Adding upstream version 1:115.7.0.upstream/1%115.7.0 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 17:32:43 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 17:32:43 +0000
commit: 6bf0a5cb5034a7e684dcc3500e841785237ce2dd (patch)
tree: a68f146d7fa01f0134297619fbe7e33db084e0aa /tools/profiler
parent: Initial commit. (diff)
download: thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.tar.xz
thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.zip
212 files changed, 64315 insertions, 0 deletions
diff --git a/tools/profiler/core/EHABIStackWalk.cpp b/tools/profiler/core/EHABIStackWalk.cpp
new file mode 100644
index 0000000000..e3099b89ec
--- /dev/null
+++ b/tools/profiler/core/EHABIStackWalk.cpp
@@ -0,0 +1,597 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * This is an implementation of stack unwinding according to a subset
+ * of the ARM Exception Handling ABI, as described in:
+ *   http://infocenter.arm.com/help/topic/com.arm.doc.ihi0038a/IHI0038A_ehabi.pdf
+ *
+ * This handles only the ARM-defined "personality routines" (chapter
+ * 9), and don't track the value of FP registers, because profiling
+ * needs only chain of PC/SP values.
+ *
+ * Because the exception handling info may not be accurate for all
+ * possible places where an async signal could occur (e.g., in a
+ * prologue or epilogue), this bounds-checks all stack accesses.
+ *
+ * This file uses "struct" for structures in the exception tables and
+ * "class" otherwise.  We should avoid violating the C++11
+ * standard-layout rules in the former.
+ */
+
+#include "EHABIStackWalk.h"
+
+#include "shared-libraries.h"
+#include "platform.h"
+
+#include "mozilla/Atomics.h"
+#include "mozilla/Attributes.h"
+#include "mozilla/DebugOnly.h"
+#include "mozilla/EndianUtils.h"
+
+#include <algorithm>
+#include <elf.h>
+#include <stdint.h>
+#include <vector>
+#include <string>
+
+#ifndef PT_ARM_EXIDX
+#  define PT_ARM_EXIDX 0x70000001
+#endif
+
+namespace mozilla {
+
+struct PRel31 {
+  uint32_t mBits;
+  bool topBit() const { return mBits & 0x80000000; }
+  uint32_t value() const { return mBits & 0x7fffffff; }
+  int32_t offset() const { return (static_cast<int32_t>(mBits) << 1) >> 1; }
+  const void* compute() const {
+    return reinterpret_cast<const char*>(this) + offset();
+  }
+
+ private:
+  PRel31(const PRel31& copied) = delete;
+  PRel31() = delete;
+};
+
+struct EHEntry {
+  PRel31 startPC;
+  PRel31 exidx;
+
+ private:
+  EHEntry(const EHEntry& copied) = delete;
+  EHEntry() = delete;
+};
+
+class EHState {
+  // Note that any core register can be used as a "frame pointer" to
+  // influence the unwinding process, so this must track all of them.
+  uint32_t mRegs[16];
+
+ public:
+  bool unwind(const EHEntry* aEntry, const void* stackBase);
+  uint32_t& operator[](int i) { return mRegs[i]; }
+  const uint32_t& operator[](int i) const { return mRegs[i]; }
+  explicit EHState(const mcontext_t&);
+};
+
+enum { R_SP = 13, R_LR = 14, R_PC = 15 };
+
+class EHTable {
+  uint32_t mStartPC;
+  uint32_t mEndPC;
+  uint32_t mBaseAddress;
+  const EHEntry* mEntriesBegin;
+  const EHEntry* mEntriesEnd;
+  std::string mName;
+
+ public:
+  EHTable(const void* aELF, size_t aSize, const std::string& aName);
+  const EHEntry* lookup(uint32_t aPC) const;
+  bool isValid() const { return mEntriesEnd != mEntriesBegin; }
+  const std::string& name() const { return mName; }
+  uint32_t startPC() const { return mStartPC; }
+  uint32_t endPC() const { return mEndPC; }
+  uint32_t baseAddress() const { return mBaseAddress; }
+};
+
+class EHAddrSpace {
+  std::vector<uint32_t> mStarts;
+  std::vector<EHTable> mTables;
+  static mozilla::Atomic<const EHAddrSpace*> sCurrent;
+
+ public:
+  explicit EHAddrSpace(const std::vector<EHTable>& aTables);
+  const EHTable* lookup(uint32_t aPC) const;
+  static void Update();
+  static const EHAddrSpace* Get();
+};
+
+void EHABIStackWalkInit() { EHAddrSpace::Update(); }
+
+size_t EHABIStackWalk(const mcontext_t& aContext, void* stackBase, void** aSPs,
+                      void** aPCs, const size_t aNumFrames) {
+  const EHAddrSpace* space = EHAddrSpace::Get();
+  EHState state(aContext);
+  size_t count = 0;
+
+  while (count < aNumFrames) {
+    uint32_t pc = state[R_PC], sp = state[R_SP];
+
+    // ARM instructions are always aligned to 2 or 4 bytes.
+    // The last bit of the pc / lr indicates ARM or Thumb mode.
+    // We're only interested in the instruction address, so we mask off that
+    // bit.
+    constexpr uint32_t instrAddrMask = ~1;
+    uint32_t instrAddress = pc & instrAddrMask;
+
+    aPCs[count] = reinterpret_cast<void*>(instrAddress);
+    aSPs[count] = reinterpret_cast<void*>(sp);
+    count++;
+
+    if (!space) break;
+    // TODO: cache these lookups.  Binary-searching libxul is
+    // expensive (possibly more expensive than doing the actual
+    // unwind), and even a small cache should help.
+    const EHTable* table = space->lookup(pc);
+    if (!table) break;
+    const EHEntry* entry = table->lookup(pc);
+    if (!entry) break;
+    if (!state.unwind(entry, stackBase)) break;
+  }
+
+  return count;
+}
+
+class EHInterp {
+ public:
+  // Note that stackLimit is exclusive and stackBase is inclusive
+  // (i.e, stackLimit < SP <= stackBase), following the convention
+  // set by the AAPCS spec.
+  EHInterp(EHState& aState, const EHEntry* aEntry, uint32_t aStackLimit,
+           uint32_t aStackBase)
+      : mState(aState),
+        mStackLimit(aStackLimit),
+        mStackBase(aStackBase),
+        mNextWord(0),
+        mWordsLeft(0),
+        mFailed(false) {
+    const PRel31& exidx = aEntry->exidx;
+    uint32_t firstWord;
+
+    if (exidx.mBits == 1) {  // EXIDX_CANTUNWIND
+      mFailed = true;
+      return;
+    }
+    if (exidx.topBit()) {
+      firstWord = exidx.mBits;
+    } else {
+      mNextWord = reinterpret_cast<const uint32_t*>(exidx.compute());
+      firstWord = *mNextWord++;
+    }
+
+    switch (firstWord >> 24) {
+      case 0x80:  // short
+        mWord = firstWord << 8;
+        mBytesLeft = 3;
+        break;
+      case 0x81:
+      case 0x82:  // long; catch descriptor size ignored
+        mWord = firstWord << 16;
+        mBytesLeft = 2;
+        mWordsLeft = (firstWord >> 16) & 0xff;
+        break;
+      default:
+        // unknown personality
+        mFailed = true;
+    }
+  }
+
+  bool unwind();
+
+ private:
+  // TODO: GCC has been observed not CSEing repeated reads of
+  // mState[R_SP] with writes to mFailed between them, suggesting that
+  // it hasn't determined that they can't alias and is thus missing
+  // optimization opportunities.  So, we may want to flatten EHState
+  // into this class; this may also make the code simpler.
+  EHState& mState;
+  uint32_t mStackLimit;
+  uint32_t mStackBase;
+  const uint32_t* mNextWord;
+  uint32_t mWord;
+  uint8_t mWordsLeft;
+  uint8_t mBytesLeft;
+  bool mFailed;
+
+  enum {
+    I_ADDSP = 0x00,  // 0sxxxxxx (subtract if s)
+    M_ADDSP = 0x80,
+    I_POPMASK = 0x80,  // 1000iiii iiiiiiii (if any i set)
+    M_POPMASK = 0xf0,
+    I_MOVSP = 0x90,  // 1001nnnn
+    M_MOVSP = 0xf0,
+    I_POPN = 0xa0,  // 1010lnnn
+    M_POPN = 0xf0,
+    I_FINISH = 0xb0,    // 10110000
+    I_POPLO = 0xb1,     // 10110001 0000iiii (if any i set)
+    I_ADDSPBIG = 0xb2,  // 10110010 uleb128
+    I_POPFDX = 0xb3,    // 10110011 sssscccc
+    I_POPFDX8 = 0xb8,   // 10111nnn
+    M_POPFDX8 = 0xf8,
+    // "Intel Wireless MMX" extensions omitted.
+    I_POPFDD = 0xc8,  // 1100100h sssscccc
+    M_POPFDD = 0xfe,
+    I_POPFDD8 = 0xd0,  // 11010nnn
+    M_POPFDD8 = 0xf8
+  };
+
+  uint8_t next() {
+    if (mBytesLeft == 0) {
+      if (mWordsLeft == 0) {
+        return I_FINISH;
+      }
+      mWordsLeft--;
+      mWord = *mNextWord++;
+      mBytesLeft = 4;
+    }
+    mBytesLeft--;
+    mWord = (mWord << 8) | (mWord >> 24);  // rotate
+    return mWord;
+  }
+
+  uint32_t& vSP() { return mState[R_SP]; }
+  uint32_t* ptrSP() { return reinterpret_cast<uint32_t*>(vSP()); }
+
+  void checkStackBase() {
+    if (vSP() > mStackBase) mFailed = true;
+  }
+  void checkStackLimit() {
+    if (vSP() <= mStackLimit) mFailed = true;
+  }
+  void checkStackAlign() {
+    if ((vSP() & 3) != 0) mFailed = true;
+  }
+  void checkStack() {
+    checkStackBase();
+    checkStackLimit();
+    checkStackAlign();
+  }
+
+  void popRange(uint8_t first, uint8_t last, uint16_t mask) {
+    bool hasSP = false;
+    uint32_t tmpSP;
+    if (mask == 0) mFailed = true;
+    for (uint8_t r = first; r <= last; ++r) {
+      if (mask & 1) {
+        if (r == R_SP) {
+          hasSP = true;
+          tmpSP = *ptrSP();
+        } else
+          mState[r] = *ptrSP();
+        vSP() += 4;
+        checkStackBase();
+        if (mFailed) return;
+      }
+      mask >>= 1;
+    }
+    if (hasSP) {
+      vSP() = tmpSP;
+      checkStack();
+    }
+  }
+};
+
+bool EHState::unwind(const EHEntry* aEntry, const void* stackBasePtr) {
+  // The unwinding program cannot set SP to less than the initial value.
+  uint32_t stackLimit = mRegs[R_SP] - 4;
+  uint32_t stackBase = reinterpret_cast<uint32_t>(stackBasePtr);
+  EHInterp interp(*this, aEntry, stackLimit, stackBase);
+  return interp.unwind();
+}
+
+bool EHInterp::unwind() {
+  mState[R_PC] = 0;
+  checkStack();
+  while (!mFailed) {
+    uint8_t insn = next();
+#if DEBUG_EHABI_UNWIND
+    LOG("unwind insn = %02x", (unsigned)insn);
+#endif
+    // Try to put the common cases first.
+
+    // 00xxxxxx: vsp = vsp + (xxxxxx << 2) + 4
+    // 01xxxxxx: vsp = vsp - (xxxxxx << 2) - 4
+    if ((insn & M_ADDSP) == I_ADDSP) {
+      uint32_t offset = ((insn & 0x3f) << 2) + 4;
+      if (insn & 0x40) {
+        vSP() -= offset;
+        checkStackLimit();
+      } else {
+        vSP() += offset;
+        checkStackBase();
+      }
+      continue;
+    }
+
+    // 10100nnn: Pop r4-r[4+nnn]
+    // 10101nnn: Pop r4-r[4+nnn], r14
+    if ((insn & M_POPN) == I_POPN) {
+      uint8_t n = (insn & 0x07) + 1;
+      bool lr = insn & 0x08;
+      uint32_t* ptr = ptrSP();
+      vSP() += (n + (lr ? 1 : 0)) * 4;
+      checkStackBase();
+      for (uint8_t r = 4; r < 4 + n; ++r) mState[r] = *ptr++;
+      if (lr) mState[R_LR] = *ptr++;
+      continue;
+    }
+
+    // 1011000: Finish
+    if (insn == I_FINISH) {
+      if (mState[R_PC] == 0) {
+        mState[R_PC] = mState[R_LR];
+        // Non-standard change (bug 916106): Prevent the caller from
+        // re-using LR.  Since the caller is by definition not a leaf
+        // routine, it will have to restore LR from somewhere to
+        // return to its own caller, so we can safely zero it here.
+        // This makes a difference only if an error in unwinding
+        // (e.g., caused by starting from within a prologue/epilogue)
+        // causes us to load a pointer to a leaf routine as LR; if we
+        // don't do something, we'll go into an infinite loop of
+        // "returning" to that same function.
+        mState[R_LR] = 0;
+      }
+      return true;
+    }
+
+    // 1001nnnn: Set vsp = r[nnnn]
+    if ((insn & M_MOVSP) == I_MOVSP) {
+      vSP() = mState[insn & 0x0f];
+      checkStack();
+      continue;
+    }
+
+    // 11001000 sssscccc: Pop VFP regs D[16+ssss]-D[16+ssss+cccc] (as FLDMFDD)
+    // 11001001 sssscccc: Pop VFP regs D[ssss]-D[ssss+cccc] (as FLDMFDD)
+    if ((insn & M_POPFDD) == I_POPFDD) {
+      uint8_t n = (next() & 0x0f) + 1;
+      // Note: if the 16+ssss+cccc > 31, the encoding is reserved.
+      // As the space is currently unused, we don't try to check.
+      vSP() += 8 * n;
+      checkStackBase();
+      continue;
+    }
+
+    // 11010nnn: Pop VFP regs D[8]-D[8+nnn] (as FLDMFDD)
+    if ((insn & M_POPFDD8) == I_POPFDD8) {
+      uint8_t n = (insn & 0x07) + 1;
+      vSP() += 8 * n;
+      checkStackBase();
+      continue;
+    }
+
+    // 10110010 uleb128: vsp = vsp + 0x204 + (uleb128 << 2)
+    if (insn == I_ADDSPBIG) {
+      uint32_t acc = 0;
+      uint8_t shift = 0;
+      uint8_t byte;
+      do {
+        if (shift >= 32) return false;
+        byte = next();
+        acc |= (byte & 0x7f) << shift;
+        shift += 7;
+      } while (byte & 0x80);
+      uint32_t offset = 0x204 + (acc << 2);
+      // The calculations above could have overflowed.
+      // But the one we care about is this:
+      if (vSP() + offset < vSP()) mFailed = true;
+      vSP() += offset;
+      // ...so that this is the only other check needed:
+      checkStackBase();
+      continue;
+    }
+
+    // 1000iiii iiiiiiii (i not all 0): Pop under masks {r15-r12}, {r11-r4}
+    if ((insn & M_POPMASK) == I_POPMASK) {
+      popRange(4, 15, ((insn & 0x0f) << 8) | next());
+      continue;
+    }
+
+    // 1011001 0000iiii (i not all 0): Pop under mask {r3-r0}
+    if (insn == I_POPLO) {
+      popRange(0, 3, next() & 0x0f);
+      continue;
+    }
+
+    // 10110011 sssscccc: Pop VFP regs D[ssss]-D[ssss+cccc] (as FLDMFDX)
+    if (insn == I_POPFDX) {
+      uint8_t n = (next() & 0x0f) + 1;
+      vSP() += 8 * n + 4;
+      checkStackBase();
+      continue;
+    }
+
+    // 10111nnn: Pop VFP regs D[8]-D[8+nnn] (as FLDMFDX)
+    if ((insn & M_POPFDX8) == I_POPFDX8) {
+      uint8_t n = (insn & 0x07) + 1;
+      vSP() += 8 * n + 4;
+      checkStackBase();
+      continue;
+    }
+
+    // unhandled instruction
+#ifdef DEBUG_EHABI_UNWIND
+    LOG("Unhandled EHABI instruction 0x%02x", insn);
+#endif
+    mFailed = true;
+  }
+  return false;
+}
+
+bool operator<(const EHTable& lhs, const EHTable& rhs) {
+  return lhs.startPC() < rhs.startPC();
+}
+
+// Async signal unsafe.
+EHAddrSpace::EHAddrSpace(const std::vector<EHTable>& aTables)
+    : mTables(aTables) {
+  std::sort(mTables.begin(), mTables.end());
+  DebugOnly<uint32_t> lastEnd = 0;
+  for (std::vector<EHTable>::iterator i = mTables.begin(); i != mTables.end();
+       ++i) {
+    MOZ_ASSERT(i->startPC() >= lastEnd);
+    mStarts.push_back(i->startPC());
+    lastEnd = i->endPC();
+  }
+}
+
+const EHTable* EHAddrSpace::lookup(uint32_t aPC) const {
+  ptrdiff_t i = (std::upper_bound(mStarts.begin(), mStarts.end(), aPC) -
+                 mStarts.begin()) -
+                1;
+
+  if (i < 0 || aPC >= mTables[i].endPC()) return 0;
+  return &mTables[i];
+}
+
+const EHEntry* EHTable::lookup(uint32_t aPC) const {
+  MOZ_ASSERT(aPC >= mStartPC);
+  if (aPC >= mEndPC) return nullptr;
+
+  const EHEntry* begin = mEntriesBegin;
+  const EHEntry* end = mEntriesEnd;
+  MOZ_ASSERT(begin < end);
+  if (aPC < reinterpret_cast<uint32_t>(begin->startPC.compute()))
+    return nullptr;
+
+  while (end - begin > 1) {
+#ifdef EHABI_UNWIND_MORE_ASSERTS
+    if ((end - 1)->startPC.compute() < begin->startPC.compute()) {
+      MOZ_CRASH("unsorted exidx");
+    }
+#endif
+    const EHEntry* mid = begin + (end - begin) / 2;
+    if (aPC < reinterpret_cast<uint32_t>(mid->startPC.compute()))
+      end = mid;
+    else
+      begin = mid;
+  }
+  return begin;
+}
+
+#if MOZ_LITTLE_ENDIAN()
+static const unsigned char hostEndian = ELFDATA2LSB;
+#elif MOZ_BIG_ENDIAN()
+static const unsigned char hostEndian = ELFDATA2MSB;
+#else
+#  error "No endian?"
+#endif
+
+// Async signal unsafe: std::vector::reserve, std::string copy ctor.
+EHTable::EHTable(const void* aELF, size_t aSize, const std::string& aName)
+    : mStartPC(~0),  // largest uint32_t
+      mEndPC(0),
+      mEntriesBegin(nullptr),
+      mEntriesEnd(nullptr),
+      mName(aName) {
+  const uint32_t fileHeaderAddr = reinterpret_cast<uint32_t>(aELF);
+
+  if (aSize < sizeof(Elf32_Ehdr)) return;
+
+  const Elf32_Ehdr& file = *(reinterpret_cast<Elf32_Ehdr*>(fileHeaderAddr));
+  if (memcmp(&file.e_ident[EI_MAG0], ELFMAG, SELFMAG) != 0 ||
+      file.e_ident[EI_CLASS] != ELFCLASS32 ||
+      file.e_ident[EI_DATA] != hostEndian ||
+      file.e_ident[EI_VERSION] != EV_CURRENT || file.e_machine != EM_ARM ||
+      file.e_version != EV_CURRENT)
+    // e_flags?
+    return;
+
+  MOZ_ASSERT(file.e_phoff + file.e_phnum * file.e_phentsize <= aSize);
+  const Elf32_Phdr *exidxHdr = 0, *zeroHdr = 0;
+  for (unsigned i = 0; i < file.e_phnum; ++i) {
+    const Elf32_Phdr& phdr = *(reinterpret_cast<Elf32_Phdr*>(
+        fileHeaderAddr + file.e_phoff + i * file.e_phentsize));
+    if (phdr.p_type == PT_ARM_EXIDX) {
+      exidxHdr = &phdr;
+    } else if (phdr.p_type == PT_LOAD) {
+      if (phdr.p_offset == 0) {
+        zeroHdr = &phdr;
+      }
+      if (phdr.p_flags & PF_X) {
+        mStartPC = std::min(mStartPC, phdr.p_vaddr);
+        mEndPC = std::max(mEndPC, phdr.p_vaddr + phdr.p_memsz);
+      }
+    }
+  }
+  if (!exidxHdr) return;
+  if (!zeroHdr) return;
+  mBaseAddress = fileHeaderAddr - zeroHdr->p_vaddr;
+  mStartPC += mBaseAddress;
+  mEndPC += mBaseAddress;
+  mEntriesBegin =
+      reinterpret_cast<const EHEntry*>(mBaseAddress + exidxHdr->p_vaddr);
+  mEntriesEnd = reinterpret_cast<const EHEntry*>(
+      mBaseAddress + exidxHdr->p_vaddr + exidxHdr->p_memsz);
+}
+
+mozilla::Atomic<const EHAddrSpace*> EHAddrSpace::sCurrent(nullptr);
+
+// Async signal safe; can fail if Update() hasn't returned yet.
+const EHAddrSpace* EHAddrSpace::Get() { return sCurrent; }
+
+// Collect unwinding information from loaded objects.  Calls after the
+// first have no effect.  Async signal unsafe.
+void EHAddrSpace::Update() {
+  const EHAddrSpace* space = sCurrent;
+  if (space) return;
+
+  SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf();
+  std::vector<EHTable> tables;
+
+  for (size_t i = 0; i < info.GetSize(); ++i) {
+    const SharedLibrary& lib = info.GetEntry(i);
+    // FIXME: This isn't correct if the start address isn't p_offset 0, because
+    // the start address will not point at the file header. But this is worked
+    // around by magic number checks in the EHTable constructor.
+    EHTable tab(reinterpret_cast<const void*>(lib.GetStart()),
+                lib.GetEnd() - lib.GetStart(), lib.GetNativeDebugPath());
+    if (tab.isValid()) tables.push_back(tab);
+  }
+  space = new EHAddrSpace(tables);
+
+  if (!sCurrent.compareExchange(nullptr, space)) {
+    delete space;
+    space = sCurrent;
+  }
+}
+
+EHState::EHState(const mcontext_t& context) {
+#ifdef linux
+  mRegs[0] = context.arm_r0;
+  mRegs[1] = context.arm_r1;
+  mRegs[2] = context.arm_r2;
+  mRegs[3] = context.arm_r3;
+  mRegs[4] = context.arm_r4;
+  mRegs[5] = context.arm_r5;
+  mRegs[6] = context.arm_r6;
+  mRegs[7] = context.arm_r7;
+  mRegs[8] = context.arm_r8;
+  mRegs[9] = context.arm_r9;
+  mRegs[10] = context.arm_r10;
+  mRegs[11] = context.arm_fp;
+  mRegs[12] = context.arm_ip;
+  mRegs[13] = context.arm_sp;
+  mRegs[14] = context.arm_lr;
+  mRegs[15] = context.arm_pc;
+#else
+#  error "Unhandled OS for ARM EHABI unwinding"
+#endif
+}
+
+}  // namespace mozilla
diff --git a/tools/profiler/core/EHABIStackWalk.h b/tools/profiler/core/EHABIStackWalk.h
new file mode 100644
index 0000000000..61286290b8
--- /dev/null
+++ b/tools/profiler/core/EHABIStackWalk.h
@@ -0,0 +1,28 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * This is an implementation of stack unwinding according to a subset
+ * of the ARM Exception Handling ABI; see the comment at the top of
+ * the .cpp file for details.
+ */
+
+#ifndef mozilla_EHABIStackWalk_h__
+#define mozilla_EHABIStackWalk_h__
+
+#include <stddef.h>
+#include <ucontext.h>
+
+namespace mozilla {
+
+void EHABIStackWalkInit();
+
+size_t EHABIStackWalk(const mcontext_t& aContext, void* stackBase, void** aSPs,
+                      void** aPCs, size_t aNumFrames);
+
+}  // namespace mozilla
+
+#endif
diff --git a/tools/profiler/core/MicroGeckoProfiler.cpp b/tools/profiler/core/MicroGeckoProfiler.cpp
new file mode 100644
index 0000000000..bedb755742
--- /dev/null
+++ b/tools/profiler/core/MicroGeckoProfiler.cpp
@@ -0,0 +1,203 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "GeckoProfiler.h"
+
+#include "mozilla/Maybe.h"
+#include "nsPrintfCString.h"
+#include "public/GeckoTraceEvent.h"
+
+using namespace mozilla;
+using webrtc::trace_event_internal::TraceValueUnion;
+
+void uprofiler_register_thread(const char* name, void* stacktop) {
+#ifdef MOZ_GECKO_PROFILER
+  profiler_register_thread(name, stacktop);
+#endif  // MOZ_GECKO_PROFILER
+}
+
+void uprofiler_unregister_thread() {
+#ifdef MOZ_GECKO_PROFILER
+  profiler_unregister_thread();
+#endif  // MOZ_GECKO_PROFILER
+}
+
+#ifdef MOZ_GECKO_PROFILER
+namespace {
+Maybe<MarkerTiming> ToTiming(char phase) {
+  switch (phase) {
+    case 'B':
+      return Some(MarkerTiming::IntervalStart());
+    case 'E':
+      return Some(MarkerTiming::IntervalEnd());
+    case 'I':
+      return Some(MarkerTiming::InstantNow());
+    default:
+      return Nothing();
+  }
+}
+
+struct TraceOption {
+  bool mPassed = false;
+  ProfilerString8View mName;
+  Variant<int64_t, bool, double, ProfilerString8View> mValue = AsVariant(false);
+};
+
+struct TraceMarker {
+  static constexpr int MAX_NUM_ARGS = 2;
+  using OptionsType = std::tuple<TraceOption, TraceOption>;
+  static constexpr mozilla::Span<const char> MarkerTypeName() {
+    return MakeStringSpan("TraceEvent");
+  }
+  static void StreamJSONMarkerData(
+      mozilla::baseprofiler::SpliceableJSONWriter& aWriter,
+      const OptionsType& aArgs) {
+    auto writeValue = [&](const auto& aName, const auto& aVariant) {
+      aVariant.match(
+          [&](const int64_t& aValue) { aWriter.IntProperty(aName, aValue); },
+          [&](const bool& aValue) { aWriter.BoolProperty(aName, aValue); },
+          [&](const double& aValue) { aWriter.DoubleProperty(aName, aValue); },
+          [&](const ProfilerString8View& aValue) {
+            aWriter.StringProperty(aName, aValue);
+          });
+    };
+    if (const auto& arg = std::get<0>(aArgs); arg.mPassed) {
+      aWriter.StringProperty("name1", arg.mName);
+      writeValue("val1", arg.mValue);
+    }
+    if (const auto& arg = std::get<1>(aArgs); arg.mPassed) {
+      aWriter.StringProperty("name2", arg.mName);
+      writeValue("val2", arg.mValue);
+    }
+  }
+  static mozilla::MarkerSchema MarkerTypeDisplay() {
+    using MS = MarkerSchema;
+    MS schema{MS::Location::MarkerChart, MS::Location::MarkerTable};
+    schema.SetChartLabel("{marker.name}");
+    schema.SetTableLabel(
+        "{marker.name}  {marker.data.name1} {marker.data.val1}  "
+        "{marker.data.name2} {marker.data.val2}");
+    schema.AddKeyLabelFormatSearchable("name1", "Key 1", MS::Format::String,
+                                       MS::Searchable::Searchable);
+    schema.AddKeyLabelFormatSearchable("val1", "Value 1", MS::Format::String,
+                                       MS::Searchable::Searchable);
+    schema.AddKeyLabelFormatSearchable("name2", "Key 2", MS::Format::String,
+                                       MS::Searchable::Searchable);
+    schema.AddKeyLabelFormatSearchable("val2", "Value 2", MS::Format::String,
+                                       MS::Searchable::Searchable);
+    return schema;
+  }
+};
+}  // namespace
+
+namespace mozilla {
+template <>
+struct ProfileBufferEntryWriter::Serializer<TraceOption> {
+  static Length Bytes(const TraceOption& aOption) {
+    // 1 byte to store passed flag, then object size if passed.
+    return aOption.mPassed ? (1 + SumBytes(aOption.mName, aOption.mValue)) : 1;
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW, const TraceOption& aOption) {
+    // 'T'/'t' is just an arbitrary 1-byte value to distinguish states.
+    if (aOption.mPassed) {
+      aEW.WriteObject<char>('T');
+      // Use the Serializer for the name/value pair.
+      aEW.WriteObject(aOption.mName);
+      aEW.WriteObject(aOption.mValue);
+    } else {
+      aEW.WriteObject<char>('t');
+    }
+  }
+};
+
+template <>
+struct ProfileBufferEntryReader::Deserializer<TraceOption> {
+  static void ReadInto(ProfileBufferEntryReader& aER, TraceOption& aOption) {
+    char c = aER.ReadObject<char>();
+    if ((aOption.mPassed = (c == 'T'))) {
+      aER.ReadIntoObject(aOption.mName);
+      aER.ReadIntoObject(aOption.mValue);
+    } else {
+      MOZ_ASSERT(c == 't');
+    }
+  }
+
+  static TraceOption Read(ProfileBufferEntryReader& aER) {
+    TraceOption option;
+    ReadInto(aER, option);
+    return option;
+  }
+};
+}  // namespace mozilla
+#endif  // MOZ_GECKO_PROFILER
+
+void uprofiler_simple_event_marker(const char* name, char phase, int num_args,
+                                   const char** arg_names,
+                                   const unsigned char* arg_types,
+                                   const unsigned long long* arg_values) {
+#ifdef MOZ_GECKO_PROFILER
+  if (!profiler_thread_is_being_profiled_for_markers()) {
+    return;
+  }
+  Maybe<MarkerTiming> timing = ToTiming(phase);
+  if (!timing) {
+    if (getenv("MOZ_LOG_UNKNOWN_TRACE_EVENT_PHASES")) {
+      fprintf(stderr, "XXX UProfiler: phase not handled: '%c'\n", phase);
+    }
+    return;
+  }
+  MOZ_ASSERT(num_args <= TraceMarker::MAX_NUM_ARGS);
+  TraceMarker::OptionsType tuple;
+  TraceOption* args[2] = {&std::get<0>(tuple), &std::get<1>(tuple)};
+  for (int i = 0; i < std::min(num_args, TraceMarker::MAX_NUM_ARGS); ++i) {
+    auto& arg = *args[i];
+    arg.mPassed = true;
+    arg.mName = ProfilerString8View::WrapNullTerminatedString(arg_names[i]);
+    switch (arg_types[i]) {
+      case TRACE_VALUE_TYPE_UINT:
+        MOZ_ASSERT(arg_values[i] <= std::numeric_limits<int64_t>::max());
+        arg.mValue = AsVariant(static_cast<int64_t>(
+            reinterpret_cast<const TraceValueUnion*>(&arg_values[i])->as_uint));
+        break;
+      case TRACE_VALUE_TYPE_INT:
+        arg.mValue = AsVariant(static_cast<int64_t>(
+            reinterpret_cast<const TraceValueUnion*>(&arg_values[i])->as_int));
+        break;
+      case TRACE_VALUE_TYPE_BOOL:
+        arg.mValue = AsVariant(
+            reinterpret_cast<const TraceValueUnion*>(&arg_values[i])->as_bool);
+        break;
+      case TRACE_VALUE_TYPE_DOUBLE:
+        arg.mValue =
+            AsVariant(reinterpret_cast<const TraceValueUnion*>(&arg_values[i])
+                          ->as_double);
+        break;
+      case TRACE_VALUE_TYPE_POINTER:
+        arg.mValue = AsVariant(ProfilerString8View(nsPrintfCString(
+            "%p", reinterpret_cast<const TraceValueUnion*>(&arg_values[i])
+                      ->as_pointer)));
+        break;
+      case TRACE_VALUE_TYPE_STRING:
+        arg.mValue = AsVariant(ProfilerString8View::WrapNullTerminatedString(
+            reinterpret_cast<const TraceValueUnion*>(&arg_values[i])
+                ->as_string));
+        break;
+      case TRACE_VALUE_TYPE_COPY_STRING:
+        arg.mValue = AsVariant(ProfilerString8View(
+            nsCString(reinterpret_cast<const TraceValueUnion*>(&arg_values[i])
+                          ->as_string)));
+        break;
+      default:
+        MOZ_ASSERT_UNREACHABLE("Unexpected trace value type");
+        arg.mValue = AsVariant(ProfilerString8View(
+            nsPrintfCString("Unexpected type: %u", arg_types[i])));
+        break;
+    }
+  }
+  profiler_add_marker(ProfilerString8View::WrapNullTerminatedString(name),
+                      geckoprofiler::category::MEDIA_RT, {timing.extract()},
+                      TraceMarker{}, tuple);
+#endif  // MOZ_GECKO_PROFILER
+}
diff --git a/tools/profiler/core/PageInformation.cpp b/tools/profiler/core/PageInformation.cpp
new file mode 100644
index 0000000000..83d2d508a1
--- /dev/null
+++ b/tools/profiler/core/PageInformation.cpp
@@ -0,0 +1,44 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "PageInformation.h"
+
+#include "mozilla/ProfileJSONWriter.h"
+
+PageInformation::PageInformation(uint64_t aTabID, uint64_t aInnerWindowID,
+                                 const nsCString& aUrl,
+                                 uint64_t aEmbedderInnerWindowID,
+                                 bool aIsPrivateBrowsing)
+    : mTabID(aTabID),
+      mInnerWindowID(aInnerWindowID),
+      mUrl(aUrl),
+      mEmbedderInnerWindowID(aEmbedderInnerWindowID),
+      mIsPrivateBrowsing(aIsPrivateBrowsing) {}
+
+bool PageInformation::Equals(PageInformation* aOtherPageInfo) const {
+  // It's enough to check inner window IDs because they are unique for each
+  // page. Therefore, we don't have to check the tab ID or url.
+  return InnerWindowID() == aOtherPageInfo->InnerWindowID();
+}
+
+void PageInformation::StreamJSON(SpliceableJSONWriter& aWriter) const {
+  // Here, we are converting uint64_t to double. Both tab and Inner
+  // Window IDs are created using `nsContentUtils::GenerateProcessSpecificId`,
+  // which is specifically designed to only use 53 of the 64 bits to be lossless
+  // when passed into and out of JS as a double.
+  aWriter.StartObjectElement();
+  aWriter.DoubleProperty("tabID", TabID());
+  aWriter.DoubleProperty("innerWindowID", InnerWindowID());
+  aWriter.StringProperty("url", Url());
+  aWriter.DoubleProperty("embedderInnerWindowID", EmbedderInnerWindowID());
+  aWriter.BoolProperty("isPrivateBrowsing", IsPrivateBrowsing());
+  aWriter.EndObject();
+}
+
+size_t PageInformation::SizeOfIncludingThis(
+    mozilla::MallocSizeOf aMallocSizeOf) const {
+  return aMallocSizeOf(this);
+}
diff --git a/tools/profiler/core/PageInformation.h b/tools/profiler/core/PageInformation.h
new file mode 100644
index 0000000000..6c9039b9a4
--- /dev/null
+++ b/tools/profiler/core/PageInformation.h
@@ -0,0 +1,68 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef PageInformation_h
+#define PageInformation_h
+
+#include "mozilla/Maybe.h"
+#include "mozilla/MemoryReporting.h"
+#include "nsISupportsImpl.h"
+#include "nsString.h"
+
+namespace mozilla {
+namespace baseprofiler {
+class SpliceableJSONWriter;
+}  // namespace baseprofiler
+}  // namespace mozilla
+
+// This class contains information that's relevant to a single page only
+// while the page information is important and registered with the profiler,
+// but regardless of whether the profiler is running. All accesses to it are
+// protected by the profiler state lock.
+// When the page gets unregistered, we keep the profiler buffer position
+// to determine if we are still using this page. If not, we unregister
+// it in the next page registration.
+class PageInformation final {
+ public:
+  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(PageInformation)
+  PageInformation(uint64_t aTabID, uint64_t aInnerWindowID,
+                  const nsCString& aUrl, uint64_t aEmbedderInnerWindowID,
+                  bool aIsPrivateBrowsing);
+
+  size_t SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) const;
+  bool Equals(PageInformation* aOtherPageInfo) const;
+  void StreamJSON(mozilla::baseprofiler::SpliceableJSONWriter& aWriter) const;
+
+  uint64_t InnerWindowID() const { return mInnerWindowID; }
+  uint64_t TabID() const { return mTabID; }
+  const nsCString& Url() const { return mUrl; }
+  uint64_t EmbedderInnerWindowID() const { return mEmbedderInnerWindowID; }
+  bool IsPrivateBrowsing() const { return mIsPrivateBrowsing; }
+
+  mozilla::Maybe<uint64_t> BufferPositionWhenUnregistered() const {
+    return mBufferPositionWhenUnregistered;
+  }
+
+  void NotifyUnregistered(uint64_t aBufferPosition) {
+    mBufferPositionWhenUnregistered = mozilla::Some(aBufferPosition);
+  }
+
+ private:
+  const uint64_t mTabID;
+  const uint64_t mInnerWindowID;
+  const nsCString mUrl;
+  const uint64_t mEmbedderInnerWindowID;
+  const bool mIsPrivateBrowsing;
+
+  // Holds the buffer position when page is unregistered.
+  // It's used to determine if we still use this page in the profiler or
+  // not.
+  mozilla::Maybe<uint64_t> mBufferPositionWhenUnregistered;
+
+  virtual ~PageInformation() = default;
+};
+
+#endif  // PageInformation_h
diff --git a/tools/profiler/core/PlatformMacros.h b/tools/profiler/core/PlatformMacros.h
new file mode 100644
index 0000000000..c72e94c128
--- /dev/null
+++ b/tools/profiler/core/PlatformMacros.h
@@ -0,0 +1,130 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef PLATFORM_MACROS_H
+#define PLATFORM_MACROS_H
+
+// Define platform selection macros in a consistent way. Don't add anything
+// else to this file, so it can remain freestanding. The primary factorisation
+// is on (ARCH,OS) pairs ("PLATforms") but ARCH_ and OS_ macros are defined
+// too, since they are sometimes convenient.
+//
+// Note: "GP" is short for "Gecko Profiler".
+
+#undef GP_PLAT_x86_android
+#undef GP_PLAT_amd64_android
+#undef GP_PLAT_arm_android
+#undef GP_PLAT_arm64_android
+#undef GP_PLAT_x86_linux
+#undef GP_PLAT_amd64_linux
+#undef GP_PLAT_arm_linux
+#undef GP_PLAT_mips64_linux
+#undef GP_PLAT_amd64_darwin
+#undef GP_PLAT_arm64_darwin
+#undef GP_PLAT_x86_windows
+#undef GP_PLAT_amd64_windows
+#undef GP_PLAT_arm64_windows
+
+#undef GP_ARCH_x86
+#undef GP_ARCH_amd64
+#undef GP_ARCH_arm
+#undef GP_ARCH_arm64
+#undef GP_ARCH_mips64
+
+#undef GP_OS_android
+#undef GP_OS_linux
+#undef GP_OS_darwin
+#undef GP_OS_windows
+
+// We test __ANDROID__ before __linux__ because __linux__ is defined on both
+// Android and Linux, whereas GP_OS_android is not defined on vanilla Linux.
+
+#if defined(__ANDROID__) && defined(__i386__)
+#  define GP_PLAT_x86_android 1
+#  define GP_ARCH_x86 1
+#  define GP_OS_android 1
+
+#elif defined(__ANDROID__) && defined(__x86_64__)
+#  define GP_PLAT_amd64_android 1
+#  define GP_ARCH_amd64 1
+#  define GP_OS_android 1
+
+#elif defined(__ANDROID__) && defined(__arm__)
+#  define GP_PLAT_arm_android 1
+#  define GP_ARCH_arm 1
+#  define GP_OS_android 1
+
+#elif defined(__ANDROID__) && defined(__aarch64__)
+#  define GP_PLAT_arm64_android 1
+#  define GP_ARCH_arm64 1
+#  define GP_OS_android 1
+
+#elif defined(__linux__) && defined(__i386__)
+#  define GP_PLAT_x86_linux 1
+#  define GP_ARCH_x86 1
+#  define GP_OS_linux 1
+
+#elif defined(__linux__) && defined(__x86_64__)
+#  define GP_PLAT_amd64_linux 1
+#  define GP_ARCH_amd64 1
+#  define GP_OS_linux 1
+
+#elif defined(__linux__) && defined(__arm__)
+#  define GP_PLAT_arm_linux 1
+#  define GP_ARCH_arm 1
+#  define GP_OS_linux 1
+
+#elif defined(__linux__) && defined(__aarch64__)
+#  define GP_PLAT_arm64_linux 1
+#  define GP_ARCH_arm64 1
+#  define GP_OS_linux 1
+
+#elif defined(__linux__) && defined(__mips64)
+#  define GP_PLAT_mips64_linux 1
+#  define GP_ARCH_mips64 1
+#  define GP_OS_linux 1
+
+#elif defined(__APPLE__) && defined(__aarch64__)
+#  define GP_PLAT_arm64_darwin 1
+#  define GP_ARCH_arm64 1
+#  define GP_OS_darwin 1
+
+#elif defined(__APPLE__) && defined(__x86_64__)
+#  define GP_PLAT_amd64_darwin 1
+#  define GP_ARCH_amd64 1
+#  define GP_OS_darwin 1
+
+#elif defined(__FreeBSD__) && defined(__x86_64__)
+#  define GP_PLAT_amd64_freebsd 1
+#  define GP_ARCH_amd64 1
+#  define GP_OS_freebsd 1
+
+#elif defined(__FreeBSD__) && defined(__aarch64__)
+#  define GP_PLAT_arm64_freebsd 1
+#  define GP_ARCH_arm64 1
+#  define GP_OS_freebsd 1
+
+#elif (defined(_MSC_VER) || defined(__MINGW32__)) && \
+    (defined(_M_IX86) || defined(__i386__))
+#  define GP_PLAT_x86_windows 1
+#  define GP_ARCH_x86 1
+#  define GP_OS_windows 1
+
+#elif (defined(_MSC_VER) || defined(__MINGW32__)) && \
+    (defined(_M_X64) || defined(__x86_64__))
+#  define GP_PLAT_amd64_windows 1
+#  define GP_ARCH_amd64 1
+#  define GP_OS_windows 1
+
+#elif defined(_MSC_VER) && defined(_M_ARM64)
+#  define GP_PLAT_arm64_windows 1
+#  define GP_ARCH_arm64 1
+#  define GP_OS_windows 1
+
+#else
+#  error "Unsupported platform"
+#endif
+
+#endif /* ndef PLATFORM_MACROS_H */
diff --git a/tools/profiler/core/PowerCounters-linux.cpp b/tools/profiler/core/PowerCounters-linux.cpp
new file mode 100644
index 0000000000..006cea4867
--- /dev/null
+++ b/tools/profiler/core/PowerCounters-linux.cpp
@@ -0,0 +1,287 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "PowerCounters.h"
+#include "nsXULAppAPI.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/Logging.h"
+
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+#include <cerrno>
+#include <cinttypes>
+#include <cstdio>
+#include <cstdlib>
+#include <fstream>
+#include <string>
+
+#include <linux/perf_event.h>
+
+// From the kernel rapl_scale() function:
+//
+// > users must then scale back: count * 1/(1e9*2^32) to get Joules
+#define PERF_EVENT_SCALE_NANOJOULES 2.3283064365386962890625e-1
+#define SCALE_NANOJOULES_TO_PICOWATTHOUR 3.6
+#define SYSFS_PERF_POWER_TYPE_PATH "/sys/bus/event_source/devices/power/type"
+
+static mozilla::LazyLogModule sRaplEventLog("profiler.rapl");
+#define RAPL_LOG(...) \
+  MOZ_LOG(sRaplEventLog, mozilla::LogLevel::Debug, (__VA_ARGS__));
+
+enum class RaplEventType : uint64_t {
+  RAPL_ENERGY_CORES = 0x01,
+  RAPL_ENERGY_PKG = 0x02,
+  RAPL_ENERGY_DRAM = 0x03,
+  RAPL_ENERGY_GPU = 0x04,
+  RAPL_ENERGY_PSYS = 0x05,
+};
+
+struct RaplDomain {
+  RaplEventType mRaplEventType;
+  const char* mLabel;
+  const char* mDescription;
+};
+
+constexpr RaplDomain kSupportedRaplDomains[] = {
+    {RaplEventType::RAPL_ENERGY_CORES, "Power: CPU cores",
+     "Consumption of all physical cores"},
+    {
+        RaplEventType::RAPL_ENERGY_PKG,
+        "Power: CPU package",
+        "Consumption of the whole processor package",
+    },
+    {
+        RaplEventType::RAPL_ENERGY_DRAM,
+        "Power: DRAM",
+        "Consumption of the dram domain",
+    },
+    {
+        RaplEventType::RAPL_ENERGY_GPU,
+        "Power: iGPU",
+        "Consumption of the builtin-gpu domain",
+    },
+    {
+        RaplEventType::RAPL_ENERGY_PSYS,
+        "Power: System",
+        "Consumption of the builtin-psys domain",
+    }};
+
+static std::string GetSysfsFileID(RaplEventType aEventType) {
+  switch (aEventType) {
+    case RaplEventType::RAPL_ENERGY_CORES:
+      return "cores";
+    case RaplEventType::RAPL_ENERGY_PKG:
+      return "pkg";
+    case RaplEventType::RAPL_ENERGY_DRAM:
+      return "ram";
+    case RaplEventType::RAPL_ENERGY_GPU:
+      return "gpu";
+    case RaplEventType::RAPL_ENERGY_PSYS:
+      return "psys";
+  }
+
+  return "";
+}
+
+static double GetRaplPerfEventScale(RaplEventType aEventType) {
+  const std::string sysfsFileName =
+      "/sys/bus/event_source/devices/power/events/energy-" +
+      GetSysfsFileID(aEventType) + ".scale";
+  std::ifstream sysfsFile(sysfsFileName);
+
+  if (!sysfsFile) {
+    return PERF_EVENT_SCALE_NANOJOULES;
+  }
+
+  double scale;
+
+  if (sysfsFile >> scale) {
+    RAPL_LOG("Read scale from %s: %.22e", sysfsFileName.c_str(), scale);
+    return scale * 1e9;
+  }
+
+  return PERF_EVENT_SCALE_NANOJOULES;
+}
+
+static uint64_t GetRaplPerfEventConfig(RaplEventType aEventType) {
+  const std::string sysfsFileName =
+      "/sys/bus/event_source/devices/power/events/energy-" +
+      GetSysfsFileID(aEventType);
+  std::ifstream sysfsFile(sysfsFileName);
+
+  if (!sysfsFile) {
+    return static_cast<uint64_t>(aEventType);
+  }
+
+  char buffer[7] = {};
+  const std::string key = "event=";
+
+  if (!sysfsFile.get(buffer, static_cast<std::streamsize>(key.length()) + 1) ||
+      key != buffer) {
+    return static_cast<uint64_t>(aEventType);
+  }
+
+  uint64_t config;
+
+  if (sysfsFile >> std::hex >> config) {
+    RAPL_LOG("Read config from %s: 0x%" PRIx64, sysfsFileName.c_str(), config);
+    return config;
+  }
+
+  return static_cast<uint64_t>(aEventType);
+}
+
+class RaplProfilerCount final : public BaseProfilerCount {
+ public:
+  explicit RaplProfilerCount(int aPerfEventType,
+                             const RaplEventType& aPerfEventConfig,
+                             const char* aLabel, const char* aDescription)
+      : BaseProfilerCount(aLabel, nullptr, nullptr, "power", aDescription),
+        mLastResult(0),
+        mPerfEventFd(-1) {
+    RAPL_LOG("Creating RAPL Event for type: %s", mLabel);
+
+    // Optimize for ease of use and do not set an excludes value. This
+    // ensures we do not require PERF_PMU_CAP_NO_EXCLUDE.
+    struct perf_event_attr attr = {0};
+    memset(&attr, 0, sizeof(attr));
+    attr.type = aPerfEventType;
+    attr.size = sizeof(struct perf_event_attr);
+    attr.config = GetRaplPerfEventConfig(aPerfEventConfig);
+    attr.sample_period = 0;
+    attr.sample_type = PERF_SAMPLE_IDENTIFIER;
+    attr.inherit = 1;
+
+    RAPL_LOG("Config for event %s: 0x%llx", mLabel, attr.config);
+
+    mEventScale = GetRaplPerfEventScale(aPerfEventConfig);
+    RAPL_LOG("Scale for event %s: %.22e", mLabel, mEventScale);
+
+    long fd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, 0);
+    if (fd < 0) {
+      RAPL_LOG("Event descriptor creation failed for event: %s", mLabel);
+      mPerfEventFd = -1;
+      return;
+    }
+
+    RAPL_LOG("Created descriptor for event: %s", mLabel)
+    mPerfEventFd = static_cast<int>(fd);
+  }
+
+  ~RaplProfilerCount() {
+    if (ValidPerfEventFd()) {
+      ioctl(mPerfEventFd, PERF_EVENT_IOC_DISABLE, 0);
+      close(mPerfEventFd);
+    }
+  }
+
+  RaplProfilerCount(const RaplProfilerCount&) = delete;
+  RaplProfilerCount& operator=(const RaplProfilerCount&) = delete;
+
+  CountSample Sample() override {
+    CountSample result = {
+        .count = 0,
+        .number = 0,
+        .isSampleNew = false,
+    };
+    mozilla::Maybe<uint64_t> raplEventResult = ReadEventFd();
+
+    if (raplEventResult.isNothing()) {
+      return result;
+    }
+
+    // We need to return picowatthour to be consistent with the Windows
+    // EMI API. As a result, the scale calculation should:
+    //
+    //  - Convert the returned value to nanojoules
+    //  - Convert nanojoules to picowatthour
+    double nanojoules =
+        static_cast<double>(raplEventResult.value()) * mEventScale;
+    double picowatthours = nanojoules / SCALE_NANOJOULES_TO_PICOWATTHOUR;
+    RAPL_LOG("Sample %s { count: %lu, last-result: %lu } = %lfJ", mLabel,
+             raplEventResult.value(), mLastResult, nanojoules * 1e-9);
+
+    result.count = static_cast<int64_t>(picowatthours);
+
+    // If the tick count is the same as the returned value or if this is the
+    // first sample, treat this sample as a duplicate.
+    result.isSampleNew =
+        (mLastResult != 0 && mLastResult != raplEventResult.value() &&
+         result.count >= 0);
+    mLastResult = raplEventResult.value();
+
+    return result;
+  }
+
+  bool ValidPerfEventFd() { return mPerfEventFd >= 0; }
+
+ private:
+  mozilla::Maybe<uint64_t> ReadEventFd() {
+    MOZ_ASSERT(ValidPerfEventFd());
+
+    uint64_t eventResult;
+    ssize_t readBytes = read(mPerfEventFd, &eventResult, sizeof(uint64_t));
+    if (readBytes != sizeof(uint64_t)) {
+      RAPL_LOG("Invalid RAPL event read size: %ld", readBytes);
+      return mozilla::Nothing();
+    }
+
+    return mozilla::Some(eventResult);
+  }
+
+  uint64_t mLastResult;
+  int mPerfEventFd;
+  double mEventScale;
+};
+
+static int GetRaplPerfEventType() {
+  FILE* fp = fopen(SYSFS_PERF_POWER_TYPE_PATH, "r");
+  if (!fp) {
+    RAPL_LOG("Open of " SYSFS_PERF_POWER_TYPE_PATH " failed");
+    return -1;
+  }
+
+  int readTypeValue = -1;
+  if (fscanf(fp, "%d", &readTypeValue) != 1) {
+    RAPL_LOG("Read of " SYSFS_PERF_POWER_TYPE_PATH " failed");
+  }
+  fclose(fp);
+
+  return readTypeValue;
+}
+
+PowerCounters::PowerCounters() {
+  if (!XRE_IsParentProcess()) {
+    // Energy meters are global, so only sample them on the parent.
+    return;
+  }
+
+  // Get the value perf_event_attr.type should be set to for RAPL
+  // perf events.
+  int perfEventType = GetRaplPerfEventType();
+  if (perfEventType < 0) {
+    RAPL_LOG("Failed to find the event type for RAPL perf events.");
+    return;
+  }
+
+  for (const auto& raplEventDomain : kSupportedRaplDomains) {
+    RaplProfilerCount* raplEvent = new RaplProfilerCount(
+        perfEventType, raplEventDomain.mRaplEventType, raplEventDomain.mLabel,
+        raplEventDomain.mDescription);
+    if (!raplEvent->ValidPerfEventFd() || !mCounters.emplaceBack(raplEvent)) {
+      delete raplEvent;
+    }
+  }
+}
+
+PowerCounters::~PowerCounters() {
+  for (auto* raplEvent : mCounters) {
+    delete raplEvent;
+  }
+  mCounters.clear();
+}
+
+void PowerCounters::Sample() {}
diff --git a/tools/profiler/core/PowerCounters-mac-amd64.cpp b/tools/profiler/core/PowerCounters-mac-amd64.cpp
new file mode 100644
index 0000000000..540cee155d
--- /dev/null
+++ b/tools/profiler/core/PowerCounters-mac-amd64.cpp
@@ -0,0 +1,419 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "PowerCounters.h"
+#include "nsDebug.h"
+#include "nsPrintfCString.h"
+#include "nsXULAppAPI.h"  // for XRE_IsParentProcess
+
+// Because of the pkg_energy_statistics_t::pkes_version check below, the
+// earliest OS X version this code will work with is 10.9.0 (xnu-2422.1.72).
+
+#include <sys/types.h>
+#include <sys/sysctl.h>
+
+// OS X has four kinds of system calls:
+//
+//  1. Mach traps;
+//  2. UNIX system calls;
+//  3. machine-dependent calls;
+//  4. diagnostic calls.
+//
+// (See "Mac OS X and iOS Internals" by Jonathan Levin for more details.)
+//
+// The last category has a single call named diagCall() or diagCall64(). Its
+// mode is controlled by its first argument, and one of the modes allows access
+// to the Intel RAPL MSRs.
+//
+// The interface to diagCall64() is not exported, so we have to import some
+// definitions from the XNU kernel. All imported definitions are annotated with
+// the XNU source file they come from, and information about what XNU versions
+// they were introduced in and (if relevant) modified.
+
+// The diagCall64() mode.
+// From osfmk/i386/Diagnostics.h
+// - In 10.8.4 (xnu-2050.24.15) this value was introduced. (In 10.8.3 the value
+//   17 was used for dgGzallocTest.)
+#define dgPowerStat 17
+
+// From osfmk/i386/cpu_data.h
+// - In 10.8.5 these values were introduced, along with core_energy_stat_t.
+#define CPU_RTIME_BINS (12)
+#define CPU_ITIME_BINS (CPU_RTIME_BINS)
+
+// core_energy_stat_t and pkg_energy_statistics_t are both from
+// osfmk/i386/Diagnostics.c.
+// - In 10.8.4 (xnu-2050.24.15) both structs were introduced, but with many
+//   fewer fields.
+// - In 10.8.5 (xnu-2050.48.11) both structs were substantially expanded, with
+//   numerous new fields.
+// - In 10.9.0 (xnu-2422.1.72) pkg_energy_statistics_t::pkes_version was added.
+//   diagCall64(dgPowerStat) fills it with '1' in all versions since (up to
+//   10.10.2 at time of writing).
+// - in 10.10.2 (xnu-2782.10.72) core_energy_stat_t::gpmcs was conditionally
+//   added, if DIAG_ALL_PMCS is true. (DIAG_ALL_PMCS is not even defined in the
+//   source code, but it could be defined at compile-time via compiler flags.)
+//   pkg_energy_statistics_t::pkes_version did not change, though.
+
+typedef struct {
+  uint64_t caperf;
+  uint64_t cmperf;
+  uint64_t ccres[6];
+  uint64_t crtimes[CPU_RTIME_BINS];
+  uint64_t citimes[CPU_ITIME_BINS];
+  uint64_t crtime_total;
+  uint64_t citime_total;
+  uint64_t cpu_idle_exits;
+  uint64_t cpu_insns;
+  uint64_t cpu_ucc;
+  uint64_t cpu_urc;
+#if DIAG_ALL_PMCS           // Added in 10.10.2 (xnu-2782.10.72).
+  uint64_t gpmcs[4];        // Added in 10.10.2 (xnu-2782.10.72).
+#endif /* DIAG_ALL_PMCS */  // Added in 10.10.2 (xnu-2782.10.72).
+} core_energy_stat_t;
+
+typedef struct {
+  uint64_t pkes_version;  // Added in 10.9.0 (xnu-2422.1.72).
+  uint64_t pkg_cres[2][7];
+
+  // This is read from MSR 0x606, which Intel calls MSR_RAPL_POWER_UNIT
+  // and XNU calls MSR_IA32_PKG_POWER_SKU_UNIT.
+  uint64_t pkg_power_unit;
+
+  // These are the four fields for the four RAPL domains. For each field
+  // we list:
+  //
+  // - the corresponding MSR number;
+  // - Intel's name for that MSR;
+  // - XNU's name for that MSR;
+  // - which Intel processors the MSR is supported on.
+  //
+  // The last of these is determined from chapter 35 of Volume 3 of the
+  // "Intel 64 and IA-32 Architecture's Software Developer's Manual",
+  // Order Number 325384. (Note that chapter 35 contradicts section 14.9
+  // to some degree.)
+
+  // 0x611 == MSR_PKG_ENERGY_STATUS == MSR_IA32_PKG_ENERGY_STATUS
+  // Atom (various), Sandy Bridge, Next Gen Xeon Phi (model 0x57).
+  uint64_t pkg_energy;
+
+  // 0x639 == MSR_PP0_ENERGY_STATUS == MSR_IA32_PP0_ENERGY_STATUS
+  // Atom (various), Sandy Bridge, Next Gen Xeon Phi (model 0x57).
+  uint64_t pp0_energy;
+
+  // 0x641 == MSR_PP1_ENERGY_STATUS == MSR_PP1_ENERGY_STATUS
+  // Sandy Bridge, Haswell.
+  uint64_t pp1_energy;
+
+  // 0x619 == MSR_DRAM_ENERGY_STATUS == MSR_IA32_DDR_ENERGY_STATUS
+  // Xeon E5, Xeon E5 v2, Haswell/Haswell-E, Next Gen Xeon Phi (model
+  // 0x57)
+  uint64_t ddr_energy;
+
+  uint64_t llc_flushed_cycles;
+  uint64_t ring_ratio_instantaneous;
+  uint64_t IA_frequency_clipping_cause;
+  uint64_t GT_frequency_clipping_cause;
+  uint64_t pkg_idle_exits;
+  uint64_t pkg_rtimes[CPU_RTIME_BINS];
+  uint64_t pkg_itimes[CPU_ITIME_BINS];
+  uint64_t mbus_delay_time;
+  uint64_t mint_delay_time;
+  uint32_t ncpus;
+  core_energy_stat_t cest[];
+} pkg_energy_statistics_t;
+
+static int diagCall64(uint64_t aMode, void* aBuf) {
+  // We cannot use syscall() here because it doesn't work with diagnostic
+  // system calls -- it raises SIGSYS if you try. So we have to use asm.
+
+#ifdef __x86_64__
+  // The 0x40000 prefix indicates it's a diagnostic system call. The 0x01
+  // suffix indicates the syscall number is 1, which also happens to be the
+  // only diagnostic system call. See osfmk/mach/i386/syscall_sw.h for more
+  // details.
+  static const uint64_t diagCallNum = 0x4000001;
+  uint64_t rv;
+
+  __asm__ __volatile__(
+      "syscall"
+
+      // Return value goes in "a" (%rax).
+      : /* outputs */ "=a"(rv)
+
+      // The syscall number goes in "0", a synonym (from outputs) for "a"
+      // (%rax). The syscall arguments go in "D" (%rdi) and "S" (%rsi).
+      : /* inputs */ "0"(diagCallNum), "D"(aMode), "S"(aBuf)
+
+      // The |syscall| instruction clobbers %rcx, %r11, and %rflags ("cc"). And
+      // this particular syscall also writes memory (aBuf).
+      : /* clobbers */ "rcx", "r11", "cc", "memory");
+  return rv;
+#else
+#  error Sorry, only x86-64 is supported
+#endif
+}
+
+// This is a counter to collect power utilization during profiling.
+// It cannot be a raw `ProfilerCounter` because we need to manually add/remove
+// it while the profiler lock is already held.
+class RaplDomain final : public BaseProfilerCount {
+ public:
+  explicit RaplDomain(const char* aLabel, const char* aDescription)
+      : BaseProfilerCount(aLabel, nullptr, nullptr, "power", aDescription),
+        mSample(0),
+        mEnergyStatusUnits(0),
+        mWrapAroundCount(0),
+        mIsSampleNew(false) {}
+
+  CountSample Sample() override {
+    CountSample result;
+
+    // To be consistent with the Windows EMI API,
+    // return values in picowatt-hour.
+    constexpr double NANOJOULES_PER_JOULE = 1'000'000'000;
+    constexpr double NANOJOULES_TO_PICOWATTHOUR = 3.6;
+
+    uint64_t ticks = (uint64_t(mWrapAroundCount) << 32) + mSample;
+    double joulesPerTick = (double)1 / (1 << mEnergyStatusUnits);
+    result.count = static_cast<double>(ticks) * joulesPerTick *
+                   NANOJOULES_PER_JOULE / NANOJOULES_TO_PICOWATTHOUR;
+
+    result.number = 0;
+    result.isSampleNew = mIsSampleNew;
+    mIsSampleNew = false;
+    return result;
+  }
+
+  void AddSample(uint32_t aSample, uint32_t aEnergyStatusUnits) {
+    if (aSample == mSample) {
+      return;
+    }
+
+    mEnergyStatusUnits = aEnergyStatusUnits;
+
+    if (aSample > mSample) {
+      mIsSampleNew = true;
+      mSample = aSample;
+      return;
+    }
+
+    // Despite being returned in uint64_t fields, the power counter values
+    // only use the lowest 32 bits of their fields, and we need to handle
+    // wraparounds to avoid our power tracks stopping after a few hours.
+    constexpr uint32_t highestBit = 1 << 31;
+    if ((mSample & highestBit) && !(aSample & highestBit)) {
+      mIsSampleNew = true;
+      ++mWrapAroundCount;
+      mSample = aSample;
+    } else {
+      NS_WARNING("unexpected sample with smaller value");
+    }
+  }
+
+ private:
+  uint32_t mSample;
+  uint32_t mEnergyStatusUnits;
+  uint32_t mWrapAroundCount;
+  bool mIsSampleNew;
+};
+
+class RAPL {
+  bool mIsGpuSupported;  // Is the GPU domain supported by the processor?
+  bool mIsRamSupported;  // Is the RAM domain supported by the processor?
+
+  // The DRAM domain on Haswell servers has a fixed energy unit (1/65536 J ==
+  // 15.3 microJoules) which is different to the power unit MSR. (See the
+  // "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, Volume 2 of
+  // 2, Registers" datasheet, September 2014, Reference Number: 330784-001.)
+  // This field records whether the quirk is present.
+  bool mHasRamUnitsQuirk;
+
+  // The abovementioned 15.3 microJoules value. (2^16 = 65536)
+  static constexpr double kQuirkyRamEnergyStatusUnits = 16;
+
+  // The struct passed to diagCall64().
+  pkg_energy_statistics_t* mPkes;
+
+  RaplDomain* mPkg = nullptr;
+  RaplDomain* mCores = nullptr;
+  RaplDomain* mGpu = nullptr;
+  RaplDomain* mRam = nullptr;
+
+ public:
+  explicit RAPL(PowerCounters::CountVector& aCounters)
+      : mHasRamUnitsQuirk(false) {
+    // Work out which RAPL MSRs this CPU model supports.
+    int cpuModel;
+    size_t size = sizeof(cpuModel);
+    if (sysctlbyname("machdep.cpu.model", &cpuModel, &size, NULL, 0) != 0) {
+      NS_WARNING("sysctlbyname(\"machdep.cpu.model\") failed");
+      return;
+    }
+
+    // This is similar to arch/x86/kernel/cpu/perf_event_intel_rapl.c in
+    // linux-4.1.5/.
+    //
+    // By linux-5.6.14/, this stuff had moved into
+    // arch/x86/events/intel/rapl.c, which references processor families in
+    // arch/x86/include/asm/intel-family.h.
+    switch (cpuModel) {
+      case 0x2a:  // Sandy Bridge
+      case 0x3a:  // Ivy Bridge
+        // Supports package, cores, GPU.
+        mIsGpuSupported = true;
+        mIsRamSupported = false;
+        break;
+
+      case 0x3f:  // Haswell X
+      case 0x4f:  // Broadwell X
+      case 0x55:  // Skylake X
+      case 0x56:  // Broadwell D
+        // Supports package, cores, RAM. Has the units quirk.
+        mIsGpuSupported = false;
+        mIsRamSupported = true;
+        mHasRamUnitsQuirk = true;
+        break;
+
+      case 0x2d:  // Sandy Bridge X
+      case 0x3e:  // Ivy Bridge X
+        // Supports package, cores, RAM.
+        mIsGpuSupported = false;
+        mIsRamSupported = true;
+        break;
+
+      case 0x3c:  // Haswell
+      case 0x3d:  // Broadwell
+      case 0x45:  // Haswell L
+      case 0x46:  // Haswell G
+      case 0x47:  // Broadwell G
+        // Supports package, cores, GPU, RAM.
+        mIsGpuSupported = true;
+        mIsRamSupported = true;
+        break;
+
+      case 0x4e:  // Skylake L
+      case 0x5e:  // Skylake
+      case 0x8e:  // Kaby Lake L
+      case 0x9e:  // Kaby Lake
+      case 0x66:  // Cannon Lake L
+      case 0x7d:  // Ice Lake
+      case 0x7e:  // Ice Lake L
+      case 0xa5:  // Comet Lake
+      case 0xa6:  // Comet Lake L
+        // Supports package, cores, GPU, RAM, PSYS.
+        // XXX: this tool currently doesn't measure PSYS.
+        mIsGpuSupported = true;
+        mIsRamSupported = true;
+        break;
+
+      default:
+        NS_WARNING(nsPrintfCString("unknown CPU model: %d", cpuModel).get());
+        return;
+    }
+
+    // Get the maximum number of logical CPUs so that we know how big to make
+    // |mPkes|.
+    int logicalcpu_max;
+    size = sizeof(logicalcpu_max);
+    if (sysctlbyname("hw.logicalcpu_max", &logicalcpu_max, &size, NULL, 0) !=
+        0) {
+      NS_WARNING("sysctlbyname(\"hw.logicalcpu_max\") failed");
+      return;
+    }
+
+    // Over-allocate by 1024 bytes per CPU to allow for the uncertainty around
+    // core_energy_stat_t::gpmcs and for any other future extensions to that
+    // struct. (The fields we read all come before the core_energy_stat_t
+    // array, so it won't matter to us whether gpmcs is present or not.)
+    size_t pkesSize = sizeof(pkg_energy_statistics_t) +
+                      logicalcpu_max * sizeof(core_energy_stat_t) +
+                      logicalcpu_max * 1024;
+    mPkes = (pkg_energy_statistics_t*)malloc(pkesSize);
+    if (mPkes && aCounters.reserve(4)) {
+      mPkg = new RaplDomain("Power: CPU package", "RAPL PKG");
+      aCounters.infallibleAppend(mPkg);
+
+      mCores = new RaplDomain("Power: CPU cores", "RAPL PP0");
+      aCounters.infallibleAppend(mCores);
+
+      if (mIsGpuSupported) {
+        mGpu = new RaplDomain("Power: iGPU", "RAPL PP1");
+        aCounters.infallibleAppend(mGpu);
+      }
+
+      if (mIsRamSupported) {
+        mRam = new RaplDomain("Power: DRAM", "RAPL DRAM");
+        aCounters.infallibleAppend(mRam);
+      }
+    }
+  }
+
+  ~RAPL() {
+    free(mPkes);
+    delete mPkg;
+    delete mCores;
+    delete mGpu;
+    delete mRam;
+  }
+
+  void Sample() {
+    constexpr uint64_t kSupportedVersion = 1;
+
+    // If we failed to allocate the memory for package energy statistics, we
+    // have nothing to sample.
+    if (MOZ_UNLIKELY(!mPkes)) {
+      return;
+    }
+
+    // Write an unsupported version number into pkes_version so that the check
+    // below cannot succeed by dumb luck.
+    mPkes->pkes_version = kSupportedVersion - 1;
+
+    // diagCall64() returns 1 on success, and 0 on failure (which can only
+    // happen if the mode is unrecognized, e.g. in 10.7.x or earlier versions).
+    if (diagCall64(dgPowerStat, mPkes) != 1) {
+      NS_WARNING("diagCall64() failed");
+      return;
+    }
+
+    if (mPkes->pkes_version != kSupportedVersion) {
+      NS_WARNING(
+          nsPrintfCString("unexpected pkes_version: %llu", mPkes->pkes_version)
+              .get());
+      return;
+    }
+
+    // Bits 12:8 are the ESU.
+    // Energy measurements come in multiples of 1/(2^ESU).
+    uint32_t energyStatusUnits = (mPkes->pkg_power_unit >> 8) & 0x1f;
+    mPkg->AddSample(mPkes->pkg_energy, energyStatusUnits);
+    mCores->AddSample(mPkes->pp0_energy, energyStatusUnits);
+    if (mIsGpuSupported) {
+      mGpu->AddSample(mPkes->pp1_energy, energyStatusUnits);
+    }
+    if (mIsRamSupported) {
+      mRam->AddSample(mPkes->ddr_energy, mHasRamUnitsQuirk
+                                             ? kQuirkyRamEnergyStatusUnits
+                                             : energyStatusUnits);
+    }
+  }
+};
+
+PowerCounters::PowerCounters() {
+  // RAPL values are global, so only sample them on the parent.
+  mRapl = XRE_IsParentProcess() ? new RAPL(mCounters) : nullptr;
+}
+
+PowerCounters::~PowerCounters() {
+  mCounters.clear();
+  delete mRapl;
+  mRapl = nullptr;
+}
+
+void PowerCounters::Sample() {
+  if (mRapl) {
+    mRapl->Sample();
+  }
+}
diff --git a/tools/profiler/core/PowerCounters-mac-arm64.cpp b/tools/profiler/core/PowerCounters-mac-arm64.cpp
new file mode 100644
index 0000000000..3a84a479ef
--- /dev/null
+++ b/tools/profiler/core/PowerCounters-mac-arm64.cpp
@@ -0,0 +1,47 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "PowerCounters.h"
+
+#include <mach/mach.h>
+
+class ProcessPower final : public BaseProfilerCount {
+ public:
+  ProcessPower()
+      : BaseProfilerCount("Process Power", nullptr, nullptr, "power",
+                          "Power utilization") {}
+
+  CountSample Sample() override {
+    CountSample result;
+    result.count = GetTaskEnergy();
+    result.number = 0;
+    result.isSampleNew = true;
+    return result;
+  }
+
+ private:
+  int64_t GetTaskEnergy() {
+    task_power_info_v2_data_t task_power_info;
+    mach_msg_type_number_t count = TASK_POWER_INFO_V2_COUNT;
+    kern_return_t kr = task_info(mach_task_self(), TASK_POWER_INFO_V2,
+                                 (task_info_t)&task_power_info, &count);
+    if (kr != KERN_SUCCESS) {
+      return 0;
+    }
+
+    // task_energy is in nanojoules. To be consistent with the Windows EMI
+    // API, return values in picowatt-hour.
+    return task_power_info.task_energy / 3.6;
+  }
+};
+
+PowerCounters::PowerCounters() : mProcessPower(new ProcessPower()) {
+  if (mProcessPower) {
+    (void)mCounters.append(mProcessPower.get());
+  }
+}
+
+PowerCounters::~PowerCounters() { mCounters.clear(); }
+
+void PowerCounters::Sample() {}
diff --git a/tools/profiler/core/PowerCounters-win.cpp b/tools/profiler/core/PowerCounters-win.cpp
new file mode 100644
index 0000000000..f1d05389b6
--- /dev/null
+++ b/tools/profiler/core/PowerCounters-win.cpp
@@ -0,0 +1,342 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "PowerCounters.h"
+#include "nsXULAppAPI.h"  // for XRE_IsParentProcess
+#include "nsString.h"
+
+#include <windows.h>
+#include <devioctl.h>
+#include <setupapi.h>  // for SetupDi*
+// LogSeverity, defined by setupapi.h to DWORD, messes with other code.
+#undef LogSeverity
+
+#undef NTDDI_VERSION
+#define NTDDI_VERSION NTDDI_WINBLUE
+#include <emi.h>
+
+#ifndef NTDDI_WIN10_RS5
+// EMI v2 API exists in SDK 10.0.17763 (Windows 10 1809 / Redstone 5) and later.
+// Our build machines are still on SDK 10.0.17134.
+// Remove this block when updating the SDK (bug 1774628).
+typedef EMI_METADATA EMI_METADATA_V1;
+typedef EMI_MEASUREMENT_DATA EMI_CHANNEL_MEASUREMENT_DATA;
+#  define EMI_VERSION_V2 2
+
+typedef struct {
+  EMI_MEASUREMENT_UNIT MeasurementUnit;
+  USHORT ChannelNameSize;
+  WCHAR ChannelName[ANYSIZE_ARRAY];
+} EMI_CHANNEL_V2;
+
+typedef struct {
+  WCHAR HardwareOEM[EMI_NAME_MAX];
+  WCHAR HardwareModel[EMI_NAME_MAX];
+  USHORT HardwareRevision;
+  USHORT ChannelCount;
+  EMI_CHANNEL_V2 Channels[ANYSIZE_ARRAY];
+} EMI_METADATA_V2;
+
+#  define EMI_CHANNEL_V2_LENGTH(_ChannelNameSize) \
+    (FIELD_OFFSET(EMI_CHANNEL_V2, ChannelName) + (_ChannelNameSize))
+
+#  define EMI_CHANNEL_V2_NEXT_CHANNEL(_Channel) \
+    ((EMI_CHANNEL_V2*)((PUCHAR)(_Channel) +     \
+                       EMI_CHANNEL_V2_LENGTH((_Channel)->ChannelNameSize)))
+#endif
+
+using namespace mozilla;
+
+// This is a counter to collect power utilization during profiling.
+// It cannot be a raw `ProfilerCounter` because we need to manually add/remove
+// it while the profiler lock is already held.
+class PowerMeterChannel final : public BaseProfilerCount {
+ public:
+  explicit PowerMeterChannel(const WCHAR* aChannelName, ULONGLONG aInitialValue,
+                             ULONGLONG aInitialTime)
+      : BaseProfilerCount(nullptr, nullptr, nullptr, "power",
+                          "Power utilization"),
+        mChannelName(NS_ConvertUTF16toUTF8(aChannelName)),
+        mPreviousValue(aInitialValue),
+        mPreviousTime(aInitialTime),
+        mIsSampleNew(true) {
+    if (mChannelName.Equals("RAPL_Package0_PKG")) {
+      mLabel = "Power: CPU package";
+      mDescription = mChannelName.get();
+    } else if (mChannelName.Equals("RAPL_Package0_PP0")) {
+      mLabel = "Power: CPU cores";
+      mDescription = mChannelName.get();
+    } else if (mChannelName.Equals("RAPL_Package0_PP1")) {
+      mLabel = "Power: iGPU";
+      mDescription = mChannelName.get();
+    } else if (mChannelName.Equals("RAPL_Package0_DRAM")) {
+      mLabel = "Power: DRAM";
+      mDescription = mChannelName.get();
+    } else {
+      unsigned int coreId;
+      if (sscanf(mChannelName.get(), "RAPL_Package0_Core%u_CORE", &coreId) ==
+          1) {
+        mLabelString = "Power: CPU core ";
+        mLabelString.AppendInt(coreId);
+        mLabel = mLabelString.get();
+        mDescription = mChannelName.get();
+      } else {
+        mLabel = mChannelName.get();
+      }
+    }
+  }
+
+  CountSample Sample() override {
+    CountSample result;
+    result.count = mCounter;
+    result.number = 0;
+    result.isSampleNew = mIsSampleNew;
+    mIsSampleNew = false;
+    return result;
+  }
+
+  void AddSample(ULONGLONG aAbsoluteEnergy, ULONGLONG aAbsoluteTime) {
+    // aAbsoluteTime is the time since the system start in 100ns increments.
+    if (aAbsoluteTime == mPreviousTime) {
+      return;
+    }
+
+    if (aAbsoluteEnergy > mPreviousValue) {
+      int64_t increment = aAbsoluteEnergy - mPreviousValue;
+      mCounter += increment;
+      mPreviousValue += increment;
+      mPreviousTime = aAbsoluteTime;
+    }
+
+    mIsSampleNew = true;
+  }
+
+ private:
+  int64_t mCounter;
+  nsCString mChannelName;
+
+  // Used as a storage when the label can not be a literal string.
+  nsCString mLabelString;
+
+  ULONGLONG mPreviousValue;
+  ULONGLONG mPreviousTime;
+  bool mIsSampleNew;
+};
+
+class PowerMeterDevice {
+ public:
+  explicit PowerMeterDevice(LPCTSTR aDevicePath) {
+    mHandle = ::CreateFile(aDevicePath, GENERIC_READ,
+                           FILE_SHARE_READ | FILE_SHARE_WRITE, nullptr,
+                           OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr);
+    if (mHandle == INVALID_HANDLE_VALUE) {
+      return;
+    }
+
+    EMI_VERSION version = {0};
+    DWORD dwOut;
+
+    if (!::DeviceIoControl(mHandle, IOCTL_EMI_GET_VERSION, nullptr, 0, &version,
+                           sizeof(version), &dwOut, nullptr) ||
+        (version.EmiVersion != EMI_VERSION_V1 &&
+         version.EmiVersion != EMI_VERSION_V2)) {
+      return;
+    }
+
+    EMI_METADATA_SIZE size = {0};
+    if (!::DeviceIoControl(mHandle, IOCTL_EMI_GET_METADATA_SIZE, nullptr, 0,
+                           &size, sizeof(size), &dwOut, nullptr) ||
+        !size.MetadataSize) {
+      return;
+    }
+
+    UniquePtr<uint8_t[]> metadata(new (std::nothrow)
+                                      uint8_t[size.MetadataSize]);
+    if (!metadata) {
+      return;
+    }
+
+    if (version.EmiVersion == EMI_VERSION_V2) {
+      EMI_METADATA_V2* metadata2 =
+          reinterpret_cast<EMI_METADATA_V2*>(metadata.get());
+      if (!::DeviceIoControl(mHandle, IOCTL_EMI_GET_METADATA, nullptr, 0,
+                             metadata2, size.MetadataSize, &dwOut, nullptr)) {
+        return;
+      }
+
+      if (!mChannels.reserve(metadata2->ChannelCount)) {
+        return;
+      }
+
+      mDataBuffer =
+          MakeUnique<EMI_CHANNEL_MEASUREMENT_DATA[]>(metadata2->ChannelCount);
+      if (!mDataBuffer) {
+        return;
+      }
+
+      if (!::DeviceIoControl(
+              mHandle, IOCTL_EMI_GET_MEASUREMENT, nullptr, 0, mDataBuffer.get(),
+              sizeof(EMI_CHANNEL_MEASUREMENT_DATA[metadata2->ChannelCount]),
+              &dwOut, nullptr)) {
+        return;
+      }
+
+      EMI_CHANNEL_V2* channel = &metadata2->Channels[0];
+      for (int i = 0; i < metadata2->ChannelCount; ++i) {
+        EMI_CHANNEL_MEASUREMENT_DATA* channel_data = &mDataBuffer[i];
+        mChannels.infallibleAppend(new PowerMeterChannel(
+            channel->ChannelName, channel_data->AbsoluteEnergy,
+            channel_data->AbsoluteTime));
+        channel = EMI_CHANNEL_V2_NEXT_CHANNEL(channel);
+      }
+    } else if (version.EmiVersion == EMI_VERSION_V1) {
+      EMI_METADATA_V1* metadata1 =
+          reinterpret_cast<EMI_METADATA_V1*>(metadata.get());
+      if (!::DeviceIoControl(mHandle, IOCTL_EMI_GET_METADATA, nullptr, 0,
+                             metadata1, size.MetadataSize, &dwOut, nullptr)) {
+        return;
+      }
+
+      mDataBuffer = MakeUnique<EMI_CHANNEL_MEASUREMENT_DATA[]>(1);
+      if (!mDataBuffer) {
+        return;
+      }
+
+      if (!::DeviceIoControl(
+              mHandle, IOCTL_EMI_GET_MEASUREMENT, nullptr, 0, mDataBuffer.get(),
+              sizeof(EMI_CHANNEL_MEASUREMENT_DATA), &dwOut, nullptr)) {
+        return;
+      }
+
+      (void)mChannels.append(new PowerMeterChannel(
+          metadata1->MeteredHardwareName, mDataBuffer[0].AbsoluteEnergy,
+          mDataBuffer[0].AbsoluteTime));
+    }
+  }
+
+  ~PowerMeterDevice() {
+    if (mHandle != INVALID_HANDLE_VALUE) {
+      ::CloseHandle(mHandle);
+    }
+  }
+
+  void Sample() {
+    MOZ_ASSERT(HasChannels());
+    MOZ_ASSERT(mDataBuffer);
+
+    DWORD dwOut;
+    if (!::DeviceIoControl(
+            mHandle, IOCTL_EMI_GET_MEASUREMENT, nullptr, 0, mDataBuffer.get(),
+            sizeof(EMI_CHANNEL_MEASUREMENT_DATA[mChannels.length()]), &dwOut,
+            nullptr)) {
+      return;
+    }
+
+    for (size_t i = 0; i < mChannels.length(); ++i) {
+      EMI_CHANNEL_MEASUREMENT_DATA* channel_data = &mDataBuffer[i];
+      mChannels[i]->AddSample(channel_data->AbsoluteEnergy,
+                              channel_data->AbsoluteTime);
+    }
+  }
+
+  bool HasChannels() { return mChannels.length() != 0; }
+  void AppendCountersTo(PowerCounters::CountVector& aCounters) {
+    if (aCounters.reserve(aCounters.length() + mChannels.length())) {
+      for (auto& channel : mChannels) {
+        aCounters.infallibleAppend(channel.get());
+      }
+    }
+  }
+
+ private:
+  Vector<UniquePtr<PowerMeterChannel>, 4> mChannels;
+  HANDLE mHandle = INVALID_HANDLE_VALUE;
+  UniquePtr<EMI_CHANNEL_MEASUREMENT_DATA[]> mDataBuffer;
+};
+
+PowerCounters::PowerCounters() {
+  class MOZ_STACK_CLASS HDevInfoHolder final {
+   public:
+    explicit HDevInfoHolder(HDEVINFO aHandle) : mHandle(aHandle) {}
+
+    ~HDevInfoHolder() { ::SetupDiDestroyDeviceInfoList(mHandle); }
+
+   private:
+    HDEVINFO mHandle;
+  };
+
+  if (!XRE_IsParentProcess()) {
+    // Energy meters are global, so only sample them on the parent.
+    return;
+  }
+
+  // Energy Metering Device Interface
+  // {45BD8344-7ED6-49cf-A440-C276C933B053}
+  //
+  // Using GUID_DEVICE_ENERGY_METER does not compile as the symbol does not
+  // exist before Windows 10.
+  GUID my_GUID_DEVICE_ENERGY_METER = {
+      0x45bd8344,
+      0x7ed6,
+      0x49cf,
+      {0xa4, 0x40, 0xc2, 0x76, 0xc9, 0x33, 0xb0, 0x53}};
+
+  HDEVINFO hdev =
+      ::SetupDiGetClassDevs(&my_GUID_DEVICE_ENERGY_METER, nullptr, nullptr,
+                            DIGCF_PRESENT | DIGCF_DEVICEINTERFACE);
+  if (hdev == INVALID_HANDLE_VALUE) {
+    return;
+  }
+
+  HDevInfoHolder hdevHolder(hdev);
+
+  DWORD i = 0;
+  SP_DEVICE_INTERFACE_DATA did = {0};
+  did.cbSize = sizeof(did);
+
+  while (::SetupDiEnumDeviceInterfaces(
+      hdev, nullptr, &my_GUID_DEVICE_ENERGY_METER, i++, &did)) {
+    DWORD bufferSize = 0;
+    ::SetupDiGetDeviceInterfaceDetail(hdev, &did, nullptr, 0, &bufferSize,
+                                      nullptr);
+    if (::GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
+      continue;
+    }
+
+    UniquePtr<uint8_t[]> buffer(new (std::nothrow) uint8_t[bufferSize]);
+    if (!buffer) {
+      continue;
+    }
+
+    PSP_DEVICE_INTERFACE_DETAIL_DATA pdidd =
+        reinterpret_cast<PSP_DEVICE_INTERFACE_DETAIL_DATA>(buffer.get());
+    MOZ_ASSERT(uintptr_t(buffer.get()) %
+                   alignof(PSP_DEVICE_INTERFACE_DETAIL_DATA) ==
+               0);
+    pdidd->cbSize = sizeof(*pdidd);
+    if (!::SetupDiGetDeviceInterfaceDetail(hdev, &did, pdidd, bufferSize,
+                                           &bufferSize, nullptr)) {
+      continue;
+    }
+
+    UniquePtr<PowerMeterDevice> pmd =
+        MakeUnique<PowerMeterDevice>(pdidd->DevicePath);
+    if (!pmd->HasChannels() ||
+        !mPowerMeterDevices.emplaceBack(std::move(pmd))) {
+      NS_WARNING("PowerMeterDevice without measurement channel (or OOM)");
+    }
+  }
+
+  for (auto& device : mPowerMeterDevices) {
+    device->AppendCountersTo(mCounters);
+  }
+}
+
+PowerCounters::~PowerCounters() { mCounters.clear(); }
+
+void PowerCounters::Sample() {
+  for (auto& device : mPowerMeterDevices) {
+    device->Sample();
+  }
+}
diff --git a/tools/profiler/core/PowerCounters.h b/tools/profiler/core/PowerCounters.h
new file mode 100644
index 0000000000..2fd8d5892c
--- /dev/null
+++ b/tools/profiler/core/PowerCounters.h
@@ -0,0 +1,52 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef TOOLS_POWERCOUNTERS_H_
+#define TOOLS_POWERCOUNTERS_H_
+
+#include "PlatformMacros.h"
+#include "mozilla/ProfilerCounts.h"
+#include "mozilla/UniquePtr.h"
+#include "mozilla/Vector.h"
+
+#if defined(_MSC_VER)
+class PowerMeterDevice;
+#endif
+#if defined(GP_PLAT_arm64_darwin)
+class ProcessPower;
+#endif
+#if defined(GP_PLAT_amd64_darwin)
+class RAPL;
+#endif
+
+class PowerCounters {
+ public:
+#if defined(_MSC_VER) || defined(GP_OS_darwin) || defined(GP_PLAT_amd64_linux)
+  explicit PowerCounters();
+  ~PowerCounters();
+  void Sample();
+#else
+  explicit PowerCounters(){};
+  ~PowerCounters(){};
+  void Sample(){};
+#endif
+
+  using CountVector = mozilla::Vector<BaseProfilerCount*, 4>;
+  const CountVector& GetCounters() { return mCounters; }
+
+ private:
+  CountVector mCounters;
+
+#if defined(_MSC_VER)
+  mozilla::Vector<mozilla::UniquePtr<PowerMeterDevice>> mPowerMeterDevices;
+#endif
+#if defined(GP_PLAT_arm64_darwin)
+  mozilla::UniquePtr<ProcessPower> mProcessPower;
+#endif
+#if defined(GP_PLAT_amd64_darwin)
+  RAPL* mRapl;
+#endif
+};
+
+#endif /* ndef TOOLS_POWERCOUNTERS_H_ */
diff --git a/tools/profiler/core/ProfileAdditionalInformation.cpp b/tools/profiler/core/ProfileAdditionalInformation.cpp
new file mode 100644
index 0000000000..ba3cd80e7c
--- /dev/null
+++ b/tools/profiler/core/ProfileAdditionalInformation.cpp
@@ -0,0 +1,102 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ProfileAdditionalInformation.h"
+
+#include "jsapi.h"
+#include "js/JSON.h"
+#include "js/PropertyAndElement.h"
+#include "js/Value.h"
+#include "mozilla/JSONStringWriteFuncs.h"
+#include "mozilla/ipc/IPDLParamTraits.h"
+
+#ifdef MOZ_GECKO_PROFILER
+#  include "platform.h"
+
+void mozilla::ProfileGenerationAdditionalInformation::ToJSValue(
+    JSContext* aCx, JS::MutableHandle<JS::Value> aRetVal) const {
+  // Get the shared libraries array.
+  JS::Rooted<JS::Value> sharedLibrariesVal(aCx);
+  {
+    JSONStringWriteFunc<nsCString> buffer;
+    JSONWriter w(buffer, JSONWriter::SingleLineStyle);
+    w.StartArrayElement();
+    AppendSharedLibraries(w, mSharedLibraries);
+    w.EndArray();
+    NS_ConvertUTF8toUTF16 buffer16(buffer.StringCRef());
+    MOZ_ALWAYS_TRUE(JS_ParseJSON(aCx,
+                                 static_cast<const char16_t*>(buffer16.get()),
+                                 buffer16.Length(), &sharedLibrariesVal));
+  }
+
+  JS::Rooted<JSObject*> additionalInfoObj(aCx, JS_NewPlainObject(aCx));
+  JS_SetProperty(aCx, additionalInfoObj, "sharedLibraries", sharedLibrariesVal);
+  aRetVal.setObject(*additionalInfoObj);
+}
+#endif  // MOZ_GECKO_PROFILER
+
+namespace IPC {
+
+#ifdef MOZ_GECKO_PROFILER
+void IPC::ParamTraits<SharedLibrary>::Write(MessageWriter* aWriter,
+                                            const paramType& aParam) {
+  WriteParam(aWriter, aParam.mStart);
+  WriteParam(aWriter, aParam.mEnd);
+  WriteParam(aWriter, aParam.mOffset);
+  WriteParam(aWriter, aParam.mBreakpadId);
+  WriteParam(aWriter, aParam.mCodeId);
+  WriteParam(aWriter, aParam.mModuleName);
+  WriteParam(aWriter, aParam.mModulePath);
+  WriteParam(aWriter, aParam.mDebugName);
+  WriteParam(aWriter, aParam.mDebugPath);
+  WriteParam(aWriter, aParam.mVersion);
+  WriteParam(aWriter, aParam.mArch);
+}
+
+bool IPC::ParamTraits<SharedLibrary>::Read(MessageReader* aReader,
+                                           paramType* aResult) {
+  return ReadParam(aReader, &aResult->mStart) &&
+         ReadParam(aReader, &aResult->mEnd) &&
+         ReadParam(aReader, &aResult->mOffset) &&
+         ReadParam(aReader, &aResult->mBreakpadId) &&
+         ReadParam(aReader, &aResult->mCodeId) &&
+         ReadParam(aReader, &aResult->mModuleName) &&
+         ReadParam(aReader, &aResult->mModulePath) &&
+         ReadParam(aReader, &aResult->mDebugName) &&
+         ReadParam(aReader, &aResult->mDebugPath) &&
+         ReadParam(aReader, &aResult->mVersion) &&
+         ReadParam(aReader, &aResult->mArch);
+}
+
+void IPC::ParamTraits<SharedLibraryInfo>::Write(MessageWriter* aWriter,
+                                                const paramType& aParam) {
+  paramType& p = const_cast<paramType&>(aParam);
+  WriteParam(aWriter, p.mEntries);
+}
+
+bool IPC::ParamTraits<SharedLibraryInfo>::Read(MessageReader* aReader,
+                                               paramType* aResult) {
+  return ReadParam(aReader, &aResult->mEntries);
+}
+#endif  // MOZ_GECKO_PROFILER
+
+void IPC::ParamTraits<mozilla::ProfileGenerationAdditionalInformation>::Write(
+    MessageWriter* aWriter, const paramType& aParam) {
+#ifdef MOZ_GECKO_PROFILER
+  WriteParam(aWriter, aParam.mSharedLibraries);
+#endif  // MOZ_GECKO_PROFILER
+}
+
+bool IPC::ParamTraits<mozilla::ProfileGenerationAdditionalInformation>::Read(
+    MessageReader* aReader, paramType* aResult) {
+#ifdef MOZ_GECKO_PROFILER
+  return ReadParam(aReader, &aResult->mSharedLibraries);
+#else
+  return true;
+#endif  // MOZ_GECKO_PROFILER
+}
+
+}  // namespace IPC
diff --git a/tools/profiler/core/ProfileBuffer.cpp b/tools/profiler/core/ProfileBuffer.cpp
new file mode 100644
index 0000000000..170a4f14b4
--- /dev/null
+++ b/tools/profiler/core/ProfileBuffer.cpp
@@ -0,0 +1,243 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ProfileBuffer.h"
+
+#include "BaseProfiler.h"
+#include "js/GCAPI.h"
+#include "jsfriendapi.h"
+#include "mozilla/MathAlgorithms.h"
+#include "nsJSPrincipals.h"
+#include "nsScriptSecurityManager.h"
+
+using namespace mozilla;
+
+ProfileBuffer::ProfileBuffer(ProfileChunkedBuffer& aBuffer)
+    : mEntries(aBuffer) {
+  // Assume the given buffer is in-session.
+  MOZ_ASSERT(mEntries.IsInSession());
+}
+
+/* static */
+ProfileBufferBlockIndex ProfileBuffer::AddEntry(
+    ProfileChunkedBuffer& aProfileChunkedBuffer,
+    const ProfileBufferEntry& aEntry) {
+  switch (aEntry.GetKind()) {
+#define SWITCH_KIND(KIND, TYPE, SIZE)                          \
+  case ProfileBufferEntry::Kind::KIND: {                       \
+    return aProfileChunkedBuffer.PutFrom(&aEntry, 1 + (SIZE)); \
+  }
+
+    FOR_EACH_PROFILE_BUFFER_ENTRY_KIND(SWITCH_KIND)
+
+#undef SWITCH_KIND
+    default:
+      MOZ_ASSERT(false, "Unhandled ProfilerBuffer entry KIND");
+      return ProfileBufferBlockIndex{};
+  }
+}
+
+// Called from signal, call only reentrant functions
+uint64_t ProfileBuffer::AddEntry(const ProfileBufferEntry& aEntry) {
+  return AddEntry(mEntries, aEntry).ConvertToProfileBufferIndex();
+}
+
+/* static */
+ProfileBufferBlockIndex ProfileBuffer::AddThreadIdEntry(
+    ProfileChunkedBuffer& aProfileChunkedBuffer, ProfilerThreadId aThreadId) {
+  return AddEntry(aProfileChunkedBuffer,
+                  ProfileBufferEntry::ThreadId(aThreadId));
+}
+
+uint64_t ProfileBuffer::AddThreadIdEntry(ProfilerThreadId aThreadId) {
+  return AddThreadIdEntry(mEntries, aThreadId).ConvertToProfileBufferIndex();
+}
+
+void ProfileBuffer::CollectCodeLocation(
+    const char* aLabel, const char* aStr, uint32_t aFrameFlags,
+    uint64_t aInnerWindowID, const Maybe<uint32_t>& aLineNumber,
+    const Maybe<uint32_t>& aColumnNumber,
+    const Maybe<JS::ProfilingCategoryPair>& aCategoryPair) {
+  AddEntry(ProfileBufferEntry::Label(aLabel));
+  AddEntry(ProfileBufferEntry::FrameFlags(uint64_t(aFrameFlags)));
+
+  if (aStr) {
+    // Store the string using one or more DynamicStringFragment entries.
+    size_t strLen = strlen(aStr) + 1;  // +1 for the null terminator
+    // If larger than the prescribed limit, we will cut the string and end it
+    // with an ellipsis.
+    const bool tooBig = strLen > kMaxFrameKeyLength;
+    if (tooBig) {
+      strLen = kMaxFrameKeyLength;
+    }
+    char chars[ProfileBufferEntry::kNumChars];
+    for (size_t j = 0;; j += ProfileBufferEntry::kNumChars) {
+      // Store up to kNumChars characters in the entry.
+      size_t len = ProfileBufferEntry::kNumChars;
+      const bool last = j + len >= strLen;
+      if (last) {
+        // Only the last entry may be smaller than kNumChars.
+        len = strLen - j;
+        if (tooBig) {
+          // That last entry is part of a too-big string, replace the end
+          // characters with an ellipsis "...".
+          len = std::max(len, size_t(4));
+          chars[len - 4] = '.';
+          chars[len - 3] = '.';
+          chars[len - 2] = '.';
+          chars[len - 1] = '\0';
+          // Make sure the memcpy will not overwrite our ellipsis!
+          len -= 4;
+        }
+      }
+      memcpy(chars, &aStr[j], len);
+      AddEntry(ProfileBufferEntry::DynamicStringFragment(chars));
+      if (last) {
+        break;
+      }
+    }
+  }
+
+  if (aInnerWindowID) {
+    AddEntry(ProfileBufferEntry::InnerWindowID(aInnerWindowID));
+  }
+
+  if (aLineNumber) {
+    AddEntry(ProfileBufferEntry::LineNumber(*aLineNumber));
+  }
+
+  if (aColumnNumber) {
+    AddEntry(ProfileBufferEntry::ColumnNumber(*aColumnNumber));
+  }
+
+  if (aCategoryPair.isSome()) {
+    AddEntry(ProfileBufferEntry::CategoryPair(int(*aCategoryPair)));
+  }
+}
+
+size_t ProfileBuffer::SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const {
+  // Measurement of the following members may be added later if DMD finds it
+  // is worthwhile:
+  // - memory pointed to by the elements within mEntries
+  return mEntries.SizeOfExcludingThis(aMallocSizeOf);
+}
+
+size_t ProfileBuffer::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
+  return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
+}
+
+void ProfileBuffer::CollectOverheadStats(double aSamplingTimeMs,
+                                         TimeDuration aLocking,
+                                         TimeDuration aCleaning,
+                                         TimeDuration aCounters,
+                                         TimeDuration aThreads) {
+  double timeUs = aSamplingTimeMs * 1000.0;
+  if (mFirstSamplingTimeUs == 0.0) {
+    mFirstSamplingTimeUs = timeUs;
+  } else {
+    // Note that we'll have 1 fewer interval than other numbers (because
+    // we need both ends of an interval to know its duration). The final
+    // difference should be insignificant over the expected many thousands
+    // of iterations.
+    mIntervalsUs.Count(timeUs - mLastSamplingTimeUs);
+  }
+  mLastSamplingTimeUs = timeUs;
+  double locking = aLocking.ToMilliseconds() * 1000.0;
+  double cleaning = aCleaning.ToMilliseconds() * 1000.0;
+  double counters = aCounters.ToMilliseconds() * 1000.0;
+  double threads = aThreads.ToMilliseconds() * 1000.0;
+
+  mOverheadsUs.Count(locking + cleaning + counters + threads);
+  mLockingsUs.Count(locking);
+  mCleaningsUs.Count(cleaning);
+  mCountersUs.Count(counters);
+  mThreadsUs.Count(threads);
+
+  static const bool sRecordSamplingOverhead = []() {
+    const char* recordOverheads = getenv("MOZ_PROFILER_RECORD_OVERHEADS");
+    return recordOverheads && recordOverheads[0] != '\0';
+  }();
+  if (sRecordSamplingOverhead) {
+    AddEntry(ProfileBufferEntry::ProfilerOverheadTime(aSamplingTimeMs));
+    AddEntry(ProfileBufferEntry::ProfilerOverheadDuration(locking));
+    AddEntry(ProfileBufferEntry::ProfilerOverheadDuration(cleaning));
+    AddEntry(ProfileBufferEntry::ProfilerOverheadDuration(counters));
+    AddEntry(ProfileBufferEntry::ProfilerOverheadDuration(threads));
+  }
+}
+
+ProfilerBufferInfo ProfileBuffer::GetProfilerBufferInfo() const {
+  return {BufferRangeStart(),
+          BufferRangeEnd(),
+          static_cast<uint32_t>(*mEntries.BufferLength() /
+                                8),  // 8 bytes per entry.
+          mIntervalsUs,
+          mOverheadsUs,
+          mLockingsUs,
+          mCleaningsUs,
+          mCountersUs,
+          mThreadsUs};
+}
+
+/* ProfileBufferCollector */
+
+void ProfileBufferCollector::CollectNativeLeafAddr(void* aAddr) {
+  mBuf.AddEntry(ProfileBufferEntry::NativeLeafAddr(aAddr));
+}
+
+void ProfileBufferCollector::CollectJitReturnAddr(void* aAddr) {
+  mBuf.AddEntry(ProfileBufferEntry::JitReturnAddr(aAddr));
+}
+
+void ProfileBufferCollector::CollectWasmFrame(const char* aLabel) {
+  mBuf.CollectCodeLocation("", aLabel, 0, 0, Nothing(), Nothing(),
+                           Some(JS::ProfilingCategoryPair::JS_Wasm));
+}
+
+void ProfileBufferCollector::CollectProfilingStackFrame(
+    const js::ProfilingStackFrame& aFrame) {
+  // WARNING: this function runs within the profiler's "critical section".
+
+  MOZ_ASSERT(aFrame.isLabelFrame() ||
+             (aFrame.isJsFrame() && !aFrame.isOSRFrame()));
+
+  const char* label = aFrame.label();
+  const char* dynamicString = aFrame.dynamicString();
+  Maybe<uint32_t> line;
+  Maybe<uint32_t> column;
+
+  if (aFrame.isJsFrame()) {
+    // There are two kinds of JS frames that get pushed onto the ProfilingStack.
+    //
+    // - label = "", dynamic string = <something>
+    // - label = "js::RunScript", dynamic string = nullptr
+    //
+    // The line number is only interesting in the first case.
+
+    if (label[0] == '\0') {
+      MOZ_ASSERT(dynamicString);
+
+      // We call aFrame.script() repeatedly -- rather than storing the result in
+      // a local variable in order -- to avoid rooting hazards.
+      if (aFrame.script()) {
+        if (aFrame.pc()) {
+          unsigned col = 0;
+          line = Some(JS_PCToLineNumber(aFrame.script(), aFrame.pc(), &col));
+          column = Some(col);
+        }
+      }
+
+    } else {
+      MOZ_ASSERT(strcmp(label, "js::RunScript") == 0 && !dynamicString);
+    }
+  } else {
+    MOZ_ASSERT(aFrame.isLabelFrame());
+  }
+
+  mBuf.CollectCodeLocation(label, dynamicString, aFrame.flags(),
+                           aFrame.realmID(), line, column,
+                           Some(aFrame.categoryPair()));
+}
diff --git a/tools/profiler/core/ProfileBuffer.h b/tools/profiler/core/ProfileBuffer.h
new file mode 100644
index 0000000000..5da34909cc
--- /dev/null
+++ b/tools/profiler/core/ProfileBuffer.h
@@ -0,0 +1,260 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MOZ_PROFILE_BUFFER_H
+#define MOZ_PROFILE_BUFFER_H
+
+#include "GeckoProfiler.h"
+#include "ProfileBufferEntry.h"
+
+#include "mozilla/Maybe.h"
+#include "mozilla/PowerOfTwo.h"
+#include "mozilla/ProfileBufferChunkManagerSingle.h"
+#include "mozilla/ProfileChunkedBuffer.h"
+
+class ProcessStreamingContext;
+class RunningTimes;
+
+// Class storing most profiling data in a ProfileChunkedBuffer.
+//
+// This class is used as a queue of entries which, after construction, never
+// allocates. This makes it safe to use in the profiler's "critical section".
+class ProfileBuffer final {
+ public:
+  // ProfileBuffer constructor
+  // @param aBuffer The in-session ProfileChunkedBuffer to use as buffer
+  // manager.
+  explicit ProfileBuffer(mozilla::ProfileChunkedBuffer& aBuffer);
+
+  mozilla::ProfileChunkedBuffer& UnderlyingChunkedBuffer() const {
+    return mEntries;
+  }
+
+  bool IsThreadSafe() const { return mEntries.IsThreadSafe(); }
+
+  // Add |aEntry| to the buffer, ignoring what kind of entry it is.
+  uint64_t AddEntry(const ProfileBufferEntry& aEntry);
+
+  // Add to the buffer a sample start (ThreadId) entry for aThreadId.
+  // Returns the position of the entry.
+  uint64_t AddThreadIdEntry(ProfilerThreadId aThreadId);
+
+  void CollectCodeLocation(
+      const char* aLabel, const char* aStr, uint32_t aFrameFlags,
+      uint64_t aInnerWindowID, const mozilla::Maybe<uint32_t>& aLineNumber,
+      const mozilla::Maybe<uint32_t>& aColumnNumber,
+      const mozilla::Maybe<JS::ProfilingCategoryPair>& aCategoryPair);
+
+  // Maximum size of a frameKey string that we'll handle.
+  static const size_t kMaxFrameKeyLength = 512;
+
+  // Add JIT frame information to aJITFrameInfo for any JitReturnAddr entries
+  // that are currently in the buffer at or after aRangeStart, in samples
+  // for the given thread.
+  void AddJITInfoForRange(uint64_t aRangeStart, ProfilerThreadId aThreadId,
+                          JSContext* aContext, JITFrameInfo& aJITFrameInfo,
+                          mozilla::ProgressLogger aProgressLogger) const;
+
+  // Stream JSON for samples in the buffer to aWriter, using the supplied
+  // UniqueStacks object.
+  // Only streams samples for the given thread ID and which were taken at or
+  // after aSinceTime. If ID is 0, ignore the stored thread ID; this should only
+  // be used when the buffer contains only one sample.
+  // aUniqueStacks needs to contain information about any JIT frames that we
+  // might encounter in the buffer, before this method is called. In other
+  // words, you need to have called AddJITInfoForRange for every range that
+  // might contain JIT frame information before calling this method.
+  // Return the thread ID of the streamed sample(s), or 0.
+  ProfilerThreadId StreamSamplesToJSON(
+      SpliceableJSONWriter& aWriter, ProfilerThreadId aThreadId,
+      double aSinceTime, UniqueStacks& aUniqueStacks,
+      mozilla::ProgressLogger aProgressLogger) const;
+
+  void StreamMarkersToJSON(SpliceableJSONWriter& aWriter,
+                           ProfilerThreadId aThreadId,
+                           const mozilla::TimeStamp& aProcessStartTime,
+                           double aSinceTime, UniqueStacks& aUniqueStacks,
+                           mozilla::ProgressLogger aProgressLogger) const;
+
+  // Stream samples and markers from all threads that `aProcessStreamingContext`
+  // accepts.
+  void StreamSamplesAndMarkersToJSON(
+      ProcessStreamingContext& aProcessStreamingContext,
+      mozilla::ProgressLogger aProgressLogger) const;
+
+  void StreamPausedRangesToJSON(SpliceableJSONWriter& aWriter,
+                                double aSinceTime,
+                                mozilla::ProgressLogger aProgressLogger) const;
+  void StreamProfilerOverheadToJSON(
+      SpliceableJSONWriter& aWriter,
+      const mozilla::TimeStamp& aProcessStartTime, double aSinceTime,
+      mozilla::ProgressLogger aProgressLogger) const;
+  void StreamCountersToJSON(SpliceableJSONWriter& aWriter,
+                            const mozilla::TimeStamp& aProcessStartTime,
+                            double aSinceTime,
+                            mozilla::ProgressLogger aProgressLogger) const;
+
+  // Find (via |aLastSample|) the most recent sample for the thread denoted by
+  // |aThreadId| and clone it, patching in the current time as appropriate.
+  // Mutate |aLastSample| to point to the newly inserted sample.
+  // Returns whether duplication was successful.
+  bool DuplicateLastSample(ProfilerThreadId aThreadId, double aSampleTimeMs,
+                           mozilla::Maybe<uint64_t>& aLastSample,
+                           const RunningTimes& aRunningTimes);
+
+  void DiscardSamplesBeforeTime(double aTime);
+
+  // Read an entry in the buffer.
+  ProfileBufferEntry GetEntry(uint64_t aPosition) const {
+    return mEntries.ReadAt(
+        mozilla::ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+            aPosition),
+        [&](mozilla::Maybe<mozilla::ProfileBufferEntryReader>&& aMER) {
+          ProfileBufferEntry entry;
+          if (aMER.isSome()) {
+            if (aMER->CurrentBlockIndex().ConvertToProfileBufferIndex() ==
+                aPosition) {
+              // If we're here, it means `aPosition` pointed at a valid block.
+              MOZ_RELEASE_ASSERT(aMER->RemainingBytes() <= sizeof(entry));
+              aMER->ReadBytes(&entry, aMER->RemainingBytes());
+            } else {
+              // EntryReader at the wrong position, pretend to have read
+              // everything.
+              aMER->SetRemainingBytes(0);
+            }
+          }
+          return entry;
+        });
+  }
+
+  size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const;
+  size_t SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) const;
+
+  void CollectOverheadStats(double aSamplingTimeMs,
+                            mozilla::TimeDuration aLocking,
+                            mozilla::TimeDuration aCleaning,
+                            mozilla::TimeDuration aCounters,
+                            mozilla::TimeDuration aThreads);
+
+  ProfilerBufferInfo GetProfilerBufferInfo() const;
+
+ private:
+  // Add |aEntry| to the provided ProfileChunkedBuffer.
+  // `static` because it may be used to add an entry to a `ProfileChunkedBuffer`
+  // that is not attached to a `ProfileBuffer`.
+  static mozilla::ProfileBufferBlockIndex AddEntry(
+      mozilla::ProfileChunkedBuffer& aProfileChunkedBuffer,
+      const ProfileBufferEntry& aEntry);
+
+  // Add a sample start (ThreadId) entry for aThreadId to the provided
+  // ProfileChunkedBuffer. Returns the position of the entry.
+  // `static` because it may be used to add an entry to a `ProfileChunkedBuffer`
+  // that is not attached to a `ProfileBuffer`.
+  static mozilla::ProfileBufferBlockIndex AddThreadIdEntry(
+      mozilla::ProfileChunkedBuffer& aProfileChunkedBuffer,
+      ProfilerThreadId aThreadId);
+
+  // The storage in which this ProfileBuffer stores its entries.
+  mozilla::ProfileChunkedBuffer& mEntries;
+
+ public:
+  // `BufferRangeStart()` and `BufferRangeEnd()` return `uint64_t` values
+  // corresponding to the first entry and past the last entry stored in
+  // `mEntries`.
+  //
+  // The returned values are not guaranteed to be stable, because other threads
+  // may also be accessing the buffer concurrently. But they will always
+  // increase, and can therefore give an indication of how far these values have
+  // *at least* reached. In particular:
+  // - Entries whose index is strictly less that `BufferRangeStart()` have been
+  //   discarded by now, so any related data may also be safely discarded.
+  // - It is safe to try and read entries at any index strictly less than
+  //   `BufferRangeEnd()` -- but note that these reads may fail by the time you
+  //   request them, as old entries get overwritten by new ones.
+  uint64_t BufferRangeStart() const { return mEntries.GetState().mRangeStart; }
+  uint64_t BufferRangeEnd() const { return mEntries.GetState().mRangeEnd; }
+
+ private:
+  // Single pre-allocated chunk (to avoid spurious mallocs), used when:
+  // - Duplicating sleeping stacks (hence scExpectedMaximumStackSize).
+  // - Adding JIT info.
+  // - Streaming stacks to JSON.
+  // Mutable because it's accessed from non-multithreaded const methods.
+  mutable mozilla::Maybe<mozilla::ProfileBufferChunkManagerSingle>
+      mMaybeWorkerChunkManager;
+  mozilla::ProfileBufferChunkManagerSingle& WorkerChunkManager() const {
+    if (mMaybeWorkerChunkManager.isNothing()) {
+      // Only actually allocate it on first use. (Some ProfileBuffers are
+      // temporary and don't actually need this.)
+      mMaybeWorkerChunkManager.emplace(
+          mozilla::ProfileBufferChunk::SizeofChunkMetadata() +
+          mozilla::ProfileBufferChunkManager::scExpectedMaximumStackSize);
+    }
+    return *mMaybeWorkerChunkManager;
+  }
+
+  // GetStreamingParametersForThreadCallback:
+  //   (ProfilerThreadId) -> Maybe<StreamingParametersForThread>
+  template <typename GetStreamingParametersForThreadCallback>
+  ProfilerThreadId DoStreamSamplesAndMarkersToJSON(
+      mozilla::FailureLatch& aFailureLatch,
+      GetStreamingParametersForThreadCallback&&
+          aGetStreamingParametersForThreadCallback,
+      double aSinceTime, ProcessStreamingContext* aStreamingContextForMarkers,
+      mozilla::ProgressLogger aProgressLogger) const;
+
+  double mFirstSamplingTimeUs = 0.0;
+  double mLastSamplingTimeUs = 0.0;
+  ProfilerStats mIntervalsUs;
+  ProfilerStats mOverheadsUs;
+  ProfilerStats mLockingsUs;
+  ProfilerStats mCleaningsUs;
+  ProfilerStats mCountersUs;
+  ProfilerStats mThreadsUs;
+};
+
+/**
+ * Helper type used to implement ProfilerStackCollector. This type is used as
+ * the collector for MergeStacks by ProfileBuffer. It holds a reference to the
+ * buffer, as well as additional feature flags which are needed to control the
+ * data collection strategy
+ */
+class ProfileBufferCollector final : public ProfilerStackCollector {
+ public:
+  ProfileBufferCollector(ProfileBuffer& aBuf, uint64_t aSamplePos,
+                         uint64_t aBufferRangeStart)
+      : mBuf(aBuf),
+        mSamplePositionInBuffer(aSamplePos),
+        mBufferRangeStart(aBufferRangeStart) {
+    MOZ_ASSERT(
+        mSamplePositionInBuffer >= mBufferRangeStart,
+        "The sample position should always be after the buffer range start");
+  }
+
+  // Position at which the sample starts in the profiler buffer (which may be
+  // different from the buffer in which the sample data is collected here).
+  mozilla::Maybe<uint64_t> SamplePositionInBuffer() override {
+    return mozilla::Some(mSamplePositionInBuffer);
+  }
+
+  // Profiler buffer's range start (which may be different from the buffer in
+  // which the sample data is collected here).
+  mozilla::Maybe<uint64_t> BufferRangeStart() override {
+    return mozilla::Some(mBufferRangeStart);
+  }
+
+  virtual void CollectNativeLeafAddr(void* aAddr) override;
+  virtual void CollectJitReturnAddr(void* aAddr) override;
+  virtual void CollectWasmFrame(const char* aLabel) override;
+  virtual void CollectProfilingStackFrame(
+      const js::ProfilingStackFrame& aFrame) override;
+
+ private:
+  ProfileBuffer& mBuf;
+  uint64_t mSamplePositionInBuffer;
+  uint64_t mBufferRangeStart;
+};
+
+#endif
diff --git a/tools/profiler/core/ProfileBufferEntry.cpp b/tools/profiler/core/ProfileBufferEntry.cpp
new file mode 100644
index 0000000000..5429eac0b8
--- /dev/null
+++ b/tools/profiler/core/ProfileBufferEntry.cpp
@@ -0,0 +1,2321 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ProfileBufferEntry.h"
+
+#include "mozilla/ProfilerMarkers.h"
+#include "platform.h"
+#include "ProfileBuffer.h"
+#include "ProfiledThreadData.h"
+#include "ProfilerBacktrace.h"
+#include "ProfilerRustBindings.h"
+
+#include "js/ProfilingFrameIterator.h"
+#include "jsapi.h"
+#include "jsfriendapi.h"
+#include "mozilla/Logging.h"
+#include "mozilla/JSONStringWriteFuncs.h"
+#include "mozilla/ScopeExit.h"
+#include "mozilla/Sprintf.h"
+#include "mozilla/StackWalk.h"
+#include "nsThreadUtils.h"
+#include "nsXULAppAPI.h"
+#include "ProfilerCodeAddressService.h"
+
+#include <ostream>
+#include <type_traits>
+
+using namespace mozilla;
+using namespace mozilla::literals::ProportionValue_literals;
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN ProfileBufferEntry
+
+ProfileBufferEntry::ProfileBufferEntry()
+    : mKind(Kind::INVALID), mStorage{0, 0, 0, 0, 0, 0, 0, 0} {}
+
+// aString must be a static string.
+ProfileBufferEntry::ProfileBufferEntry(Kind aKind, const char* aString)
+    : mKind(aKind) {
+  MOZ_ASSERT(aKind == Kind::Label);
+  memcpy(mStorage, &aString, sizeof(aString));
+}
+
+ProfileBufferEntry::ProfileBufferEntry(Kind aKind, char aChars[kNumChars])
+    : mKind(aKind) {
+  MOZ_ASSERT(aKind == Kind::DynamicStringFragment);
+  memcpy(mStorage, aChars, kNumChars);
+}
+
+ProfileBufferEntry::ProfileBufferEntry(Kind aKind, void* aPtr) : mKind(aKind) {
+  memcpy(mStorage, &aPtr, sizeof(aPtr));
+}
+
+ProfileBufferEntry::ProfileBufferEntry(Kind aKind, double aDouble)
+    : mKind(aKind) {
+  memcpy(mStorage, &aDouble, sizeof(aDouble));
+}
+
+ProfileBufferEntry::ProfileBufferEntry(Kind aKind, int aInt) : mKind(aKind) {
+  memcpy(mStorage, &aInt, sizeof(aInt));
+}
+
+ProfileBufferEntry::ProfileBufferEntry(Kind aKind, int64_t aInt64)
+    : mKind(aKind) {
+  memcpy(mStorage, &aInt64, sizeof(aInt64));
+}
+
+ProfileBufferEntry::ProfileBufferEntry(Kind aKind, uint64_t aUint64)
+    : mKind(aKind) {
+  memcpy(mStorage, &aUint64, sizeof(aUint64));
+}
+
+ProfileBufferEntry::ProfileBufferEntry(Kind aKind, ProfilerThreadId aThreadId)
+    : mKind(aKind) {
+  static_assert(std::is_trivially_copyable_v<ProfilerThreadId>);
+  static_assert(sizeof(aThreadId) <= sizeof(mStorage));
+  memcpy(mStorage, &aThreadId, sizeof(aThreadId));
+}
+
+const char* ProfileBufferEntry::GetString() const {
+  const char* result;
+  memcpy(&result, mStorage, sizeof(result));
+  return result;
+}
+
+void* ProfileBufferEntry::GetPtr() const {
+  void* result;
+  memcpy(&result, mStorage, sizeof(result));
+  return result;
+}
+
+double ProfileBufferEntry::GetDouble() const {
+  double result;
+  memcpy(&result, mStorage, sizeof(result));
+  return result;
+}
+
+int ProfileBufferEntry::GetInt() const {
+  int result;
+  memcpy(&result, mStorage, sizeof(result));
+  return result;
+}
+
+int64_t ProfileBufferEntry::GetInt64() const {
+  int64_t result;
+  memcpy(&result, mStorage, sizeof(result));
+  return result;
+}
+
+uint64_t ProfileBufferEntry::GetUint64() const {
+  uint64_t result;
+  memcpy(&result, mStorage, sizeof(result));
+  return result;
+}
+
+ProfilerThreadId ProfileBufferEntry::GetThreadId() const {
+  ProfilerThreadId result;
+  static_assert(std::is_trivially_copyable_v<ProfilerThreadId>);
+  memcpy(&result, mStorage, sizeof(result));
+  return result;
+}
+
+void ProfileBufferEntry::CopyCharsInto(char (&aOutArray)[kNumChars]) const {
+  memcpy(aOutArray, mStorage, kNumChars);
+}
+
+// END ProfileBufferEntry
+////////////////////////////////////////////////////////////////////////
+
+struct TypeInfo {
+  Maybe<nsCString> mKeyedBy;
+  Maybe<nsCString> mName;
+  Maybe<nsCString> mLocation;
+  Maybe<unsigned> mLineNumber;
+};
+
+// As mentioned in ProfileBufferEntry.h, the JSON format contains many
+// arrays whose elements are laid out according to various schemas to help
+// de-duplication. This RAII class helps write these arrays by keeping track of
+// the last non-null element written and adding the appropriate number of null
+// elements when writing new non-null elements. It also automatically opens and
+// closes an array element on the given JSON writer.
+//
+// You grant the AutoArraySchemaWriter exclusive access to the JSONWriter and
+// the UniqueJSONStrings objects for the lifetime of AutoArraySchemaWriter. Do
+// not access them independently while the AutoArraySchemaWriter is alive.
+// If you need to add complex objects, call FreeFormElement(), which will give
+// you temporary access to the writer.
+//
+// Example usage:
+//
+//     // Define the schema of elements in this type of array: [FOO, BAR, BAZ]
+//     enum Schema : uint32_t {
+//       FOO = 0,
+//       BAR = 1,
+//       BAZ = 2
+//     };
+//
+//     AutoArraySchemaWriter writer(someJsonWriter, someUniqueStrings);
+//     if (shouldWriteFoo) {
+//       writer.IntElement(FOO, getFoo());
+//     }
+//     ... etc ...
+//
+//     The elements need to be added in-order.
+class MOZ_RAII AutoArraySchemaWriter {
+ public:
+  explicit AutoArraySchemaWriter(SpliceableJSONWriter& aWriter)
+      : mJSONWriter(aWriter), mNextFreeIndex(0) {
+    mJSONWriter.StartArrayElement();
+  }
+
+  ~AutoArraySchemaWriter() { mJSONWriter.EndArray(); }
+
+  template <typename T>
+  void IntElement(uint32_t aIndex, T aValue) {
+    static_assert(!std::is_same_v<T, uint64_t>,
+                  "Narrowing uint64 -> int64 conversion not allowed");
+    FillUpTo(aIndex);
+    mJSONWriter.IntElement(static_cast<int64_t>(aValue));
+  }
+
+  void DoubleElement(uint32_t aIndex, double aValue) {
+    FillUpTo(aIndex);
+    mJSONWriter.DoubleElement(aValue);
+  }
+
+  void TimeMsElement(uint32_t aIndex, double aTime_ms) {
+    FillUpTo(aIndex);
+    mJSONWriter.TimeDoubleMsElement(aTime_ms);
+  }
+
+  void BoolElement(uint32_t aIndex, bool aValue) {
+    FillUpTo(aIndex);
+    mJSONWriter.BoolElement(aValue);
+  }
+
+ protected:
+  SpliceableJSONWriter& Writer() { return mJSONWriter; }
+
+  void FillUpTo(uint32_t aIndex) {
+    MOZ_ASSERT(aIndex >= mNextFreeIndex);
+    mJSONWriter.NullElements(aIndex - mNextFreeIndex);
+    mNextFreeIndex = aIndex + 1;
+  }
+
+ private:
+  SpliceableJSONWriter& mJSONWriter;
+  uint32_t mNextFreeIndex;
+};
+
+// Same as AutoArraySchemaWriter, but this can also write strings (output as
+// indexes into the table of unique strings).
+class MOZ_RAII AutoArraySchemaWithStringsWriter : public AutoArraySchemaWriter {
+ public:
+  AutoArraySchemaWithStringsWriter(SpliceableJSONWriter& aWriter,
+                                   UniqueJSONStrings& aStrings)
+      : AutoArraySchemaWriter(aWriter), mStrings(aStrings) {}
+
+  void StringElement(uint32_t aIndex, const Span<const char>& aValue) {
+    FillUpTo(aIndex);
+    mStrings.WriteElement(Writer(), aValue);
+  }
+
+ private:
+  UniqueJSONStrings& mStrings;
+};
+
+Maybe<UniqueStacks::StackKey> UniqueStacks::BeginStack(const FrameKey& aFrame) {
+  if (Maybe<uint32_t> frameIndex = GetOrAddFrameIndex(aFrame); frameIndex) {
+    return Some(StackKey(*frameIndex));
+  }
+  return Nothing{};
+}
+
+Vector<JITFrameInfoForBufferRange>&&
+JITFrameInfo::MoveRangesWithNewFailureLatch(FailureLatch& aFailureLatch) && {
+  aFailureLatch.SetFailureFrom(mLocalFailureLatchSource);
+  return std::move(mRanges);
+}
+
+UniquePtr<UniqueJSONStrings>&&
+JITFrameInfo::MoveUniqueStringsWithNewFailureLatch(
+    FailureLatch& aFailureLatch) && {
+  if (mUniqueStrings) {
+    mUniqueStrings->ChangeFailureLatchAndForwardState(aFailureLatch);
+  } else {
+    aFailureLatch.SetFailureFrom(mLocalFailureLatchSource);
+  }
+  return std::move(mUniqueStrings);
+}
+
+Maybe<UniqueStacks::StackKey> UniqueStacks::AppendFrame(
+    const StackKey& aStack, const FrameKey& aFrame) {
+  if (Maybe<uint32_t> stackIndex = GetOrAddStackIndex(aStack); stackIndex) {
+    if (Maybe<uint32_t> frameIndex = GetOrAddFrameIndex(aFrame); frameIndex) {
+      return Some(StackKey(aStack, *stackIndex, *frameIndex));
+    }
+  }
+  return Nothing{};
+}
+
+JITFrameInfoForBufferRange JITFrameInfoForBufferRange::Clone() const {
+  JITFrameInfoForBufferRange::JITAddressToJITFramesMap jitAddressToJITFramesMap;
+  MOZ_RELEASE_ASSERT(
+      jitAddressToJITFramesMap.reserve(mJITAddressToJITFramesMap.count()));
+  for (auto iter = mJITAddressToJITFramesMap.iter(); !iter.done();
+       iter.next()) {
+    const mozilla::Vector<JITFrameKey>& srcKeys = iter.get().value();
+    mozilla::Vector<JITFrameKey> destKeys;
+    MOZ_RELEASE_ASSERT(destKeys.appendAll(srcKeys));
+    jitAddressToJITFramesMap.putNewInfallible(iter.get().key(),
+                                              std::move(destKeys));
+  }
+
+  JITFrameInfoForBufferRange::JITFrameToFrameJSONMap jitFrameToFrameJSONMap;
+  MOZ_RELEASE_ASSERT(
+      jitFrameToFrameJSONMap.reserve(mJITFrameToFrameJSONMap.count()));
+  for (auto iter = mJITFrameToFrameJSONMap.iter(); !iter.done(); iter.next()) {
+    jitFrameToFrameJSONMap.putNewInfallible(iter.get().key(),
+                                            iter.get().value());
+  }
+
+  return JITFrameInfoForBufferRange{mRangeStart, mRangeEnd,
+                                    std::move(jitAddressToJITFramesMap),
+                                    std::move(jitFrameToFrameJSONMap)};
+}
+
+JITFrameInfo::JITFrameInfo(const JITFrameInfo& aOther,
+                           mozilla::ProgressLogger aProgressLogger)
+    : mUniqueStrings(MakeUniqueFallible<UniqueJSONStrings>(
+          mLocalFailureLatchSource, *aOther.mUniqueStrings,
+          aProgressLogger.CreateSubLoggerFromTo(
+              0_pc, "Creating JIT frame info unique strings...", 49_pc,
+              "Created JIT frame info unique strings"))) {
+  if (!mUniqueStrings) {
+    mLocalFailureLatchSource.SetFailure(
+        "OOM in JITFrameInfo allocating mUniqueStrings");
+    return;
+  }
+
+  if (mRanges.reserve(aOther.mRanges.length())) {
+    for (auto&& [i, progressLogger] :
+         aProgressLogger.CreateLoopSubLoggersFromTo(50_pc, 100_pc,
+                                                    aOther.mRanges.length(),
+                                                    "Copying JIT frame info")) {
+      mRanges.infallibleAppend(aOther.mRanges[i].Clone());
+    }
+  } else {
+    mLocalFailureLatchSource.SetFailure("OOM in JITFrameInfo resizing mRanges");
+  }
+}
+
+bool UniqueStacks::FrameKey::NormalFrameData::operator==(
+    const NormalFrameData& aOther) const {
+  return mLocation == aOther.mLocation &&
+         mRelevantForJS == aOther.mRelevantForJS &&
+         mBaselineInterp == aOther.mBaselineInterp &&
+         mInnerWindowID == aOther.mInnerWindowID && mLine == aOther.mLine &&
+         mColumn == aOther.mColumn && mCategoryPair == aOther.mCategoryPair;
+}
+
+bool UniqueStacks::FrameKey::JITFrameData::operator==(
+    const JITFrameData& aOther) const {
+  return mCanonicalAddress == aOther.mCanonicalAddress &&
+         mDepth == aOther.mDepth && mRangeIndex == aOther.mRangeIndex;
+}
+
+// Consume aJITFrameInfo by stealing its string table and its JIT frame info
+// ranges. The JIT frame info contains JSON which refers to strings from the
+// JIT frame info's string table, so our string table needs to have the same
+// strings at the same indices.
+UniqueStacks::UniqueStacks(
+    FailureLatch& aFailureLatch, JITFrameInfo&& aJITFrameInfo,
+    ProfilerCodeAddressService* aCodeAddressService /* = nullptr */)
+    : mUniqueStrings(std::move(aJITFrameInfo)
+                         .MoveUniqueStringsWithNewFailureLatch(aFailureLatch)),
+      mCodeAddressService(aCodeAddressService),
+      mFrameTableWriter(aFailureLatch),
+      mStackTableWriter(aFailureLatch),
+      mJITInfoRanges(std::move(aJITFrameInfo)
+                         .MoveRangesWithNewFailureLatch(aFailureLatch)) {
+  if (!mUniqueStrings) {
+    SetFailure("Did not get mUniqueStrings from JITFrameInfo");
+    return;
+  }
+
+  mFrameTableWriter.StartBareList();
+  mStackTableWriter.StartBareList();
+}
+
+Maybe<uint32_t> UniqueStacks::GetOrAddStackIndex(const StackKey& aStack) {
+  if (Failed()) {
+    return Nothing{};
+  }
+
+  uint32_t count = mStackToIndexMap.count();
+  auto entry = mStackToIndexMap.lookupForAdd(aStack);
+  if (entry) {
+    MOZ_ASSERT(entry->value() < count);
+    return Some(entry->value());
+  }
+
+  if (!mStackToIndexMap.add(entry, aStack, count)) {
+    SetFailure("OOM in UniqueStacks::GetOrAddStackIndex");
+    return Nothing{};
+  }
+  StreamStack(aStack);
+  return Some(count);
+}
+
+Maybe<Vector<UniqueStacks::FrameKey>>
+UniqueStacks::LookupFramesForJITAddressFromBufferPos(void* aJITAddress,
+                                                     uint64_t aBufferPos) {
+  JITFrameInfoForBufferRange* rangeIter =
+      std::lower_bound(mJITInfoRanges.begin(), mJITInfoRanges.end(), aBufferPos,
+                       [](const JITFrameInfoForBufferRange& aRange,
+                          uint64_t aPos) { return aRange.mRangeEnd < aPos; });
+  MOZ_RELEASE_ASSERT(
+      rangeIter != mJITInfoRanges.end() &&
+          rangeIter->mRangeStart <= aBufferPos &&
+          aBufferPos < rangeIter->mRangeEnd,
+      "Buffer position of jit address needs to be in one of the ranges");
+
+  using JITFrameKey = JITFrameInfoForBufferRange::JITFrameKey;
+
+  const JITFrameInfoForBufferRange& jitFrameInfoRange = *rangeIter;
+  auto jitFrameKeys =
+      jitFrameInfoRange.mJITAddressToJITFramesMap.lookup(aJITAddress);
+  if (!jitFrameKeys) {
+    return Nothing();
+  }
+
+  // Map the array of JITFrameKeys to an array of FrameKeys, and ensure that
+  // each of the FrameKeys exists in mFrameToIndexMap.
+  Vector<FrameKey> frameKeys;
+  MOZ_RELEASE_ASSERT(frameKeys.initCapacity(jitFrameKeys->value().length()));
+  for (const JITFrameKey& jitFrameKey : jitFrameKeys->value()) {
+    FrameKey frameKey(jitFrameKey.mCanonicalAddress, jitFrameKey.mDepth,
+                      rangeIter - mJITInfoRanges.begin());
+    uint32_t index = mFrameToIndexMap.count();
+    auto entry = mFrameToIndexMap.lookupForAdd(frameKey);
+    if (!entry) {
+      // We need to add this frame to our frame table. The JSON for this frame
+      // already exists in jitFrameInfoRange, we just need to splice it into
+      // the frame table and give it an index.
+      auto frameJSON =
+          jitFrameInfoRange.mJITFrameToFrameJSONMap.lookup(jitFrameKey);
+      MOZ_RELEASE_ASSERT(frameJSON, "Should have cached JSON for this frame");
+      mFrameTableWriter.Splice(frameJSON->value());
+      MOZ_RELEASE_ASSERT(mFrameToIndexMap.add(entry, frameKey, index));
+    }
+    MOZ_RELEASE_ASSERT(frameKeys.append(std::move(frameKey)));
+  }
+  return Some(std::move(frameKeys));
+}
+
+Maybe<uint32_t> UniqueStacks::GetOrAddFrameIndex(const FrameKey& aFrame) {
+  if (Failed()) {
+    return Nothing{};
+  }
+
+  uint32_t count = mFrameToIndexMap.count();
+  auto entry = mFrameToIndexMap.lookupForAdd(aFrame);
+  if (entry) {
+    MOZ_ASSERT(entry->value() < count);
+    return Some(entry->value());
+  }
+
+  if (!mFrameToIndexMap.add(entry, aFrame, count)) {
+    SetFailure("OOM in UniqueStacks::GetOrAddFrameIndex");
+    return Nothing{};
+  }
+  StreamNonJITFrame(aFrame);
+  return Some(count);
+}
+
+void UniqueStacks::SpliceFrameTableElements(SpliceableJSONWriter& aWriter) {
+  mFrameTableWriter.EndBareList();
+  aWriter.TakeAndSplice(mFrameTableWriter.TakeChunkedWriteFunc());
+}
+
+void UniqueStacks::SpliceStackTableElements(SpliceableJSONWriter& aWriter) {
+  mStackTableWriter.EndBareList();
+  aWriter.TakeAndSplice(mStackTableWriter.TakeChunkedWriteFunc());
+}
+
+[[nodiscard]] nsAutoCString UniqueStacks::FunctionNameOrAddress(void* aPC) {
+  nsAutoCString nameOrAddress;
+
+  if (!mCodeAddressService ||
+      !mCodeAddressService->GetFunction(aPC, nameOrAddress) ||
+      nameOrAddress.IsEmpty()) {
+    nameOrAddress.AppendASCII("0x");
+    // `AppendInt` only knows `uint32_t` or `uint64_t`, but because these are
+    // just aliases for *two* of (`unsigned`, `unsigned long`, and `unsigned
+    // long long`), a call with `uintptr_t` could use the third type and
+    // therefore would be ambiguous.
+    // So we want to force using exactly `uint32_t` or `uint64_t`, whichever
+    // matches the size of `uintptr_t`.
+    // (The outer cast to `uint` should then be a no-op.)
+    using uint = std::conditional_t<sizeof(uintptr_t) <= sizeof(uint32_t),
+                                    uint32_t, uint64_t>;
+    nameOrAddress.AppendInt(static_cast<uint>(reinterpret_cast<uintptr_t>(aPC)),
+                            16);
+  }
+
+  return nameOrAddress;
+}
+
+void UniqueStacks::StreamStack(const StackKey& aStack) {
+  enum Schema : uint32_t { PREFIX = 0, FRAME = 1 };
+
+  AutoArraySchemaWriter writer(mStackTableWriter);
+  if (aStack.mPrefixStackIndex.isSome()) {
+    writer.IntElement(PREFIX, *aStack.mPrefixStackIndex);
+  }
+  writer.IntElement(FRAME, aStack.mFrameIndex);
+}
+
+void UniqueStacks::StreamNonJITFrame(const FrameKey& aFrame) {
+  if (Failed()) {
+    return;
+  }
+
+  using NormalFrameData = FrameKey::NormalFrameData;
+
+  enum Schema : uint32_t {
+    LOCATION = 0,
+    RELEVANT_FOR_JS = 1,
+    INNER_WINDOW_ID = 2,
+    IMPLEMENTATION = 3,
+    LINE = 4,
+    COLUMN = 5,
+    CATEGORY = 6,
+    SUBCATEGORY = 7
+  };
+
+  AutoArraySchemaWithStringsWriter writer(mFrameTableWriter, *mUniqueStrings);
+
+  const NormalFrameData& data = aFrame.mData.as<NormalFrameData>();
+  writer.StringElement(LOCATION, data.mLocation);
+  writer.BoolElement(RELEVANT_FOR_JS, data.mRelevantForJS);
+
+  // It's okay to convert uint64_t to double here because DOM always creates IDs
+  // that are convertible to double.
+  writer.DoubleElement(INNER_WINDOW_ID, data.mInnerWindowID);
+
+  // The C++ interpreter is the default implementation so we only emit element
+  // for Baseline Interpreter frames.
+  if (data.mBaselineInterp) {
+    writer.StringElement(IMPLEMENTATION, MakeStringSpan("blinterp"));
+  }
+
+  if (data.mLine.isSome()) {
+    writer.IntElement(LINE, *data.mLine);
+  }
+  if (data.mColumn.isSome()) {
+    writer.IntElement(COLUMN, *data.mColumn);
+  }
+  if (data.mCategoryPair.isSome()) {
+    const JS::ProfilingCategoryPairInfo& info =
+        JS::GetProfilingCategoryPairInfo(*data.mCategoryPair);
+    writer.IntElement(CATEGORY, uint32_t(info.mCategory));
+    writer.IntElement(SUBCATEGORY, info.mSubcategoryIndex);
+  }
+}
+
+static void StreamJITFrame(JSContext* aContext, SpliceableJSONWriter& aWriter,
+                           UniqueJSONStrings& aUniqueStrings,
+                           const JS::ProfiledFrameHandle& aJITFrame) {
+  enum Schema : uint32_t {
+    LOCATION = 0,
+    RELEVANT_FOR_JS = 1,
+    INNER_WINDOW_ID = 2,
+    IMPLEMENTATION = 3,
+    LINE = 4,
+    COLUMN = 5,
+    CATEGORY = 6,
+    SUBCATEGORY = 7
+  };
+
+  AutoArraySchemaWithStringsWriter writer(aWriter, aUniqueStrings);
+
+  writer.StringElement(LOCATION, MakeStringSpan(aJITFrame.label()));
+  writer.BoolElement(RELEVANT_FOR_JS, false);
+
+  // It's okay to convert uint64_t to double here because DOM always creates IDs
+  // that are convertible to double.
+  // Realm ID is the name of innerWindowID inside JS code.
+  writer.DoubleElement(INNER_WINDOW_ID, aJITFrame.realmID());
+
+  JS::ProfilingFrameIterator::FrameKind frameKind = aJITFrame.frameKind();
+  MOZ_ASSERT(frameKind == JS::ProfilingFrameIterator::Frame_Ion ||
+             frameKind == JS::ProfilingFrameIterator::Frame_Baseline);
+  writer.StringElement(IMPLEMENTATION,
+                       frameKind == JS::ProfilingFrameIterator::Frame_Ion
+                           ? MakeStringSpan("ion")
+                           : MakeStringSpan("baseline"));
+
+  const JS::ProfilingCategoryPairInfo& info = JS::GetProfilingCategoryPairInfo(
+      frameKind == JS::ProfilingFrameIterator::Frame_Ion
+          ? JS::ProfilingCategoryPair::JS_IonMonkey
+          : JS::ProfilingCategoryPair::JS_Baseline);
+  writer.IntElement(CATEGORY, uint32_t(info.mCategory));
+  writer.IntElement(SUBCATEGORY, info.mSubcategoryIndex);
+}
+
+static nsCString JSONForJITFrame(JSContext* aContext,
+                                 const JS::ProfiledFrameHandle& aJITFrame,
+                                 UniqueJSONStrings& aUniqueStrings) {
+  nsCString json;
+  JSONStringRefWriteFunc jw(json);
+  SpliceableJSONWriter writer(jw, aUniqueStrings.SourceFailureLatch());
+  StreamJITFrame(aContext, writer, aUniqueStrings, aJITFrame);
+  return json;
+}
+
+void JITFrameInfo::AddInfoForRange(
+    uint64_t aRangeStart, uint64_t aRangeEnd, JSContext* aCx,
+    const std::function<void(const std::function<void(void*)>&)>&
+        aJITAddressProvider) {
+  if (mLocalFailureLatchSource.Failed()) {
+    return;
+  }
+
+  if (aRangeStart == aRangeEnd) {
+    return;
+  }
+
+  MOZ_RELEASE_ASSERT(aRangeStart < aRangeEnd);
+
+  if (!mRanges.empty()) {
+    const JITFrameInfoForBufferRange& prevRange = mRanges.back();
+    MOZ_RELEASE_ASSERT(prevRange.mRangeEnd <= aRangeStart,
+                       "Ranges must be non-overlapping and added in-order.");
+  }
+
+  using JITFrameKey = JITFrameInfoForBufferRange::JITFrameKey;
+
+  JITFrameInfoForBufferRange::JITAddressToJITFramesMap jitAddressToJITFrameMap;
+  JITFrameInfoForBufferRange::JITFrameToFrameJSONMap jitFrameToFrameJSONMap;
+
+  aJITAddressProvider([&](void* aJITAddress) {
+    // Make sure that we have cached data for aJITAddress.
+    auto addressEntry = jitAddressToJITFrameMap.lookupForAdd(aJITAddress);
+    if (!addressEntry) {
+      Vector<JITFrameKey> jitFrameKeys;
+      for (JS::ProfiledFrameHandle handle :
+           JS::GetProfiledFrames(aCx, aJITAddress)) {
+        uint32_t depth = jitFrameKeys.length();
+        JITFrameKey jitFrameKey{handle.canonicalAddress(), depth};
+        auto frameEntry = jitFrameToFrameJSONMap.lookupForAdd(jitFrameKey);
+        if (!frameEntry) {
+          if (!jitFrameToFrameJSONMap.add(
+                  frameEntry, jitFrameKey,
+                  JSONForJITFrame(aCx, handle, *mUniqueStrings))) {
+            mLocalFailureLatchSource.SetFailure(
+                "OOM in JITFrameInfo::AddInfoForRange adding jit->frame map");
+            return;
+          }
+        }
+        if (!jitFrameKeys.append(jitFrameKey)) {
+          mLocalFailureLatchSource.SetFailure(
+              "OOM in JITFrameInfo::AddInfoForRange adding jit frame key");
+          return;
+        }
+      }
+      if (!jitAddressToJITFrameMap.add(addressEntry, aJITAddress,
+                                       std::move(jitFrameKeys))) {
+        mLocalFailureLatchSource.SetFailure(
+            "OOM in JITFrameInfo::AddInfoForRange adding addr->jit map");
+        return;
+      }
+    }
+  });
+
+  if (!mRanges.append(JITFrameInfoForBufferRange{
+          aRangeStart, aRangeEnd, std::move(jitAddressToJITFrameMap),
+          std::move(jitFrameToFrameJSONMap)})) {
+    mLocalFailureLatchSource.SetFailure(
+        "OOM in JITFrameInfo::AddInfoForRange adding range");
+    return;
+  }
+}
+
+struct ProfileSample {
+  uint32_t mStack = 0;
+  double mTime = 0.0;
+  Maybe<double> mResponsiveness;
+  RunningTimes mRunningTimes;
+};
+
+// Write CPU measurements with "Delta" unit, which is some amount of work that
+// happened since the previous sample.
+static void WriteDelta(AutoArraySchemaWriter& aSchemaWriter, uint32_t aProperty,
+                       uint64_t aDelta) {
+  aSchemaWriter.IntElement(aProperty, int64_t(aDelta));
+}
+
+static void WriteSample(SpliceableJSONWriter& aWriter,
+                        const ProfileSample& aSample) {
+  enum Schema : uint32_t {
+    STACK = 0,
+    TIME = 1,
+    EVENT_DELAY = 2
+#define RUNNING_TIME_SCHEMA(index, name, unit, jsonProperty) , name
+    PROFILER_FOR_EACH_RUNNING_TIME(RUNNING_TIME_SCHEMA)
+#undef RUNNING_TIME_SCHEMA
+  };
+
+  AutoArraySchemaWriter writer(aWriter);
+
+  writer.IntElement(STACK, aSample.mStack);
+
+  writer.TimeMsElement(TIME, aSample.mTime);
+
+  if (aSample.mResponsiveness.isSome()) {
+    writer.DoubleElement(EVENT_DELAY, *aSample.mResponsiveness);
+  }
+
+#define RUNNING_TIME_STREAM(index, name, unit, jsonProperty) \
+  aSample.mRunningTimes.GetJson##name##unit().apply(         \
+      [&writer](const uint64_t& aValue) {                    \
+        Write##unit(writer, name, aValue);                   \
+      });
+
+  PROFILER_FOR_EACH_RUNNING_TIME(RUNNING_TIME_STREAM)
+
+#undef RUNNING_TIME_STREAM
+}
+
+static void StreamMarkerAfterKind(
+    ProfileBufferEntryReader& aER,
+    ProcessStreamingContext& aProcessStreamingContext) {
+  ThreadStreamingContext* threadData = nullptr;
+  mozilla::base_profiler_markers_detail::DeserializeAfterKindAndStream(
+      aER,
+      [&](ProfilerThreadId aThreadId) -> baseprofiler::SpliceableJSONWriter* {
+        threadData =
+            aProcessStreamingContext.GetThreadStreamingContext(aThreadId);
+        return threadData ? &threadData->mMarkersDataWriter : nullptr;
+      },
+      [&](ProfileChunkedBuffer& aChunkedBuffer) {
+        ProfilerBacktrace backtrace("", &aChunkedBuffer);
+        MOZ_ASSERT(threadData,
+                   "threadData should have been set before calling here");
+        backtrace.StreamJSON(threadData->mMarkersDataWriter,
+                             aProcessStreamingContext.ProcessStartTime(),
+                             *threadData->mUniqueStacks);
+      },
+      [&](mozilla::base_profiler_markers_detail::Streaming::DeserializerTag
+              aTag) {
+        MOZ_ASSERT(threadData,
+                   "threadData should have been set before calling here");
+
+        size_t payloadSize = aER.RemainingBytes();
+
+        ProfileBufferEntryReader::DoubleSpanOfConstBytes spans =
+            aER.ReadSpans(payloadSize);
+        if (MOZ_LIKELY(spans.IsSingleSpan())) {
+          // Only a single span, we can just refer to it directly
+          // instead of copying it.
+          profiler::ffi::gecko_profiler_serialize_marker_for_tag(
+              aTag, spans.mFirstOrOnly.Elements(), payloadSize,
+              &threadData->mMarkersDataWriter);
+        } else {
+          // Two spans, we need to concatenate them by copying.
+          uint8_t* payloadBuffer = new uint8_t[payloadSize];
+          spans.CopyBytesTo(payloadBuffer);
+          profiler::ffi::gecko_profiler_serialize_marker_for_tag(
+              aTag, payloadBuffer, payloadSize,
+              &threadData->mMarkersDataWriter);
+          delete[] payloadBuffer;
+        }
+      });
+}
+
+class EntryGetter {
+ public:
+  explicit EntryGetter(
+      ProfileChunkedBuffer::Reader& aReader,
+      mozilla::FailureLatch& aFailureLatch,
+      mozilla::ProgressLogger aProgressLogger = {},
+      uint64_t aInitialReadPos = 0,
+      ProcessStreamingContext* aStreamingContextForMarkers = nullptr)
+      : mFailureLatch(aFailureLatch),
+        mStreamingContextForMarkers(aStreamingContextForMarkers),
+        mBlockIt(
+            aReader.At(ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+                aInitialReadPos))),
+        mBlockItEnd(aReader.end()),
+        mRangeStart(mBlockIt.BufferRangeStart().ConvertToProfileBufferIndex()),
+        mRangeSize(
+            double(mBlockIt.BufferRangeEnd().ConvertToProfileBufferIndex() -
+                   mRangeStart)),
+        mProgressLogger(std::move(aProgressLogger)) {
+    SetLocalProgress(ProgressLogger::NO_LOCATION_UPDATE);
+    if (!ReadLegacyOrEnd()) {
+      // Find and read the next non-legacy entry.
+      Next();
+    }
+  }
+
+  bool Has() const {
+    return (!mFailureLatch.Failed()) && (mBlockIt != mBlockItEnd);
+  }
+
+  const ProfileBufferEntry& Get() const {
+    MOZ_ASSERT(Has() || mFailureLatch.Failed(),
+               "Caller should have checked `Has()` before `Get()`");
+    return mEntry;
+  }
+
+  void Next() {
+    MOZ_ASSERT(Has() || mFailureLatch.Failed(),
+               "Caller should have checked `Has()` before `Next()`");
+    ++mBlockIt;
+    ReadUntilLegacyOrEnd();
+  }
+
+  // Hand off the current iterator to the caller, which may be used to read
+  // any kind of entries (legacy or modern).
+  ProfileChunkedBuffer::BlockIterator Iterator() const { return mBlockIt; }
+
+  // After `Iterator()` was used, we can restart from *after* its updated
+  // position.
+  void RestartAfter(const ProfileChunkedBuffer::BlockIterator& it) {
+    mBlockIt = it;
+    if (!Has()) {
+      return;
+    }
+    Next();
+  }
+
+  ProfileBufferBlockIndex CurBlockIndex() const {
+    return mBlockIt.CurrentBlockIndex();
+  }
+
+  uint64_t CurPos() const {
+    return CurBlockIndex().ConvertToProfileBufferIndex();
+  }
+
+  void SetLocalProgress(const char* aLocation) {
+    mProgressLogger.SetLocalProgress(
+        ProportionValue{double(CurBlockIndex().ConvertToProfileBufferIndex() -
+                               mRangeStart) /
+                        mRangeSize},
+        aLocation);
+  }
+
+ private:
+  // Try to read the entry at the current `mBlockIt` position.
+  // * If we're at the end of the buffer, just return `true`.
+  // * If there is a "legacy" entry (containing a real `ProfileBufferEntry`),
+  //   read it into `mEntry`, and return `true` as well.
+  // * Otherwise the entry contains a "modern" type that cannot be read into
+  // `mEntry`, return `false` (so `EntryGetter` can skip to another entry).
+  bool ReadLegacyOrEnd() {
+    if (!Has()) {
+      return true;
+    }
+    // Read the entry "kind", which is always at the start of all entries.
+    ProfileBufferEntryReader er = *mBlockIt;
+    auto type = static_cast<ProfileBufferEntry::Kind>(
+        er.ReadObject<ProfileBufferEntry::KindUnderlyingType>());
+    MOZ_ASSERT(static_cast<ProfileBufferEntry::KindUnderlyingType>(type) <
+               static_cast<ProfileBufferEntry::KindUnderlyingType>(
+                   ProfileBufferEntry::Kind::MODERN_LIMIT));
+    if (type >= ProfileBufferEntry::Kind::LEGACY_LIMIT) {
+      if (type == ProfileBufferEntry::Kind::Marker &&
+          mStreamingContextForMarkers) {
+        StreamMarkerAfterKind(er, *mStreamingContextForMarkers);
+        if (!Has()) {
+          return true;
+        }
+        SetLocalProgress("Processed marker");
+      }
+      er.SetRemainingBytes(0);
+      return false;
+    }
+    // Here, we have a legacy item, we need to read it from the start.
+    // Because the above `ReadObject` moved the reader, we ned to reset it to
+    // the start of the entry before reading the whole entry.
+    er = *mBlockIt;
+    er.ReadBytes(&mEntry, er.RemainingBytes());
+    return true;
+  }
+
+  void ReadUntilLegacyOrEnd() {
+    for (;;) {
+      if (ReadLegacyOrEnd()) {
+        // Either we're at the end, or we could read a legacy entry -> Done.
+        break;
+      }
+      // Otherwise loop around until we hit a legacy entry or the end.
+      ++mBlockIt;
+    }
+    SetLocalProgress(ProgressLogger::NO_LOCATION_UPDATE);
+  }
+
+  mozilla::FailureLatch& mFailureLatch;
+
+  ProcessStreamingContext* const mStreamingContextForMarkers;
+
+  ProfileBufferEntry mEntry;
+  ProfileChunkedBuffer::BlockIterator mBlockIt;
+  const ProfileChunkedBuffer::BlockIterator mBlockItEnd;
+
+  // Progress logger, and the data needed to compute the current relative
+  // position in the buffer.
+  const mozilla::ProfileBufferIndex mRangeStart;
+  const double mRangeSize;
+  mozilla::ProgressLogger mProgressLogger;
+};
+
+// The following grammar shows legal sequences of profile buffer entries.
+// The sequences beginning with a ThreadId entry are known as "samples".
+//
+// (
+//   ( /* Samples */
+//     ThreadId
+//     TimeBeforeCompactStack
+//     RunningTimes?
+//     UnresponsivenessDurationMs?
+//     CompactStack
+//         /* internally including:
+//           ( NativeLeafAddr
+//           | Label FrameFlags? DynamicStringFragment*
+//             LineNumber? CategoryPair?
+//           | JitReturnAddr
+//           )+
+//         */
+//   )
+//   | ( /* Reference to a previous identical sample */
+//       ThreadId
+//       TimeBeforeSameSample
+//       RunningTimes?
+//       SameSample
+//     )
+//   | Marker
+//   | ( /* Counters */
+//       CounterId
+//       Time
+//       (
+//         CounterKey
+//         Count
+//         Number?
+//       )*
+//     )
+//   | CollectionStart
+//   | CollectionEnd
+//   | Pause
+//   | Resume
+//   | ( ProfilerOverheadTime /* Sampling start timestamp */
+//       ProfilerOverheadDuration /* Lock acquisition */
+//       ProfilerOverheadDuration /* Expired markers cleaning */
+//       ProfilerOverheadDuration /* Counters */
+//       ProfilerOverheadDuration /* Threads */
+//     )
+// )*
+//
+// The most complicated part is the stack entry sequence that begins with
+// Label. Here are some examples.
+//
+// - ProfilingStack frames without a dynamic string:
+//
+//     Label("js::RunScript")
+//     CategoryPair(JS::ProfilingCategoryPair::JS)
+//
+//     Label("XREMain::XRE_main")
+//     LineNumber(4660)
+//     CategoryPair(JS::ProfilingCategoryPair::OTHER)
+//
+//     Label("ElementRestyler::ComputeStyleChangeFor")
+//     LineNumber(3003)
+//     CategoryPair(JS::ProfilingCategoryPair::CSS)
+//
+// - ProfilingStack frames with a dynamic string:
+//
+//     Label("nsObserverService::NotifyObservers")
+//     FrameFlags(uint64_t(ProfilingStackFrame::Flags::IS_LABEL_FRAME))
+//     DynamicStringFragment("domwindo")
+//     DynamicStringFragment("wopened")
+//     LineNumber(291)
+//     CategoryPair(JS::ProfilingCategoryPair::OTHER)
+//
+//     Label("")
+//     FrameFlags(uint64_t(ProfilingStackFrame::Flags::IS_JS_FRAME))
+//     DynamicStringFragment("closeWin")
+//     DynamicStringFragment("dow (chr")
+//     DynamicStringFragment("ome://gl")
+//     DynamicStringFragment("obal/con")
+//     DynamicStringFragment("tent/glo")
+//     DynamicStringFragment("balOverl")
+//     DynamicStringFragment("ay.js:5)")
+//     DynamicStringFragment("")          # this string holds the closing '\0'
+//     LineNumber(25)
+//     CategoryPair(JS::ProfilingCategoryPair::JS)
+//
+//     Label("")
+//     FrameFlags(uint64_t(ProfilingStackFrame::Flags::IS_JS_FRAME))
+//     DynamicStringFragment("bound (s")
+//     DynamicStringFragment("elf-host")
+//     DynamicStringFragment("ed:914)")
+//     LineNumber(945)
+//     CategoryPair(JS::ProfilingCategoryPair::JS)
+//
+// - A profiling stack frame with an overly long dynamic string:
+//
+//     Label("")
+//     FrameFlags(uint64_t(ProfilingStackFrame::Flags::IS_LABEL_FRAME))
+//     DynamicStringFragment("(too lon")
+//     DynamicStringFragment("g)")
+//     LineNumber(100)
+//     CategoryPair(JS::ProfilingCategoryPair::NETWORK)
+//
+// - A wasm JIT frame:
+//
+//     Label("")
+//     FrameFlags(uint64_t(0))
+//     DynamicStringFragment("wasm-fun")
+//     DynamicStringFragment("ction[87")
+//     DynamicStringFragment("36] (blo")
+//     DynamicStringFragment("b:http:/")
+//     DynamicStringFragment("/webasse")
+//     DynamicStringFragment("mbly.org")
+//     DynamicStringFragment("/3dc5759")
+//     DynamicStringFragment("4-ce58-4")
+//     DynamicStringFragment("626-975b")
+//     DynamicStringFragment("-08ad116")
+//     DynamicStringFragment("30bc1:38")
+//     DynamicStringFragment("29856)")
+//
+// - A JS frame in a synchronous sample:
+//
+//     Label("")
+//     FrameFlags(uint64_t(ProfilingStackFrame::Flags::IS_LABEL_FRAME))
+//     DynamicStringFragment("u (https")
+//     DynamicStringFragment("://perf-")
+//     DynamicStringFragment("html.io/")
+//     DynamicStringFragment("ac0da204")
+//     DynamicStringFragment("aaa44d75")
+//     DynamicStringFragment("a800.bun")
+//     DynamicStringFragment("dle.js:2")
+//     DynamicStringFragment("5)")
+
+// Because this is a format entirely internal to the Profiler, any parsing
+// error indicates a bug in the ProfileBuffer writing or the parser itself,
+// or possibly flaky hardware.
+#define ERROR_AND_CONTINUE(msg)                            \
+  {                                                        \
+    fprintf(stderr, "ProfileBuffer parse error: %s", msg); \
+    MOZ_ASSERT(false, msg);                                \
+    continue;                                              \
+  }
+
+struct StreamingParametersForThread {
+  SpliceableJSONWriter& mWriter;
+  UniqueStacks& mUniqueStacks;
+  ThreadStreamingContext::PreviousStackState& mPreviousStackState;
+  uint32_t& mPreviousStack;
+
+  StreamingParametersForThread(
+      SpliceableJSONWriter& aWriter, UniqueStacks& aUniqueStacks,
+      ThreadStreamingContext::PreviousStackState& aPreviousStackState,
+      uint32_t& aPreviousStack)
+      : mWriter(aWriter),
+        mUniqueStacks(aUniqueStacks),
+        mPreviousStackState(aPreviousStackState),
+        mPreviousStack(aPreviousStack) {}
+};
+
+// GetStreamingParametersForThreadCallback:
+//   (ProfilerThreadId) -> Maybe<StreamingParametersForThread>
+template <typename GetStreamingParametersForThreadCallback>
+ProfilerThreadId ProfileBuffer::DoStreamSamplesAndMarkersToJSON(
+    mozilla::FailureLatch& aFailureLatch,
+    GetStreamingParametersForThreadCallback&&
+        aGetStreamingParametersForThreadCallback,
+    double aSinceTime, ProcessStreamingContext* aStreamingContextForMarkers,
+    mozilla::ProgressLogger aProgressLogger) const {
+  UniquePtr<char[]> dynStrBuf = MakeUnique<char[]>(kMaxFrameKeyLength);
+
+  return mEntries.Read([&](ProfileChunkedBuffer::Reader* aReader) {
+    MOZ_ASSERT(aReader,
+               "ProfileChunkedBuffer cannot be out-of-session when sampler is "
+               "running");
+
+    ProfilerThreadId processedThreadId;
+
+    EntryGetter e(*aReader, aFailureLatch, std::move(aProgressLogger),
+                  /* aInitialReadPos */ 0, aStreamingContextForMarkers);
+
+    for (;;) {
+      // This block skips entries until we find the start of the next sample.
+      // This is useful in three situations.
+      //
+      // - The circular buffer overwrites old entries, so when we start parsing
+      //   we might be in the middle of a sample, and we must skip forward to
+      //   the start of the next sample.
+      //
+      // - We skip samples that don't have an appropriate ThreadId or Time.
+      //
+      // - We skip range Pause, Resume, CollectionStart, Marker, Counter
+      //   and CollectionEnd entries between samples.
+      while (e.Has()) {
+        if (e.Get().IsThreadId()) {
+          break;
+        }
+        e.Next();
+      }
+
+      if (!e.Has()) {
+        break;
+      }
+
+      // Due to the skip_to_next_sample block above, if we have an entry here it
+      // must be a ThreadId entry.
+      MOZ_ASSERT(e.Get().IsThreadId());
+
+      ProfilerThreadId threadId = e.Get().GetThreadId();
+      e.Next();
+
+      Maybe<StreamingParametersForThread> streamingParameters =
+          std::forward<GetStreamingParametersForThreadCallback>(
+              aGetStreamingParametersForThreadCallback)(threadId);
+
+      // Ignore samples that are for the wrong thread.
+      if (!streamingParameters) {
+        continue;
+      }
+
+      SpliceableJSONWriter& writer = streamingParameters->mWriter;
+      UniqueStacks& uniqueStacks = streamingParameters->mUniqueStacks;
+      ThreadStreamingContext::PreviousStackState& previousStackState =
+          streamingParameters->mPreviousStackState;
+      uint32_t& previousStack = streamingParameters->mPreviousStack;
+
+      auto ReadStack = [&](EntryGetter& e, double time, uint64_t entryPosition,
+                           const Maybe<double>& unresponsiveDuration,
+                           const RunningTimes& runningTimes) {
+        if (writer.Failed()) {
+          return;
+        }
+
+        Maybe<UniqueStacks::StackKey> maybeStack =
+            uniqueStacks.BeginStack(UniqueStacks::FrameKey("(root)"));
+        if (!maybeStack) {
+          writer.SetFailure("BeginStack failure");
+          return;
+        }
+
+        UniqueStacks::StackKey stack = *maybeStack;
+
+        int numFrames = 0;
+        while (e.Has()) {
+          if (e.Get().IsNativeLeafAddr()) {
+            numFrames++;
+
+            void* pc = e.Get().GetPtr();
+            e.Next();
+
+            nsAutoCString functionNameOrAddress =
+                uniqueStacks.FunctionNameOrAddress(pc);
+
+            maybeStack = uniqueStacks.AppendFrame(
+                stack, UniqueStacks::FrameKey(functionNameOrAddress.get()));
+            if (!maybeStack) {
+              writer.SetFailure("AppendFrame failure");
+              return;
+            }
+            stack = *maybeStack;
+
+          } else if (e.Get().IsLabel()) {
+            numFrames++;
+
+            const char* label = e.Get().GetString();
+            e.Next();
+
+            using FrameFlags = js::ProfilingStackFrame::Flags;
+            uint32_t frameFlags = 0;
+            if (e.Has() && e.Get().IsFrameFlags()) {
+              frameFlags = uint32_t(e.Get().GetUint64());
+              e.Next();
+            }
+
+            bool relevantForJS =
+                frameFlags & uint32_t(FrameFlags::RELEVANT_FOR_JS);
+
+            bool isBaselineInterp =
+                frameFlags & uint32_t(FrameFlags::IS_BLINTERP_FRAME);
+
+            // Copy potential dynamic string fragments into dynStrBuf, so that
+            // dynStrBuf will then contain the entire dynamic string.
+            size_t i = 0;
+            dynStrBuf[0] = '\0';
+            while (e.Has()) {
+              if (e.Get().IsDynamicStringFragment()) {
+                char chars[ProfileBufferEntry::kNumChars];
+                e.Get().CopyCharsInto(chars);
+                for (char c : chars) {
+                  if (i < kMaxFrameKeyLength) {
+                    dynStrBuf[i] = c;
+                    i++;
+                  }
+                }
+                e.Next();
+              } else {
+                break;
+              }
+            }
+            dynStrBuf[kMaxFrameKeyLength - 1] = '\0';
+            bool hasDynamicString = (i != 0);
+
+            nsAutoCStringN<1024> frameLabel;
+            if (label[0] != '\0' && hasDynamicString) {
+              if (frameFlags & uint32_t(FrameFlags::STRING_TEMPLATE_METHOD)) {
+                frameLabel.AppendPrintf("%s.%s", label, dynStrBuf.get());
+              } else if (frameFlags &
+                         uint32_t(FrameFlags::STRING_TEMPLATE_GETTER)) {
+                frameLabel.AppendPrintf("get %s.%s", label, dynStrBuf.get());
+              } else if (frameFlags &
+                         uint32_t(FrameFlags::STRING_TEMPLATE_SETTER)) {
+                frameLabel.AppendPrintf("set %s.%s", label, dynStrBuf.get());
+              } else {
+                frameLabel.AppendPrintf("%s %s", label, dynStrBuf.get());
+              }
+            } else if (hasDynamicString) {
+              frameLabel.Append(dynStrBuf.get());
+            } else {
+              frameLabel.Append(label);
+            }
+
+            uint64_t innerWindowID = 0;
+            if (e.Has() && e.Get().IsInnerWindowID()) {
+              innerWindowID = uint64_t(e.Get().GetUint64());
+              e.Next();
+            }
+
+            Maybe<unsigned> line;
+            if (e.Has() && e.Get().IsLineNumber()) {
+              line = Some(unsigned(e.Get().GetInt()));
+              e.Next();
+            }
+
+            Maybe<unsigned> column;
+            if (e.Has() && e.Get().IsColumnNumber()) {
+              column = Some(unsigned(e.Get().GetInt()));
+              e.Next();
+            }
+
+            Maybe<JS::ProfilingCategoryPair> categoryPair;
+            if (e.Has() && e.Get().IsCategoryPair()) {
+              categoryPair =
+                  Some(JS::ProfilingCategoryPair(uint32_t(e.Get().GetInt())));
+              e.Next();
+            }
+
+            maybeStack = uniqueStacks.AppendFrame(
+                stack,
+                UniqueStacks::FrameKey(std::move(frameLabel), relevantForJS,
+                                       isBaselineInterp, innerWindowID, line,
+                                       column, categoryPair));
+            if (!maybeStack) {
+              writer.SetFailure("AppendFrame failure");
+              return;
+            }
+            stack = *maybeStack;
+
+          } else if (e.Get().IsJitReturnAddr()) {
+            numFrames++;
+
+            // A JIT frame may expand to multiple frames due to inlining.
+            void* pc = e.Get().GetPtr();
+            const Maybe<Vector<UniqueStacks::FrameKey>>& frameKeys =
+                uniqueStacks.LookupFramesForJITAddressFromBufferPos(
+                    pc, entryPosition ? entryPosition : e.CurPos());
+            MOZ_RELEASE_ASSERT(
+                frameKeys,
+                "Attempting to stream samples for a buffer range "
+                "for which we don't have JITFrameInfo?");
+            for (const UniqueStacks::FrameKey& frameKey : *frameKeys) {
+              maybeStack = uniqueStacks.AppendFrame(stack, frameKey);
+              if (!maybeStack) {
+                writer.SetFailure("AppendFrame failure");
+                return;
+              }
+              stack = *maybeStack;
+            }
+
+            e.Next();
+
+          } else {
+            break;
+          }
+        }
+
+        // Even if this stack is considered empty, it contains the root frame,
+        // which needs to be in the JSON output because following "same samples"
+        // may refer to it when reusing this sample.mStack.
+        const Maybe<uint32_t> stackIndex =
+            uniqueStacks.GetOrAddStackIndex(stack);
+        if (!stackIndex) {
+          writer.SetFailure("Can't add unique string for stack");
+          return;
+        }
+
+        // And store that possibly-empty stack in case it's followed by "same
+        // sample" entries.
+        previousStack = *stackIndex;
+        previousStackState = (numFrames == 0)
+                                 ? ThreadStreamingContext::eStackWasEmpty
+                                 : ThreadStreamingContext::eStackWasNotEmpty;
+
+        // Even if too old or empty, we did process a sample for this thread id.
+        processedThreadId = threadId;
+
+        // Discard samples that are too old.
+        if (time < aSinceTime) {
+          return;
+        }
+
+        if (numFrames == 0 && runningTimes.IsEmpty()) {
+          // It is possible to have empty stacks if native stackwalking is
+          // disabled. Skip samples with empty stacks, unless we have useful
+          // running times.
+          return;
+        }
+
+        WriteSample(writer, ProfileSample{*stackIndex, time,
+                                          unresponsiveDuration, runningTimes});
+      };  // End of `ReadStack(EntryGetter&)` lambda.
+
+      if (e.Has() && e.Get().IsTime()) {
+        double time = e.Get().GetDouble();
+        e.Next();
+        // Note: Even if this sample is too old (before aSinceTime), we still
+        // need to read it, so that its frames are in the tables, in case there
+        // is a same-sample following it that would be after aSinceTime, which
+        // would need these frames to be present.
+
+        ReadStack(e, time, 0, Nothing{}, RunningTimes{});
+
+        e.SetLocalProgress("Processed sample");
+      } else if (e.Has() && e.Get().IsTimeBeforeCompactStack()) {
+        double time = e.Get().GetDouble();
+        // Note: Even if this sample is too old (before aSinceTime), we still
+        // need to read it, so that its frames are in the tables, in case there
+        // is a same-sample following it that would be after aSinceTime, which
+        // would need these frames to be present.
+
+        RunningTimes runningTimes;
+        Maybe<double> unresponsiveDuration;
+
+        ProfileChunkedBuffer::BlockIterator it = e.Iterator();
+        for (;;) {
+          ++it;
+          if (it.IsAtEnd()) {
+            break;
+          }
+          ProfileBufferEntryReader er = *it;
+          ProfileBufferEntry::Kind kind =
+              er.ReadObject<ProfileBufferEntry::Kind>();
+
+          // There may be running times before the CompactStack.
+          if (kind == ProfileBufferEntry::Kind::RunningTimes) {
+            er.ReadIntoObject(runningTimes);
+            continue;
+          }
+
+          // There may be an UnresponsiveDurationMs before the CompactStack.
+          if (kind == ProfileBufferEntry::Kind::UnresponsiveDurationMs) {
+            unresponsiveDuration = Some(er.ReadObject<double>());
+            continue;
+          }
+
+          if (kind == ProfileBufferEntry::Kind::CompactStack) {
+            ProfileChunkedBuffer tempBuffer(
+                ProfileChunkedBuffer::ThreadSafety::WithoutMutex,
+                WorkerChunkManager());
+            er.ReadIntoObject(tempBuffer);
+            tempBuffer.Read([&](ProfileChunkedBuffer::Reader* aReader) {
+              MOZ_ASSERT(aReader,
+                         "Local ProfileChunkedBuffer cannot be out-of-session");
+              // This is a compact stack, it should only contain one sample.
+              EntryGetter stackEntryGetter(*aReader, aFailureLatch);
+              ReadStack(stackEntryGetter, time,
+                        it.CurrentBlockIndex().ConvertToProfileBufferIndex(),
+                        unresponsiveDuration, runningTimes);
+            });
+            WorkerChunkManager().Reset(tempBuffer.GetAllChunks());
+            break;
+          }
+
+          if (kind == ProfileBufferEntry::Kind::Marker &&
+              aStreamingContextForMarkers) {
+            StreamMarkerAfterKind(er, *aStreamingContextForMarkers);
+            continue;
+          }
+
+          MOZ_ASSERT(kind >= ProfileBufferEntry::Kind::LEGACY_LIMIT,
+                     "There should be no legacy entries between "
+                     "TimeBeforeCompactStack and CompactStack");
+          er.SetRemainingBytes(0);
+        }
+
+        e.RestartAfter(it);
+
+        e.SetLocalProgress("Processed compact sample");
+      } else if (e.Has() && e.Get().IsTimeBeforeSameSample()) {
+        if (previousStackState == ThreadStreamingContext::eNoStackYet) {
+          // We don't have any full sample yet, we cannot duplicate a "previous"
+          // one. This should only happen at most once per thread, for the very
+          // first sample.
+          continue;
+        }
+
+        ProfileSample sample;
+
+        // Keep the same `mStack` as previously output.
+        // Note that it may be empty, this is checked below before writing it.
+        sample.mStack = previousStack;
+
+        sample.mTime = e.Get().GetDouble();
+
+        // Ignore samples that are too old.
+        if (sample.mTime < aSinceTime) {
+          e.Next();
+          continue;
+        }
+
+        sample.mResponsiveness = Nothing{};
+
+        sample.mRunningTimes.Clear();
+
+        ProfileChunkedBuffer::BlockIterator it = e.Iterator();
+        for (;;) {
+          ++it;
+          if (it.IsAtEnd()) {
+            break;
+          }
+          ProfileBufferEntryReader er = *it;
+          ProfileBufferEntry::Kind kind =
+              er.ReadObject<ProfileBufferEntry::Kind>();
+
+          // There may be running times before the SameSample.
+          if (kind == ProfileBufferEntry::Kind::RunningTimes) {
+            er.ReadIntoObject(sample.mRunningTimes);
+            continue;
+          }
+
+          if (kind == ProfileBufferEntry::Kind::SameSample) {
+            if (previousStackState == ThreadStreamingContext::eStackWasEmpty &&
+                sample.mRunningTimes.IsEmpty()) {
+              // Skip samples with empty stacks, unless we have useful running
+              // times.
+              break;
+            }
+            WriteSample(writer, sample);
+            break;
+          }
+
+          if (kind == ProfileBufferEntry::Kind::Marker &&
+              aStreamingContextForMarkers) {
+            StreamMarkerAfterKind(er, *aStreamingContextForMarkers);
+            continue;
+          }
+
+          MOZ_ASSERT(kind >= ProfileBufferEntry::Kind::LEGACY_LIMIT,
+                     "There should be no legacy entries between "
+                     "TimeBeforeSameSample and SameSample");
+          er.SetRemainingBytes(0);
+        }
+
+        e.RestartAfter(it);
+
+        e.SetLocalProgress("Processed repeated sample");
+      } else {
+        ERROR_AND_CONTINUE("expected a Time entry");
+      }
+    }
+
+    return processedThreadId;
+  });
+}
+
+ProfilerThreadId ProfileBuffer::StreamSamplesToJSON(
+    SpliceableJSONWriter& aWriter, ProfilerThreadId aThreadId,
+    double aSinceTime, UniqueStacks& aUniqueStacks,
+    mozilla::ProgressLogger aProgressLogger) const {
+  ThreadStreamingContext::PreviousStackState previousStackState =
+      ThreadStreamingContext::eNoStackYet;
+  uint32_t stack = 0u;
+#ifdef DEBUG
+  int processedCount = 0;
+#endif  // DEBUG
+  return DoStreamSamplesAndMarkersToJSON(
+      aWriter.SourceFailureLatch(),
+      [&](ProfilerThreadId aReadThreadId) {
+        Maybe<StreamingParametersForThread> streamingParameters;
+#ifdef DEBUG
+        ++processedCount;
+        MOZ_ASSERT(
+            aThreadId.IsSpecified() ||
+                (processedCount == 1 && aReadThreadId.IsSpecified()),
+            "Unspecified aThreadId should only be used with 1-sample buffer");
+#endif  // DEBUG
+        if (!aThreadId.IsSpecified() || aThreadId == aReadThreadId) {
+          streamingParameters.emplace(aWriter, aUniqueStacks,
+                                      previousStackState, stack);
+        }
+        return streamingParameters;
+      },
+      aSinceTime, /* aStreamingContextForMarkers */ nullptr,
+      std::move(aProgressLogger));
+}
+
+void ProfileBuffer::StreamSamplesAndMarkersToJSON(
+    ProcessStreamingContext& aProcessStreamingContext,
+    mozilla::ProgressLogger aProgressLogger) const {
+  (void)DoStreamSamplesAndMarkersToJSON(
+      aProcessStreamingContext.SourceFailureLatch(),
+      [&](ProfilerThreadId aReadThreadId) {
+        Maybe<StreamingParametersForThread> streamingParameters;
+        ThreadStreamingContext* threadData =
+            aProcessStreamingContext.GetThreadStreamingContext(aReadThreadId);
+        if (threadData) {
+          streamingParameters.emplace(
+              threadData->mSamplesDataWriter, *threadData->mUniqueStacks,
+              threadData->mPreviousStackState, threadData->mPreviousStack);
+        }
+        return streamingParameters;
+      },
+      aProcessStreamingContext.GetSinceTime(), &aProcessStreamingContext,
+      std::move(aProgressLogger));
+}
+
+void ProfileBuffer::AddJITInfoForRange(
+    uint64_t aRangeStart, ProfilerThreadId aThreadId, JSContext* aContext,
+    JITFrameInfo& aJITFrameInfo,
+    mozilla::ProgressLogger aProgressLogger) const {
+  // We can only process JitReturnAddr entries if we have a JSContext.
+  MOZ_RELEASE_ASSERT(aContext);
+
+  aRangeStart = std::max(aRangeStart, BufferRangeStart());
+  aJITFrameInfo.AddInfoForRange(
+      aRangeStart, BufferRangeEnd(), aContext,
+      [&](const std::function<void(void*)>& aJITAddressConsumer) {
+        // Find all JitReturnAddr entries in the given range for the given
+        // thread, and call aJITAddressConsumer with those addresses.
+
+        mEntries.Read([&](ProfileChunkedBuffer::Reader* aReader) {
+          MOZ_ASSERT(aReader,
+                     "ProfileChunkedBuffer cannot be out-of-session when "
+                     "sampler is running");
+
+          EntryGetter e(*aReader, aJITFrameInfo.LocalFailureLatchSource(),
+                        std::move(aProgressLogger), aRangeStart);
+
+          while (true) {
+            // Advance to the next ThreadId entry.
+            while (e.Has() && !e.Get().IsThreadId()) {
+              e.Next();
+            }
+            if (!e.Has()) {
+              break;
+            }
+
+            MOZ_ASSERT(e.Get().IsThreadId());
+            ProfilerThreadId threadId = e.Get().GetThreadId();
+            e.Next();
+
+            // Ignore samples that are for a different thread.
+            if (threadId != aThreadId) {
+              continue;
+            }
+
+            if (e.Has() && e.Get().IsTime()) {
+              // Legacy stack.
+              e.Next();
+              while (e.Has() && !e.Get().IsThreadId()) {
+                if (e.Get().IsJitReturnAddr()) {
+                  aJITAddressConsumer(e.Get().GetPtr());
+                }
+                e.Next();
+              }
+            } else if (e.Has() && e.Get().IsTimeBeforeCompactStack()) {
+              // Compact stack.
+              ProfileChunkedBuffer::BlockIterator it = e.Iterator();
+              for (;;) {
+                ++it;
+                if (it.IsAtEnd()) {
+                  break;
+                }
+                ProfileBufferEntryReader er = *it;
+                ProfileBufferEntry::Kind kind =
+                    er.ReadObject<ProfileBufferEntry::Kind>();
+                if (kind == ProfileBufferEntry::Kind::CompactStack) {
+                  ProfileChunkedBuffer tempBuffer(
+                      ProfileChunkedBuffer::ThreadSafety::WithoutMutex,
+                      WorkerChunkManager());
+                  er.ReadIntoObject(tempBuffer);
+                  tempBuffer.Read([&](ProfileChunkedBuffer::Reader* aReader) {
+                    MOZ_ASSERT(
+                        aReader,
+                        "Local ProfileChunkedBuffer cannot be out-of-session");
+                    EntryGetter stackEntryGetter(
+                        *aReader, aJITFrameInfo.LocalFailureLatchSource());
+                    while (stackEntryGetter.Has()) {
+                      if (stackEntryGetter.Get().IsJitReturnAddr()) {
+                        aJITAddressConsumer(stackEntryGetter.Get().GetPtr());
+                      }
+                      stackEntryGetter.Next();
+                    }
+                  });
+                  WorkerChunkManager().Reset(tempBuffer.GetAllChunks());
+                  break;
+                }
+
+                MOZ_ASSERT(kind >= ProfileBufferEntry::Kind::LEGACY_LIMIT,
+                           "There should be no legacy entries between "
+                           "TimeBeforeCompactStack and CompactStack");
+                er.SetRemainingBytes(0);
+              }
+
+              e.Next();
+            } else if (e.Has() && e.Get().IsTimeBeforeSameSample()) {
+              // Sample index, nothing to do.
+
+            } else {
+              ERROR_AND_CONTINUE("expected a Time entry");
+            }
+          }
+        });
+      });
+}
+
+void ProfileBuffer::StreamMarkersToJSON(
+    SpliceableJSONWriter& aWriter, ProfilerThreadId aThreadId,
+    const TimeStamp& aProcessStartTime, double aSinceTime,
+    UniqueStacks& aUniqueStacks,
+    mozilla::ProgressLogger aProgressLogger) const {
+  mEntries.ReadEach([&](ProfileBufferEntryReader& aER) {
+    auto type = static_cast<ProfileBufferEntry::Kind>(
+        aER.ReadObject<ProfileBufferEntry::KindUnderlyingType>());
+    MOZ_ASSERT(static_cast<ProfileBufferEntry::KindUnderlyingType>(type) <
+               static_cast<ProfileBufferEntry::KindUnderlyingType>(
+                   ProfileBufferEntry::Kind::MODERN_LIMIT));
+    if (type == ProfileBufferEntry::Kind::Marker) {
+      mozilla::base_profiler_markers_detail::DeserializeAfterKindAndStream(
+          aER,
+          [&](const ProfilerThreadId& aMarkerThreadId) {
+            return (!aThreadId.IsSpecified() || aMarkerThreadId == aThreadId)
+                       ? &aWriter
+                       : nullptr;
+          },
+          [&](ProfileChunkedBuffer& aChunkedBuffer) {
+            ProfilerBacktrace backtrace("", &aChunkedBuffer);
+            backtrace.StreamJSON(aWriter, aProcessStartTime, aUniqueStacks);
+          },
+          [&](mozilla::base_profiler_markers_detail::Streaming::DeserializerTag
+                  aTag) {
+            size_t payloadSize = aER.RemainingBytes();
+
+            ProfileBufferEntryReader::DoubleSpanOfConstBytes spans =
+                aER.ReadSpans(payloadSize);
+            if (MOZ_LIKELY(spans.IsSingleSpan())) {
+              // Only a single span, we can just refer to it directly
+              // instead of copying it.
+              profiler::ffi::gecko_profiler_serialize_marker_for_tag(
+                  aTag, spans.mFirstOrOnly.Elements(), payloadSize, &aWriter);
+            } else {
+              // Two spans, we need to concatenate them by copying.
+              uint8_t* payloadBuffer = new uint8_t[payloadSize];
+              spans.CopyBytesTo(payloadBuffer);
+              profiler::ffi::gecko_profiler_serialize_marker_for_tag(
+                  aTag, payloadBuffer, payloadSize, &aWriter);
+              delete[] payloadBuffer;
+            }
+          });
+    } else {
+      // The entry was not a marker, we need to skip to the end.
+      aER.SetRemainingBytes(0);
+    }
+  });
+}
+
+void ProfileBuffer::StreamProfilerOverheadToJSON(
+    SpliceableJSONWriter& aWriter, const TimeStamp& aProcessStartTime,
+    double aSinceTime, mozilla::ProgressLogger aProgressLogger) const {
+  mEntries.Read([&](ProfileChunkedBuffer::Reader* aReader) {
+    MOZ_ASSERT(aReader,
+               "ProfileChunkedBuffer cannot be out-of-session when sampler is "
+               "running");
+
+    EntryGetter e(*aReader, aWriter.SourceFailureLatch(),
+                  std::move(aProgressLogger));
+
+    enum Schema : uint32_t {
+      TIME = 0,
+      LOCKING = 1,
+      MARKER_CLEANING = 2,
+      COUNTERS = 3,
+      THREADS = 4
+    };
+
+    aWriter.StartObjectProperty("profilerOverhead");
+    aWriter.StartObjectProperty("samples");
+    // Stream all sampling overhead data. We skip other entries, because we
+    // process them in StreamSamplesToJSON()/etc.
+    {
+      JSONSchemaWriter schema(aWriter);
+      schema.WriteField("time");
+      schema.WriteField("locking");
+      schema.WriteField("expiredMarkerCleaning");
+      schema.WriteField("counters");
+      schema.WriteField("threads");
+    }
+
+    aWriter.StartArrayProperty("data");
+    double firstTime = 0.0;
+    double lastTime = 0.0;
+    ProfilerStats intervals, overheads, lockings, cleanings, counters, threads;
+    while (e.Has()) {
+      // valid sequence: ProfilerOverheadTime, ProfilerOverheadDuration * 4
+      if (e.Get().IsProfilerOverheadTime()) {
+        double time = e.Get().GetDouble();
+        if (time >= aSinceTime) {
+          e.Next();
+          if (!e.Has() || !e.Get().IsProfilerOverheadDuration()) {
+            ERROR_AND_CONTINUE(
+                "expected a ProfilerOverheadDuration entry after "
+                "ProfilerOverheadTime");
+          }
+          double locking = e.Get().GetDouble();
+          e.Next();
+          if (!e.Has() || !e.Get().IsProfilerOverheadDuration()) {
+            ERROR_AND_CONTINUE(
+                "expected a ProfilerOverheadDuration entry after "
+                "ProfilerOverheadTime,ProfilerOverheadDuration");
+          }
+          double cleaning = e.Get().GetDouble();
+          e.Next();
+          if (!e.Has() || !e.Get().IsProfilerOverheadDuration()) {
+            ERROR_AND_CONTINUE(
+                "expected a ProfilerOverheadDuration entry after "
+                "ProfilerOverheadTime,ProfilerOverheadDuration*2");
+          }
+          double counter = e.Get().GetDouble();
+          e.Next();
+          if (!e.Has() || !e.Get().IsProfilerOverheadDuration()) {
+            ERROR_AND_CONTINUE(
+                "expected a ProfilerOverheadDuration entry after "
+                "ProfilerOverheadTime,ProfilerOverheadDuration*3");
+          }
+          double thread = e.Get().GetDouble();
+
+          if (firstTime == 0.0) {
+            firstTime = time;
+          } else {
+            // Note that we'll have 1 fewer interval than other numbers (because
+            // we need both ends of an interval to know its duration). The final
+            // difference should be insignificant over the expected many
+            // thousands of iterations.
+            intervals.Count(time - lastTime);
+          }
+          lastTime = time;
+          overheads.Count(locking + cleaning + counter + thread);
+          lockings.Count(locking);
+          cleanings.Count(cleaning);
+          counters.Count(counter);
+          threads.Count(thread);
+
+          AutoArraySchemaWriter writer(aWriter);
+          writer.TimeMsElement(TIME, time);
+          writer.DoubleElement(LOCKING, locking);
+          writer.DoubleElement(MARKER_CLEANING, cleaning);
+          writer.DoubleElement(COUNTERS, counter);
+          writer.DoubleElement(THREADS, thread);
+        }
+      }
+      e.Next();
+    }
+    aWriter.EndArray();   // data
+    aWriter.EndObject();  // samples
+
+    // Only output statistics if there is at least one full interval (and
+    // therefore at least two samplings.)
+    if (intervals.n > 0) {
+      aWriter.StartObjectProperty("statistics");
+      aWriter.DoubleProperty("profiledDuration", lastTime - firstTime);
+      aWriter.IntProperty("samplingCount", overheads.n);
+      aWriter.DoubleProperty("overheadDurations", overheads.sum);
+      aWriter.DoubleProperty("overheadPercentage",
+                             overheads.sum / (lastTime - firstTime));
+#define PROFILER_STATS(name, var)                           \
+  aWriter.DoubleProperty("mean" name, (var).sum / (var).n); \
+  aWriter.DoubleProperty("min" name, (var).min);            \
+  aWriter.DoubleProperty("max" name, (var).max);
+      PROFILER_STATS("Interval", intervals);
+      PROFILER_STATS("Overhead", overheads);
+      PROFILER_STATS("Lockings", lockings);
+      PROFILER_STATS("Cleaning", cleanings);
+      PROFILER_STATS("Counter", counters);
+      PROFILER_STATS("Thread", threads);
+#undef PROFILER_STATS
+      aWriter.EndObject();  // statistics
+    }
+    aWriter.EndObject();  // profilerOverhead
+  });
+}
+
+struct CounterKeyedSample {
+  double mTime;
+  uint64_t mNumber;
+  int64_t mCount;
+};
+
+using CounterKeyedSamples = Vector<CounterKeyedSample>;
+
+static LazyLogModule sFuzzyfoxLog("Fuzzyfox");
+
+using CounterMap = HashMap<uint64_t, CounterKeyedSamples>;
+
+// HashMap lookup, if not found, a default value is inserted.
+// Returns reference to (existing or new) value inside the HashMap.
+template <typename HashM, typename Key>
+static auto& LookupOrAdd(HashM& aMap, Key&& aKey) {
+  auto addPtr = aMap.lookupForAdd(aKey);
+  if (!addPtr) {
+    MOZ_RELEASE_ASSERT(aMap.add(addPtr, std::forward<Key>(aKey),
+                                typename HashM::Entry::ValueType{}));
+    MOZ_ASSERT(!!addPtr);
+  }
+  return addPtr->value();
+}
+
+void ProfileBuffer::StreamCountersToJSON(
+    SpliceableJSONWriter& aWriter, const TimeStamp& aProcessStartTime,
+    double aSinceTime, mozilla::ProgressLogger aProgressLogger) const {
+  // Because this is a format entirely internal to the Profiler, any parsing
+  // error indicates a bug in the ProfileBuffer writing or the parser itself,
+  // or possibly flaky hardware.
+
+  mEntries.Read([&](ProfileChunkedBuffer::Reader* aReader) {
+    MOZ_ASSERT(aReader,
+               "ProfileChunkedBuffer cannot be out-of-session when sampler is "
+               "running");
+
+    EntryGetter e(*aReader, aWriter.SourceFailureLatch(),
+                  std::move(aProgressLogger));
+
+    enum Schema : uint32_t { TIME = 0, COUNT = 1, NUMBER = 2 };
+
+    // Stream all counters. We skip other entries, because we process them in
+    // StreamSamplesToJSON()/etc.
+    //
+    // Valid sequence in the buffer:
+    // CounterID
+    // Time
+    // ( CounterKey Count Number? )*
+    //
+    // And the JSON (example):
+    // "counters": {
+    //  "name": "malloc",
+    //  "category": "Memory",
+    //  "description": "Amount of allocated memory",
+    //  "sample_groups": {
+    //   "id": 0,
+    //   "samples": {
+    //    "schema": {"time": 0, "number": 1, "count": 2},
+    //    "data": [
+    //     [
+    //      16117.033968000002,
+    //      2446216,
+    //      6801320
+    //     ],
+    //     [
+    //      16118.037638,
+    //      2446216,
+    //      6801320
+    //     ],
+    //    ],
+    //   }
+    //  }
+    // },
+
+    // Build the map of counters and populate it
+    HashMap<void*, CounterMap> counters;
+
+    while (e.Has()) {
+      // skip all non-Counters, including if we start in the middle of a counter
+      if (e.Get().IsCounterId()) {
+        void* id = e.Get().GetPtr();
+        CounterMap& counter = LookupOrAdd(counters, id);
+        e.Next();
+        if (!e.Has() || !e.Get().IsTime()) {
+          ERROR_AND_CONTINUE("expected a Time entry");
+        }
+        double time = e.Get().GetDouble();
+        e.Next();
+        if (time >= aSinceTime) {
+          while (e.Has() && e.Get().IsCounterKey()) {
+            uint64_t key = e.Get().GetUint64();
+            CounterKeyedSamples& data = LookupOrAdd(counter, key);
+            e.Next();
+            if (!e.Has() || !e.Get().IsCount()) {
+              ERROR_AND_CONTINUE("expected a Count entry");
+            }
+            int64_t count = e.Get().GetUint64();
+            e.Next();
+            uint64_t number;
+            if (!e.Has() || !e.Get().IsNumber()) {
+              number = 0;
+            } else {
+              number = e.Get().GetInt64();
+              e.Next();
+            }
+            CounterKeyedSample sample = {time, number, count};
+            MOZ_RELEASE_ASSERT(data.append(sample));
+          }
+        } else {
+          // skip counter sample - only need to skip the initial counter
+          // id, then let the loop at the top skip the rest
+        }
+      } else {
+        e.Next();
+      }
+    }
+    // we have a map of a map of counter entries; dump them to JSON
+    if (counters.count() == 0) {
+      return;
+    }
+
+    aWriter.StartArrayProperty("counters");
+    for (auto iter = counters.iter(); !iter.done(); iter.next()) {
+      CounterMap& counter = iter.get().value();
+      const BaseProfilerCount* base_counter =
+          static_cast<const BaseProfilerCount*>(iter.get().key());
+
+      aWriter.Start();
+      aWriter.StringProperty("name", MakeStringSpan(base_counter->mLabel));
+      aWriter.StringProperty("category",
+                             MakeStringSpan(base_counter->mCategory));
+      aWriter.StringProperty("description",
+                             MakeStringSpan(base_counter->mDescription));
+
+      aWriter.StartArrayProperty("sample_groups");
+      for (auto counter_iter = counter.iter(); !counter_iter.done();
+           counter_iter.next()) {
+        CounterKeyedSamples& samples = counter_iter.get().value();
+        uint64_t key = counter_iter.get().key();
+
+        size_t size = samples.length();
+        if (size == 0) {
+          continue;
+        }
+
+        bool hasNumber = false;
+        for (size_t i = 0; i < size; i++) {
+          if (samples[i].mNumber != 0) {
+            hasNumber = true;
+            break;
+          }
+        }
+
+        aWriter.StartObjectElement();
+        {
+          aWriter.IntProperty("id", static_cast<int64_t>(key));
+          aWriter.StartObjectProperty("samples");
+          {
+            JSONSchemaWriter schema(aWriter);
+            schema.WriteField("time");
+            schema.WriteField("count");
+            if (hasNumber) {
+              schema.WriteField("number");
+            }
+          }
+
+          aWriter.StartArrayProperty("data");
+          double previousSkippedTime = 0.0;
+          uint64_t previousNumber = 0;
+          int64_t previousCount = 0;
+          for (size_t i = 0; i < size; i++) {
+            // Encode as deltas, and only encode if different than the previous
+            // or next sample; Always write the first and last samples.
+            if (i == 0 || i == size - 1 ||
+                samples[i].mNumber != previousNumber ||
+                samples[i].mCount != previousCount ||
+                // Ensure we ouput the first 0 before skipping samples.
+                (i >= 2 && (samples[i - 2].mNumber != previousNumber ||
+                            samples[i - 2].mCount != previousCount))) {
+              if (i != 0 && samples[i].mTime >= samples[i - 1].mTime) {
+                MOZ_LOG(sFuzzyfoxLog, mozilla::LogLevel::Error,
+                        ("Fuzzyfox Profiler Assertion: %f >= %f",
+                         samples[i].mTime, samples[i - 1].mTime));
+              }
+              MOZ_ASSERT(i == 0 || samples[i].mTime >= samples[i - 1].mTime);
+              MOZ_ASSERT(samples[i].mNumber >= previousNumber);
+              MOZ_ASSERT(samples[i].mNumber - previousNumber <=
+                         uint64_t(std::numeric_limits<int64_t>::max()));
+
+              int64_t numberDelta =
+                  static_cast<int64_t>(samples[i].mNumber - previousNumber);
+              int64_t countDelta = samples[i].mCount - previousCount;
+
+              if (previousSkippedTime != 0.0 &&
+                  (numberDelta != 0 || countDelta != 0)) {
+                // Write the last skipped sample, unless the new one is all
+                // zeroes (that'd be redundant) This is useful to know when a
+                // certain value was last sampled, so that the front-end graph
+                // will be more correct.
+                AutoArraySchemaWriter writer(aWriter);
+                writer.TimeMsElement(TIME, previousSkippedTime);
+                // The deltas are effectively zeroes, since no change happened
+                // between the last actually-written sample and the last skipped
+                // one.
+                writer.IntElement(COUNT, 0);
+                if (hasNumber) {
+                  writer.IntElement(NUMBER, 0);
+                }
+              }
+
+              AutoArraySchemaWriter writer(aWriter);
+              writer.TimeMsElement(TIME, samples[i].mTime);
+              writer.IntElement(COUNT, countDelta);
+              if (hasNumber) {
+                writer.IntElement(NUMBER, numberDelta);
+              }
+
+              previousSkippedTime = 0.0;
+              previousNumber = samples[i].mNumber;
+              previousCount = samples[i].mCount;
+            } else {
+              previousSkippedTime = samples[i].mTime;
+            }
+          }
+          aWriter.EndArray();   // data
+          aWriter.EndObject();  // samples
+        }
+        aWriter.EndObject();  // sample_groups item
+      }
+      aWriter.EndArray();  // sample groups
+      aWriter.End();       // for each counter
+    }
+    aWriter.EndArray();  // counters
+  });
+}
+
+#undef ERROR_AND_CONTINUE
+
+static void AddPausedRange(SpliceableJSONWriter& aWriter, const char* aReason,
+                           const Maybe<double>& aStartTime,
+                           const Maybe<double>& aEndTime) {
+  aWriter.Start();
+  if (aStartTime) {
+    aWriter.TimeDoubleMsProperty("startTime", *aStartTime);
+  } else {
+    aWriter.NullProperty("startTime");
+  }
+  if (aEndTime) {
+    aWriter.TimeDoubleMsProperty("endTime", *aEndTime);
+  } else {
+    aWriter.NullProperty("endTime");
+  }
+  aWriter.StringProperty("reason", MakeStringSpan(aReason));
+  aWriter.End();
+}
+
+void ProfileBuffer::StreamPausedRangesToJSON(
+    SpliceableJSONWriter& aWriter, double aSinceTime,
+    mozilla::ProgressLogger aProgressLogger) const {
+  mEntries.Read([&](ProfileChunkedBuffer::Reader* aReader) {
+    MOZ_ASSERT(aReader,
+               "ProfileChunkedBuffer cannot be out-of-session when sampler is "
+               "running");
+
+    EntryGetter e(*aReader, aWriter.SourceFailureLatch(),
+                  aProgressLogger.CreateSubLoggerFromTo(
+                      1_pc, "Streaming pauses...", 99_pc, "Streamed pauses"));
+
+    Maybe<double> currentPauseStartTime;
+    Maybe<double> currentCollectionStartTime;
+
+    while (e.Has()) {
+      if (e.Get().IsPause()) {
+        currentPauseStartTime = Some(e.Get().GetDouble());
+      } else if (e.Get().IsResume()) {
+        AddPausedRange(aWriter, "profiler-paused", currentPauseStartTime,
+                       Some(e.Get().GetDouble()));
+        currentPauseStartTime = Nothing();
+      } else if (e.Get().IsCollectionStart()) {
+        currentCollectionStartTime = Some(e.Get().GetDouble());
+      } else if (e.Get().IsCollectionEnd()) {
+        AddPausedRange(aWriter, "collecting", currentCollectionStartTime,
+                       Some(e.Get().GetDouble()));
+        currentCollectionStartTime = Nothing();
+      }
+      e.Next();
+    }
+
+    if (currentPauseStartTime) {
+      AddPausedRange(aWriter, "profiler-paused", currentPauseStartTime,
+                     Nothing());
+    }
+    if (currentCollectionStartTime) {
+      AddPausedRange(aWriter, "collecting", currentCollectionStartTime,
+                     Nothing());
+    }
+  });
+}
+
+bool ProfileBuffer::DuplicateLastSample(ProfilerThreadId aThreadId,
+                                        double aSampleTimeMs,
+                                        Maybe<uint64_t>& aLastSample,
+                                        const RunningTimes& aRunningTimes) {
+  if (!aLastSample) {
+    return false;
+  }
+
+  if (mEntries.IsIndexInCurrentChunk(ProfileBufferIndex{*aLastSample})) {
+    // The last (fully-written) sample is in this chunk, we can refer to it.
+
+    // Note that between now and when we write the SameSample below, another
+    // chunk could have been started, so the SameSample will in fact refer to a
+    // block in a previous chunk. This is okay, because:
+    // - When serializing to JSON, if that chunk is still there, we'll still be
+    //   able to find that old stack, so nothing will be lost.
+    // - If unfortunately that chunk has been destroyed, we will lose this
+    //   sample. But this will only happen to the first sample (per thread) in
+    //   in the whole JSON output, because the next time we're here to duplicate
+    //   the same sample again, IsIndexInCurrentChunk will say `false` and we
+    //   will fall back to the normal copy or even re-sample. Losing the first
+    //   sample out of many in a whole recording is acceptable.
+    //
+    // |---| = chunk, S = Sample, D = Duplicate, s = same sample
+    // |---S-s-s--| |s-D--s--s-| |s-D--s---s|
+    // Later, the first chunk is destroyed/recycled:
+    //              |s-D--s--s-| |s-D--s---s| |-...
+    // Output:       ^ ^  ^       ^
+    //               `-|--|-------|--- Same but no previous -> lost.
+    //                 `--|-------|--- Full duplicate sample.
+    //                    `-------|--- Same with previous -> okay.
+    //                            `--- Same but now we have a previous -> okay!
+
+    AUTO_PROFILER_STATS(DuplicateLastSample_SameSample);
+
+    // Add the thread id first. We don't update `aLastSample` because we are not
+    // writing a full sample.
+    (void)AddThreadIdEntry(aThreadId);
+
+    // Copy the new time, to be followed by a SameSample.
+    AddEntry(ProfileBufferEntry::TimeBeforeSameSample(aSampleTimeMs));
+
+    // Add running times if they have data.
+    if (!aRunningTimes.IsEmpty()) {
+      mEntries.PutObjects(ProfileBufferEntry::Kind::RunningTimes,
+                          aRunningTimes);
+    }
+
+    // Finish with a SameSample entry.
+    mEntries.PutObjects(ProfileBufferEntry::Kind::SameSample);
+
+    return true;
+  }
+
+  AUTO_PROFILER_STATS(DuplicateLastSample_copy);
+
+  ProfileChunkedBuffer tempBuffer(
+      ProfileChunkedBuffer::ThreadSafety::WithoutMutex, WorkerChunkManager());
+
+  auto retrieveWorkerChunk = MakeScopeExit(
+      [&]() { WorkerChunkManager().Reset(tempBuffer.GetAllChunks()); });
+
+  const bool ok = mEntries.Read([&](ProfileChunkedBuffer::Reader* aReader) {
+    MOZ_ASSERT(aReader,
+               "ProfileChunkedBuffer cannot be out-of-session when sampler is "
+               "running");
+
+    // DuplicateLastSample is only called during profiling, so we don't need a
+    // progress logger (only useful when capturing the final profile).
+    EntryGetter e(*aReader, mozilla::FailureLatchInfallibleSource::Singleton(),
+                  ProgressLogger{}, *aLastSample);
+
+    if (e.CurPos() != *aLastSample) {
+      // The last sample is no longer within the buffer range, so we cannot
+      // use it. Reset the stored buffer position to Nothing().
+      aLastSample.reset();
+      return false;
+    }
+
+    MOZ_RELEASE_ASSERT(e.Has() && e.Get().IsThreadId() &&
+                       e.Get().GetThreadId() == aThreadId);
+
+    e.Next();
+
+    // Go through the whole entry and duplicate it, until we find the next
+    // one.
+    while (e.Has()) {
+      switch (e.Get().GetKind()) {
+        case ProfileBufferEntry::Kind::Pause:
+        case ProfileBufferEntry::Kind::Resume:
+        case ProfileBufferEntry::Kind::PauseSampling:
+        case ProfileBufferEntry::Kind::ResumeSampling:
+        case ProfileBufferEntry::Kind::CollectionStart:
+        case ProfileBufferEntry::Kind::CollectionEnd:
+        case ProfileBufferEntry::Kind::ThreadId:
+        case ProfileBufferEntry::Kind::TimeBeforeSameSample:
+          // We're done.
+          return true;
+        case ProfileBufferEntry::Kind::Time:
+          // Copy with new time
+          AddEntry(tempBuffer, ProfileBufferEntry::Time(aSampleTimeMs));
+          break;
+        case ProfileBufferEntry::Kind::TimeBeforeCompactStack: {
+          // Copy with new time, followed by a compact stack.
+          AddEntry(tempBuffer,
+                   ProfileBufferEntry::TimeBeforeCompactStack(aSampleTimeMs));
+
+          // Add running times if they have data.
+          if (!aRunningTimes.IsEmpty()) {
+            tempBuffer.PutObjects(ProfileBufferEntry::Kind::RunningTimes,
+                                  aRunningTimes);
+          }
+
+          // The `CompactStack` *must* be present afterwards, but may not
+          // immediately follow `TimeBeforeCompactStack` (e.g., some markers
+          // could be written in-between), so we need to look for it in the
+          // following entries.
+          ProfileChunkedBuffer::BlockIterator it = e.Iterator();
+          for (;;) {
+            ++it;
+            if (it.IsAtEnd()) {
+              break;
+            }
+            ProfileBufferEntryReader er = *it;
+            auto kind = static_cast<ProfileBufferEntry::Kind>(
+                er.ReadObject<ProfileBufferEntry::KindUnderlyingType>());
+            MOZ_ASSERT(
+                static_cast<ProfileBufferEntry::KindUnderlyingType>(kind) <
+                static_cast<ProfileBufferEntry::KindUnderlyingType>(
+                    ProfileBufferEntry::Kind::MODERN_LIMIT));
+            if (kind == ProfileBufferEntry::Kind::CompactStack) {
+              // Found our CompactStack, just make a copy of the whole entry.
+              er = *it;
+              auto bytes = er.RemainingBytes();
+              MOZ_ASSERT(bytes <
+                         ProfileBufferChunkManager::scExpectedMaximumStackSize);
+              tempBuffer.Put(bytes, [&](Maybe<ProfileBufferEntryWriter>& aEW) {
+                MOZ_ASSERT(aEW.isSome(), "tempBuffer cannot be out-of-session");
+                aEW->WriteFromReader(er, bytes);
+              });
+              // CompactStack marks the end, we're done.
+              break;
+            }
+
+            MOZ_ASSERT(kind >= ProfileBufferEntry::Kind::LEGACY_LIMIT,
+                       "There should be no legacy entries between "
+                       "TimeBeforeCompactStack and CompactStack");
+            er.SetRemainingBytes(0);
+            // Here, we have encountered a non-legacy entry that was not the
+            // CompactStack we're looking for; just continue the search...
+          }
+          // We're done.
+          return true;
+        }
+        case ProfileBufferEntry::Kind::CounterKey:
+        case ProfileBufferEntry::Kind::Number:
+        case ProfileBufferEntry::Kind::Count:
+          // Don't copy anything not part of a thread's stack sample
+          break;
+        case ProfileBufferEntry::Kind::CounterId:
+          // CounterId is normally followed by Time - if so, we'd like
+          // to skip it.  If we duplicate Time, it won't hurt anything, just
+          // waste buffer space (and this can happen if the CounterId has
+          // fallen off the end of the buffer, but Time (and Number/Count)
+          // are still in the buffer).
+          e.Next();
+          if (e.Has() && e.Get().GetKind() != ProfileBufferEntry::Kind::Time) {
+            // this would only happen if there was an invalid sequence
+            // in the buffer.  Don't skip it.
+            continue;
+          }
+          // we've skipped Time
+          break;
+        case ProfileBufferEntry::Kind::ProfilerOverheadTime:
+          // ProfilerOverheadTime is normally followed by
+          // ProfilerOverheadDuration*4 - if so, we'd like to skip it. Don't
+          // duplicate, as we are in the middle of a sampling and will soon
+          // capture its own overhead.
+          e.Next();
+          // A missing Time would only happen if there was an invalid
+          // sequence in the buffer. Don't skip unexpected entry.
+          if (e.Has() &&
+              e.Get().GetKind() !=
+                  ProfileBufferEntry::Kind::ProfilerOverheadDuration) {
+            continue;
+          }
+          e.Next();
+          if (e.Has() &&
+              e.Get().GetKind() !=
+                  ProfileBufferEntry::Kind::ProfilerOverheadDuration) {
+            continue;
+          }
+          e.Next();
+          if (e.Has() &&
+              e.Get().GetKind() !=
+                  ProfileBufferEntry::Kind::ProfilerOverheadDuration) {
+            continue;
+          }
+          e.Next();
+          if (e.Has() &&
+              e.Get().GetKind() !=
+                  ProfileBufferEntry::Kind::ProfilerOverheadDuration) {
+            continue;
+          }
+          // we've skipped ProfilerOverheadTime and
+          // ProfilerOverheadDuration*4.
+          break;
+        default: {
+          // Copy anything else we don't know about.
+          AddEntry(tempBuffer, e.Get());
+          break;
+        }
+      }
+      e.Next();
+    }
+    return true;
+  });
+
+  if (!ok) {
+    return false;
+  }
+
+  // If the buffer was big enough, there won't be any cleared blocks.
+  if (tempBuffer.GetState().mClearedBlockCount != 0) {
+    // No need to try to read stack again as it won't fit. Reset the stored
+    // buffer position to Nothing().
+    aLastSample.reset();
+    return false;
+  }
+
+  aLastSample = Some(AddThreadIdEntry(aThreadId));
+
+  mEntries.AppendContents(tempBuffer);
+
+  return true;
+}
+
+void ProfileBuffer::DiscardSamplesBeforeTime(double aTime) {
+  // This function does nothing!
+  // The duration limit will be removed from Firefox, see bug 1632365.
+  Unused << aTime;
+}
+
+// END ProfileBuffer
+////////////////////////////////////////////////////////////////////////
diff --git a/tools/profiler/core/ProfileBufferEntry.h b/tools/profiler/core/ProfileBufferEntry.h
new file mode 100644
index 0000000000..bfee4923a3
--- /dev/null
+++ b/tools/profiler/core/ProfileBufferEntry.h
@@ -0,0 +1,532 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfileBufferEntry_h
+#define ProfileBufferEntry_h
+
+#include <cstdint>
+#include <cstdlib>
+#include <functional>
+#include <utility>
+#include <type_traits>
+#include "gtest/MozGtestFriend.h"
+#include "js/ProfilingCategory.h"
+#include "mozilla/Attributes.h"
+#include "mozilla/HashFunctions.h"
+#include "mozilla/HashTable.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/ProfileBufferEntryKinds.h"
+#include "mozilla/ProfileJSONWriter.h"
+#include "mozilla/ProfilerUtils.h"
+#include "mozilla/UniquePtrExtensions.h"
+#include "mozilla/Variant.h"
+#include "mozilla/Vector.h"
+#include "nsString.h"
+
+class ProfilerCodeAddressService;
+struct JSContext;
+
+class ProfileBufferEntry {
+ public:
+  using KindUnderlyingType =
+      std::underlying_type_t<::mozilla::ProfileBufferEntryKind>;
+  using Kind = mozilla::ProfileBufferEntryKind;
+
+  ProfileBufferEntry();
+
+  static constexpr size_t kNumChars = mozilla::ProfileBufferEntryNumChars;
+
+ private:
+  // aString must be a static string.
+  ProfileBufferEntry(Kind aKind, const char* aString);
+  ProfileBufferEntry(Kind aKind, char aChars[kNumChars]);
+  ProfileBufferEntry(Kind aKind, void* aPtr);
+  ProfileBufferEntry(Kind aKind, double aDouble);
+  ProfileBufferEntry(Kind aKind, int64_t aInt64);
+  ProfileBufferEntry(Kind aKind, uint64_t aUint64);
+  ProfileBufferEntry(Kind aKind, int aInt);
+  ProfileBufferEntry(Kind aKind, ProfilerThreadId aThreadId);
+
+ public:
+#define CTOR(KIND, TYPE, SIZE)                   \
+  static ProfileBufferEntry KIND(TYPE aVal) {    \
+    return ProfileBufferEntry(Kind::KIND, aVal); \
+  }
+  FOR_EACH_PROFILE_BUFFER_ENTRY_KIND(CTOR)
+#undef CTOR
+
+  Kind GetKind() const { return mKind; }
+
+#define IS_KIND(KIND, TYPE, SIZE) \
+  bool Is##KIND() const { return mKind == Kind::KIND; }
+  FOR_EACH_PROFILE_BUFFER_ENTRY_KIND(IS_KIND)
+#undef IS_KIND
+
+ private:
+  FRIEND_TEST(ThreadProfile, InsertOneEntry);
+  FRIEND_TEST(ThreadProfile, InsertOneEntryWithTinyBuffer);
+  FRIEND_TEST(ThreadProfile, InsertEntriesNoWrap);
+  FRIEND_TEST(ThreadProfile, InsertEntriesWrap);
+  FRIEND_TEST(ThreadProfile, MemoryMeasure);
+  friend class ProfileBuffer;
+
+  Kind mKind;
+  uint8_t mStorage[kNumChars];
+
+  const char* GetString() const;
+  void* GetPtr() const;
+  double GetDouble() const;
+  int GetInt() const;
+  int64_t GetInt64() const;
+  uint64_t GetUint64() const;
+  ProfilerThreadId GetThreadId() const;
+  void CopyCharsInto(char (&aOutArray)[kNumChars]) const;
+};
+
+// Packed layout: 1 byte for the tag + 8 bytes for the value.
+static_assert(sizeof(ProfileBufferEntry) == 9, "bad ProfileBufferEntry size");
+
+// Contains all the information about JIT frames that is needed to stream stack
+// frames for JitReturnAddr entries in the profiler buffer.
+// Every return address (void*) is mapped to one or more JITFrameKeys, and
+// every JITFrameKey is mapped to a JSON string for that frame.
+// mRangeStart and mRangeEnd describe the range in the buffer for which this
+// mapping is valid. Only JitReturnAddr entries within that buffer range can be
+// processed using this JITFrameInfoForBufferRange object.
+struct JITFrameInfoForBufferRange final {
+  JITFrameInfoForBufferRange Clone() const;
+
+  uint64_t mRangeStart;
+  uint64_t mRangeEnd;  // mRangeEnd marks the first invalid index.
+
+  struct JITFrameKey {
+    bool operator==(const JITFrameKey& aOther) const {
+      return mCanonicalAddress == aOther.mCanonicalAddress &&
+             mDepth == aOther.mDepth;
+    }
+    bool operator!=(const JITFrameKey& aOther) const {
+      return !(*this == aOther);
+    }
+
+    void* mCanonicalAddress;
+    uint32_t mDepth;
+  };
+  struct JITFrameKeyHasher {
+    using Lookup = JITFrameKey;
+
+    static mozilla::HashNumber hash(const JITFrameKey& aLookup) {
+      mozilla::HashNumber hash = 0;
+      hash = mozilla::AddToHash(hash, aLookup.mCanonicalAddress);
+      hash = mozilla::AddToHash(hash, aLookup.mDepth);
+      return hash;
+    }
+
+    static bool match(const JITFrameKey& aKey, const JITFrameKey& aLookup) {
+      return aKey == aLookup;
+    }
+
+    static void rekey(JITFrameKey& aKey, const JITFrameKey& aNewKey) {
+      aKey = aNewKey;
+    }
+  };
+
+  using JITAddressToJITFramesMap =
+      mozilla::HashMap<void*, mozilla::Vector<JITFrameKey>>;
+  JITAddressToJITFramesMap mJITAddressToJITFramesMap;
+  using JITFrameToFrameJSONMap =
+      mozilla::HashMap<JITFrameKey, nsCString, JITFrameKeyHasher>;
+  JITFrameToFrameJSONMap mJITFrameToFrameJSONMap;
+};
+
+// Contains JITFrameInfoForBufferRange objects for multiple profiler buffer
+// ranges.
+class JITFrameInfo final {
+ public:
+  JITFrameInfo()
+      : mUniqueStrings(mozilla::MakeUniqueFallible<UniqueJSONStrings>(
+            mLocalFailureLatchSource)) {
+    if (!mUniqueStrings) {
+      mLocalFailureLatchSource.SetFailure(
+          "OOM in JITFrameInfo allocating mUniqueStrings");
+    }
+  }
+
+  MOZ_IMPLICIT JITFrameInfo(const JITFrameInfo& aOther,
+                            mozilla::ProgressLogger aProgressLogger);
+
+  // Creates a new JITFrameInfoForBufferRange object in mRanges by looking up
+  // information about the provided JIT return addresses using aCx.
+  // Addresses are provided like this:
+  // The caller of AddInfoForRange supplies a function in aJITAddressProvider.
+  // This function will be called once, synchronously, with an
+  // aJITAddressConsumer argument, which is a function that needs to be called
+  // for every address. That function can be called multiple times for the same
+  // address.
+  void AddInfoForRange(
+      uint64_t aRangeStart, uint64_t aRangeEnd, JSContext* aCx,
+      const std::function<void(const std::function<void(void*)>&)>&
+          aJITAddressProvider);
+
+  // Returns whether the information stored in this object is still relevant
+  // for any entries in the buffer.
+  bool HasExpired(uint64_t aCurrentBufferRangeStart) const {
+    if (mRanges.empty()) {
+      // No information means no relevant information. Allow this object to be
+      // discarded.
+      return true;
+    }
+    return mRanges.back().mRangeEnd <= aCurrentBufferRangeStart;
+  }
+
+  mozilla::FailureLatch& LocalFailureLatchSource() {
+    return mLocalFailureLatchSource;
+  }
+
+  // The encapsulated data points at the local FailureLatch, so on the way out
+  // they must be given a new external FailureLatch to start using instead.
+  mozilla::Vector<JITFrameInfoForBufferRange>&& MoveRangesWithNewFailureLatch(
+      mozilla::FailureLatch& aFailureLatch) &&;
+  mozilla::UniquePtr<UniqueJSONStrings>&& MoveUniqueStringsWithNewFailureLatch(
+      mozilla::FailureLatch& aFailureLatch) &&;
+
+ private:
+  // JITFrameInfo's may exist during profiling, so it carries its own fallible
+  // FailureLatch. If&when the data below is finally extracted, any error is
+  // forwarded to the caller.
+  mozilla::FailureLatchSource mLocalFailureLatchSource;
+
+  // The array of ranges of JIT frame information, sorted by buffer position.
+  // Ranges are non-overlapping.
+  // The JSON of the cached frames can contain string indexes, which refer
+  // to strings in mUniqueStrings.
+  mozilla::Vector<JITFrameInfoForBufferRange> mRanges;
+
+  // The string table which contains strings used in the frame JSON that's
+  // cached in mRanges.
+  mozilla::UniquePtr<UniqueJSONStrings> mUniqueStrings;
+};
+
+class UniqueStacks final : public mozilla::FailureLatch {
+ public:
+  struct FrameKey {
+    explicit FrameKey(const char* aLocation)
+        : mData(NormalFrameData{nsCString(aLocation), false, false, 0,
+                                mozilla::Nothing(), mozilla::Nothing()}) {}
+
+    FrameKey(nsCString&& aLocation, bool aRelevantForJS, bool aBaselineInterp,
+             uint64_t aInnerWindowID, const mozilla::Maybe<unsigned>& aLine,
+             const mozilla::Maybe<unsigned>& aColumn,
+             const mozilla::Maybe<JS::ProfilingCategoryPair>& aCategoryPair)
+        : mData(NormalFrameData{aLocation, aRelevantForJS, aBaselineInterp,
+                                aInnerWindowID, aLine, aColumn,
+                                aCategoryPair}) {}
+
+    FrameKey(void* aJITAddress, uint32_t aJITDepth, uint32_t aRangeIndex)
+        : mData(JITFrameData{aJITAddress, aJITDepth, aRangeIndex}) {}
+
+    FrameKey(const FrameKey& aToCopy) = default;
+
+    uint32_t Hash() const;
+    bool operator==(const FrameKey& aOther) const {
+      return mData == aOther.mData;
+    }
+
+    struct NormalFrameData {
+      bool operator==(const NormalFrameData& aOther) const;
+
+      nsCString mLocation;
+      bool mRelevantForJS;
+      bool mBaselineInterp;
+      uint64_t mInnerWindowID;
+      mozilla::Maybe<unsigned> mLine;
+      mozilla::Maybe<unsigned> mColumn;
+      mozilla::Maybe<JS::ProfilingCategoryPair> mCategoryPair;
+    };
+    struct JITFrameData {
+      bool operator==(const JITFrameData& aOther) const;
+
+      void* mCanonicalAddress;
+      uint32_t mDepth;
+      uint32_t mRangeIndex;
+    };
+    mozilla::Variant<NormalFrameData, JITFrameData> mData;
+  };
+
+  struct FrameKeyHasher {
+    using Lookup = FrameKey;
+
+    static mozilla::HashNumber hash(const FrameKey& aLookup) {
+      mozilla::HashNumber hash = 0;
+      if (aLookup.mData.is<FrameKey::NormalFrameData>()) {
+        const FrameKey::NormalFrameData& data =
+            aLookup.mData.as<FrameKey::NormalFrameData>();
+        if (!data.mLocation.IsEmpty()) {
+          hash = mozilla::AddToHash(hash,
+                                    mozilla::HashString(data.mLocation.get()));
+        }
+        hash = mozilla::AddToHash(hash, data.mRelevantForJS);
+        hash = mozilla::AddToHash(hash, data.mBaselineInterp);
+        hash = mozilla::AddToHash(hash, data.mInnerWindowID);
+        if (data.mLine.isSome()) {
+          hash = mozilla::AddToHash(hash, *data.mLine);
+        }
+        if (data.mColumn.isSome()) {
+          hash = mozilla::AddToHash(hash, *data.mColumn);
+        }
+        if (data.mCategoryPair.isSome()) {
+          hash = mozilla::AddToHash(hash,
+                                    static_cast<uint32_t>(*data.mCategoryPair));
+        }
+      } else {
+        const FrameKey::JITFrameData& data =
+            aLookup.mData.as<FrameKey::JITFrameData>();
+        hash = mozilla::AddToHash(hash, data.mCanonicalAddress);
+        hash = mozilla::AddToHash(hash, data.mDepth);
+        hash = mozilla::AddToHash(hash, data.mRangeIndex);
+      }
+      return hash;
+    }
+
+    static bool match(const FrameKey& aKey, const FrameKey& aLookup) {
+      return aKey == aLookup;
+    }
+
+    static void rekey(FrameKey& aKey, const FrameKey& aNewKey) {
+      aKey = aNewKey;
+    }
+  };
+
+  struct StackKey {
+    mozilla::Maybe<uint32_t> mPrefixStackIndex;
+    uint32_t mFrameIndex;
+
+    explicit StackKey(uint32_t aFrame)
+        : mFrameIndex(aFrame), mHash(mozilla::HashGeneric(aFrame)) {}
+
+    StackKey(const StackKey& aPrefix, uint32_t aPrefixStackIndex,
+             uint32_t aFrame)
+        : mPrefixStackIndex(mozilla::Some(aPrefixStackIndex)),
+          mFrameIndex(aFrame),
+          mHash(mozilla::AddToHash(aPrefix.mHash, aFrame)) {}
+
+    mozilla::HashNumber Hash() const { return mHash; }
+
+    bool operator==(const StackKey& aOther) const {
+      return mPrefixStackIndex == aOther.mPrefixStackIndex &&
+             mFrameIndex == aOther.mFrameIndex;
+    }
+
+   private:
+    mozilla::HashNumber mHash;
+  };
+
+  struct StackKeyHasher {
+    using Lookup = StackKey;
+
+    static mozilla::HashNumber hash(const StackKey& aLookup) {
+      return aLookup.Hash();
+    }
+
+    static bool match(const StackKey& aKey, const StackKey& aLookup) {
+      return aKey == aLookup;
+    }
+
+    static void rekey(StackKey& aKey, const StackKey& aNewKey) {
+      aKey = aNewKey;
+    }
+  };
+
+  UniqueStacks(mozilla::FailureLatch& aFailureLatch,
+               JITFrameInfo&& aJITFrameInfo,
+               ProfilerCodeAddressService* aCodeAddressService = nullptr);
+
+  // Return a StackKey for aFrame as the stack's root frame (no prefix).
+  [[nodiscard]] mozilla::Maybe<StackKey> BeginStack(const FrameKey& aFrame);
+
+  // Return a new StackKey that is obtained by appending aFrame to aStack.
+  [[nodiscard]] mozilla::Maybe<StackKey> AppendFrame(const StackKey& aStack,
+                                                     const FrameKey& aFrame);
+
+  // Look up frame keys for the given JIT address, and ensure that our frame
+  // table has entries for the returned frame keys. The JSON for these frames
+  // is taken from mJITInfoRanges.
+  // aBufferPosition is needed in order to look up the correct JIT frame info
+  // object in mJITInfoRanges.
+  [[nodiscard]] mozilla::Maybe<mozilla::Vector<UniqueStacks::FrameKey>>
+  LookupFramesForJITAddressFromBufferPos(void* aJITAddress,
+                                         uint64_t aBufferPosition);
+
+  [[nodiscard]] mozilla::Maybe<uint32_t> GetOrAddFrameIndex(
+      const FrameKey& aFrame);
+  [[nodiscard]] mozilla::Maybe<uint32_t> GetOrAddStackIndex(
+      const StackKey& aStack);
+
+  void SpliceFrameTableElements(SpliceableJSONWriter& aWriter);
+  void SpliceStackTableElements(SpliceableJSONWriter& aWriter);
+
+  [[nodiscard]] UniqueJSONStrings& UniqueStrings() {
+    MOZ_RELEASE_ASSERT(mUniqueStrings.get());
+    return *mUniqueStrings;
+  }
+
+  // Find the function name at the given PC (if a ProfilerCodeAddressService was
+  // provided), otherwise just stringify that PC.
+  [[nodiscard]] nsAutoCString FunctionNameOrAddress(void* aPC);
+
+  FAILURELATCH_IMPL_PROXY(mFrameTableWriter)
+
+ private:
+  void StreamNonJITFrame(const FrameKey& aFrame);
+  void StreamStack(const StackKey& aStack);
+
+  mozilla::UniquePtr<UniqueJSONStrings> mUniqueStrings;
+
+  ProfilerCodeAddressService* mCodeAddressService = nullptr;
+
+  SpliceableChunkedJSONWriter mFrameTableWriter;
+  mozilla::HashMap<FrameKey, uint32_t, FrameKeyHasher> mFrameToIndexMap;
+
+  SpliceableChunkedJSONWriter mStackTableWriter;
+  mozilla::HashMap<StackKey, uint32_t, StackKeyHasher> mStackToIndexMap;
+
+  mozilla::Vector<JITFrameInfoForBufferRange> mJITInfoRanges;
+};
+
+//
+// Thread profile JSON Format
+// --------------------------
+//
+// The profile contains much duplicate information. The output JSON of the
+// profile attempts to deduplicate strings, frames, and stack prefixes, to cut
+// down on size and to increase JSON streaming speed. Deduplicated values are
+// streamed as indices into their respective tables.
+//
+// Further, arrays of objects with the same set of properties (e.g., samples,
+// frames) are output as arrays according to a schema instead of an object
+// with property names. A property that is not present is represented in the
+// array as null or undefined.
+//
+// The format of the thread profile JSON is shown by the following example
+// with 1 sample and 1 marker:
+//
+// {
+//   "name": "Foo",
+//   "tid": 42,
+//   "samples":
+//   {
+//     "schema":
+//     {
+//       "stack": 0,          /* index into stackTable */
+//       "time": 1,           /* number */
+//       "eventDelay": 2,     /* number */
+//       "ThreadCPUDelta": 3, /* optional number */
+//     },
+//     "data":
+//     [
+//       [ 1, 0.0, 0.0 ]      /* { stack: 1, time: 0.0, eventDelay: 0.0 } */
+//     ]
+//   },
+//
+//   "markers":
+//   {
+//     "schema":
+//     {
+//       "name": 0,           /* index into stringTable */
+//       "time": 1,           /* number */
+//       "data": 2            /* arbitrary JSON */
+//     },
+//     "data":
+//     [
+//       [ 3, 0.1 ]           /* { name: 'example marker', time: 0.1 } */
+//     ]
+//   },
+//
+//   "stackTable":
+//   {
+//     "schema":
+//     {
+//       "prefix": 0,         /* index into stackTable */
+//       "frame": 1           /* index into frameTable */
+//     },
+//     "data":
+//     [
+//       [ null, 0 ],         /* (root) */
+//       [ 0,    1 ]          /* (root) > foo.js */
+//     ]
+//   },
+//
+//   "frameTable":
+//   {
+//     "schema":
+//     {
+//       "location": 0,       /* index into stringTable */
+//       "relevantForJS": 1,  /* bool */
+//       "innerWindowID": 2,  /* inner window ID of global JS `window` object */
+//       "implementation": 3, /* index into stringTable */
+//       "line": 4,           /* number */
+//       "column": 5,         /* number */
+//       "category": 6,       /* index into profile.meta.categories */
+//       "subcategory": 7     /* index into
+//       profile.meta.categories[category].subcategories */
+//     },
+//     "data":
+//     [
+//       [ 0 ],               /* { location: '(root)' } */
+//       [ 1, null, null, 2 ] /* { location: 'foo.js',
+//                                 implementation: 'baseline' } */
+//     ]
+//   },
+//
+//   "stringTable":
+//   [
+//     "(root)",
+//     "foo.js",
+//     "baseline",
+//     "example marker"
+//   ]
+// }
+//
+// Process:
+// {
+//   "name": "Bar",
+//   "pid": 24,
+//   "threads":
+//   [
+//     <0-N threads from above>
+//   ],
+//   "counters": /* includes the memory counter */
+//   [
+//     {
+//       "name": "qwerty",
+//       "category": "uiop",
+//       "description": "this is qwerty uiop",
+//       "sample_groups:
+//       [
+//         {
+//           "id": 42, /* number (thread id, or object identifier (tab), etc) */
+//           "samples:
+//           {
+//             "schema":
+//             {
+//               "time": 1,   /* number */
+//               "number": 2, /* number (of times the counter was touched) */
+//               "count": 3   /* number (total for the counter) */
+//             },
+//             "data":
+//             [
+//               [ 0.1, 1824,
+//                 454622 ]   /* { time: 0.1, number: 1824, count: 454622 } */
+//             ]
+//           },
+//         },
+//         /* more sample-group objects with different id's */
+//       ]
+//     },
+//     /* more counters */
+//   ],
+// }
+//
+#endif /* ndef ProfileBufferEntry_h */
diff --git a/tools/profiler/core/ProfiledThreadData.cpp b/tools/profiler/core/ProfiledThreadData.cpp
new file mode 100644
index 0000000000..febda0d85b
--- /dev/null
+++ b/tools/profiler/core/ProfiledThreadData.cpp
@@ -0,0 +1,455 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ProfiledThreadData.h"
+
+#include "platform.h"
+#include "ProfileBuffer.h"
+
+#include "mozilla/OriginAttributes.h"
+#include "mozilla/Span.h"
+#include "nsXULAppAPI.h"
+
+#if defined(GP_OS_darwin)
+#  include <pthread.h>
+#endif
+
+using namespace mozilla::literals::ProportionValue_literals;
+
+ProfiledThreadData::ProfiledThreadData(
+    const mozilla::profiler::ThreadRegistrationInfo& aThreadInfo)
+    : mThreadInfo(aThreadInfo.Name(), aThreadInfo.ThreadId(),
+                  aThreadInfo.IsMainThread(), aThreadInfo.RegisterTime()) {
+  MOZ_COUNT_CTOR(ProfiledThreadData);
+}
+
+ProfiledThreadData::ProfiledThreadData(
+    mozilla::profiler::ThreadRegistrationInfo&& aThreadInfo)
+    : mThreadInfo(std::move(aThreadInfo)) {
+  MOZ_COUNT_CTOR(ProfiledThreadData);
+}
+
+ProfiledThreadData::~ProfiledThreadData() {
+  MOZ_COUNT_DTOR(ProfiledThreadData);
+}
+
+static void StreamTables(UniqueStacks&& aUniqueStacks, JSContext* aCx,
+                         SpliceableJSONWriter& aWriter,
+                         const mozilla::TimeStamp& aProcessStartTime,
+                         mozilla::ProgressLogger aProgressLogger) {
+  aWriter.StartObjectProperty("stackTable");
+  {
+    {
+      JSONSchemaWriter schema(aWriter);
+      schema.WriteField("prefix");
+      schema.WriteField("frame");
+    }
+
+    aWriter.StartArrayProperty("data");
+    {
+      aProgressLogger.SetLocalProgress(1_pc, "Splicing stack table...");
+      aUniqueStacks.SpliceStackTableElements(aWriter);
+      aProgressLogger.SetLocalProgress(30_pc, "Spliced stack table");
+    }
+    aWriter.EndArray();
+  }
+  aWriter.EndObject();
+
+  aWriter.StartObjectProperty("frameTable");
+  {
+    {
+      JSONSchemaWriter schema(aWriter);
+      schema.WriteField("location");
+      schema.WriteField("relevantForJS");
+      schema.WriteField("innerWindowID");
+      schema.WriteField("implementation");
+      schema.WriteField("line");
+      schema.WriteField("column");
+      schema.WriteField("category");
+      schema.WriteField("subcategory");
+    }
+
+    aWriter.StartArrayProperty("data");
+    {
+      aProgressLogger.SetLocalProgress(30_pc, "Splicing frame table...");
+      aUniqueStacks.SpliceFrameTableElements(aWriter);
+      aProgressLogger.SetLocalProgress(60_pc, "Spliced frame table");
+    }
+    aWriter.EndArray();
+  }
+  aWriter.EndObject();
+
+  aWriter.StartArrayProperty("stringTable");
+  {
+    aProgressLogger.SetLocalProgress(60_pc, "Splicing string table...");
+    std::move(aUniqueStacks.UniqueStrings()).SpliceStringTableElements(aWriter);
+    aProgressLogger.SetLocalProgress(90_pc, "Spliced string table");
+  }
+  aWriter.EndArray();
+}
+
+mozilla::NotNull<mozilla::UniquePtr<UniqueStacks>>
+ProfiledThreadData::PrepareUniqueStacks(
+    const ProfileBuffer& aBuffer, JSContext* aCx,
+    mozilla::FailureLatch& aFailureLatch, ProfilerCodeAddressService* aService,
+    mozilla::ProgressLogger aProgressLogger) {
+  if (mJITFrameInfoForPreviousJSContexts &&
+      mJITFrameInfoForPreviousJSContexts->HasExpired(
+          aBuffer.BufferRangeStart())) {
+    mJITFrameInfoForPreviousJSContexts = nullptr;
+  }
+  aProgressLogger.SetLocalProgress(1_pc, "Checked JIT frame info presence");
+
+  // If we have an existing JITFrameInfo in mJITFrameInfoForPreviousJSContexts,
+  // copy the data from it.
+  JITFrameInfo jitFrameInfo =
+      mJITFrameInfoForPreviousJSContexts
+          ? JITFrameInfo(*mJITFrameInfoForPreviousJSContexts,
+                         aProgressLogger.CreateSubLoggerTo(
+                             "Retrieving JIT frame info...", 10_pc,
+                             "Retrieved JIT frame info"))
+          : JITFrameInfo();
+
+  if (aCx && mBufferPositionWhenReceivedJSContext) {
+    aBuffer.AddJITInfoForRange(
+        *mBufferPositionWhenReceivedJSContext, mThreadInfo.ThreadId(), aCx,
+        jitFrameInfo,
+        aProgressLogger.CreateSubLoggerTo("Adding JIT info...", 90_pc,
+                                          "Added JIT info"));
+  } else {
+    aProgressLogger.SetLocalProgress(90_pc, "No JIT info");
+  }
+
+  return mozilla::MakeNotNull<mozilla::UniquePtr<UniqueStacks>>(
+      aFailureLatch, std::move(jitFrameInfo), aService);
+}
+
+void ProfiledThreadData::StreamJSON(
+    const ProfileBuffer& aBuffer, JSContext* aCx, SpliceableJSONWriter& aWriter,
+    const nsACString& aProcessName, const nsACString& aETLDplus1,
+    const mozilla::TimeStamp& aProcessStartTime, double aSinceTime,
+    ProfilerCodeAddressService* aService,
+    mozilla::ProgressLogger aProgressLogger) {
+  mozilla::NotNull<mozilla::UniquePtr<UniqueStacks>> uniqueStacks =
+      PrepareUniqueStacks(aBuffer, aCx, aWriter.SourceFailureLatch(), aService,
+                          aProgressLogger.CreateSubLoggerFromTo(
+                              0_pc, "Preparing unique stacks...", 10_pc,
+                              "Prepared Unique stacks"));
+
+  aWriter.SetUniqueStrings(uniqueStacks->UniqueStrings());
+
+  aWriter.Start();
+  {
+    StreamSamplesAndMarkers(
+        mThreadInfo.Name(), mThreadInfo.ThreadId(), aBuffer, aWriter,
+        aProcessName, aETLDplus1, aProcessStartTime, mThreadInfo.RegisterTime(),
+        mUnregisterTime, aSinceTime, *uniqueStacks,
+        aProgressLogger.CreateSubLoggerTo(
+            90_pc,
+            "ProfiledThreadData::StreamJSON: Streamed samples and markers"));
+
+    StreamTables(std::move(*uniqueStacks), aCx, aWriter, aProcessStartTime,
+                 aProgressLogger.CreateSubLoggerTo(
+                     99_pc, "Streamed tables and trace logger"));
+  }
+  aWriter.End();
+
+  aWriter.ResetUniqueStrings();
+}
+
+void ProfiledThreadData::StreamJSON(
+    ThreadStreamingContext&& aThreadStreamingContext,
+    SpliceableJSONWriter& aWriter, const nsACString& aProcessName,
+    const nsACString& aETLDplus1, const mozilla::TimeStamp& aProcessStartTime,
+    ProfilerCodeAddressService* aService,
+    mozilla::ProgressLogger aProgressLogger) {
+  aWriter.Start();
+  {
+    StreamSamplesAndMarkers(
+        mThreadInfo.Name(), aThreadStreamingContext, aWriter, aProcessName,
+        aETLDplus1, aProcessStartTime, mThreadInfo.RegisterTime(),
+        mUnregisterTime,
+        aProgressLogger.CreateSubLoggerFromTo(
+            1_pc, "ProfiledThreadData::StreamJSON(context): Streaming...",
+            90_pc,
+            "ProfiledThreadData::StreamJSON(context): Streamed samples and "
+            "markers"));
+
+    StreamTables(
+        std::move(*aThreadStreamingContext.mUniqueStacks),
+        aThreadStreamingContext.mJSContext, aWriter, aProcessStartTime,
+        aProgressLogger.CreateSubLoggerTo(
+            "ProfiledThreadData::StreamJSON(context): Streaming tables...",
+            99_pc, "ProfiledThreadData::StreamJSON(context): Streamed tables"));
+  }
+  aWriter.End();
+}
+
+// StreamSamplesDataCallback: (ProgressLogger) -> ProfilerThreadId
+// StreamMarkersDataCallback: (ProgressLogger) -> void
+// Returns the ProfilerThreadId returned by StreamSamplesDataCallback, which
+// should be the thread id of the last sample that was processed (if any;
+// otherwise it is left unspecified). This is mostly useful when the caller
+// doesn't know where the sample comes from, e.g., when it's a backtrace in a
+// marker.
+template <typename StreamSamplesDataCallback,
+          typename StreamMarkersDataCallback>
+ProfilerThreadId DoStreamSamplesAndMarkers(
+    const char* aName, SpliceableJSONWriter& aWriter,
+    const nsACString& aProcessName, const nsACString& aETLDplus1,
+    const mozilla::TimeStamp& aProcessStartTime,
+    const mozilla::TimeStamp& aRegisterTime,
+    const mozilla::TimeStamp& aUnregisterTime,
+    mozilla::ProgressLogger aProgressLogger,
+    StreamSamplesDataCallback&& aStreamSamplesDataCallback,
+    StreamMarkersDataCallback&& aStreamMarkersDataCallback) {
+  ProfilerThreadId processedThreadId;
+
+  aWriter.StringProperty("processType",
+                         mozilla::MakeStringSpan(XRE_GetProcessTypeString()));
+
+  aWriter.StringProperty("name", mozilla::MakeStringSpan(aName));
+
+  // Use given process name (if any), unless we're the parent process.
+  if (XRE_IsParentProcess()) {
+    aWriter.StringProperty("processName", "Parent Process");
+  } else if (!aProcessName.IsEmpty()) {
+    aWriter.StringProperty("processName", aProcessName);
+  }
+  if (!aETLDplus1.IsEmpty()) {
+    nsAutoCString originNoSuffix;
+    mozilla::OriginAttributes attrs;
+    if (!attrs.PopulateFromOrigin(aETLDplus1, originNoSuffix)) {
+      aWriter.StringProperty("eTLD+1", aETLDplus1);
+    } else {
+      aWriter.StringProperty("eTLD+1", originNoSuffix);
+      aWriter.BoolProperty("isPrivateBrowsing", attrs.mPrivateBrowsingId > 0);
+      aWriter.IntProperty("userContextId", attrs.mUserContextId);
+    }
+  }
+
+  if (aRegisterTime) {
+    aWriter.DoubleProperty(
+        "registerTime", (aRegisterTime - aProcessStartTime).ToMilliseconds());
+  } else {
+    aWriter.NullProperty("registerTime");
+  }
+
+  if (aUnregisterTime) {
+    aWriter.DoubleProperty(
+        "unregisterTime",
+        (aUnregisterTime - aProcessStartTime).ToMilliseconds());
+  } else {
+    aWriter.NullProperty("unregisterTime");
+  }
+
+  aWriter.StartObjectProperty("samples");
+  {
+    {
+      JSONSchemaWriter schema(aWriter);
+      schema.WriteField("stack");
+      schema.WriteField("time");
+      schema.WriteField("eventDelay");
+#define RUNNING_TIME_FIELD(index, name, unit, jsonProperty) \
+  schema.WriteField(#jsonProperty);
+      PROFILER_FOR_EACH_RUNNING_TIME(RUNNING_TIME_FIELD)
+#undef RUNNING_TIME_FIELD
+    }
+
+    aWriter.StartArrayProperty("data");
+    {
+      processedThreadId = std::forward<StreamSamplesDataCallback>(
+          aStreamSamplesDataCallback)(aProgressLogger.CreateSubLoggerFromTo(
+          1_pc, "Streaming samples...", 49_pc, "Streamed samples"));
+    }
+    aWriter.EndArray();
+  }
+  aWriter.EndObject();
+
+  aWriter.StartObjectProperty("markers");
+  {
+    {
+      JSONSchemaWriter schema(aWriter);
+      schema.WriteField("name");
+      schema.WriteField("startTime");
+      schema.WriteField("endTime");
+      schema.WriteField("phase");
+      schema.WriteField("category");
+      schema.WriteField("data");
+    }
+
+    aWriter.StartArrayProperty("data");
+    {
+      std::forward<StreamMarkersDataCallback>(aStreamMarkersDataCallback)(
+          aProgressLogger.CreateSubLoggerFromTo(50_pc, "Streaming markers...",
+                                                99_pc, "Streamed markers"));
+    }
+    aWriter.EndArray();
+  }
+  aWriter.EndObject();
+
+  // Tech note: If `ToNumber()` returns a uint64_t, the conversion to int64_t is
+  // "implementation-defined" before C++20. This is acceptable here, because
+  // this is a one-way conversion to a unique identifier that's used to visually
+  // separate data by thread on the front-end.
+  aWriter.IntProperty(
+      "pid", static_cast<int64_t>(profiler_current_process_id().ToNumber()));
+  aWriter.IntProperty("tid",
+                      static_cast<int64_t>(processedThreadId.ToNumber()));
+
+  return processedThreadId;
+}
+
+ProfilerThreadId StreamSamplesAndMarkers(
+    const char* aName, ProfilerThreadId aThreadId, const ProfileBuffer& aBuffer,
+    SpliceableJSONWriter& aWriter, const nsACString& aProcessName,
+    const nsACString& aETLDplus1, const mozilla::TimeStamp& aProcessStartTime,
+    const mozilla::TimeStamp& aRegisterTime,
+    const mozilla::TimeStamp& aUnregisterTime, double aSinceTime,
+    UniqueStacks& aUniqueStacks, mozilla::ProgressLogger aProgressLogger) {
+  return DoStreamSamplesAndMarkers(
+      aName, aWriter, aProcessName, aETLDplus1, aProcessStartTime,
+      aRegisterTime, aUnregisterTime, std::move(aProgressLogger),
+      [&](mozilla::ProgressLogger aSubProgressLogger) {
+        ProfilerThreadId processedThreadId = aBuffer.StreamSamplesToJSON(
+            aWriter, aThreadId, aSinceTime, aUniqueStacks,
+            std::move(aSubProgressLogger));
+        return aThreadId.IsSpecified() ? aThreadId : processedThreadId;
+      },
+      [&](mozilla::ProgressLogger aSubProgressLogger) {
+        aBuffer.StreamMarkersToJSON(aWriter, aThreadId, aProcessStartTime,
+                                    aSinceTime, aUniqueStacks,
+                                    std::move(aSubProgressLogger));
+      });
+}
+
+void StreamSamplesAndMarkers(const char* aName,
+                             ThreadStreamingContext& aThreadData,
+                             SpliceableJSONWriter& aWriter,
+                             const nsACString& aProcessName,
+                             const nsACString& aETLDplus1,
+                             const mozilla::TimeStamp& aProcessStartTime,
+                             const mozilla::TimeStamp& aRegisterTime,
+                             const mozilla::TimeStamp& aUnregisterTime,
+                             mozilla::ProgressLogger aProgressLogger) {
+  (void)DoStreamSamplesAndMarkers(
+      aName, aWriter, aProcessName, aETLDplus1, aProcessStartTime,
+      aRegisterTime, aUnregisterTime, std::move(aProgressLogger),
+      [&](mozilla::ProgressLogger aSubProgressLogger) {
+        aWriter.TakeAndSplice(
+            aThreadData.mSamplesDataWriter.TakeChunkedWriteFunc());
+        return aThreadData.mProfiledThreadData.Info().ThreadId();
+      },
+      [&](mozilla::ProgressLogger aSubProgressLogger) {
+        aWriter.TakeAndSplice(
+            aThreadData.mMarkersDataWriter.TakeChunkedWriteFunc());
+      });
+}
+
+void ProfiledThreadData::NotifyAboutToLoseJSContext(
+    JSContext* aContext, const mozilla::TimeStamp& aProcessStartTime,
+    ProfileBuffer& aBuffer) {
+  if (!mBufferPositionWhenReceivedJSContext) {
+    return;
+  }
+
+  MOZ_RELEASE_ASSERT(aContext);
+
+  if (mJITFrameInfoForPreviousJSContexts &&
+      mJITFrameInfoForPreviousJSContexts->HasExpired(
+          aBuffer.BufferRangeStart())) {
+    mJITFrameInfoForPreviousJSContexts = nullptr;
+  }
+
+  mozilla::UniquePtr<JITFrameInfo> jitFrameInfo =
+      mJITFrameInfoForPreviousJSContexts
+          ? std::move(mJITFrameInfoForPreviousJSContexts)
+          : mozilla::MakeUnique<JITFrameInfo>();
+
+  aBuffer.AddJITInfoForRange(*mBufferPositionWhenReceivedJSContext,
+                             mThreadInfo.ThreadId(), aContext, *jitFrameInfo,
+                             mozilla::ProgressLogger{});
+
+  mJITFrameInfoForPreviousJSContexts = std::move(jitFrameInfo);
+  mBufferPositionWhenReceivedJSContext = mozilla::Nothing();
+}
+
+ThreadStreamingContext::ThreadStreamingContext(
+    ProfiledThreadData& aProfiledThreadData, const ProfileBuffer& aBuffer,
+    JSContext* aCx, mozilla::FailureLatch& aFailureLatch,
+    ProfilerCodeAddressService* aService,
+    mozilla::ProgressLogger aProgressLogger)
+    : mProfiledThreadData(aProfiledThreadData),
+      mJSContext(aCx),
+      mSamplesDataWriter(aFailureLatch),
+      mMarkersDataWriter(aFailureLatch),
+      mUniqueStacks(mProfiledThreadData.PrepareUniqueStacks(
+          aBuffer, aCx, aFailureLatch, aService,
+          aProgressLogger.CreateSubLoggerFromTo(
+              0_pc, "Preparing thread streaming context unique stacks...",
+              99_pc, "Prepared thread streaming context Unique stacks"))) {
+  if (aFailureLatch.Failed()) {
+    return;
+  }
+  mSamplesDataWriter.SetUniqueStrings(mUniqueStacks->UniqueStrings());
+  mSamplesDataWriter.StartBareList();
+  mMarkersDataWriter.SetUniqueStrings(mUniqueStacks->UniqueStrings());
+  mMarkersDataWriter.StartBareList();
+}
+
+void ThreadStreamingContext::FinalizeWriter() {
+  mSamplesDataWriter.EndBareList();
+  mMarkersDataWriter.EndBareList();
+}
+
+ProcessStreamingContext::ProcessStreamingContext(
+    size_t aThreadCount, mozilla::FailureLatch& aFailureLatch,
+    const mozilla::TimeStamp& aProcessStartTime, double aSinceTime)
+    : mFailureLatch(aFailureLatch),
+      mProcessStartTime(aProcessStartTime),
+      mSinceTime(aSinceTime) {
+  if (mFailureLatch.Failed()) {
+    return;
+  }
+  if (!mTIDList.initCapacity(aThreadCount)) {
+    mFailureLatch.SetFailure(
+        "OOM in ProcessStreamingContext allocating TID list");
+    return;
+  }
+  if (!mThreadStreamingContextList.initCapacity(aThreadCount)) {
+    mFailureLatch.SetFailure(
+        "OOM in ProcessStreamingContext allocating context list");
+    mTIDList.clear();
+    return;
+  }
+}
+
+ProcessStreamingContext::~ProcessStreamingContext() {
+  if (mFailureLatch.Failed()) {
+    return;
+  }
+  MOZ_ASSERT(mTIDList.length() == mThreadStreamingContextList.length());
+  MOZ_ASSERT(mTIDList.length() == mTIDList.capacity(),
+             "Didn't pre-allocate exactly right");
+}
+
+void ProcessStreamingContext::AddThreadStreamingContext(
+    ProfiledThreadData& aProfiledThreadData, const ProfileBuffer& aBuffer,
+    JSContext* aCx, ProfilerCodeAddressService* aService,
+    mozilla::ProgressLogger aProgressLogger) {
+  if (mFailureLatch.Failed()) {
+    return;
+  }
+  MOZ_ASSERT(mTIDList.length() == mThreadStreamingContextList.length());
+  MOZ_ASSERT(mTIDList.length() < mTIDList.capacity(),
+             "Didn't pre-allocate enough");
+  mTIDList.infallibleAppend(aProfiledThreadData.Info().ThreadId());
+  mThreadStreamingContextList.infallibleEmplaceBack(
+      aProfiledThreadData, aBuffer, aCx, mFailureLatch, aService,
+      aProgressLogger.CreateSubLoggerFromTo(
+          1_pc, "Prepared streaming thread id", 100_pc,
+          "Added thread streaming context"));
+}
diff --git a/tools/profiler/core/ProfiledThreadData.h b/tools/profiler/core/ProfiledThreadData.h
new file mode 100644
index 0000000000..47ae0c579c
--- /dev/null
+++ b/tools/profiler/core/ProfiledThreadData.h
@@ -0,0 +1,250 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfiledThreadData_h
+#define ProfiledThreadData_h
+
+#include "platform.h"
+#include "ProfileBuffer.h"
+#include "ProfileBufferEntry.h"
+
+#include "mozilla/FailureLatch.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/NotNull.h"
+#include "mozilla/ProfileJSONWriter.h"
+#include "mozilla/ProfilerThreadRegistrationInfo.h"
+#include "mozilla/RefPtr.h"
+#include "mozilla/TimeStamp.h"
+#include "mozilla/UniquePtr.h"
+#include "mozilla/Vector.h"
+#include "nsStringFwd.h"
+
+class nsIEventTarget;
+class ProfilerCodeAddressService;
+struct JSContext;
+struct ThreadStreamingContext;
+
+// This class contains information about a thread that is only relevant while
+// the profiler is running, for any threads (both alive and dead) whose thread
+// name matches the "thread filter" in the current profiler run.
+// ProfiledThreadData objects may be kept alive even after the thread is
+// unregistered, as long as there is still data for that thread in the profiler
+// buffer.
+//
+// Accesses to this class are protected by the profiler state lock.
+//
+// Created as soon as the following are true for the thread:
+//  - The profiler is running, and
+//  - the thread matches the profiler's thread filter, and
+//  - the thread is registered with the profiler.
+// So it gets created in response to either (1) the profiler being started (for
+// an existing registered thread) or (2) the thread being registered (if the
+// profiler is already running).
+//
+// The thread may be unregistered during the lifetime of ProfiledThreadData.
+// If that happens, NotifyUnregistered() is called.
+//
+// This class is the right place to store buffer positions. Profiler buffer
+// positions become invalid if the profiler buffer is destroyed, which happens
+// when the profiler is stopped.
+class ProfiledThreadData final {
+ public:
+  explicit ProfiledThreadData(
+      const mozilla::profiler::ThreadRegistrationInfo& aThreadInfo);
+  explicit ProfiledThreadData(
+      mozilla::profiler::ThreadRegistrationInfo&& aThreadInfo);
+  ~ProfiledThreadData();
+
+  void NotifyUnregistered(uint64_t aBufferPosition) {
+    mLastSample = mozilla::Nothing();
+    MOZ_ASSERT(!mBufferPositionWhenReceivedJSContext,
+               "JSContext should have been cleared before the thread was "
+               "unregistered");
+    mUnregisterTime = mozilla::TimeStamp::Now();
+    mBufferPositionWhenUnregistered = mozilla::Some(aBufferPosition);
+    mPreviousThreadRunningTimes.Clear();
+  }
+  mozilla::Maybe<uint64_t> BufferPositionWhenUnregistered() {
+    return mBufferPositionWhenUnregistered;
+  }
+
+  mozilla::Maybe<uint64_t>& LastSample() { return mLastSample; }
+
+  mozilla::NotNull<mozilla::UniquePtr<UniqueStacks>> PrepareUniqueStacks(
+      const ProfileBuffer& aBuffer, JSContext* aCx,
+      mozilla::FailureLatch& aFailureLatch,
+      ProfilerCodeAddressService* aService,
+      mozilla::ProgressLogger aProgressLogger);
+
+  void StreamJSON(const ProfileBuffer& aBuffer, JSContext* aCx,
+                  SpliceableJSONWriter& aWriter, const nsACString& aProcessName,
+                  const nsACString& aETLDplus1,
+                  const mozilla::TimeStamp& aProcessStartTime,
+                  double aSinceTime, ProfilerCodeAddressService* aService,
+                  mozilla::ProgressLogger aProgressLogger);
+  void StreamJSON(ThreadStreamingContext&& aThreadStreamingContext,
+                  SpliceableJSONWriter& aWriter, const nsACString& aProcessName,
+                  const nsACString& aETLDplus1,
+                  const mozilla::TimeStamp& aProcessStartTime,
+                  ProfilerCodeAddressService* aService,
+                  mozilla::ProgressLogger aProgressLogger);
+
+  const mozilla::profiler::ThreadRegistrationInfo& Info() const {
+    return mThreadInfo;
+  }
+
+  void NotifyReceivedJSContext(uint64_t aCurrentBufferPosition) {
+    mBufferPositionWhenReceivedJSContext =
+        mozilla::Some(aCurrentBufferPosition);
+  }
+
+  // Call this method when the JS entries inside the buffer are about to
+  // become invalid, i.e., just before JS shutdown.
+  void NotifyAboutToLoseJSContext(JSContext* aCx,
+                                  const mozilla::TimeStamp& aProcessStartTime,
+                                  ProfileBuffer& aBuffer);
+
+  RunningTimes& PreviousThreadRunningTimesRef() {
+    return mPreviousThreadRunningTimes;
+  }
+
+ private:
+  // Group A:
+  // The following fields are interesting for the entire lifetime of a
+  // ProfiledThreadData object.
+
+  // This thread's thread info. Local copy because the one in ThreadRegistration
+  // may be destroyed while ProfiledThreadData stays alive.
+  const mozilla::profiler::ThreadRegistrationInfo mThreadInfo;
+
+  // Contains JSON for JIT frames from any JSContexts that were used for this
+  // thread in the past.
+  // Null if this thread has never lost a JSContext or if all samples from
+  // previous JSContexts have been evicted from the profiler buffer.
+  mozilla::UniquePtr<JITFrameInfo> mJITFrameInfoForPreviousJSContexts;
+
+  // Group B:
+  // The following fields are only used while this thread is alive and
+  // registered. They become Nothing() or empty once the thread is unregistered.
+
+  // When sampling, this holds the position in ActivePS::mBuffer of the most
+  // recent sample for this thread, or Nothing() if there is no sample for this
+  // thread in the buffer.
+  mozilla::Maybe<uint64_t> mLastSample;
+
+  // Only non-Nothing() if the thread currently has a JSContext.
+  mozilla::Maybe<uint64_t> mBufferPositionWhenReceivedJSContext;
+
+  // RunningTimes at the previous sample if any, or empty.
+  RunningTimes mPreviousThreadRunningTimes;
+
+  // Group C:
+  // The following fields are only used once this thread has been unregistered.
+
+  mozilla::Maybe<uint64_t> mBufferPositionWhenUnregistered;
+  mozilla::TimeStamp mUnregisterTime;
+};
+
+// This class will be used when outputting the profile data for one thread.
+struct ThreadStreamingContext {
+  ProfiledThreadData& mProfiledThreadData;
+  JSContext* mJSContext;
+  SpliceableChunkedJSONWriter mSamplesDataWriter;
+  SpliceableChunkedJSONWriter mMarkersDataWriter;
+  mozilla::NotNull<mozilla::UniquePtr<UniqueStacks>> mUniqueStacks;
+
+  // These are updated when writing samples, and reused for "same-sample"s.
+  enum PreviousStackState { eNoStackYet, eStackWasNotEmpty, eStackWasEmpty };
+  PreviousStackState mPreviousStackState = eNoStackYet;
+  uint32_t mPreviousStack = 0;
+
+  ThreadStreamingContext(ProfiledThreadData& aProfiledThreadData,
+                         const ProfileBuffer& aBuffer, JSContext* aCx,
+                         mozilla::FailureLatch& aFailureLatch,
+                         ProfilerCodeAddressService* aService,
+                         mozilla::ProgressLogger aProgressLogger);
+
+  void FinalizeWriter();
+};
+
+// This class will be used when outputting the profile data for all threads.
+class ProcessStreamingContext final : public mozilla::FailureLatch {
+ public:
+  // Pre-allocate space for `aThreadCount` threads.
+  ProcessStreamingContext(size_t aThreadCount,
+                          mozilla::FailureLatch& aFailureLatch,
+                          const mozilla::TimeStamp& aProcessStartTime,
+                          double aSinceTime);
+
+  ~ProcessStreamingContext();
+
+  // Add the streaming context corresponding to each profiled thread. This
+  // should be called exactly the number of times specified in the constructor.
+  void AddThreadStreamingContext(ProfiledThreadData& aProfiledThreadData,
+                                 const ProfileBuffer& aBuffer, JSContext* aCx,
+                                 ProfilerCodeAddressService* aService,
+                                 mozilla::ProgressLogger aProgressLogger);
+
+  // Retrieve the ThreadStreamingContext for a given thread id.
+  // Returns null if that thread id doesn't correspond to any profiled thread.
+  ThreadStreamingContext* GetThreadStreamingContext(
+      const ProfilerThreadId& aThreadId) {
+    for (size_t i = 0; i < mTIDList.length(); ++i) {
+      if (mTIDList[i] == aThreadId) {
+        return &mThreadStreamingContextList[i];
+      }
+    }
+    return nullptr;
+  }
+
+  const mozilla::TimeStamp& ProcessStartTime() const {
+    return mProcessStartTime;
+  }
+
+  double GetSinceTime() const { return mSinceTime; }
+
+  ThreadStreamingContext* begin() {
+    return mThreadStreamingContextList.begin();
+  };
+  ThreadStreamingContext* end() { return mThreadStreamingContextList.end(); };
+
+  FAILURELATCH_IMPL_PROXY(mFailureLatch)
+
+ private:
+  // Separate list of thread ids, it's much faster to do a linear search
+  // here than a vector of bigger items like mThreadStreamingContextList.
+  mozilla::Vector<ProfilerThreadId> mTIDList;
+  // Contexts corresponding to the thread id at the same indexes.
+  mozilla::Vector<ThreadStreamingContext> mThreadStreamingContextList;
+
+  mozilla::FailureLatch& mFailureLatch;
+
+  const mozilla::TimeStamp mProcessStartTime;
+
+  const double mSinceTime;
+};
+
+// Stream all samples and markers from aBuffer with the given aThreadId (or 0
+// for everything, which is assumed to be a single backtrace sample.)
+// Returns the thread id of the output sample(s), or 0 if none was present.
+ProfilerThreadId StreamSamplesAndMarkers(
+    const char* aName, ProfilerThreadId aThreadId, const ProfileBuffer& aBuffer,
+    SpliceableJSONWriter& aWriter, const nsACString& aProcessName,
+    const nsACString& aETLDplus1, const mozilla::TimeStamp& aProcessStartTime,
+    const mozilla::TimeStamp& aRegisterTime,
+    const mozilla::TimeStamp& aUnregisterTime, double aSinceTime,
+    UniqueStacks& aUniqueStacks, mozilla::ProgressLogger aProgressLogger);
+void StreamSamplesAndMarkers(const char* aName,
+                             ThreadStreamingContext& aThreadData,
+                             SpliceableJSONWriter& aWriter,
+                             const nsACString& aProcessName,
+                             const nsACString& aETLDplus1,
+                             const mozilla::TimeStamp& aProcessStartTime,
+                             const mozilla::TimeStamp& aRegisterTime,
+                             const mozilla::TimeStamp& aUnregisterTime,
+                             mozilla::ProgressLogger aProgressLogger);
+
+#endif  // ProfiledThreadData_h
diff --git a/tools/profiler/core/ProfilerBacktrace.cpp b/tools/profiler/core/ProfilerBacktrace.cpp
new file mode 100644
index 0000000000..a264d85d64
--- /dev/null
+++ b/tools/profiler/core/ProfilerBacktrace.cpp
@@ -0,0 +1,101 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ProfilerBacktrace.h"
+
+#include "ProfileBuffer.h"
+#include "ProfiledThreadData.h"
+
+#include "mozilla/ProfileJSONWriter.h"
+
+ProfilerBacktrace::ProfilerBacktrace(
+    const char* aName,
+    mozilla::UniquePtr<mozilla::ProfileChunkedBuffer>
+        aProfileChunkedBufferStorage,
+    mozilla::UniquePtr<ProfileBuffer>
+        aProfileBufferStorageOrNull /* = nullptr */)
+    : mName(aName),
+      mOptionalProfileChunkedBufferStorage(
+          std::move(aProfileChunkedBufferStorage)),
+      mProfileChunkedBuffer(mOptionalProfileChunkedBufferStorage.get()),
+      mOptionalProfileBufferStorage(std::move(aProfileBufferStorageOrNull)),
+      mProfileBuffer(mOptionalProfileBufferStorage.get()) {
+  MOZ_COUNT_CTOR(ProfilerBacktrace);
+  if (mProfileBuffer) {
+    MOZ_RELEASE_ASSERT(mProfileChunkedBuffer,
+                       "If we take ownership of a ProfileBuffer, we must also "
+                       "receive ownership of a ProfileChunkedBuffer");
+    MOZ_RELEASE_ASSERT(
+        mProfileChunkedBuffer == &mProfileBuffer->UnderlyingChunkedBuffer(),
+        "If we take ownership of a ProfileBuffer, we must also receive "
+        "ownership of its ProfileChunkedBuffer");
+  }
+  MOZ_ASSERT(
+      !mProfileChunkedBuffer || !mProfileChunkedBuffer->IsThreadSafe(),
+      "ProfilerBacktrace only takes a non-thread-safe ProfileChunkedBuffer");
+}
+
+ProfilerBacktrace::ProfilerBacktrace(
+    const char* aName,
+    mozilla::ProfileChunkedBuffer* aExternalProfileChunkedBuffer,
+    ProfileBuffer* aExternalProfileBuffer)
+    : mName(aName),
+      mProfileChunkedBuffer(aExternalProfileChunkedBuffer),
+      mProfileBuffer(aExternalProfileBuffer) {
+  MOZ_COUNT_CTOR(ProfilerBacktrace);
+  if (!mProfileChunkedBuffer) {
+    if (mProfileBuffer) {
+      // We don't have a ProfileChunkedBuffer but we have a ProfileBuffer, use
+      // the latter's ProfileChunkedBuffer.
+      mProfileChunkedBuffer = &mProfileBuffer->UnderlyingChunkedBuffer();
+      MOZ_ASSERT(!mProfileChunkedBuffer->IsThreadSafe(),
+                 "ProfilerBacktrace only takes a non-thread-safe "
+                 "ProfileChunkedBuffer");
+    }
+  } else {
+    if (mProfileBuffer) {
+      MOZ_RELEASE_ASSERT(
+          mProfileChunkedBuffer == &mProfileBuffer->UnderlyingChunkedBuffer(),
+          "If we reference both ProfileChunkedBuffer and ProfileBuffer, they "
+          "must already be connected");
+    }
+    MOZ_ASSERT(!mProfileChunkedBuffer->IsThreadSafe(),
+               "ProfilerBacktrace only takes a non-thread-safe "
+               "ProfileChunkedBuffer");
+  }
+}
+
+ProfilerBacktrace::~ProfilerBacktrace() { MOZ_COUNT_DTOR(ProfilerBacktrace); }
+
+ProfilerThreadId ProfilerBacktrace::StreamJSON(
+    SpliceableJSONWriter& aWriter, const mozilla::TimeStamp& aProcessStartTime,
+    UniqueStacks& aUniqueStacks) {
+  ProfilerThreadId processedThreadId;
+
+  // Unlike ProfiledThreadData::StreamJSON, we don't need to call
+  // ProfileBuffer::AddJITInfoForRange because ProfileBuffer does not contain
+  // any JitReturnAddr entries. For synchronous samples, JIT frames get expanded
+  // at sample time.
+  if (mProfileBuffer) {
+    processedThreadId = StreamSamplesAndMarkers(
+        mName.c_str(), ProfilerThreadId{}, *mProfileBuffer, aWriter, ""_ns,
+        ""_ns, aProcessStartTime,
+        /* aRegisterTime */ mozilla::TimeStamp(),
+        /* aUnregisterTime */ mozilla::TimeStamp(),
+        /* aSinceTime */ 0, aUniqueStacks, mozilla::ProgressLogger{});
+  } else if (mProfileChunkedBuffer) {
+    ProfileBuffer profileBuffer(*mProfileChunkedBuffer);
+    processedThreadId = StreamSamplesAndMarkers(
+        mName.c_str(), ProfilerThreadId{}, profileBuffer, aWriter, ""_ns, ""_ns,
+        aProcessStartTime,
+        /* aRegisterTime */ mozilla::TimeStamp(),
+        /* aUnregisterTime */ mozilla::TimeStamp(),
+        /* aSinceTime */ 0, aUniqueStacks, mozilla::ProgressLogger{});
+  }
+  // If there are no buffers, the backtrace is empty and nothing is streamed.
+
+  return processedThreadId;
+}
diff --git a/tools/profiler/core/ProfilerBacktrace.h b/tools/profiler/core/ProfilerBacktrace.h
new file mode 100644
index 0000000000..55811f4422
--- /dev/null
+++ b/tools/profiler/core/ProfilerBacktrace.h
@@ -0,0 +1,184 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef __PROFILER_BACKTRACE_H
+#define __PROFILER_BACKTRACE_H
+
+#include "ProfileBuffer.h"
+
+#include "mozilla/ProfileBufferEntrySerialization.h"
+#include "mozilla/UniquePtrExtensions.h"
+
+#include <string>
+
+class ProfileBuffer;
+class ProfilerCodeAddressService;
+class ThreadInfo;
+class UniqueStacks;
+
+namespace mozilla {
+class ProfileChunkedBuffer;
+class TimeStamp;
+namespace baseprofiler {
+class SpliceableJSONWriter;
+}  // namespace baseprofiler
+}  // namespace mozilla
+
+// ProfilerBacktrace encapsulates a synchronous sample.
+// It can work with a ProfileBuffer and/or a ProfileChunkedBuffer (if both, they
+// must already be linked together). The ProfileChunkedBuffer contains all the
+// data; the ProfileBuffer is not strictly needed, only provide it if it is
+// already available at the call site.
+// And these buffers can either be:
+// - owned here, so that the ProfilerBacktrace object can be kept for later
+//   use), OR
+// - referenced through pointers (in cases where the backtrace is immediately
+//   streamed out, so we only need temporary references to external buffers);
+//   these pointers may be null for empty backtraces.
+class ProfilerBacktrace {
+ public:
+  // Take ownership of external buffers and use them to keep, and to stream a
+  // backtrace. If a ProfileBuffer is given, its underlying chunked buffer must
+  // be provided as well.
+  explicit ProfilerBacktrace(
+      const char* aName,
+      mozilla::UniquePtr<mozilla::ProfileChunkedBuffer>
+          aProfileChunkedBufferStorage,
+      mozilla::UniquePtr<ProfileBuffer> aProfileBufferStorageOrNull = nullptr);
+
+  // Take pointers to external buffers and use them to stream a backtrace.
+  // If null, the backtrace is effectively empty.
+  // If both are provided, they must already be connected.
+  explicit ProfilerBacktrace(
+      const char* aName,
+      mozilla::ProfileChunkedBuffer* aExternalProfileChunkedBufferOrNull =
+          nullptr,
+      ProfileBuffer* aExternalProfileBufferOrNull = nullptr);
+
+  ~ProfilerBacktrace();
+
+  [[nodiscard]] bool IsEmpty() const {
+    return !mProfileChunkedBuffer ||
+           mozilla::ProfileBufferEntryWriter::Serializer<
+               mozilla::ProfileChunkedBuffer>::Bytes(*mProfileChunkedBuffer) <=
+               mozilla::ULEB128Size(0u);
+  }
+
+  // ProfilerBacktraces' stacks are deduplicated in the context of the
+  // profile that contains the backtrace as a marker payload.
+  //
+  // That is, markers that contain backtraces should not need their own stack,
+  // frame, and string tables. They should instead reuse their parent
+  // profile's tables.
+  ProfilerThreadId StreamJSON(
+      mozilla::baseprofiler::SpliceableJSONWriter& aWriter,
+      const mozilla::TimeStamp& aProcessStartTime, UniqueStacks& aUniqueStacks);
+
+ private:
+  // Used to serialize a ProfilerBacktrace.
+  friend struct mozilla::ProfileBufferEntryWriter::Serializer<
+      ProfilerBacktrace>;
+  friend struct mozilla::ProfileBufferEntryReader::Deserializer<
+      ProfilerBacktrace>;
+
+  std::string mName;
+
+  // `ProfileChunkedBuffer` in which `mProfileBuffer` stores its data; must be
+  // located before `mProfileBuffer` so that it's destroyed after.
+  mozilla::UniquePtr<mozilla::ProfileChunkedBuffer>
+      mOptionalProfileChunkedBufferStorage;
+  // If null, there is no need to check mProfileBuffer's (if present) underlying
+  // buffer because this is done when constructed.
+  mozilla::ProfileChunkedBuffer* mProfileChunkedBuffer;
+
+  mozilla::UniquePtr<ProfileBuffer> mOptionalProfileBufferStorage;
+  ProfileBuffer* mProfileBuffer;
+};
+
+namespace mozilla {
+
+// Format: [ UniquePtr<BlockRingsBuffer> | name ]
+// Initial len==0 marks a nullptr or empty backtrace.
+template <>
+struct mozilla::ProfileBufferEntryWriter::Serializer<ProfilerBacktrace> {
+  static Length Bytes(const ProfilerBacktrace& aBacktrace) {
+    if (!aBacktrace.mProfileChunkedBuffer) {
+      // No buffer.
+      return ULEB128Size(0u);
+    }
+    auto bufferBytes = SumBytes(*aBacktrace.mProfileChunkedBuffer);
+    if (bufferBytes <= ULEB128Size(0u)) {
+      // Empty buffer.
+      return ULEB128Size(0u);
+    }
+    return bufferBytes + SumBytes(aBacktrace.mName);
+  }
+
+  static void Write(mozilla::ProfileBufferEntryWriter& aEW,
+                    const ProfilerBacktrace& aBacktrace) {
+    if (!aBacktrace.mProfileChunkedBuffer ||
+        SumBytes(*aBacktrace.mProfileChunkedBuffer) <= ULEB128Size(0u)) {
+      // No buffer, or empty buffer.
+      aEW.WriteULEB128(0u);
+      return;
+    }
+    aEW.WriteObject(*aBacktrace.mProfileChunkedBuffer);
+    aEW.WriteObject(aBacktrace.mName);
+  }
+};
+
+template <typename Destructor>
+struct mozilla::ProfileBufferEntryWriter::Serializer<
+    mozilla::UniquePtr<ProfilerBacktrace, Destructor>> {
+  static Length Bytes(
+      const mozilla::UniquePtr<ProfilerBacktrace, Destructor>& aBacktrace) {
+    if (!aBacktrace) {
+      // Null backtrace pointer (treated like an empty backtrace).
+      return ULEB128Size(0u);
+    }
+    return SumBytes(*aBacktrace);
+  }
+
+  static void Write(
+      mozilla::ProfileBufferEntryWriter& aEW,
+      const mozilla::UniquePtr<ProfilerBacktrace, Destructor>& aBacktrace) {
+    if (!aBacktrace) {
+      // Null backtrace pointer (treated like an empty backtrace).
+      aEW.WriteULEB128(0u);
+      return;
+    }
+    aEW.WriteObject(*aBacktrace);
+  }
+};
+
+template <typename Destructor>
+struct mozilla::ProfileBufferEntryReader::Deserializer<
+    mozilla::UniquePtr<ProfilerBacktrace, Destructor>> {
+  static void ReadInto(
+      mozilla::ProfileBufferEntryReader& aER,
+      mozilla::UniquePtr<ProfilerBacktrace, Destructor>& aBacktrace) {
+    aBacktrace = Read(aER);
+  }
+
+  static mozilla::UniquePtr<ProfilerBacktrace, Destructor> Read(
+      mozilla::ProfileBufferEntryReader& aER) {
+    auto profileChunkedBuffer =
+        aER.ReadObject<UniquePtr<ProfileChunkedBuffer>>();
+    if (!profileChunkedBuffer) {
+      return nullptr;
+    }
+    MOZ_ASSERT(
+        !profileChunkedBuffer->IsThreadSafe(),
+        "ProfilerBacktrace only stores non-thread-safe ProfileChunkedBuffers");
+    std::string name = aER.ReadObject<std::string>();
+    return UniquePtr<ProfilerBacktrace, Destructor>{
+        new ProfilerBacktrace(name.c_str(), std::move(profileChunkedBuffer))};
+  }
+};
+
+}  // namespace mozilla
+
+#endif  // __PROFILER_BACKTRACE_H
diff --git a/tools/profiler/core/ProfilerBindings.cpp b/tools/profiler/core/ProfilerBindings.cpp
new file mode 100644
index 0000000000..c3af5c5b56
--- /dev/null
+++ b/tools/profiler/core/ProfilerBindings.cpp
@@ -0,0 +1,386 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* FFI functions for Profiler Rust API to call into profiler */
+
+#include "ProfilerBindings.h"
+
+#include "GeckoProfiler.h"
+
+#include <set>
+#include <type_traits>
+
+void gecko_profiler_register_thread(const char* aName) {
+  PROFILER_REGISTER_THREAD(aName);
+}
+
+void gecko_profiler_unregister_thread() { PROFILER_UNREGISTER_THREAD(); }
+
+void gecko_profiler_construct_label(mozilla::AutoProfilerLabel* aAutoLabel,
+                                    JS::ProfilingCategoryPair aCategoryPair) {
+#ifdef MOZ_GECKO_PROFILER
+  new (aAutoLabel) mozilla::AutoProfilerLabel(
+      "", nullptr, aCategoryPair,
+      uint32_t(
+          js::ProfilingStackFrame::Flags::LABEL_DETERMINED_BY_CATEGORY_PAIR));
+#endif
+}
+
+void gecko_profiler_destruct_label(mozilla::AutoProfilerLabel* aAutoLabel) {
+#ifdef MOZ_GECKO_PROFILER
+  aAutoLabel->~AutoProfilerLabel();
+#endif
+}
+
+void gecko_profiler_construct_timestamp_now(mozilla::TimeStamp* aTimeStamp) {
+  new (aTimeStamp) mozilla::TimeStamp(mozilla::TimeStamp::Now());
+}
+
+void gecko_profiler_clone_timestamp(const mozilla::TimeStamp* aSrcTimeStamp,
+                                    mozilla::TimeStamp* aDestTimeStamp) {
+  new (aDestTimeStamp) mozilla::TimeStamp(*aSrcTimeStamp);
+}
+
+void gecko_profiler_destruct_timestamp(mozilla::TimeStamp* aTimeStamp) {
+  aTimeStamp->~TimeStamp();
+}
+
+void gecko_profiler_add_timestamp(const mozilla::TimeStamp* aTimeStamp,
+                                  mozilla::TimeStamp* aDestTimeStamp,
+                                  double aMicroseconds) {
+  new (aDestTimeStamp) mozilla::TimeStamp(
+      *aTimeStamp + mozilla::TimeDuration::FromMicroseconds(aMicroseconds));
+}
+
+void gecko_profiler_subtract_timestamp(const mozilla::TimeStamp* aTimeStamp,
+                                       mozilla::TimeStamp* aDestTimeStamp,
+                                       double aMicroseconds) {
+  new (aDestTimeStamp) mozilla::TimeStamp(
+      *aTimeStamp - mozilla::TimeDuration::FromMicroseconds(aMicroseconds));
+}
+
+void gecko_profiler_construct_marker_timing_instant_at(
+    mozilla::MarkerTiming* aMarkerTiming, const mozilla::TimeStamp* aTime) {
+#ifdef MOZ_GECKO_PROFILER
+  static_assert(std::is_trivially_copyable_v<mozilla::MarkerTiming>);
+  mozilla::MarkerTiming::UnsafeConstruct(aMarkerTiming, *aTime,
+                                         mozilla::TimeStamp{},
+                                         mozilla::MarkerTiming::Phase::Instant);
+#endif
+}
+
+void gecko_profiler_construct_marker_timing_instant_now(
+    mozilla::MarkerTiming* aMarkerTiming) {
+#ifdef MOZ_GECKO_PROFILER
+  static_assert(std::is_trivially_copyable_v<mozilla::MarkerTiming>);
+  mozilla::MarkerTiming::UnsafeConstruct(
+      aMarkerTiming, mozilla::TimeStamp::Now(), mozilla::TimeStamp{},
+      mozilla::MarkerTiming::Phase::Instant);
+#endif
+}
+
+void gecko_profiler_construct_marker_timing_interval(
+    mozilla::MarkerTiming* aMarkerTiming, const mozilla::TimeStamp* aStartTime,
+    const mozilla::TimeStamp* aEndTime) {
+#ifdef MOZ_GECKO_PROFILER
+  static_assert(std::is_trivially_copyable_v<mozilla::MarkerTiming>);
+  mozilla::MarkerTiming::UnsafeConstruct(
+      aMarkerTiming, *aStartTime, *aEndTime,
+      mozilla::MarkerTiming::Phase::Interval);
+#endif
+}
+
+void gecko_profiler_construct_marker_timing_interval_until_now_from(
+    mozilla::MarkerTiming* aMarkerTiming,
+    const mozilla::TimeStamp* aStartTime) {
+#ifdef MOZ_GECKO_PROFILER
+  static_assert(std::is_trivially_copyable_v<mozilla::MarkerTiming>);
+  mozilla::MarkerTiming::UnsafeConstruct(
+      aMarkerTiming, *aStartTime, mozilla::TimeStamp::Now(),
+      mozilla::MarkerTiming::Phase::Interval);
+#endif
+}
+
+void gecko_profiler_construct_marker_timing_interval_start(
+    mozilla::MarkerTiming* aMarkerTiming, const mozilla::TimeStamp* aTime) {
+#ifdef MOZ_GECKO_PROFILER
+  static_assert(std::is_trivially_copyable_v<mozilla::MarkerTiming>);
+  mozilla::MarkerTiming::UnsafeConstruct(
+      aMarkerTiming, *aTime, mozilla::TimeStamp{},
+      mozilla::MarkerTiming::Phase::IntervalStart);
+#endif
+}
+
+void gecko_profiler_construct_marker_timing_interval_end(
+    mozilla::MarkerTiming* aMarkerTiming, const mozilla::TimeStamp* aTime) {
+#ifdef MOZ_GECKO_PROFILER
+  static_assert(std::is_trivially_copyable_v<mozilla::MarkerTiming>);
+  mozilla::MarkerTiming::UnsafeConstruct(
+      aMarkerTiming, mozilla::TimeStamp{}, *aTime,
+      mozilla::MarkerTiming::Phase::IntervalEnd);
+#endif
+}
+
+void gecko_profiler_destruct_marker_timing(
+    mozilla::MarkerTiming* aMarkerTiming) {
+#ifdef MOZ_GECKO_PROFILER
+  aMarkerTiming->~MarkerTiming();
+#endif
+}
+
+void gecko_profiler_construct_marker_schema(
+    mozilla::MarkerSchema* aMarkerSchema,
+    const mozilla::MarkerSchema::Location* aLocations, size_t aLength) {
+#ifdef MOZ_GECKO_PROFILER
+  new (aMarkerSchema) mozilla::MarkerSchema(aLocations, aLength);
+#endif
+}
+
+void gecko_profiler_construct_marker_schema_with_special_front_end_location(
+    mozilla::MarkerSchema* aMarkerSchema) {
+#ifdef MOZ_GECKO_PROFILER
+  new (aMarkerSchema)
+      mozilla::MarkerSchema(mozilla::MarkerSchema::SpecialFrontendLocation{});
+#endif
+}
+
+void gecko_profiler_destruct_marker_schema(
+    mozilla::MarkerSchema* aMarkerSchema) {
+#ifdef MOZ_GECKO_PROFILER
+  aMarkerSchema->~MarkerSchema();
+#endif
+}
+
+void gecko_profiler_marker_schema_set_chart_label(
+    mozilla::MarkerSchema* aSchema, const char* aLabel, size_t aLabelLength) {
+#ifdef MOZ_GECKO_PROFILER
+  aSchema->SetChartLabel(std::string(aLabel, aLabelLength));
+#endif
+}
+
+void gecko_profiler_marker_schema_set_tooltip_label(
+    mozilla::MarkerSchema* aSchema, const char* aLabel, size_t aLabelLength) {
+#ifdef MOZ_GECKO_PROFILER
+  aSchema->SetTooltipLabel(std::string(aLabel, aLabelLength));
+#endif
+}
+
+void gecko_profiler_marker_schema_set_table_label(
+    mozilla::MarkerSchema* aSchema, const char* aLabel, size_t aLabelLength) {
+#ifdef MOZ_GECKO_PROFILER
+  aSchema->SetTableLabel(std::string(aLabel, aLabelLength));
+#endif
+}
+
+void gecko_profiler_marker_schema_set_all_labels(mozilla::MarkerSchema* aSchema,
+                                                 const char* aLabel,
+                                                 size_t aLabelLength) {
+#ifdef MOZ_GECKO_PROFILER
+  aSchema->SetAllLabels(std::string(aLabel, aLabelLength));
+#endif
+}
+
+void gecko_profiler_marker_schema_add_key_format(
+    mozilla::MarkerSchema* aSchema, const char* aKey, size_t aKeyLength,
+    mozilla::MarkerSchema::Format aFormat) {
+#ifdef MOZ_GECKO_PROFILER
+  aSchema->AddKeyFormat(std::string(aKey, aKeyLength), aFormat);
+#endif
+}
+
+void gecko_profiler_marker_schema_add_key_label_format(
+    mozilla::MarkerSchema* aSchema, const char* aKey, size_t aKeyLength,
+    const char* aLabel, size_t aLabelLength,
+    mozilla::MarkerSchema::Format aFormat) {
+#ifdef MOZ_GECKO_PROFILER
+  aSchema->AddKeyLabelFormat(std::string(aKey, aKeyLength),
+                             std::string(aLabel, aLabelLength), aFormat);
+#endif
+}
+
+void gecko_profiler_marker_schema_add_key_format_searchable(
+    mozilla::MarkerSchema* aSchema, const char* aKey, size_t aKeyLength,
+    mozilla::MarkerSchema::Format aFormat,
+    mozilla::MarkerSchema::Searchable aSearchable) {
+#ifdef MOZ_GECKO_PROFILER
+  aSchema->AddKeyFormatSearchable(std::string(aKey, aKeyLength), aFormat,
+                                  aSearchable);
+#endif
+}
+
+void gecko_profiler_marker_schema_add_key_label_format_searchable(
+    mozilla::MarkerSchema* aSchema, const char* aKey, size_t aKeyLength,
+    const char* aLabel, size_t aLabelLength,
+    mozilla::MarkerSchema::Format aFormat,
+    mozilla::MarkerSchema::Searchable aSearchable) {
+#ifdef MOZ_GECKO_PROFILER
+  aSchema->AddKeyLabelFormatSearchable(std::string(aKey, aKeyLength),
+                                       std::string(aLabel, aLabelLength),
+                                       aFormat, aSearchable);
+#endif
+}
+
+void gecko_profiler_marker_schema_add_static_label_value(
+    mozilla::MarkerSchema* aSchema, const char* aLabel, size_t aLabelLength,
+    const char* aValue, size_t aValueLength) {
+#ifdef MOZ_GECKO_PROFILER
+  aSchema->AddStaticLabelValue(std::string(aLabel, aLabelLength),
+                               std::string(aValue, aValueLength));
+#endif
+}
+
+void gecko_profiler_marker_schema_stream(
+    mozilla::baseprofiler::SpliceableJSONWriter* aWriter, const char* aName,
+    size_t aNameLength, mozilla::MarkerSchema* aMarkerSchema,
+    void* aStreamedNamesSet) {
+#ifdef MOZ_GECKO_PROFILER
+  auto* streamedNames = static_cast<std::set<std::string>*>(aStreamedNamesSet);
+  // std::set.insert(T&&) returns a pair, its `second` is true if the element
+  // was actually inserted (i.e., it was not there yet.).
+  const bool didInsert =
+      streamedNames->insert(std::string(aName, aNameLength)).second;
+  if (didInsert) {
+    std::move(*aMarkerSchema)
+        .Stream(*aWriter, mozilla::Span(aName, aNameLength));
+  }
+#endif
+}
+
+void gecko_profiler_json_writer_int_property(
+    mozilla::baseprofiler::SpliceableJSONWriter* aWriter, const char* aName,
+    size_t aNameLength, int64_t aValue) {
+#ifdef MOZ_GECKO_PROFILER
+  aWriter->IntProperty(mozilla::Span(aName, aNameLength), aValue);
+#endif
+}
+
+void gecko_profiler_json_writer_float_property(
+    mozilla::baseprofiler::SpliceableJSONWriter* aWriter, const char* aName,
+    size_t aNameLength, double aValue) {
+#ifdef MOZ_GECKO_PROFILER
+  aWriter->DoubleProperty(mozilla::Span(aName, aNameLength), aValue);
+#endif
+}
+
+void gecko_profiler_json_writer_bool_property(
+    mozilla::baseprofiler::SpliceableJSONWriter* aWriter, const char* aName,
+    size_t aNameLength, bool aValue) {
+#ifdef MOZ_GECKO_PROFILER
+  aWriter->BoolProperty(mozilla::Span(aName, aNameLength), aValue);
+#endif
+}
+void gecko_profiler_json_writer_string_property(
+    mozilla::baseprofiler::SpliceableJSONWriter* aWriter, const char* aName,
+    size_t aNameLength, const char* aValue, size_t aValueLength) {
+#ifdef MOZ_GECKO_PROFILER
+  aWriter->StringProperty(mozilla::Span(aName, aNameLength),
+                          mozilla::Span(aValue, aValueLength));
+#endif
+}
+
+void gecko_profiler_json_writer_null_property(
+    mozilla::baseprofiler::SpliceableJSONWriter* aWriter, const char* aName,
+    size_t aNameLength) {
+#ifdef MOZ_GECKO_PROFILER
+  aWriter->NullProperty(mozilla::Span(aName, aNameLength));
+#endif
+}
+
+void gecko_profiler_add_marker_untyped(
+    const char* aName, size_t aNameLength,
+    mozilla::baseprofiler::ProfilingCategoryPair aCategoryPair,
+    mozilla::MarkerTiming* aMarkerTiming,
+    mozilla::StackCaptureOptions aStackCaptureOptions) {
+#ifdef MOZ_GECKO_PROFILER
+  profiler_add_marker(
+      mozilla::ProfilerString8View(aName, aNameLength),
+      mozilla::MarkerCategory{aCategoryPair},
+      mozilla::MarkerOptions(
+          std::move(*aMarkerTiming),
+          mozilla::MarkerStack::WithCaptureOptions(aStackCaptureOptions)));
+#endif
+}
+
+void gecko_profiler_add_marker_text(
+    const char* aName, size_t aNameLength,
+    mozilla::baseprofiler::ProfilingCategoryPair aCategoryPair,
+    mozilla::MarkerTiming* aMarkerTiming,
+    mozilla::StackCaptureOptions aStackCaptureOptions, const char* aText,
+    size_t aTextLength) {
+#ifdef MOZ_GECKO_PROFILER
+  profiler_add_marker(
+      mozilla::ProfilerString8View(aName, aNameLength),
+      mozilla::MarkerCategory{aCategoryPair},
+      mozilla::MarkerOptions(
+          std::move(*aMarkerTiming),
+          mozilla::MarkerStack::WithCaptureOptions(aStackCaptureOptions)),
+      geckoprofiler::markers::TextMarker{},
+      mozilla::ProfilerString8View(aText, aTextLength));
+#endif
+}
+
+void gecko_profiler_add_marker(
+    const char* aName, size_t aNameLength,
+    mozilla::baseprofiler::ProfilingCategoryPair aCategoryPair,
+    mozilla::MarkerTiming* aMarkerTiming,
+    mozilla::StackCaptureOptions aStackCaptureOptions, uint8_t aMarkerTag,
+    const uint8_t* aPayload, size_t aPayloadSize) {
+#ifdef MOZ_GECKO_PROFILER
+  // Copy the marker timing and create the marker option.
+  mozilla::MarkerOptions markerOptions(
+      std::move(*aMarkerTiming),
+      mozilla::MarkerStack::WithCaptureOptions(aStackCaptureOptions));
+
+  // Currently it's not possible to add a threadId option, but we will
+  // have it soon.
+  if (markerOptions.ThreadId().IsUnspecified()) {
+    // If yet unspecified, set thread to this thread where the marker is added.
+    markerOptions.Set(mozilla::MarkerThreadId::CurrentThread());
+  }
+
+  auto& buffer = profiler_get_core_buffer();
+  mozilla::Span payload(aPayload, aPayloadSize);
+
+  mozilla::StackCaptureOptions captureOptions =
+      markerOptions.Stack().CaptureOptions();
+  if (captureOptions != mozilla::StackCaptureOptions::NoStack &&
+      // Do not capture a stack if the NoMarkerStacks feature is set.
+      profiler_active_without_feature(ProfilerFeature::NoMarkerStacks)) {
+    // A capture was requested, let's attempt to do it here&now. This avoids a
+    // lot of allocations that would be necessary if capturing a backtrace
+    // separately.
+    // TODO use a local on-stack byte buffer to remove last allocation.
+    // TODO reduce internal profiler stack levels, see bug 1659872.
+    mozilla::ProfileBufferChunkManagerSingle chunkManager(
+        mozilla::ProfileBufferChunkManager::scExpectedMaximumStackSize);
+    mozilla::ProfileChunkedBuffer chunkedBuffer(
+        mozilla::ProfileChunkedBuffer::ThreadSafety::WithoutMutex,
+        chunkManager);
+    markerOptions.StackRef().UseRequestedBacktrace(
+        profiler_capture_backtrace_into(chunkedBuffer, captureOptions)
+            ? &chunkedBuffer
+            : nullptr);
+
+    // This call must be made from here, while chunkedBuffer is in scope.
+    buffer.PutObjects(
+        mozilla::ProfileBufferEntryKind::Marker, markerOptions,
+        mozilla::ProfilerString8View(aName, aNameLength),
+        mozilla::MarkerCategory{aCategoryPair},
+        mozilla::base_profiler_markers_detail::Streaming::DeserializerTag(
+            aMarkerTag),
+        mozilla::MarkerPayloadType::Rust, payload);
+    return;
+  }
+
+  buffer.PutObjects(
+      mozilla::ProfileBufferEntryKind::Marker, markerOptions,
+      mozilla::ProfilerString8View(aName, aNameLength),
+      mozilla::MarkerCategory{aCategoryPair},
+      mozilla::base_profiler_markers_detail::Streaming::DeserializerTag(
+          aMarkerTag),
+      mozilla::MarkerPayloadType::Rust, payload);
+#endif
+}
diff --git a/tools/profiler/core/ProfilerCodeAddressService.cpp b/tools/profiler/core/ProfilerCodeAddressService.cpp
new file mode 100644
index 0000000000..5a65e06379
--- /dev/null
+++ b/tools/profiler/core/ProfilerCodeAddressService.cpp
@@ -0,0 +1,75 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ProfilerCodeAddressService.h"
+
+#include "platform.h"
+#include "mozilla/StackWalk.h"
+
+using namespace mozilla;
+
+#if defined(XP_LINUX) || defined(XP_FREEBSD)
+static char* SearchSymbolTable(SymbolTable& aTable, uint32_t aOffset) {
+  size_t index;
+  bool exact =
+      BinarySearch(aTable.mAddrs, 0, aTable.mAddrs.Length(), aOffset, &index);
+
+  if (index == 0 && !exact) {
+    // Our offset is before the first symbol in the table; no result.
+    return nullptr;
+  }
+
+  // Extract the (mangled) symbol name out of the string table.
+  auto strings = reinterpret_cast<char*>(aTable.mBuffer.Elements());
+  nsCString symbol;
+  symbol.Append(strings + aTable.mIndex[index - 1],
+                aTable.mIndex[index] - aTable.mIndex[index - 1]);
+
+  // First try demangling as a Rust identifier.
+  char demangled[1024];
+  if (!profiler_demangle_rust(symbol.get(), demangled,
+                              ArrayLength(demangled))) {
+    // Then as a C++ identifier.
+    DemangleSymbol(symbol.get(), demangled, ArrayLength(demangled));
+  }
+  demangled[ArrayLength(demangled) - 1] = '\0';
+
+  // Use the mangled name if we didn't successfully demangle.
+  return strdup(demangled[0] != '\0' ? demangled : symbol.get());
+}
+#endif
+
+bool ProfilerCodeAddressService::GetFunction(const void* aPc,
+                                             nsACString& aResult) {
+  Entry& entry = GetEntry(aPc);
+
+#if defined(XP_LINUX) || defined(XP_FREEBSD)
+  // On Linux, most symbols will not be found by the MozDescribeCodeAddress call
+  // that GetEntry does.  So we read the symbol table directly from the ELF
+  // image.
+
+  // SymbolTable currently assumes library offsets will not be larger than
+  // 4 GiB.
+  if (entry.mLOffset <= 0xFFFFFFFF && !entry.mFunction) {
+    auto p = mSymbolTables.lookupForAdd(entry.mLibrary);
+    if (!p) {
+      if (!mSymbolTables.add(p, entry.mLibrary, SymbolTable())) {
+        MOZ_CRASH("ProfilerCodeAddressService OOM");
+      }
+      profiler_get_symbol_table(entry.mLibrary, nullptr, &p->value());
+    }
+    entry.mFunction =
+        SearchSymbolTable(p->value(), static_cast<uint32_t>(entry.mLOffset));
+  }
+#endif
+
+  if (!entry.mFunction || entry.mFunction[0] == '\0') {
+    return false;
+  }
+
+  aResult = nsDependentCString(entry.mFunction);
+  return true;
+}
diff --git a/tools/profiler/core/ProfilerMarkers.cpp b/tools/profiler/core/ProfilerMarkers.cpp
new file mode 100644
index 0000000000..7c299678d1
--- /dev/null
+++ b/tools/profiler/core/ProfilerMarkers.cpp
@@ -0,0 +1,32 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/ProfilerMarkers.h"
+
+template mozilla::ProfileBufferBlockIndex AddMarkerToBuffer(
+    mozilla::ProfileChunkedBuffer&, const mozilla::ProfilerString8View&,
+    const mozilla::MarkerCategory&, mozilla::MarkerOptions&&,
+    mozilla::baseprofiler::markers::NoPayload);
+
+template mozilla::ProfileBufferBlockIndex AddMarkerToBuffer(
+    mozilla::ProfileChunkedBuffer&, const mozilla::ProfilerString8View&,
+    const mozilla::MarkerCategory&, mozilla::MarkerOptions&&,
+    mozilla::baseprofiler::markers::TextMarker, const std::string&);
+
+template mozilla::ProfileBufferBlockIndex profiler_add_marker(
+    const mozilla::ProfilerString8View&, const mozilla::MarkerCategory&,
+    mozilla::MarkerOptions&&, mozilla::baseprofiler::markers::TextMarker,
+    const std::string&);
+
+template mozilla::ProfileBufferBlockIndex profiler_add_marker(
+    const mozilla::ProfilerString8View&, const mozilla::MarkerCategory&,
+    mozilla::MarkerOptions&&, mozilla::baseprofiler::markers::TextMarker,
+    const nsCString&);
+
+template mozilla::ProfileBufferBlockIndex profiler_add_marker(
+    const mozilla::ProfilerString8View&, const mozilla::MarkerCategory&,
+    mozilla::MarkerOptions&&, mozilla::baseprofiler::markers::Tracing,
+    const mozilla::ProfilerString8View&);
diff --git a/tools/profiler/core/ProfilerThreadRegistration.cpp b/tools/profiler/core/ProfilerThreadRegistration.cpp
new file mode 100644
index 0000000000..c81d00573d
--- /dev/null
+++ b/tools/profiler/core/ProfilerThreadRegistration.cpp
@@ -0,0 +1,198 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/ProfilerThreadRegistration.h"
+
+#include "mozilla/ProfilerMarkers.h"
+#include "mozilla/ProfilerThreadRegistry.h"
+#include "nsString.h"
+#ifdef MOZ_GECKO_PROFILER
+#  include "platform.h"
+#else
+#  define profiler_mark_thread_awake()
+#  define profiler_mark_thread_asleep()
+#endif
+
+namespace mozilla::profiler {
+
+/* static */
+MOZ_THREAD_LOCAL(ThreadRegistration*) ThreadRegistration::tlsThreadRegistration;
+
+ThreadRegistration::ThreadRegistration(const char* aName, const void* aStackTop)
+    : mData(aName, aStackTop) {
+  auto* tls = GetTLS();
+  if (MOZ_UNLIKELY(!tls)) {
+    // No TLS, nothing can be done without it.
+    return;
+  }
+
+  if (ThreadRegistration* rootRegistration = tls->get(); rootRegistration) {
+    // This is a nested ThreadRegistration object, so the thread is already
+    // registered in the TLS and ThreadRegistry and we don't need to register
+    // again.
+    MOZ_ASSERT(
+        mData.Info().ThreadId() == rootRegistration->mData.Info().ThreadId(),
+        "Thread being re-registered has changed its TID");
+    // TODO: Use new name. This is currently not possible because the
+    // TLS-stored RegisteredThread's ThreadInfo cannot be changed.
+    // In the meantime, we record a marker that could be used in the frontend.
+    PROFILER_MARKER_TEXT("Nested ThreadRegistration()", OTHER_Profiling,
+                         MarkerOptions{},
+                         ProfilerString8View::WrapNullTerminatedString(aName));
+    return;
+  }
+
+  tls->set(this);
+  ThreadRegistry::Register(OnThreadRef{*this});
+  profiler_mark_thread_awake();
+}
+
+ThreadRegistration::~ThreadRegistration() {
+  MOZ_ASSERT(profiler_current_thread_id() == mData.mInfo.ThreadId(),
+             "ThreadRegistration must be destroyed on its thread");
+  MOZ_ASSERT(!mDataMutex.IsLockedOnCurrentThread(),
+             "Mutex shouldn't be locked here, as it's about to be destroyed "
+             "in ~ThreadRegistration()");
+  auto* tls = GetTLS();
+  if (MOZ_UNLIKELY(!tls)) {
+    // No TLS, nothing can be done without it.
+    return;
+  }
+
+  if (ThreadRegistration* rootRegistration = tls->get(); rootRegistration) {
+    if (rootRegistration != this) {
+      // `this` is not in the TLS, so it was a nested registration, there is
+      // nothing to unregister yet.
+      PROFILER_MARKER_TEXT(
+          "Nested ~ThreadRegistration()", OTHER_Profiling, MarkerOptions{},
+          ProfilerString8View::WrapNullTerminatedString(mData.Info().Name()));
+      return;
+    }
+
+    profiler_mark_thread_asleep();
+#ifdef NIGHTLY_BUILD
+    mData.RecordWakeCount();
+#endif
+    ThreadRegistry::Unregister(OnThreadRef{*this});
+#ifdef DEBUG
+    // After ThreadRegistry::Unregister, other threads should not be able to
+    // find this ThreadRegistration, and shouldn't have kept any reference to
+    // it across the ThreadRegistry mutex.
+    MOZ_ASSERT(mDataMutex.TryLock(),
+               "Mutex shouldn't be locked in any thread, as it's about to be "
+               "destroyed in ~ThreadRegistration()");
+    // Undo the above successful TryLock.
+    mDataMutex.Unlock();
+#endif  // DEBUG
+
+    tls->set(nullptr);
+    return;
+  }
+
+  // Already removed from the TLS!? This could happen with improperly-nested
+  // register/unregister calls, and the first ThreadRegistration has already
+  // been unregistered.
+  // We cannot record a marker on this thread because it was already
+  // unregistered. Send it to the main thread (unless this *is* already the
+  // main thread, which has been unregistered); this may be useful to catch
+  // mismatched register/unregister pairs in Firefox.
+  if (!profiler_is_main_thread()) {
+    nsAutoCString threadId("thread id: ");
+    threadId.AppendInt(profiler_current_thread_id().ToNumber());
+    threadId.AppendLiteral(", name: \"");
+    threadId.AppendASCII(mData.Info().Name());
+    threadId.AppendLiteral("\"");
+    PROFILER_MARKER_TEXT(
+        "~ThreadRegistration() but TLS is empty", OTHER_Profiling,
+        MarkerOptions(MarkerThreadId::MainThread(), MarkerStack::Capture()),
+        threadId);
+  }
+}
+
+/* static */
+ProfilingStack* ThreadRegistration::RegisterThread(const char* aName,
+                                                   const void* aStackTop) {
+  auto* tls = GetTLS();
+  if (MOZ_UNLIKELY(!tls)) {
+    // No TLS, nothing can be done without it.
+    return nullptr;
+  }
+
+  if (ThreadRegistration* rootRegistration = tls->get(); rootRegistration) {
+    // Already registered, record the extra depth to ignore the matching
+    // UnregisterThread.
+    ++rootRegistration->mOtherRegistrations;
+    // TODO: Use new name. This is currently not possible because the
+    // TLS-stored RegisteredThread's ThreadInfo cannot be changed.
+    // In the meantime, we record a marker that could be used in the frontend.
+    PROFILER_MARKER_TEXT("Nested ThreadRegistration::RegisterThread()",
+                         OTHER_Profiling, MarkerOptions{},
+                         ProfilerString8View::WrapNullTerminatedString(aName));
+    return &rootRegistration->mData.mProfilingStack;
+  }
+
+  // Create on heap, it self-registers with the TLS (its effective owner, so
+  // we can forget the pointer after this), and with the Profiler.
+  ThreadRegistration* tr = new ThreadRegistration(aName, aStackTop);
+  tr->mIsOnHeap = true;
+  return &tr->mData.mProfilingStack;
+}
+
+/* static */
+void ThreadRegistration::UnregisterThread() {
+  auto* tls = GetTLS();
+  if (MOZ_UNLIKELY(!tls)) {
+    // No TLS, nothing can be done without it.
+    return;
+  }
+
+  if (ThreadRegistration* rootRegistration = tls->get(); rootRegistration) {
+    if (rootRegistration->mOtherRegistrations != 0) {
+      // This is assumed to be a matching UnregisterThread() for a nested
+      // RegisterThread(). Decrease depth and we're done.
+      --rootRegistration->mOtherRegistrations;
+      // We don't know what name was used in the related RegisterThread().
+      PROFILER_MARKER_UNTYPED("Nested ThreadRegistration::UnregisterThread()",
+                              OTHER_Profiling);
+      return;
+    }
+
+    if (!rootRegistration->mIsOnHeap) {
+      // The root registration was not added by `RegisterThread()`, so it
+      // shouldn't be deleted!
+      // This could happen if there are un-paired `UnregisterThread` calls when
+      // the initial registration (still alive) was done on the stack. We don't
+      // know what name was used in the related RegisterThread().
+      PROFILER_MARKER_UNTYPED("Excess ThreadRegistration::UnregisterThread()",
+                              OTHER_Profiling, MarkerStack::Capture());
+      return;
+    }
+
+    // This is the last `UnregisterThread()` that should match the first
+    // `RegisterThread()` that created this ThreadRegistration on the heap.
+    // Just delete this root registration, it will de-register itself from the
+    // TLS (and from the Profiler).
+    delete rootRegistration;
+    return;
+  }
+
+  // There is no known ThreadRegistration for this thread, ignore this
+  // request. We cannot record a marker on this thread because it was already
+  // unregistered. Send it to the main thread (unless this *is* already the
+  // main thread, which has been unregistered); this may be useful to catch
+  // mismatched register/unregister pairs in Firefox.
+  if (!profiler_is_main_thread()) {
+    nsAutoCString threadId("thread id: ");
+    threadId.AppendInt(profiler_current_thread_id().ToNumber());
+    PROFILER_MARKER_TEXT(
+        "ThreadRegistration::UnregisterThread() but TLS is empty",
+        OTHER_Profiling,
+        MarkerOptions(MarkerThreadId::MainThread(), MarkerStack::Capture()),
+        threadId);
+  }
+}
+
+}  // namespace mozilla::profiler
diff --git a/tools/profiler/core/ProfilerThreadRegistrationData.cpp b/tools/profiler/core/ProfilerThreadRegistrationData.cpp
new file mode 100644
index 0000000000..e70f9e749a
--- /dev/null
+++ b/tools/profiler/core/ProfilerThreadRegistrationData.cpp
@@ -0,0 +1,303 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/ProfilerThreadRegistrationData.h"
+
+#include "mozilla/FOGIPC.h"
+#include "mozilla/glean/GleanMetrics.h"
+#include "mozilla/ProfilerMarkers.h"
+#include "js/AllocationRecording.h"
+#include "js/ProfilingStack.h"
+
+#if defined(XP_WIN)
+#  include <windows.h>
+#elif defined(XP_DARWIN)
+#  include <pthread.h>
+#endif
+
+#ifdef NIGHTLY_BUILD
+namespace geckoprofiler::markers {
+
+using namespace mozilla;
+
+struct ThreadCpuUseMarker {
+  static constexpr Span<const char> MarkerTypeName() {
+    return MakeStringSpan("ThreadCpuUse");
+  }
+  static void StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter& aWriter,
+                                   ProfilerThreadId aThreadId,
+                                   int64_t aCpuTimeMs, int64_t aWakeUps,
+                                   const ProfilerString8View& aThreadName) {
+    aWriter.IntProperty("threadId", static_cast<int64_t>(aThreadId.ToNumber()));
+    aWriter.IntProperty("time", aCpuTimeMs);
+    aWriter.IntProperty("wakeups", aWakeUps);
+    aWriter.StringProperty("label", aThreadName);
+  }
+  static MarkerSchema MarkerTypeDisplay() {
+    using MS = MarkerSchema;
+    MS schema{MS::Location::MarkerChart, MS::Location::MarkerTable};
+    schema.AddKeyLabelFormat("time", "CPU Time", MS::Format::Milliseconds);
+    schema.AddKeyLabelFormat("wakeups", "Wake ups", MS::Format::Integer);
+    schema.SetTooltipLabel("{marker.name} - {marker.data.label}");
+    schema.SetTableLabel(
+        "{marker.name} - {marker.data.label}: {marker.data.time} of CPU time, "
+        "{marker.data.wakeups} wake ups");
+    return schema;
+  }
+};
+
+}  // namespace geckoprofiler::markers
+#endif
+
+namespace mozilla::profiler {
+
+ThreadRegistrationData::ThreadRegistrationData(const char* aName,
+                                               const void* aStackTop)
+    : mInfo(aName),
+      mPlatformData(mInfo.ThreadId()),
+      mStackTop(
+#if defined(XP_WIN)
+          // We don't have to guess on Windows.
+          reinterpret_cast<const void*>(
+              reinterpret_cast<PNT_TIB>(NtCurrentTeb())->StackBase)
+#elif defined(XP_DARWIN)
+          // We don't have to guess on Mac/Darwin.
+          reinterpret_cast<const void*>(
+              pthread_get_stackaddr_np(pthread_self()))
+#else
+          // Otherwise use the given guess.
+          aStackTop
+#endif
+      ) {
+}
+
+// This is a simplified version of profiler_add_marker that can be easily passed
+// into the JS engine.
+static void profiler_add_js_marker(const char* aMarkerName,
+                                   const char* aMarkerText) {
+  PROFILER_MARKER_TEXT(
+      mozilla::ProfilerString8View::WrapNullTerminatedString(aMarkerName), JS,
+      {}, mozilla::ProfilerString8View::WrapNullTerminatedString(aMarkerText));
+}
+
+static void profiler_add_js_allocation_marker(JS::RecordAllocationInfo&& info) {
+  if (!profiler_thread_is_being_profiled_for_markers()) {
+    return;
+  }
+
+  struct JsAllocationMarker {
+    static constexpr mozilla::Span<const char> MarkerTypeName() {
+      return mozilla::MakeStringSpan("JS allocation");
+    }
+    static void StreamJSONMarkerData(
+        mozilla::baseprofiler::SpliceableJSONWriter& aWriter,
+        const mozilla::ProfilerString16View& aTypeName,
+        const mozilla::ProfilerString8View& aClassName,
+        const mozilla::ProfilerString16View& aDescriptiveTypeName,
+        const mozilla::ProfilerString8View& aCoarseType, uint64_t aSize,
+        bool aInNursery) {
+      if (aClassName.Length() != 0) {
+        aWriter.StringProperty("className", aClassName);
+      }
+      if (aTypeName.Length() != 0) {
+        aWriter.StringProperty("typeName", NS_ConvertUTF16toUTF8(aTypeName));
+      }
+      if (aDescriptiveTypeName.Length() != 0) {
+        aWriter.StringProperty("descriptiveTypeName",
+                               NS_ConvertUTF16toUTF8(aDescriptiveTypeName));
+      }
+      aWriter.StringProperty("coarseType", aCoarseType);
+      aWriter.IntProperty("size", aSize);
+      aWriter.BoolProperty("inNursery", aInNursery);
+    }
+    static mozilla::MarkerSchema MarkerTypeDisplay() {
+      return mozilla::MarkerSchema::SpecialFrontendLocation{};
+    }
+  };
+
+  profiler_add_marker(
+      "JS allocation", geckoprofiler::category::JS,
+      mozilla::MarkerStack::Capture(), JsAllocationMarker{},
+      mozilla::ProfilerString16View::WrapNullTerminatedString(info.typeName),
+      mozilla::ProfilerString8View::WrapNullTerminatedString(info.className),
+      mozilla::ProfilerString16View::WrapNullTerminatedString(
+          info.descriptiveTypeName),
+      mozilla::ProfilerString8View::WrapNullTerminatedString(info.coarseType),
+      info.size, info.inNursery);
+}
+
+void ThreadRegistrationLockedRWFromAnyThread::SetProfilingFeaturesAndData(
+    ThreadProfilingFeatures aProfilingFeatures,
+    ProfiledThreadData* aProfiledThreadData, const PSAutoLock&) {
+  MOZ_ASSERT(mProfilingFeatures == ThreadProfilingFeatures::NotProfiled);
+  mProfilingFeatures = aProfilingFeatures;
+
+  MOZ_ASSERT(!mProfiledThreadData);
+  MOZ_ASSERT(aProfiledThreadData);
+  mProfiledThreadData = aProfiledThreadData;
+
+  if (mJSContext) {
+    // The thread is now being profiled, and we already have a JSContext,
+    // allocate a JsFramesBuffer to allow profiler-unlocked on-thread sampling.
+    MOZ_ASSERT(!mJsFrameBuffer);
+    mJsFrameBuffer = new JsFrame[MAX_JS_FRAMES];
+  }
+
+  // Check invariants.
+  MOZ_ASSERT((mProfilingFeatures != ThreadProfilingFeatures::NotProfiled) ==
+             !!mProfiledThreadData);
+  MOZ_ASSERT((mJSContext &&
+              (mProfilingFeatures != ThreadProfilingFeatures::NotProfiled)) ==
+             !!mJsFrameBuffer);
+}
+
+void ThreadRegistrationLockedRWFromAnyThread::ClearProfilingFeaturesAndData(
+    const PSAutoLock&) {
+  mProfilingFeatures = ThreadProfilingFeatures::NotProfiled;
+  mProfiledThreadData = nullptr;
+
+  if (mJsFrameBuffer) {
+    delete[] mJsFrameBuffer;
+    mJsFrameBuffer = nullptr;
+  }
+
+  // Check invariants.
+  MOZ_ASSERT((mProfilingFeatures != ThreadProfilingFeatures::NotProfiled) ==
+             !!mProfiledThreadData);
+  MOZ_ASSERT((mJSContext &&
+              (mProfilingFeatures != ThreadProfilingFeatures::NotProfiled)) ==
+             !!mJsFrameBuffer);
+}
+
+void ThreadRegistrationLockedRWOnThread::SetJSContext(JSContext* aJSContext) {
+  MOZ_ASSERT(aJSContext && !mJSContext);
+
+  mJSContext = aJSContext;
+
+  if (mProfiledThreadData) {
+    MOZ_ASSERT((mProfilingFeatures != ThreadProfilingFeatures::NotProfiled) ==
+               !!mProfiledThreadData);
+    // We now have a JSContext, and the thread is already being profiled,
+    // allocate a JsFramesBuffer to allow profiler-unlocked on-thread sampling.
+    MOZ_ASSERT(!mJsFrameBuffer);
+    mJsFrameBuffer = new JsFrame[MAX_JS_FRAMES];
+  }
+
+  // We give the JS engine a non-owning reference to the ProfilingStack. It's
+  // important that the JS engine doesn't touch this once the thread dies.
+  js::SetContextProfilingStack(aJSContext, &ProfilingStackRef());
+
+  // Check invariants.
+  MOZ_ASSERT((mJSContext &&
+              (mProfilingFeatures != ThreadProfilingFeatures::NotProfiled)) ==
+             !!mJsFrameBuffer);
+}
+
+void ThreadRegistrationLockedRWOnThread::ClearJSContext() {
+  mJSContext = nullptr;
+
+  if (mJsFrameBuffer) {
+    delete[] mJsFrameBuffer;
+    mJsFrameBuffer = nullptr;
+  }
+
+  // Check invariants.
+  MOZ_ASSERT((mJSContext &&
+              (mProfilingFeatures != ThreadProfilingFeatures::NotProfiled)) ==
+             !!mJsFrameBuffer);
+}
+
+void ThreadRegistrationLockedRWOnThread::PollJSSampling() {
+  // We can't start/stop profiling until we have the thread's JSContext.
+  if (mJSContext) {
+    // It is possible for mJSSampling to go through the following sequences.
+    //
+    // - INACTIVE, ACTIVE_REQUESTED, INACTIVE_REQUESTED, INACTIVE
+    //
+    // - ACTIVE, INACTIVE_REQUESTED, ACTIVE_REQUESTED, ACTIVE
+    //
+    // Therefore, the if and else branches here aren't always interleaved.
+    // This is ok because the JS engine can handle that.
+    //
+    if (mJSSampling == ACTIVE_REQUESTED) {
+      mJSSampling = ACTIVE;
+      js::EnableContextProfilingStack(mJSContext, true);
+
+      if (JSAllocationsEnabled()) {
+        // TODO - This probability should not be hardcoded. See Bug 1547284.
+        JS::EnableRecordingAllocations(mJSContext,
+                                       profiler_add_js_allocation_marker, 0.01);
+      }
+      js::RegisterContextProfilingEventMarker(mJSContext,
+                                              profiler_add_js_marker);
+
+    } else if (mJSSampling == INACTIVE_REQUESTED) {
+      mJSSampling = INACTIVE;
+      js::EnableContextProfilingStack(mJSContext, false);
+
+      if (JSAllocationsEnabled()) {
+        JS::DisableRecordingAllocations(mJSContext);
+      }
+    }
+  }
+}
+
+#ifdef NIGHTLY_BUILD
+void ThreadRegistrationUnlockedConstReaderAndAtomicRW::RecordWakeCount() const {
+  baseprofiler::detail::BaseProfilerAutoLock lock(mRecordWakeCountMutex);
+
+  uint64_t newWakeCount = mWakeCount - mAlreadyRecordedWakeCount;
+  if (newWakeCount == 0 && mSleep != AWAKE) {
+    // If no new wake-up was counted, and the thread is not marked awake,
+    // we can be pretty sure there is no CPU activity to record.
+    // Threads that are never annotated as asleep/awake (typically rust threads)
+    // start as awake.
+    return;
+  }
+
+  uint64_t cpuTimeNs;
+  if (!GetCpuTimeSinceThreadStartInNs(&cpuTimeNs, PlatformDataCRef())) {
+    cpuTimeNs = 0;
+  }
+
+  constexpr uint64_t NS_PER_MS = 1'000'000;
+  uint64_t cpuTimeMs = cpuTimeNs / NS_PER_MS;
+
+  uint64_t newCpuTimeMs = MOZ_LIKELY(cpuTimeMs > mAlreadyRecordedCpuTimeInMs)
+                              ? cpuTimeMs - mAlreadyRecordedCpuTimeInMs
+                              : 0;
+
+  if (!newWakeCount && !newCpuTimeMs) {
+    // Nothing to report, avoid computing the Glean friendly thread name.
+    return;
+  }
+
+  nsAutoCString threadName(mInfo.Name());
+  // Trim the trailing number of threads that are part of a thread pool.
+  for (size_t length = threadName.Length(); length > 0; --length) {
+    const char c = threadName.CharAt(length - 1);
+    if ((c < '0' || c > '9') && c != '#' && c != ' ') {
+      if (length != threadName.Length()) {
+        threadName.SetLength(length);
+      }
+      break;
+    }
+  }
+
+  mozilla::glean::RecordThreadCpuUse(threadName, newCpuTimeMs, newWakeCount);
+
+  // The thread id is provided as part of the payload because this call is
+  // inside a ThreadRegistration data function, which could be invoked with
+  // the ThreadRegistry locked. We cannot call any function/option that could
+  // attempt to lock the ThreadRegistry again, like MarkerThreadId.
+  PROFILER_MARKER("Thread CPU use", OTHER, {}, ThreadCpuUseMarker,
+                  mInfo.ThreadId(), newCpuTimeMs, newWakeCount, threadName);
+  mAlreadyRecordedCpuTimeInMs = cpuTimeMs;
+  mAlreadyRecordedWakeCount += newWakeCount;
+}
+#endif
+
+}  // namespace mozilla::profiler
diff --git a/tools/profiler/core/ProfilerThreadRegistry.cpp b/tools/profiler/core/ProfilerThreadRegistry.cpp
new file mode 100644
index 0000000000..cb456471d9
--- /dev/null
+++ b/tools/profiler/core/ProfilerThreadRegistry.cpp
@@ -0,0 +1,40 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/ProfilerThreadRegistry.h"
+
+namespace mozilla::profiler {
+
+/* static */
+ThreadRegistry::RegistryContainer ThreadRegistry::sRegistryContainer;
+
+/* static */
+ThreadRegistry::RegistryMutex ThreadRegistry::sRegistryMutex;
+
+#if !defined(MOZ_GECKO_PROFILER)
+// When MOZ_GECKO_PROFILER is not defined, the function definitions in
+// platform.cpp are not built, causing link errors. So we keep these simple
+// definitions here.
+
+/* static */
+void ThreadRegistry::Register(ThreadRegistration::OnThreadRef aOnThreadRef) {
+  LockedRegistry lock;
+  MOZ_RELEASE_ASSERT(sRegistryContainer.append(OffThreadRef{aOnThreadRef}));
+}
+
+/* static */
+void ThreadRegistry::Unregister(ThreadRegistration::OnThreadRef aOnThreadRef) {
+  LockedRegistry lock;
+  for (OffThreadRef& thread : sRegistryContainer) {
+    if (thread.IsPointingAt(*aOnThreadRef.mThreadRegistration)) {
+      sRegistryContainer.erase(&thread);
+      break;
+    }
+  }
+}
+#endif  // !defined(MOZ_GECKO_PROFILER)
+
+}  // namespace mozilla::profiler
diff --git a/tools/profiler/core/ProfilerUtils.cpp b/tools/profiler/core/ProfilerUtils.cpp
new file mode 100644
index 0000000000..6a46878ad7
--- /dev/null
+++ b/tools/profiler/core/ProfilerUtils.cpp
@@ -0,0 +1,118 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// This file implements functions from ProfilerUtils.h on all platforms.
+// Functions with platform-specific implementations are separated in #if blocks
+// below, with each block being self-contained with all the #includes and
+// definitions it needs, to keep platform code easier to maintain in isolation.
+
+#include "mozilla/ProfilerUtils.h"
+
+// --------------------------------------------- Windows process & thread ids
+#if defined(XP_WIN)
+
+#  include <process.h>
+#  include <processthreadsapi.h>
+
+ProfilerProcessId profiler_current_process_id() {
+  return ProfilerProcessId::FromNativeId(_getpid());
+}
+
+ProfilerThreadId profiler_current_thread_id() {
+  static_assert(std::is_same_v<ProfilerThreadId::NativeType,
+                               decltype(GetCurrentThreadId())>,
+                "ProfilerThreadId::NativeType must be exactly the type "
+                "returned by GetCurrentThreadId()");
+  return ProfilerThreadId::FromNativeId(GetCurrentThreadId());
+}
+
+// --------------------------------------------- Non-Windows process id
+#else
+// All non-Windows platforms are assumed to be POSIX, which has getpid().
+
+#  include <unistd.h>
+
+ProfilerProcessId profiler_current_process_id() {
+  return ProfilerProcessId::FromNativeId(getpid());
+}
+
+// --------------------------------------------- Non-Windows thread id
+// ------------------------------------------------------- macOS
+#  if defined(XP_MACOSX)
+
+#    include <pthread.h>
+
+ProfilerThreadId profiler_current_thread_id() {
+  uint64_t tid;
+  if (pthread_threadid_np(nullptr, &tid) != 0) {
+    return ProfilerThreadId{};
+  }
+  return ProfilerThreadId::FromNativeId(tid);
+}
+
+// ------------------------------------------------------- Android
+// Test Android before Linux, because Linux includes Android.
+#  elif defined(__ANDROID__) || defined(ANDROID)
+
+ProfilerThreadId profiler_current_thread_id() {
+  return ProfilerThreadId::FromNativeId(gettid());
+}
+
+// ------------------------------------------------------- Linux
+#  elif defined(XP_LINUX)
+
+#    include <sys/syscall.h>
+
+ProfilerThreadId profiler_current_thread_id() {
+  // glibc doesn't provide a wrapper for gettid() until 2.30
+  return ProfilerThreadId::FromNativeId(syscall(SYS_gettid));
+}
+
+// ------------------------------------------------------- FreeBSD
+#  elif defined(XP_FREEBSD)
+
+#    include <sys/thr.h>
+
+ProfilerThreadId profiler_current_thread_id() {
+  long id;
+  if (thr_self(&id) != 0) {
+    return ProfilerThreadId{};
+  }
+  return ProfilerThreadId::FromNativeId(id);
+}
+
+// ------------------------------------------------------- Others
+#  else
+
+ProfilerThreadId profiler_current_thread_id() {
+  return ProfilerThreadId::FromNativeId(std::this_thread::get_id());
+}
+
+#  endif
+#endif  // End of non-XP_WIN.
+
+// --------------------------------------------- Platform-agnostic definitions
+
+#include "MainThreadUtils.h"
+#include "mozilla/Assertions.h"
+
+static ProfilerThreadId scProfilerMainThreadId;
+
+void profiler_init_main_thread_id() {
+  MOZ_ASSERT(NS_IsMainThread());
+  mozilla::baseprofiler::profiler_init_main_thread_id();
+  if (!scProfilerMainThreadId.IsSpecified()) {
+    scProfilerMainThreadId = profiler_current_thread_id();
+  }
+}
+
+[[nodiscard]] ProfilerThreadId profiler_main_thread_id() {
+  return scProfilerMainThreadId;
+}
+
+[[nodiscard]] bool profiler_is_main_thread() {
+  return profiler_current_thread_id() == scProfilerMainThreadId;
+}
diff --git a/tools/profiler/core/VTuneProfiler.cpp b/tools/profiler/core/VTuneProfiler.cpp
new file mode 100644
index 0000000000..58a39c51ee
--- /dev/null
+++ b/tools/profiler/core/VTuneProfiler.cpp
@@ -0,0 +1,80 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef XP_WIN
+#  undef UNICODE
+#  undef _UNICODE
+#endif
+
+#include "VTuneProfiler.h"
+#include "mozilla/Bootstrap.h"
+#include <memory>
+
+VTuneProfiler* VTuneProfiler::mInstance = nullptr;
+
+void VTuneProfiler::Initialize() {
+  // This is just a 'dirty trick' to find out if the ittnotify DLL was found.
+  // If it wasn't this function always returns 0, otherwise it returns
+  // incrementing numbers, if the library was found this wastes 2 events but
+  // that should be okay.
+  __itt_event testEvent =
+      __itt_event_create("Test event", strlen("Test event"));
+  testEvent = __itt_event_create("Test event 2", strlen("Test event 2"));
+
+  if (testEvent) {
+    mInstance = new VTuneProfiler();
+  }
+}
+
+void VTuneProfiler::Shutdown() {}
+
+void VTuneProfiler::TraceInternal(const char* aName, TracingKind aKind) {
+  std::string str(aName);
+
+  auto iter = mStrings.find(str);
+
+  __itt_event event;
+  if (iter != mStrings.end()) {
+    event = iter->second;
+  } else {
+    event = __itt_event_create(aName, str.length());
+    mStrings.insert({str, event});
+  }
+
+  if (aKind == TRACING_INTERVAL_START || aKind == TRACING_EVENT) {
+    // VTune will consider starts not matched with an end to be single point in
+    // time events.
+    __itt_event_start(event);
+  } else {
+    __itt_event_end(event);
+  }
+}
+
+void VTuneProfiler::RegisterThreadInternal(const char* aName) {
+  std::string str(aName);
+
+  if (!str.compare("GeckoMain")) {
+    // Process main thread.
+    switch (XRE_GetProcessType()) {
+      case GeckoProcessType::GeckoProcessType_Default:
+        __itt_thread_set_name("Main Process");
+        break;
+      case GeckoProcessType::GeckoProcessType_Content:
+        __itt_thread_set_name("Content Process");
+        break;
+      case GeckoProcessType::GeckoProcessType_GMPlugin:
+        __itt_thread_set_name("Plugin Process");
+        break;
+      case GeckoProcessType::GeckoProcessType_GPU:
+        __itt_thread_set_name("GPU Process");
+        break;
+      default:
+        __itt_thread_set_name("Unknown Process");
+    }
+    return;
+  }
+  __itt_thread_set_name(aName);
+}
diff --git a/tools/profiler/core/VTuneProfiler.h b/tools/profiler/core/VTuneProfiler.h
new file mode 100644
index 0000000000..e3abe6b90d
--- /dev/null
+++ b/tools/profiler/core/VTuneProfiler.h
@@ -0,0 +1,78 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef VTuneProfiler_h
+#define VTuneProfiler_h
+
+// The intent here is to add 0 overhead for regular users. In order to build
+// the VTune profiler code at all --enable-vtune-instrumentation needs to be
+// set as a build option. Even then, when none of the environment variables
+// is specified that allow us to find the ittnotify DLL, these functions
+// should be minimal overhead. When starting Firefox under VTune, these
+// env vars will be automatically defined, otherwise INTEL_LIBITTNOTIFY32/64
+// should be set to point at the ittnotify DLL.
+#ifndef MOZ_VTUNE_INSTRUMENTATION
+
+#  define VTUNE_INIT()
+#  define VTUNE_SHUTDOWN()
+
+#  define VTUNE_TRACING(name, kind)
+#  define VTUNE_REGISTER_THREAD(name)
+
+#else
+
+#  include "GeckoProfiler.h"
+
+// This is the regular Intel header, these functions are actually defined for
+// us inside js/src/vtune by an intel C file which actually dynamically resolves
+// them to the correct DLL. Through libxul these will 'magically' resolve.
+#  include "vtune/ittnotify.h"
+
+#  include <stddef.h>
+#  include <unordered_map>
+#  include <string>
+
+class VTuneProfiler {
+ public:
+  static void Initialize();
+  static void Shutdown();
+
+  enum TracingKind {
+    TRACING_EVENT,
+    TRACING_INTERVAL_START,
+    TRACING_INTERVAL_END,
+  };
+
+  static void Trace(const char* aName, TracingKind aKind) {
+    if (mInstance) {
+      mInstance->TraceInternal(aName, aKind);
+    }
+  }
+  static void RegisterThread(const char* aName) {
+    if (mInstance) {
+      mInstance->RegisterThreadInternal(aName);
+    }
+  }
+
+ private:
+  void TraceInternal(const char* aName, TracingKind aKind);
+  void RegisterThreadInternal(const char* aName);
+
+  // This is null when the ittnotify DLL could not be found.
+  static VTuneProfiler* mInstance;
+
+  std::unordered_map<std::string, __itt_event> mStrings;
+};
+
+#  define VTUNE_INIT() VTuneProfiler::Initialize()
+#  define VTUNE_SHUTDOWN() VTuneProfiler::Shutdown()
+
+#  define VTUNE_TRACING(name, kind) VTuneProfiler::Trace(name, kind)
+#  define VTUNE_REGISTER_THREAD(name) VTuneProfiler::RegisterThread(name)
+
+#endif
+
+#endif /* VTuneProfiler_h */
diff --git a/tools/profiler/core/memory_hooks.cpp b/tools/profiler/core/memory_hooks.cpp
new file mode 100644
index 0000000000..59e87d607c
--- /dev/null
+++ b/tools/profiler/core/memory_hooks.cpp
@@ -0,0 +1,632 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "memory_hooks.h"
+
+#include "nscore.h"
+
+#include "mozilla/Assertions.h"
+#include "mozilla/Atomics.h"
+#include "mozilla/FastBernoulliTrial.h"
+#include "mozilla/IntegerPrintfMacros.h"
+#include "mozilla/JSONWriter.h"
+#include "mozilla/MemoryReporting.h"
+#include "mozilla/PlatformMutex.h"
+#include "mozilla/ProfilerCounts.h"
+#include "mozilla/ThreadLocal.h"
+
+#include "GeckoProfiler.h"
+#include "prenv.h"
+#include "replace_malloc.h"
+
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef XP_WIN
+#  include <windows.h>
+#  include <process.h>
+#else
+#  include <pthread.h>
+#  include <sys/types.h>
+#  include <unistd.h>
+#endif
+
+#ifdef ANDROID
+#  include <android/log.h>
+#endif
+
+// The counters start out as a nullptr, and then get initialized only once. They
+// are never destroyed, as it would cause race conditions for the memory hooks
+// that use the counters. This helps guard against potentially expensive
+// operations like using a mutex.
+//
+// In addition, this is a raw pointer and not a UniquePtr, as the counter
+// machinery will try and de-register itself from the profiler. This could
+// happen after the profiler and its PSMutex was already destroyed, resulting in
+// a crash.
+static ProfilerCounterTotal* sCounter;
+
+// The gBernoulli value starts out as a nullptr, and only gets initialized once.
+// It then lives for the entire lifetime of the process. It cannot be deleted
+// without additional multi-threaded protections, since if we deleted it during
+// profiler_stop then there could be a race between threads already in a
+// memory hook that might try to access the value after or during deletion.
+static mozilla::FastBernoulliTrial* gBernoulli;
+
+namespace mozilla::profiler {
+
+//---------------------------------------------------------------------------
+// Utilities
+//---------------------------------------------------------------------------
+
+// Returns true or or false depending on whether the marker was actually added
+// or not.
+static bool profiler_add_native_allocation_marker(int64_t aSize,
+                                                  uintptr_t aMemoryAddress) {
+  if (!profiler_thread_is_being_profiled_for_markers(
+          profiler_main_thread_id())) {
+    return false;
+  }
+
+  // Because native allocations may be intercepted anywhere, blocking while
+  // locking the profiler mutex here could end up causing a deadlock if another
+  // mutex is taken, which the profiler may indirectly need elsewhere.
+  // See bug 1642726 for such a scenario.
+  // So instead we bail out if the mutex is already locked. Native allocations
+  // are statistically sampled anyway, so missing a few because of this is
+  // acceptable.
+  if (profiler_is_locked_on_current_thread()) {
+    return false;
+  }
+
+  struct NativeAllocationMarker {
+    static constexpr mozilla::Span<const char> MarkerTypeName() {
+      return mozilla::MakeStringSpan("Native allocation");
+    }
+    static void StreamJSONMarkerData(
+        mozilla::baseprofiler::SpliceableJSONWriter& aWriter, int64_t aSize,
+        uintptr_t aMemoryAddress, ProfilerThreadId aThreadId) {
+      aWriter.IntProperty("size", aSize);
+      aWriter.IntProperty("memoryAddress",
+                          static_cast<int64_t>(aMemoryAddress));
+      // Tech note: If `ToNumber()` returns a uint64_t, the conversion to
+      // int64_t is "implementation-defined" before C++20. This is acceptable
+      // here, because this is a one-way conversion to a unique identifier
+      // that's used to visually separate data by thread on the front-end.
+      aWriter.IntProperty("threadId",
+                          static_cast<int64_t>(aThreadId.ToNumber()));
+    }
+    static mozilla::MarkerSchema MarkerTypeDisplay() {
+      return mozilla::MarkerSchema::SpecialFrontendLocation{};
+    }
+  };
+
+  profiler_add_marker("Native allocation", geckoprofiler::category::OTHER,
+                      {MarkerThreadId::MainThread(), MarkerStack::Capture()},
+                      NativeAllocationMarker{}, aSize, aMemoryAddress,
+                      profiler_current_thread_id());
+  return true;
+}
+
+static malloc_table_t gMallocTable;
+
+// This is only needed because of the |const void*| vs |void*| arg mismatch.
+static size_t MallocSizeOf(const void* aPtr) {
+  return gMallocTable.malloc_usable_size(const_cast<void*>(aPtr));
+}
+
+// The values for the Bernoulli trial are taken from DMD. According to DMD:
+//
+//   In testing, a probability of 0.003 resulted in ~25% of heap blocks getting
+//   a stack trace and ~80% of heap bytes getting a stack trace. (This is
+//   possible because big heap blocks are more likely to get a stack trace.)
+//
+//   The random number seeds are arbitrary and were obtained from random.org.
+//
+// However this value resulted in a lot of slowdown since the profiler stacks
+// are pretty heavy to collect. The value was lowered to 10% of the original to
+// 0.0003.
+static void EnsureBernoulliIsInstalled() {
+  if (!gBernoulli) {
+    // This is only installed once. See the gBernoulli definition for more
+    // information.
+    gBernoulli =
+        new FastBernoulliTrial(0.0003, 0x8e26eeee166bc8ca, 0x56820f304a9c9ae0);
+  }
+}
+
+// This class provides infallible allocations (they abort on OOM) like
+// mozalloc's InfallibleAllocPolicy, except that memory hooks are bypassed. This
+// policy is used by the HashSet.
+class InfallibleAllocWithoutHooksPolicy {
+  static void ExitOnFailure(const void* aP) {
+    if (!aP) {
+      MOZ_CRASH("Profiler memory hooks out of memory; aborting");
+    }
+  }
+
+ public:
+  template <typename T>
+  static T* maybe_pod_malloc(size_t aNumElems) {
+    if (aNumElems & mozilla::tl::MulOverflowMask<sizeof(T)>::value) {
+      return nullptr;
+    }
+    return (T*)gMallocTable.malloc(aNumElems * sizeof(T));
+  }
+
+  template <typename T>
+  static T* maybe_pod_calloc(size_t aNumElems) {
+    return (T*)gMallocTable.calloc(aNumElems, sizeof(T));
+  }
+
+  template <typename T>
+  static T* maybe_pod_realloc(T* aPtr, size_t aOldSize, size_t aNewSize) {
+    if (aNewSize & mozilla::tl::MulOverflowMask<sizeof(T)>::value) {
+      return nullptr;
+    }
+    return (T*)gMallocTable.realloc(aPtr, aNewSize * sizeof(T));
+  }
+
+  template <typename T>
+  static T* pod_malloc(size_t aNumElems) {
+    T* p = maybe_pod_malloc<T>(aNumElems);
+    ExitOnFailure(p);
+    return p;
+  }
+
+  template <typename T>
+  static T* pod_calloc(size_t aNumElems) {
+    T* p = maybe_pod_calloc<T>(aNumElems);
+    ExitOnFailure(p);
+    return p;
+  }
+
+  template <typename T>
+  static T* pod_realloc(T* aPtr, size_t aOldSize, size_t aNewSize) {
+    T* p = maybe_pod_realloc(aPtr, aOldSize, aNewSize);
+    ExitOnFailure(p);
+    return p;
+  }
+
+  template <typename T>
+  static void free_(T* aPtr, size_t aSize = 0) {
+    gMallocTable.free(aPtr);
+  }
+
+  static void reportAllocOverflow() { ExitOnFailure(nullptr); }
+  bool checkSimulatedOOM() const { return true; }
+};
+
+// We can't use mozilla::Mutex because it causes re-entry into the memory hooks.
+// Define a custom implementation here.
+class Mutex : private ::mozilla::detail::MutexImpl {
+ public:
+  Mutex() : ::mozilla::detail::MutexImpl() {}
+
+  void Lock() { ::mozilla::detail::MutexImpl::lock(); }
+  void Unlock() { ::mozilla::detail::MutexImpl::unlock(); }
+};
+
+class MutexAutoLock {
+  MutexAutoLock(const MutexAutoLock&) = delete;
+  void operator=(const MutexAutoLock&) = delete;
+
+  Mutex& mMutex;
+
+ public:
+  explicit MutexAutoLock(Mutex& aMutex) : mMutex(aMutex) { mMutex.Lock(); }
+  ~MutexAutoLock() { mMutex.Unlock(); }
+};
+
+//---------------------------------------------------------------------------
+// Tracked allocations
+//---------------------------------------------------------------------------
+
+// The allocation tracker is shared between multiple threads, and is the
+// coordinator for knowing when allocations have been tracked. The mutable
+// internal state is protected by a mutex, and managed by the methods.
+//
+// The tracker knows about all the allocations that we have added to the
+// profiler. This way, whenever any given piece of memory is freed, we can see
+// if it was previously tracked, and we can track its deallocation.
+
+class AllocationTracker {
+  // This type tracks all of the allocations that we have captured. This way, we
+  // can see if a deallocation is inside of this set. We want to provide a
+  // balanced view into the allocations and deallocations.
+  typedef mozilla::HashSet<const void*, mozilla::DefaultHasher<const void*>,
+                           InfallibleAllocWithoutHooksPolicy>
+      AllocationSet;
+
+ public:
+  AllocationTracker() : mAllocations(), mMutex() {}
+
+  void AddMemoryAddress(const void* memoryAddress) {
+    MutexAutoLock lock(mMutex);
+    if (!mAllocations.put(memoryAddress)) {
+      MOZ_CRASH("Out of memory while tracking native allocations.");
+    };
+  }
+
+  void Reset() {
+    MutexAutoLock lock(mMutex);
+    mAllocations.clearAndCompact();
+  }
+
+  // Returns true when the memory address is found and removed, otherwise that
+  // memory address is not being tracked and it returns false.
+  bool RemoveMemoryAddressIfFound(const void* memoryAddress) {
+    MutexAutoLock lock(mMutex);
+
+    auto ptr = mAllocations.lookup(memoryAddress);
+    if (ptr) {
+      // The memory was present. It no longer needs to be tracked.
+      mAllocations.remove(ptr);
+      return true;
+    }
+
+    return false;
+  }
+
+ private:
+  AllocationSet mAllocations;
+  Mutex mMutex MOZ_UNANNOTATED;
+};
+
+static AllocationTracker* gAllocationTracker;
+
+static void EnsureAllocationTrackerIsInstalled() {
+  if (!gAllocationTracker) {
+    // This is only installed once.
+    gAllocationTracker = new AllocationTracker();
+  }
+}
+
+//---------------------------------------------------------------------------
+// Per-thread blocking of intercepts
+//---------------------------------------------------------------------------
+
+// On MacOS, and Linux the first __thread/thread_local access calls malloc,
+// which leads to an infinite loop. So we use pthread-based TLS instead, which
+// somehow doesn't have this problem.
+#if !defined(XP_DARWIN) && !defined(XP_LINUX)
+#  define PROFILER_THREAD_LOCAL(T) MOZ_THREAD_LOCAL(T)
+#else
+#  define PROFILER_THREAD_LOCAL(T) \
+    ::mozilla::detail::ThreadLocal<T, ::mozilla::detail::ThreadLocalKeyStorage>
+#endif
+
+// This class is used to determine if allocations on this thread should be
+// intercepted or not.
+// Creating a ThreadIntercept object on the stack will implicitly block nested
+// ones. There are other reasons to block: The feature is off, or we're inside a
+// profiler function that is locking a mutex.
+class MOZ_RAII ThreadIntercept {
+  // When set to true, malloc does not intercept additional allocations. This is
+  // needed because collecting stacks creates new allocations. When blocked,
+  // these allocations are then ignored by the memory hook.
+  static PROFILER_THREAD_LOCAL(bool) tlsIsBlocked;
+
+  // This is a quick flag to check and see if the allocations feature is enabled
+  // or disabled.
+  static mozilla::Atomic<bool, mozilla::Relaxed> sAllocationsFeatureEnabled;
+
+  // True if this ThreadIntercept has set tlsIsBlocked.
+  bool mIsBlockingTLS;
+
+  // True if interception is blocked for any reason.
+  bool mIsBlocked;
+
+ public:
+  static void Init() {
+    tlsIsBlocked.infallibleInit();
+    // infallibleInit should zero-initialize, which corresponds to `false`.
+    MOZ_ASSERT(!tlsIsBlocked.get());
+  }
+
+  ThreadIntercept() {
+    // If the allocation interception feature is enabled, and the TLS is not
+    // blocked yet, we will block the TLS now, and unblock on destruction.
+    mIsBlockingTLS = sAllocationsFeatureEnabled && !tlsIsBlocked.get();
+    if (mIsBlockingTLS) {
+      MOZ_ASSERT(!tlsIsBlocked.get());
+      tlsIsBlocked.set(true);
+      // Since this is the top-level ThreadIntercept, interceptions are not
+      // blocked unless the profiler itself holds a locked mutex, in which case
+      // we don't want to intercept allocations that originate from such a
+      // profiler call.
+      mIsBlocked = profiler_is_locked_on_current_thread();
+    } else {
+      // The feature is off, or the TLS was already blocked, then we block this
+      // interception.
+      mIsBlocked = true;
+    }
+  }
+
+  ~ThreadIntercept() {
+    if (mIsBlockingTLS) {
+      MOZ_ASSERT(tlsIsBlocked.get());
+      tlsIsBlocked.set(false);
+    }
+  }
+
+  // Is this ThreadIntercept effectively blocked? (Feature is off, or this
+  // ThreadIntercept is nested, or we're inside a locked-Profiler function.)
+  bool IsBlocked() const { return mIsBlocked; }
+
+  static void EnableAllocationFeature() { sAllocationsFeatureEnabled = true; }
+
+  static void DisableAllocationFeature() { sAllocationsFeatureEnabled = false; }
+};
+
+PROFILER_THREAD_LOCAL(bool) ThreadIntercept::tlsIsBlocked;
+
+mozilla::Atomic<bool, mozilla::Relaxed>
+    ThreadIntercept::sAllocationsFeatureEnabled(false);
+
+//---------------------------------------------------------------------------
+// malloc/free callbacks
+//---------------------------------------------------------------------------
+
+static void AllocCallback(void* aPtr, size_t aReqSize) {
+  if (!aPtr) {
+    return;
+  }
+
+  // The first part of this function does not allocate.
+  size_t actualSize = gMallocTable.malloc_usable_size(aPtr);
+  if (actualSize > 0) {
+    sCounter->Add(actualSize);
+  }
+
+  ThreadIntercept threadIntercept;
+  if (threadIntercept.IsBlocked()) {
+    // Either the native allocations feature is not turned on, or we may be
+    // recursing into a memory hook, return. We'll still collect counter
+    // information about this allocation, but no stack.
+    return;
+  }
+
+  AUTO_PROFILER_LABEL("AllocCallback", PROFILER);
+
+  // Perform a bernoulli trial, which will return true or false based on its
+  // configured probability. It takes into account the byte size so that
+  // larger allocations are weighted heavier than smaller allocations.
+  MOZ_ASSERT(gBernoulli,
+             "gBernoulli must be properly installed for the memory hooks.");
+  if (
+      // First perform the Bernoulli trial.
+      gBernoulli->trial(actualSize) &&
+      // Second, attempt to add a marker if the Bernoulli trial passed.
+      profiler_add_native_allocation_marker(
+          static_cast<int64_t>(actualSize),
+          reinterpret_cast<uintptr_t>(aPtr))) {
+    MOZ_ASSERT(gAllocationTracker,
+               "gAllocationTracker must be properly installed for the memory "
+               "hooks.");
+    // Only track the memory if the allocation marker was actually added to the
+    // profiler.
+    gAllocationTracker->AddMemoryAddress(aPtr);
+  }
+
+  // We're ignoring aReqSize here
+}
+
+static void FreeCallback(void* aPtr) {
+  if (!aPtr) {
+    return;
+  }
+
+  // The first part of this function does not allocate.
+  size_t unsignedSize = MallocSizeOf(aPtr);
+  int64_t signedSize = -(static_cast<int64_t>(unsignedSize));
+  sCounter->Add(signedSize);
+
+  ThreadIntercept threadIntercept;
+  if (threadIntercept.IsBlocked()) {
+    // Either the native allocations feature is not turned on, or we may be
+    // recursing into a memory hook, return. We'll still collect counter
+    // information about this allocation, but no stack.
+    return;
+  }
+
+  AUTO_PROFILER_LABEL("FreeCallback", PROFILER);
+
+  // Perform a bernoulli trial, which will return true or false based on its
+  // configured probability. It takes into account the byte size so that
+  // larger allocations are weighted heavier than smaller allocations.
+  MOZ_ASSERT(
+      gAllocationTracker,
+      "gAllocationTracker must be properly installed for the memory hooks.");
+  if (gAllocationTracker->RemoveMemoryAddressIfFound(aPtr)) {
+    // This size here is negative, indicating a deallocation.
+    profiler_add_native_allocation_marker(signedSize,
+                                          reinterpret_cast<uintptr_t>(aPtr));
+  }
+}
+
+}  // namespace mozilla::profiler
+
+//---------------------------------------------------------------------------
+// malloc/free interception
+//---------------------------------------------------------------------------
+
+using namespace mozilla::profiler;
+
+static void* replace_malloc(size_t aSize) {
+  // This must be a call to malloc from outside.  Intercept it.
+  void* ptr = gMallocTable.malloc(aSize);
+  AllocCallback(ptr, aSize);
+  return ptr;
+}
+
+static void* replace_calloc(size_t aCount, size_t aSize) {
+  void* ptr = gMallocTable.calloc(aCount, aSize);
+  AllocCallback(ptr, aCount * aSize);
+  return ptr;
+}
+
+static void* replace_realloc(void* aOldPtr, size_t aSize) {
+  // If |aOldPtr| is nullptr, the call is equivalent to |malloc(aSize)|.
+  if (!aOldPtr) {
+    return replace_malloc(aSize);
+  }
+
+  FreeCallback(aOldPtr);
+  void* ptr = gMallocTable.realloc(aOldPtr, aSize);
+  if (ptr) {
+    AllocCallback(ptr, aSize);
+  } else {
+    // If realloc fails, we undo the prior operations by re-inserting the old
+    // pointer into the live block table. We don't have to do anything with the
+    // dead block list because the dead block hasn't yet been inserted. The
+    // block will end up looking like it was allocated for the first time here,
+    // which is untrue, and the slop bytes will be zero, which may be untrue.
+    // But this case is rare and doing better isn't worth the effort.
+    AllocCallback(aOldPtr, gMallocTable.malloc_usable_size(aOldPtr));
+  }
+  return ptr;
+}
+
+static void* replace_memalign(size_t aAlignment, size_t aSize) {
+  void* ptr = gMallocTable.memalign(aAlignment, aSize);
+  AllocCallback(ptr, aSize);
+  return ptr;
+}
+
+static void replace_free(void* aPtr) {
+  FreeCallback(aPtr);
+  gMallocTable.free(aPtr);
+}
+
+static void* replace_moz_arena_malloc(arena_id_t aArena, size_t aSize) {
+  void* ptr = gMallocTable.moz_arena_malloc(aArena, aSize);
+  AllocCallback(ptr, aSize);
+  return ptr;
+}
+
+static void* replace_moz_arena_calloc(arena_id_t aArena, size_t aCount,
+                                      size_t aSize) {
+  void* ptr = gMallocTable.moz_arena_calloc(aArena, aCount, aSize);
+  AllocCallback(ptr, aCount * aSize);
+  return ptr;
+}
+
+static void* replace_moz_arena_realloc(arena_id_t aArena, void* aPtr,
+                                       size_t aSize) {
+  void* ptr = gMallocTable.moz_arena_realloc(aArena, aPtr, aSize);
+  AllocCallback(ptr, aSize);
+  return ptr;
+}
+
+static void replace_moz_arena_free(arena_id_t aArena, void* aPtr) {
+  FreeCallback(aPtr);
+  gMallocTable.moz_arena_free(aArena, aPtr);
+}
+
+static void* replace_moz_arena_memalign(arena_id_t aArena, size_t aAlignment,
+                                        size_t aSize) {
+  void* ptr = gMallocTable.moz_arena_memalign(aArena, aAlignment, aSize);
+  AllocCallback(ptr, aSize);
+  return ptr;
+}
+
+// we have to replace these or jemalloc will assume we don't implement any
+// of the arena replacements!
+static arena_id_t replace_moz_create_arena_with_params(
+    arena_params_t* aParams) {
+  return gMallocTable.moz_create_arena_with_params(aParams);
+}
+
+static void replace_moz_dispose_arena(arena_id_t aArenaId) {
+  return gMallocTable.moz_dispose_arena(aArenaId);
+}
+
+static void replace_moz_set_max_dirty_page_modifier(int32_t aModifier) {
+  return gMallocTable.moz_set_max_dirty_page_modifier(aModifier);
+}
+
+// Must come after all the replace_* funcs
+void replace_init(malloc_table_t* aMallocTable, ReplaceMallocBridge** aBridge) {
+  gMallocTable = *aMallocTable;
+#define MALLOC_FUNCS (MALLOC_FUNCS_MALLOC_BASE | MALLOC_FUNCS_ARENA)
+#define MALLOC_DECL(name, ...) aMallocTable->name = replace_##name;
+#include "malloc_decls.h"
+}
+
+void profiler_replace_remove() {}
+
+namespace mozilla::profiler {
+//---------------------------------------------------------------------------
+// Initialization
+//---------------------------------------------------------------------------
+
+BaseProfilerCount* install_memory_hooks() {
+  if (!sCounter) {
+    sCounter = new ProfilerCounterTotal("malloc", "Memory",
+                                        "Amount of allocated memory");
+    // Also initialize the ThreadIntercept, even if native allocation tracking
+    // won't be turned on. This way the TLS will be initialized.
+    ThreadIntercept::Init();
+  } else {
+    sCounter->Clear();
+  }
+  jemalloc_replace_dynamic(replace_init);
+  return sCounter;
+}
+
+// Remove the hooks, but leave the sCounter machinery. Deleting the counter
+// would race with any existing memory hooks that are currently running. Rather
+// than adding overhead here of mutexes it's cheaper for the performance to just
+// leak these values.
+void remove_memory_hooks() { jemalloc_replace_dynamic(nullptr); }
+
+void enable_native_allocations() {
+  // The bloat log tracks allocations and deallocations. This can conflict
+  // with the memory hook machinery, as the bloat log creates its own
+  // allocations. This means we can re-enter inside the bloat log machinery. At
+  // this time, the bloat log does not know about cannot handle the native
+  // allocation feature.
+  //
+  // At the time of this writing, we hit this assertion:
+  // IsIdle(oldState) || IsRead(oldState) in Checker::StartReadOp()
+  //
+  //    #01: GetBloatEntry(char const*, unsigned int)
+  //    #02: NS_LogCtor
+  //    #03: profiler_get_backtrace()
+  //    #04: profiler_add_native_allocation_marker(long long)
+  //    #05: mozilla::profiler::AllocCallback(void*, unsigned long)
+  //    #06: replace_calloc(unsigned long, unsigned long)
+  //    #07: PLDHashTable::ChangeTable(int)
+  //    #08: PLDHashTable::Add(void const*, std::nothrow_t const&)
+  //    #09: nsBaseHashtable<nsDepCharHashKey, nsAutoPtr<BloatEntry>, ...
+  //    #10: GetBloatEntry(char const*, unsigned int)
+  //    #11: NS_LogCtor
+  //    #12: profiler_get_backtrace()
+  //    ...
+  MOZ_ASSERT(!PR_GetEnv("XPCOM_MEM_BLOAT_LOG"),
+             "The bloat log feature is not compatible with the native "
+             "allocations instrumentation.");
+
+  EnsureBernoulliIsInstalled();
+  EnsureAllocationTrackerIsInstalled();
+  ThreadIntercept::EnableAllocationFeature();
+}
+
+// This is safe to call even if native allocations hasn't been enabled.
+void disable_native_allocations() {
+  ThreadIntercept::DisableAllocationFeature();
+  if (gAllocationTracker) {
+    gAllocationTracker->Reset();
+  }
+}
+
+}  // namespace mozilla::profiler
diff --git a/tools/profiler/core/memory_hooks.h b/tools/profiler/core/memory_hooks.h
new file mode 100644
index 0000000000..a6ace771dd
--- /dev/null
+++ b/tools/profiler/core/memory_hooks.h
@@ -0,0 +1,25 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef memory_hooks_h
+#define memory_hooks_h
+
+#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
+class BaseProfilerCount;
+
+namespace mozilla {
+namespace profiler {
+
+BaseProfilerCount* install_memory_hooks();
+void remove_memory_hooks();
+void enable_native_allocations();
+void disable_native_allocations();
+
+}  // namespace profiler
+}  // namespace mozilla
+#endif
+
+#endif
diff --git a/tools/profiler/core/platform-linux-android.cpp b/tools/profiler/core/platform-linux-android.cpp
new file mode 100644
index 0000000000..6bcb9cf38b
--- /dev/null
+++ b/tools/profiler/core/platform-linux-android.cpp
@@ -0,0 +1,636 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+// Copyright (c) 2006-2011 The Chromium Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in
+//    the documentation and/or other materials provided with the
+//    distribution.
+//  * Neither the name of Google, Inc. nor the names of its contributors
+//    may be used to endorse or promote products derived from this
+//    software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// This file is used for both Linux and Android as well as FreeBSD.
+
+#include <stdio.h>
+#include <math.h>
+
+#include <pthread.h>
+#if defined(GP_OS_freebsd)
+#  include <sys/thr.h>
+#endif
+#include <semaphore.h>
+#include <signal.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <stdlib.h>
+#include <sched.h>
+#include <ucontext.h>
+// Ubuntu Dapper requires memory pages to be marked as
+// executable. Otherwise, OS raises an exception when executing code
+// in that page.
+#include <sys/types.h>  // mmap & munmap
+#include <sys/mman.h>   // mmap & munmap
+#include <sys/stat.h>   // open
+#include <fcntl.h>      // open
+#include <unistd.h>     // sysconf
+#include <semaphore.h>
+#ifdef __GLIBC__
+#  include <execinfo.h>  // backtrace, backtrace_symbols
+#endif                   // def __GLIBC__
+#include <strings.h>     // index
+#include <errno.h>
+#include <stdarg.h>
+
+#include "prenv.h"
+#include "mozilla/PodOperations.h"
+#include "mozilla/DebugOnly.h"
+#if defined(GP_OS_linux) || defined(GP_OS_android)
+#  include "common/linux/breakpad_getcontext.h"
+#endif
+
+#include <string.h>
+#include <list>
+
+using namespace mozilla;
+
+static void PopulateRegsFromContext(Registers& aRegs, ucontext_t* aContext) {
+  aRegs.mContext = aContext;
+  mcontext_t& mcontext = aContext->uc_mcontext;
+
+  // Extracting the sample from the context is extremely machine dependent.
+#if defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
+  aRegs.mPC = reinterpret_cast<Address>(mcontext.gregs[REG_EIP]);
+  aRegs.mSP = reinterpret_cast<Address>(mcontext.gregs[REG_ESP]);
+  aRegs.mFP = reinterpret_cast<Address>(mcontext.gregs[REG_EBP]);
+  aRegs.mLR = 0;
+#elif defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android)
+  aRegs.mPC = reinterpret_cast<Address>(mcontext.gregs[REG_RIP]);
+  aRegs.mSP = reinterpret_cast<Address>(mcontext.gregs[REG_RSP]);
+  aRegs.mFP = reinterpret_cast<Address>(mcontext.gregs[REG_RBP]);
+  aRegs.mLR = 0;
+#elif defined(GP_PLAT_amd64_freebsd)
+  aRegs.mPC = reinterpret_cast<Address>(mcontext.mc_rip);
+  aRegs.mSP = reinterpret_cast<Address>(mcontext.mc_rsp);
+  aRegs.mFP = reinterpret_cast<Address>(mcontext.mc_rbp);
+  aRegs.mLR = 0;
+#elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
+  aRegs.mPC = reinterpret_cast<Address>(mcontext.arm_pc);
+  aRegs.mSP = reinterpret_cast<Address>(mcontext.arm_sp);
+  aRegs.mFP = reinterpret_cast<Address>(mcontext.arm_fp);
+  aRegs.mLR = reinterpret_cast<Address>(mcontext.arm_lr);
+#elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android)
+  aRegs.mPC = reinterpret_cast<Address>(mcontext.pc);
+  aRegs.mSP = reinterpret_cast<Address>(mcontext.sp);
+  aRegs.mFP = reinterpret_cast<Address>(mcontext.regs[29]);
+  aRegs.mLR = reinterpret_cast<Address>(mcontext.regs[30]);
+#elif defined(GP_PLAT_arm64_freebsd)
+  aRegs.mPC = reinterpret_cast<Address>(mcontext.mc_gpregs.gp_elr);
+  aRegs.mSP = reinterpret_cast<Address>(mcontext.mc_gpregs.gp_sp);
+  aRegs.mFP = reinterpret_cast<Address>(mcontext.mc_gpregs.gp_x[29]);
+  aRegs.mLR = reinterpret_cast<Address>(mcontext.mc_gpregs.gp_lr);
+#elif defined(GP_PLAT_mips64_linux) || defined(GP_PLAT_mips64_android)
+  aRegs.mPC = reinterpret_cast<Address>(mcontext.pc);
+  aRegs.mSP = reinterpret_cast<Address>(mcontext.gregs[29]);
+  aRegs.mFP = reinterpret_cast<Address>(mcontext.gregs[30]);
+
+#else
+#  error "bad platform"
+#endif
+}
+
+#if defined(GP_OS_android)
+#  define SYS_tgkill __NR_tgkill
+#endif
+
+#if defined(GP_OS_linux) || defined(GP_OS_android)
+int tgkill(pid_t tgid, pid_t tid, int signalno) {
+  return syscall(SYS_tgkill, tgid, tid, signalno);
+}
+#endif
+
+#if defined(GP_OS_freebsd)
+#  define tgkill thr_kill2
+#endif
+
+mozilla::profiler::PlatformData::PlatformData(ProfilerThreadId aThreadId) {
+  MOZ_ASSERT(aThreadId == profiler_current_thread_id());
+  if (clockid_t clockid; pthread_getcpuclockid(pthread_self(), &clockid) == 0) {
+    mClockId = Some(clockid);
+  }
+}
+
+mozilla::profiler::PlatformData::~PlatformData() = default;
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN Sampler target specifics
+
+// The only way to reliably interrupt a Linux thread and inspect its register
+// and stack state is by sending a signal to it, and doing the work inside the
+// signal handler.  But we don't want to run much code inside the signal
+// handler, since POSIX severely restricts what we can do in signal handlers.
+// So we use a system of semaphores to suspend the thread and allow the
+// sampler thread to do all the work of unwinding and copying out whatever
+// data it wants.
+//
+// A four-message protocol is used to reliably suspend and later resume the
+// thread to be sampled (the samplee):
+//
+// Sampler (signal sender) thread              Samplee (thread to be sampled)
+//
+// Prepare the SigHandlerCoordinator
+// and point sSigHandlerCoordinator at it
+//
+// send SIGPROF to samplee ------- MSG 1 ----> (enter signal handler)
+// wait(mMessage2)                             Copy register state
+//                                               into sSigHandlerCoordinator
+//                         <------ MSG 2 ----- post(mMessage2)
+// Samplee is now suspended.                   wait(mMessage3)
+//   Examine its stack/register
+//   state at leisure
+//
+// Release samplee:
+//   post(mMessage3)       ------- MSG 3 ----->
+// wait(mMessage4)                              Samplee now resumes.  Tell
+//                                                the sampler that we are done.
+//                         <------ MSG 4 ------ post(mMessage4)
+// Now we know the samplee's signal             (leave signal handler)
+//   handler has finished using
+//   sSigHandlerCoordinator.  We can
+//   safely reuse it for some other thread.
+//
+
+// A type used to coordinate between the sampler (signal sending) thread and
+// the thread currently being sampled (the samplee, which receives the
+// signals).
+//
+// The first message is sent using a SIGPROF signal delivery.  The subsequent
+// three are sent using sem_wait/sem_post pairs.  They are named accordingly
+// in the following struct.
+struct SigHandlerCoordinator {
+  SigHandlerCoordinator() {
+    PodZero(&mUContext);
+    int r = sem_init(&mMessage2, /* pshared */ 0, 0);
+    r |= sem_init(&mMessage3, /* pshared */ 0, 0);
+    r |= sem_init(&mMessage4, /* pshared */ 0, 0);
+    MOZ_ASSERT(r == 0);
+    (void)r;
+  }
+
+  ~SigHandlerCoordinator() {
+    int r = sem_destroy(&mMessage2);
+    r |= sem_destroy(&mMessage3);
+    r |= sem_destroy(&mMessage4);
+    MOZ_ASSERT(r == 0);
+    (void)r;
+  }
+
+  sem_t mMessage2;       // To sampler: "context is in sSigHandlerCoordinator"
+  sem_t mMessage3;       // To samplee: "resume"
+  sem_t mMessage4;       // To sampler: "finished with sSigHandlerCoordinator"
+  ucontext_t mUContext;  // Context at signal
+};
+
+struct SigHandlerCoordinator* Sampler::sSigHandlerCoordinator = nullptr;
+
+static void SigprofHandler(int aSignal, siginfo_t* aInfo, void* aContext) {
+  // Avoid TSan warning about clobbering errno.
+  int savedErrno = errno;
+
+  MOZ_ASSERT(aSignal == SIGPROF);
+  MOZ_ASSERT(Sampler::sSigHandlerCoordinator);
+
+  // By sending us this signal, the sampler thread has sent us message 1 in
+  // the comment above, with the meaning "|sSigHandlerCoordinator| is ready
+  // for use, please copy your register context into it."
+  Sampler::sSigHandlerCoordinator->mUContext =
+      *static_cast<ucontext_t*>(aContext);
+
+  // Send message 2: tell the sampler thread that the context has been copied
+  // into |sSigHandlerCoordinator->mUContext|.  sem_post can never fail by
+  // being interrupted by a signal, so there's no loop around this call.
+  int r = sem_post(&Sampler::sSigHandlerCoordinator->mMessage2);
+  MOZ_ASSERT(r == 0);
+
+  // At this point, the sampler thread assumes we are suspended, so we must
+  // not touch any global state here.
+
+  // Wait for message 3: the sampler thread tells us to resume.
+  while (true) {
+    r = sem_wait(&Sampler::sSigHandlerCoordinator->mMessage3);
+    if (r == -1 && errno == EINTR) {
+      // Interrupted by a signal.  Try again.
+      continue;
+    }
+    // We don't expect any other kind of failure
+    MOZ_ASSERT(r == 0);
+    break;
+  }
+
+  // Send message 4: tell the sampler thread that we are finished accessing
+  // |sSigHandlerCoordinator|.  After this point it is not safe to touch
+  // |sSigHandlerCoordinator|.
+  r = sem_post(&Sampler::sSigHandlerCoordinator->mMessage4);
+  MOZ_ASSERT(r == 0);
+
+  errno = savedErrno;
+}
+
+Sampler::Sampler(PSLockRef aLock)
+    : mMyPid(profiler_current_process_id()),
+      // We don't know what the sampler thread's ID will be until it runs, so
+      // set mSamplerTid to a dummy value and fill it in for real in
+      // SuspendAndSampleAndResumeThread().
+      mSamplerTid{} {
+#if defined(USE_EHABI_STACKWALK)
+  mozilla::EHABIStackWalkInit();
+#endif
+
+  // NOTE: We don't initialize LUL here, instead initializing it in
+  // SamplerThread's constructor. This is because with the
+  // profiler_suspend_and_sample_thread entry point, we want to be able to
+  // sample without waiting for LUL to be initialized.
+
+  // Request profiling signals.
+  struct sigaction sa;
+  sa.sa_sigaction = SigprofHandler;
+  sigemptyset(&sa.sa_mask);
+  sa.sa_flags = SA_RESTART | SA_SIGINFO;
+  if (sigaction(SIGPROF, &sa, &mOldSigprofHandler) != 0) {
+    MOZ_CRASH("Error installing SIGPROF handler in the profiler");
+  }
+}
+
+void Sampler::Disable(PSLockRef aLock) {
+  // Restore old signal handler. This is global state so it's important that
+  // we do it now, while gPSMutex is locked.
+  sigaction(SIGPROF, &mOldSigprofHandler, 0);
+}
+
+static void StreamMetaPlatformSampleUnits(PSLockRef aLock,
+                                          SpliceableJSONWriter& aWriter) {
+  aWriter.StringProperty("threadCPUDelta", "ns");
+}
+
+/* static */
+uint64_t RunningTimes::ConvertRawToJson(uint64_t aRawValue) {
+  return aRawValue;
+}
+
+namespace mozilla::profiler {
+bool GetCpuTimeSinceThreadStartInNs(
+    uint64_t* aResult, const mozilla::profiler::PlatformData& aPlatformData) {
+  Maybe<clockid_t> maybeCid = aPlatformData.GetClockId();
+  if (MOZ_UNLIKELY(!maybeCid)) {
+    return false;
+  }
+
+  timespec t;
+  if (clock_gettime(*maybeCid, &t) != 0) {
+    return false;
+  }
+
+  *aResult = uint64_t(t.tv_sec) * 1'000'000'000u + uint64_t(t.tv_nsec);
+  return true;
+}
+}  // namespace mozilla::profiler
+
+static RunningTimes GetProcessRunningTimesDiff(
+    PSLockRef aLock, RunningTimes& aPreviousRunningTimesToBeUpdated) {
+  AUTO_PROFILER_STATS(GetProcessRunningTimes);
+
+  RunningTimes newRunningTimes;
+  {
+    AUTO_PROFILER_STATS(GetProcessRunningTimes_clock_gettime);
+    if (timespec ts; clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts) == 0) {
+      newRunningTimes.SetThreadCPUDelta(uint64_t(ts.tv_sec) * 1'000'000'000u +
+                                        uint64_t(ts.tv_nsec));
+    }
+    newRunningTimes.SetPostMeasurementTimeStamp(TimeStamp::Now());
+  };
+
+  const RunningTimes diff = newRunningTimes - aPreviousRunningTimesToBeUpdated;
+  aPreviousRunningTimesToBeUpdated = newRunningTimes;
+  return diff;
+}
+
+static RunningTimes GetThreadRunningTimesDiff(
+    PSLockRef aLock,
+    ThreadRegistration::UnlockedRWForLockedProfiler& aThreadData) {
+  AUTO_PROFILER_STATS(GetRunningTimes_clock_gettime_thread);
+
+  const mozilla::profiler::PlatformData& platformData =
+      aThreadData.PlatformDataCRef();
+  Maybe<clockid_t> maybeCid = platformData.GetClockId();
+
+  if (MOZ_UNLIKELY(!maybeCid)) {
+    // No clock id -> Nothing to measure apart from the timestamp.
+    RunningTimes emptyRunningTimes;
+    emptyRunningTimes.SetPostMeasurementTimeStamp(TimeStamp::Now());
+    return emptyRunningTimes;
+  }
+
+  const RunningTimes newRunningTimes = GetRunningTimesWithTightTimestamp(
+      [cid = *maybeCid](RunningTimes& aRunningTimes) {
+        AUTO_PROFILER_STATS(GetRunningTimes_clock_gettime);
+        if (timespec ts; clock_gettime(cid, &ts) == 0) {
+          aRunningTimes.ResetThreadCPUDelta(
+              uint64_t(ts.tv_sec) * 1'000'000'000u + uint64_t(ts.tv_nsec));
+        } else {
+          aRunningTimes.ClearThreadCPUDelta();
+        }
+      });
+
+  ProfiledThreadData* profiledThreadData =
+      aThreadData.GetProfiledThreadData(aLock);
+  MOZ_ASSERT(profiledThreadData);
+  RunningTimes& previousRunningTimes =
+      profiledThreadData->PreviousThreadRunningTimesRef();
+  const RunningTimes diff = newRunningTimes - previousRunningTimes;
+  previousRunningTimes = newRunningTimes;
+  return diff;
+}
+
+static void DiscardSuspendedThreadRunningTimes(
+    PSLockRef aLock,
+    ThreadRegistration::UnlockedRWForLockedProfiler& aThreadData) {
+  AUTO_PROFILER_STATS(DiscardSuspendedThreadRunningTimes);
+
+  // On Linux, suspending a thread uses a signal that makes that thread work
+  // to handle it. So we want to discard any added running time since the call
+  // to GetThreadRunningTimesDiff, which is done by overwriting the thread's
+  // PreviousThreadRunningTimesRef() with the current running time now.
+
+  const mozilla::profiler::PlatformData& platformData =
+      aThreadData.PlatformDataCRef();
+  Maybe<clockid_t> maybeCid = platformData.GetClockId();
+
+  if (MOZ_UNLIKELY(!maybeCid)) {
+    // No clock id -> Nothing to measure.
+    return;
+  }
+
+  ProfiledThreadData* profiledThreadData =
+      aThreadData.GetProfiledThreadData(aLock);
+  MOZ_ASSERT(profiledThreadData);
+  RunningTimes& previousRunningTimes =
+      profiledThreadData->PreviousThreadRunningTimesRef();
+
+  if (timespec ts; clock_gettime(*maybeCid, &ts) == 0) {
+    previousRunningTimes.ResetThreadCPUDelta(
+        uint64_t(ts.tv_sec) * 1'000'000'000u + uint64_t(ts.tv_nsec));
+  } else {
+    previousRunningTimes.ClearThreadCPUDelta();
+  }
+}
+
+template <typename Func>
+void Sampler::SuspendAndSampleAndResumeThread(
+    PSLockRef aLock,
+    const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData,
+    const TimeStamp& aNow, const Func& aProcessRegs) {
+  // Only one sampler thread can be sampling at once.  So we expect to have
+  // complete control over |sSigHandlerCoordinator|.
+  MOZ_ASSERT(!sSigHandlerCoordinator);
+
+  if (!mSamplerTid.IsSpecified()) {
+    mSamplerTid = profiler_current_thread_id();
+  }
+  ProfilerThreadId sampleeTid = aThreadData.Info().ThreadId();
+  MOZ_RELEASE_ASSERT(sampleeTid != mSamplerTid);
+
+  //----------------------------------------------------------------//
+  // Suspend the samplee thread and get its context.
+
+  SigHandlerCoordinator coord;  // on sampler thread's stack
+  sSigHandlerCoordinator = &coord;
+
+  // Send message 1 to the samplee (the thread to be sampled), by
+  // signalling at it.
+  // This could fail if the thread doesn't exist anymore.
+  int r = tgkill(mMyPid.ToNumber(), sampleeTid.ToNumber(), SIGPROF);
+  if (r == 0) {
+    // Wait for message 2 from the samplee, indicating that the context
+    // is available and that the thread is suspended.
+    while (true) {
+      r = sem_wait(&sSigHandlerCoordinator->mMessage2);
+      if (r == -1 && errno == EINTR) {
+        // Interrupted by a signal.  Try again.
+        continue;
+      }
+      // We don't expect any other kind of failure.
+      MOZ_ASSERT(r == 0);
+      break;
+    }
+
+    //----------------------------------------------------------------//
+    // Sample the target thread.
+
+    // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+    //
+    // The profiler's "critical section" begins here.  In the critical section,
+    // we must not do any dynamic memory allocation, nor try to acquire any lock
+    // or any other unshareable resource.  This is because the thread to be
+    // sampled has been suspended at some entirely arbitrary point, and we have
+    // no idea which unsharable resources (locks, essentially) it holds.  So any
+    // attempt to acquire any lock, including the implied locks used by the
+    // malloc implementation, risks deadlock.  This includes TimeStamp::Now(),
+    // which gets a lock on Windows.
+
+    // The samplee thread is now frozen and sSigHandlerCoordinator->mUContext is
+    // valid.  We can poke around in it and unwind its stack as we like.
+
+    // Extract the current register values.
+    Registers regs;
+    PopulateRegsFromContext(regs, &sSigHandlerCoordinator->mUContext);
+    aProcessRegs(regs, aNow);
+
+    //----------------------------------------------------------------//
+    // Resume the target thread.
+
+    // Send message 3 to the samplee, which tells it to resume.
+    r = sem_post(&sSigHandlerCoordinator->mMessage3);
+    MOZ_ASSERT(r == 0);
+
+    // Wait for message 4 from the samplee, which tells us that it has
+    // finished with |sSigHandlerCoordinator|.
+    while (true) {
+      r = sem_wait(&sSigHandlerCoordinator->mMessage4);
+      if (r == -1 && errno == EINTR) {
+        continue;
+      }
+      MOZ_ASSERT(r == 0);
+      break;
+    }
+
+    // The profiler's critical section ends here.  After this point, none of the
+    // critical section limitations documented above apply.
+    //
+    // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+  }
+
+  // This isn't strictly necessary, but doing so does help pick up anomalies
+  // in which the signal handler is running when it shouldn't be.
+  sSigHandlerCoordinator = nullptr;
+}
+
+// END Sampler target specifics
+////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN SamplerThread target specifics
+
+static void* ThreadEntry(void* aArg) {
+  auto thread = static_cast<SamplerThread*>(aArg);
+  thread->Run();
+  return nullptr;
+}
+
+SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
+                             double aIntervalMilliseconds, uint32_t aFeatures)
+    : mSampler(aLock),
+      mActivityGeneration(aActivityGeneration),
+      mIntervalMicroseconds(
+          std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5)))) {
+#if defined(USE_LUL_STACKWALK)
+  lul::LUL* lul = CorePS::Lul();
+  if (!lul && ProfilerFeature::HasStackWalk(aFeatures)) {
+    CorePS::SetLul(MakeUnique<lul::LUL>(logging_sink_for_LUL));
+    // Read all the unwind info currently available.
+    lul = CorePS::Lul();
+    read_procmaps(lul);
+
+    // Switch into unwind mode. After this point, we can't add or remove any
+    // unwind info to/from this LUL instance. The only thing we can do with
+    // it is Unwind() calls.
+    lul->EnableUnwinding();
+
+    // Has a test been requested?
+    if (PR_GetEnv("MOZ_PROFILER_LUL_TEST")) {
+      int nTests = 0, nTestsPassed = 0;
+      RunLulUnitTests(&nTests, &nTestsPassed, lul);
+    }
+  }
+#endif
+
+  // Start the sampling thread. It repeatedly sends a SIGPROF signal. Sending
+  // the signal ourselves instead of relying on itimer provides much better
+  // accuracy.
+  //
+  // At least 350 KiB of stack space are needed when built with TSAN. This
+  // includes lul::N_STACK_BYTES plus whatever else is needed for the sampler
+  // thread. Set the stack size to 800 KiB to keep a safe margin above that.
+  pthread_attr_t attr;
+  if (pthread_attr_init(&attr) != 0 ||
+      pthread_attr_setstacksize(&attr, 800 * 1024) != 0 ||
+      pthread_create(&mThread, &attr, ThreadEntry, this) != 0) {
+    MOZ_CRASH("pthread_create failed");
+  }
+  pthread_attr_destroy(&attr);
+}
+
+SamplerThread::~SamplerThread() {
+  pthread_join(mThread, nullptr);
+  // Just in the unlikely case some callbacks were added between the end of the
+  // thread and now.
+  InvokePostSamplingCallbacks(std::move(mPostSamplingCallbackList),
+                              SamplingState::JustStopped);
+}
+
+void SamplerThread::SleepMicro(uint32_t aMicroseconds) {
+  if (aMicroseconds >= 1000000) {
+    // Use usleep for larger intervals, because the nanosleep
+    // code below only supports intervals < 1 second.
+    MOZ_ALWAYS_TRUE(!::usleep(aMicroseconds));
+    return;
+  }
+
+  struct timespec ts;
+  ts.tv_sec = 0;
+  ts.tv_nsec = aMicroseconds * 1000UL;
+
+  int rv = ::nanosleep(&ts, &ts);
+
+  while (rv != 0 && errno == EINTR) {
+    // Keep waiting in case of interrupt.
+    // nanosleep puts the remaining time back into ts.
+    rv = ::nanosleep(&ts, &ts);
+  }
+
+  MOZ_ASSERT(!rv, "nanosleep call failed");
+}
+
+void SamplerThread::Stop(PSLockRef aLock) {
+  // Restore old signal handler. This is global state so it's important that
+  // we do it now, while gPSMutex is locked. It's safe to do this now even
+  // though this SamplerThread is still alive, because the next time the main
+  // loop of Run() iterates it won't get past the mActivityGeneration check,
+  // and so won't send any signals.
+  mSampler.Disable(aLock);
+}
+
+// END SamplerThread target specifics
+////////////////////////////////////////////////////////////////////////
+
+#if defined(GP_OS_linux) || defined(GP_OS_freebsd)
+
+// We use pthread_atfork() to temporarily disable signal delivery during any
+// fork() call. Without that, fork() can be repeatedly interrupted by signal
+// delivery, requiring it to be repeatedly restarted, which can lead to *long*
+// delays. See bug 837390.
+//
+// We provide no paf_child() function to run in the child after forking. This
+// is fine because we always immediately exec() after fork(), and exec()
+// clobbers all process state. Also, we don't want the sampler to resume in the
+// child process between fork() and exec(), it would be wasteful.
+//
+// Unfortunately all this is only doable on non-Android because Bionic doesn't
+// have pthread_atfork.
+
+// In the parent, before the fork, increase gSkipSampling to ensure that
+// profiler sampling loops will be skipped. There could be one in progress now,
+// causing a small delay, but further sampling will be skipped, allowing `fork`
+// to complete.
+static void paf_prepare() { ++gSkipSampling; }
+
+// In the parent, after the fork, decrease gSkipSampling to let the sampler
+// resume sampling (unless other places have made it non-zero as well).
+static void paf_parent() { --gSkipSampling; }
+
+static void PlatformInit(PSLockRef aLock) {
+  // Set up the fork handlers.
+  pthread_atfork(paf_prepare, paf_parent, nullptr);
+}
+
+#else
+
+static void PlatformInit(PSLockRef aLock) {}
+
+#endif
+
+#if defined(HAVE_NATIVE_UNWIND)
+#  define REGISTERS_SYNC_POPULATE(regs)                         \
+    if (!getcontext(&regs.mContextSyncStorage)) {               \
+      PopulateRegsFromContext(regs, &regs.mContextSyncStorage); \
+    }
+#endif
diff --git a/tools/profiler/core/platform-macos.cpp b/tools/profiler/core/platform-macos.cpp
new file mode 100644
index 0000000000..b69a346d64
--- /dev/null
+++ b/tools/profiler/core/platform-macos.cpp
@@ -0,0 +1,297 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <unistd.h>
+#include <sys/mman.h>
+#include <mach/mach_init.h>
+#include <mach-o/getsect.h>
+
+#include <AvailabilityMacros.h>
+
+#include <pthread.h>
+#include <semaphore.h>
+#include <signal.h>
+#include <libkern/OSAtomic.h>
+#include <libproc.h>
+#include <mach/mach.h>
+#include <mach/semaphore.h>
+#include <mach/task.h>
+#include <mach/thread_act.h>
+#include <mach/vm_statistics.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <math.h>
+
+// this port is based off of v8 svn revision 9837
+
+mozilla::profiler::PlatformData::PlatformData(ProfilerThreadId aThreadId)
+    : mProfiledThread(mach_thread_self()) {}
+
+mozilla::profiler::PlatformData::~PlatformData() {
+  // Deallocate Mach port for thread.
+  mach_port_deallocate(mach_task_self(), mProfiledThread);
+}
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN Sampler target specifics
+
+Sampler::Sampler(PSLockRef aLock) {}
+
+void Sampler::Disable(PSLockRef aLock) {}
+
+static void StreamMetaPlatformSampleUnits(PSLockRef aLock,
+                                          SpliceableJSONWriter& aWriter) {
+  // Microseconds.
+  aWriter.StringProperty("threadCPUDelta", "\u00B5s");
+}
+
+/* static */
+uint64_t RunningTimes::ConvertRawToJson(uint64_t aRawValue) {
+  return aRawValue;
+}
+
+namespace mozilla::profiler {
+bool GetCpuTimeSinceThreadStartInNs(
+    uint64_t* aResult, const mozilla::profiler::PlatformData& aPlatformData) {
+  thread_extended_info_data_t threadInfoData;
+  mach_msg_type_number_t count = THREAD_EXTENDED_INFO_COUNT;
+  if (thread_info(aPlatformData.ProfiledThread(), THREAD_EXTENDED_INFO,
+                  (thread_info_t)&threadInfoData, &count) != KERN_SUCCESS) {
+    return false;
+  }
+
+  *aResult = threadInfoData.pth_user_time + threadInfoData.pth_system_time;
+  return true;
+}
+}  // namespace mozilla::profiler
+
+static RunningTimes GetProcessRunningTimesDiff(
+    PSLockRef aLock, RunningTimes& aPreviousRunningTimesToBeUpdated) {
+  AUTO_PROFILER_STATS(GetProcessRunningTimes);
+
+  RunningTimes newRunningTimes;
+  {
+    AUTO_PROFILER_STATS(GetProcessRunningTimes_task_info);
+
+    static const auto pid = getpid();
+    struct proc_taskinfo pti;
+    if ((unsigned long)proc_pidinfo(pid, PROC_PIDTASKINFO, 0, &pti,
+                                    PROC_PIDTASKINFO_SIZE) >=
+        PROC_PIDTASKINFO_SIZE) {
+      newRunningTimes.SetThreadCPUDelta(pti.pti_total_user +
+                                        pti.pti_total_system);
+    }
+    newRunningTimes.SetPostMeasurementTimeStamp(TimeStamp::Now());
+  };
+
+  const RunningTimes diff = newRunningTimes - aPreviousRunningTimesToBeUpdated;
+  aPreviousRunningTimesToBeUpdated = newRunningTimes;
+  return diff;
+}
+
+static RunningTimes GetThreadRunningTimesDiff(
+    PSLockRef aLock,
+    ThreadRegistration::UnlockedRWForLockedProfiler& aThreadData) {
+  AUTO_PROFILER_STATS(GetRunningTimes);
+
+  const mozilla::profiler::PlatformData& platformData =
+      aThreadData.PlatformDataCRef();
+
+  const RunningTimes newRunningTimes = GetRunningTimesWithTightTimestamp(
+      [&platformData](RunningTimes& aRunningTimes) {
+        AUTO_PROFILER_STATS(GetRunningTimes_thread_info);
+        thread_basic_info_data_t threadBasicInfo;
+        mach_msg_type_number_t basicCount = THREAD_BASIC_INFO_COUNT;
+        if (thread_info(platformData.ProfiledThread(), THREAD_BASIC_INFO,
+                        reinterpret_cast<thread_info_t>(&threadBasicInfo),
+                        &basicCount) == KERN_SUCCESS &&
+            basicCount == THREAD_BASIC_INFO_COUNT) {
+          uint64_t userTimeUs =
+              uint64_t(threadBasicInfo.user_time.seconds) *
+                  uint64_t(USEC_PER_SEC) +
+              uint64_t(threadBasicInfo.user_time.microseconds);
+          uint64_t systemTimeUs =
+              uint64_t(threadBasicInfo.system_time.seconds) *
+                  uint64_t(USEC_PER_SEC) +
+              uint64_t(threadBasicInfo.system_time.microseconds);
+          aRunningTimes.ResetThreadCPUDelta(userTimeUs + systemTimeUs);
+        } else {
+          aRunningTimes.ClearThreadCPUDelta();
+        }
+      });
+
+  ProfiledThreadData* profiledThreadData =
+      aThreadData.GetProfiledThreadData(aLock);
+  MOZ_ASSERT(profiledThreadData);
+  RunningTimes& previousRunningTimes =
+      profiledThreadData->PreviousThreadRunningTimesRef();
+  const RunningTimes diff = newRunningTimes - previousRunningTimes;
+  previousRunningTimes = newRunningTimes;
+  return diff;
+}
+
+static void DiscardSuspendedThreadRunningTimes(
+    PSLockRef aLock,
+    ThreadRegistration::UnlockedRWForLockedProfiler& aThreadData) {
+  // Nothing to do!
+  // On macOS, suspending a thread doesn't make that thread work.
+}
+
+template <typename Func>
+void Sampler::SuspendAndSampleAndResumeThread(
+    PSLockRef aLock,
+    const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData,
+    const TimeStamp& aNow, const Func& aProcessRegs) {
+  thread_act_t samplee_thread = aThreadData.PlatformDataCRef().ProfiledThread();
+
+  //----------------------------------------------------------------//
+  // Suspend the samplee thread and get its context.
+
+  // We're using thread_suspend on OS X because pthread_kill (which is what we
+  // at one time used on Linux) has less consistent performance and causes
+  // strange crashes, see bug 1166778 and bug 1166808.  thread_suspend
+  // is also just a lot simpler to use.
+
+  if (KERN_SUCCESS != thread_suspend(samplee_thread)) {
+    return;
+  }
+
+  //----------------------------------------------------------------//
+  // Sample the target thread.
+
+  // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+  //
+  // The profiler's "critical section" begins here.  We must be very careful
+  // what we do here, or risk deadlock.  See the corresponding comment in
+  // platform-linux-android.cpp for details.
+
+#if defined(__x86_64__)
+  thread_state_flavor_t flavor = x86_THREAD_STATE64;
+  x86_thread_state64_t state;
+  mach_msg_type_number_t count = x86_THREAD_STATE64_COUNT;
+#  if __DARWIN_UNIX03
+#    define REGISTER_FIELD(name) __r##name
+#  else
+#    define REGISTER_FIELD(name) r##name
+#  endif  // __DARWIN_UNIX03
+#elif defined(__aarch64__)
+  thread_state_flavor_t flavor = ARM_THREAD_STATE64;
+  arm_thread_state64_t state;
+  mach_msg_type_number_t count = ARM_THREAD_STATE64_COUNT;
+#  if __DARWIN_UNIX03
+#    define REGISTER_FIELD(name) __##name
+#  else
+#    define REGISTER_FIELD(name) name
+#  endif  // __DARWIN_UNIX03
+#else
+#  error "unknown architecture"
+#endif
+
+  if (thread_get_state(samplee_thread, flavor,
+                       reinterpret_cast<natural_t*>(&state),
+                       &count) == KERN_SUCCESS) {
+    Registers regs;
+#if defined(__x86_64__)
+    regs.mPC = reinterpret_cast<Address>(state.REGISTER_FIELD(ip));
+    regs.mSP = reinterpret_cast<Address>(state.REGISTER_FIELD(sp));
+    regs.mFP = reinterpret_cast<Address>(state.REGISTER_FIELD(bp));
+    regs.mLR = 0;
+#elif defined(__aarch64__)
+    regs.mPC = reinterpret_cast<Address>(state.REGISTER_FIELD(pc));
+    regs.mSP = reinterpret_cast<Address>(state.REGISTER_FIELD(sp));
+    regs.mFP = reinterpret_cast<Address>(state.REGISTER_FIELD(fp));
+    regs.mLR = reinterpret_cast<Address>(state.REGISTER_FIELD(lr));
+#else
+#  error "unknown architecture"
+#endif
+
+    aProcessRegs(regs, aNow);
+  }
+
+#undef REGISTER_FIELD
+
+  //----------------------------------------------------------------//
+  // Resume the target thread.
+
+  thread_resume(samplee_thread);
+
+  // The profiler's critical section ends here.
+  //
+  // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+}
+
+// END Sampler target specifics
+////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN SamplerThread target specifics
+
+static void* ThreadEntry(void* aArg) {
+  auto thread = static_cast<SamplerThread*>(aArg);
+  thread->Run();
+  return nullptr;
+}
+
+SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
+                             double aIntervalMilliseconds, uint32_t aFeatures)
+    : mSampler(aLock),
+      mActivityGeneration(aActivityGeneration),
+      mIntervalMicroseconds(
+          std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5)))),
+      mThread{nullptr} {
+  pthread_attr_t* attr_ptr = nullptr;
+  if (pthread_create(&mThread, attr_ptr, ThreadEntry, this) != 0) {
+    MOZ_CRASH("pthread_create failed");
+  }
+}
+
+SamplerThread::~SamplerThread() {
+  pthread_join(mThread, nullptr);
+  // Just in the unlikely case some callbacks were added between the end of the
+  // thread and now.
+  InvokePostSamplingCallbacks(std::move(mPostSamplingCallbackList),
+                              SamplingState::JustStopped);
+}
+
+void SamplerThread::SleepMicro(uint32_t aMicroseconds) {
+  usleep(aMicroseconds);
+  // FIXME: the OSX 10.12 page for usleep says "The usleep() function is
+  // obsolescent.  Use nanosleep(2) instead."  This implementation could be
+  // merged with the linux-android version.  Also, this doesn't handle the
+  // case where the usleep call is interrupted by a signal.
+}
+
+void SamplerThread::Stop(PSLockRef aLock) { mSampler.Disable(aLock); }
+
+// END SamplerThread target specifics
+////////////////////////////////////////////////////////////////////////
+
+static void PlatformInit(PSLockRef aLock) {}
+
+// clang-format off
+#if defined(HAVE_NATIVE_UNWIND)
+// Derive the stack pointer from the frame pointer. The 0x10 offset is
+// 8 bytes for the previous frame pointer and 8 bytes for the return
+// address both stored on the stack after at the beginning of the current
+// frame.
+#  define REGISTERS_SYNC_POPULATE(regs)                                       \
+    regs.mSP = reinterpret_cast<Address>(__builtin_frame_address(0)) + 0x10;  \
+    _Pragma("GCC diagnostic push")                                            \
+    _Pragma("GCC diagnostic ignored \"-Wframe-address\"")                     \
+    regs.mFP = reinterpret_cast<Address>(__builtin_frame_address(1));         \
+    _Pragma("GCC diagnostic pop")                                             \
+    regs.mPC = reinterpret_cast<Address>(                                     \
+        __builtin_extract_return_addr(__builtin_return_address(0)));          \
+    regs.mLR = 0;
+#endif
+// clang-format on
diff --git a/tools/profiler/core/platform-win32.cpp b/tools/profiler/core/platform-win32.cpp
new file mode 100644
index 0000000000..5e10e04c89
--- /dev/null
+++ b/tools/profiler/core/platform-win32.cpp
@@ -0,0 +1,496 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+// Copyright (c) 2006-2011 The Chromium Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in
+//    the documentation and/or other materials provided with the
+//    distribution.
+//  * Neither the name of Google, Inc. nor the names of its contributors
+//    may be used to endorse or promote products derived from this
+//    software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+#include <windows.h>
+#include <mmsystem.h>
+#include <process.h>
+
+#include "nsWindowsDllInterceptor.h"
+#include "mozilla/StackWalk_windows.h"
+#include "mozilla/WindowsVersion.h"
+
+#include <type_traits>
+
+static void PopulateRegsFromContext(Registers& aRegs, CONTEXT* aContext) {
+#if defined(GP_ARCH_amd64)
+  aRegs.mPC = reinterpret_cast<Address>(aContext->Rip);
+  aRegs.mSP = reinterpret_cast<Address>(aContext->Rsp);
+  aRegs.mFP = reinterpret_cast<Address>(aContext->Rbp);
+  aRegs.mLR = 0;
+#elif defined(GP_ARCH_x86)
+  aRegs.mPC = reinterpret_cast<Address>(aContext->Eip);
+  aRegs.mSP = reinterpret_cast<Address>(aContext->Esp);
+  aRegs.mFP = reinterpret_cast<Address>(aContext->Ebp);
+  aRegs.mLR = 0;
+#elif defined(GP_ARCH_arm64)
+  aRegs.mPC = reinterpret_cast<Address>(aContext->Pc);
+  aRegs.mSP = reinterpret_cast<Address>(aContext->Sp);
+  aRegs.mFP = reinterpret_cast<Address>(aContext->Fp);
+  aRegs.mLR = reinterpret_cast<Address>(aContext->Lr);
+#else
+#  error "bad arch"
+#endif
+}
+
+// Gets a real (i.e. not pseudo) handle for the current thread, with the
+// permissions needed for profiling.
+// @return a real HANDLE for the current thread.
+static HANDLE GetRealCurrentThreadHandleForProfiling() {
+  HANDLE realCurrentThreadHandle;
+  if (!::DuplicateHandle(
+          ::GetCurrentProcess(), ::GetCurrentThread(), ::GetCurrentProcess(),
+          &realCurrentThreadHandle,
+          THREAD_GET_CONTEXT | THREAD_SUSPEND_RESUME | THREAD_QUERY_INFORMATION,
+          FALSE, 0)) {
+    return nullptr;
+  }
+
+  return realCurrentThreadHandle;
+}
+
+static_assert(
+    std::is_same_v<mozilla::profiler::PlatformData::WindowsHandle, HANDLE>);
+
+mozilla::profiler::PlatformData::PlatformData(ProfilerThreadId aThreadId)
+    : mProfiledThread(GetRealCurrentThreadHandleForProfiling()) {
+  MOZ_ASSERT(aThreadId == ProfilerThreadId::FromNumber(::GetCurrentThreadId()));
+}
+
+mozilla::profiler::PlatformData::~PlatformData() {
+  if (mProfiledThread) {
+    CloseHandle(mProfiledThread);
+    mProfiledThread = nullptr;
+  }
+}
+
+static const HANDLE kNoThread = INVALID_HANDLE_VALUE;
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN Sampler target specifics
+
+Sampler::Sampler(PSLockRef aLock) {}
+
+void Sampler::Disable(PSLockRef aLock) {}
+
+static void StreamMetaPlatformSampleUnits(PSLockRef aLock,
+                                          SpliceableJSONWriter& aWriter) {
+  static const Span<const char> units =
+      (GetCycleTimeFrequencyMHz() != 0) ? MakeStringSpan("ns")
+                                        : MakeStringSpan("variable CPU cycles");
+  aWriter.StringProperty("threadCPUDelta", units);
+}
+
+/* static */
+uint64_t RunningTimes::ConvertRawToJson(uint64_t aRawValue) {
+  static const uint64_t cycleTimeFrequencyMHz = GetCycleTimeFrequencyMHz();
+  if (cycleTimeFrequencyMHz == 0u) {
+    return aRawValue;
+  }
+
+  constexpr uint64_t GHZ_PER_MHZ = 1'000u;
+  // To get ns, we need to divide cycles by a frequency in GHz, i.e.:
+  // cycles / (f_MHz / GHZ_PER_MHZ). To avoid losing the integer precision of
+  // f_MHz, this is computed as (cycles * GHZ_PER_MHZ) / f_MHz.
+  // Adding GHZ_PER_MHZ/2 to (cycles * GHZ_PER_MHZ) will round to nearest when
+  // the result of the division is truncated.
+  return (aRawValue * GHZ_PER_MHZ + (GHZ_PER_MHZ / 2u)) / cycleTimeFrequencyMHz;
+}
+
+static inline uint64_t ToNanoSeconds(const FILETIME& aFileTime) {
+  // FILETIME values are 100-nanoseconds units, converting
+  ULARGE_INTEGER usec = {{aFileTime.dwLowDateTime, aFileTime.dwHighDateTime}};
+  return usec.QuadPart * 100;
+}
+
+namespace mozilla::profiler {
+bool GetCpuTimeSinceThreadStartInNs(
+    uint64_t* aResult, const mozilla::profiler::PlatformData& aPlatformData) {
+  const HANDLE profiledThread = aPlatformData.ProfiledThread();
+  int frequencyInMHz = GetCycleTimeFrequencyMHz();
+  if (frequencyInMHz) {
+    uint64_t cpuCycleCount;
+    if (!QueryThreadCycleTime(profiledThread, &cpuCycleCount)) {
+      return false;
+    }
+
+    constexpr uint64_t USEC_PER_NSEC = 1000L;
+    *aResult = cpuCycleCount * USEC_PER_NSEC / frequencyInMHz;
+    return true;
+  }
+
+  FILETIME createTime, exitTime, kernelTime, userTime;
+  if (!GetThreadTimes(profiledThread, &createTime, &exitTime, &kernelTime,
+                      &userTime)) {
+    return false;
+  }
+
+  *aResult = ToNanoSeconds(kernelTime) + ToNanoSeconds(userTime);
+  return true;
+}
+}  // namespace mozilla::profiler
+
+static RunningTimes GetProcessRunningTimesDiff(
+    PSLockRef aLock, RunningTimes& aPreviousRunningTimesToBeUpdated) {
+  AUTO_PROFILER_STATS(GetProcessRunningTimes);
+
+  static const HANDLE processHandle = GetCurrentProcess();
+
+  RunningTimes newRunningTimes;
+  {
+    AUTO_PROFILER_STATS(GetProcessRunningTimes_QueryProcessCycleTime);
+    if (ULONG64 cycles; QueryProcessCycleTime(processHandle, &cycles) != 0) {
+      newRunningTimes.SetThreadCPUDelta(cycles);
+    }
+    newRunningTimes.SetPostMeasurementTimeStamp(TimeStamp::Now());
+  };
+
+  const RunningTimes diff = newRunningTimes - aPreviousRunningTimesToBeUpdated;
+  aPreviousRunningTimesToBeUpdated = newRunningTimes;
+  return diff;
+}
+
+static RunningTimes GetThreadRunningTimesDiff(
+    PSLockRef aLock,
+    ThreadRegistration::UnlockedRWForLockedProfiler& aThreadData) {
+  AUTO_PROFILER_STATS(GetThreadRunningTimes);
+
+  const mozilla::profiler::PlatformData& platformData =
+      aThreadData.PlatformDataCRef();
+  const HANDLE profiledThread = platformData.ProfiledThread();
+
+  const RunningTimes newRunningTimes = GetRunningTimesWithTightTimestamp(
+      [profiledThread](RunningTimes& aRunningTimes) {
+        AUTO_PROFILER_STATS(GetThreadRunningTimes_QueryThreadCycleTime);
+        if (ULONG64 cycles;
+            QueryThreadCycleTime(profiledThread, &cycles) != 0) {
+          aRunningTimes.ResetThreadCPUDelta(cycles);
+        } else {
+          aRunningTimes.ClearThreadCPUDelta();
+        }
+      });
+
+  ProfiledThreadData* profiledThreadData =
+      aThreadData.GetProfiledThreadData(aLock);
+  MOZ_ASSERT(profiledThreadData);
+  RunningTimes& previousRunningTimes =
+      profiledThreadData->PreviousThreadRunningTimesRef();
+  const RunningTimes diff = newRunningTimes - previousRunningTimes;
+  previousRunningTimes = newRunningTimes;
+  return diff;
+}
+
+static void DiscardSuspendedThreadRunningTimes(
+    PSLockRef aLock,
+    ThreadRegistration::UnlockedRWForLockedProfiler& aThreadData) {
+  AUTO_PROFILER_STATS(DiscardSuspendedThreadRunningTimes);
+
+  // On Windows, suspending a thread makes that thread work a little bit. So we
+  // want to discard any added running time since the call to
+  // GetThreadRunningTimesDiff, which is done by overwriting the thread's
+  // PreviousThreadRunningTimesRef() with the current running time now.
+
+  const mozilla::profiler::PlatformData& platformData =
+      aThreadData.PlatformDataCRef();
+  const HANDLE profiledThread = platformData.ProfiledThread();
+
+  ProfiledThreadData* profiledThreadData =
+      aThreadData.GetProfiledThreadData(aLock);
+  MOZ_ASSERT(profiledThreadData);
+  RunningTimes& previousRunningTimes =
+      profiledThreadData->PreviousThreadRunningTimesRef();
+
+  if (ULONG64 cycles; QueryThreadCycleTime(profiledThread, &cycles) != 0) {
+    previousRunningTimes.ResetThreadCPUDelta(cycles);
+  } else {
+    previousRunningTimes.ClearThreadCPUDelta();
+  }
+}
+
+template <typename Func>
+void Sampler::SuspendAndSampleAndResumeThread(
+    PSLockRef aLock,
+    const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData,
+    const TimeStamp& aNow, const Func& aProcessRegs) {
+  HANDLE profiled_thread = aThreadData.PlatformDataCRef().ProfiledThread();
+  if (profiled_thread == nullptr) {
+    return;
+  }
+
+  // Context used for sampling the register state of the profiled thread.
+  CONTEXT context;
+  memset(&context, 0, sizeof(context));
+
+  //----------------------------------------------------------------//
+  // Suspend the samplee thread and get its context.
+
+  static const DWORD kSuspendFailed = static_cast<DWORD>(-1);
+  if (SuspendThread(profiled_thread) == kSuspendFailed) {
+    return;
+  }
+
+  // SuspendThread is asynchronous, so the thread may still be running.
+  // Call GetThreadContext first to ensure the thread is really suspended.
+  // See https://blogs.msdn.microsoft.com/oldnewthing/20150205-00/?p=44743.
+
+  // Using only CONTEXT_CONTROL is faster but on 64-bit it causes crashes in
+  // RtlVirtualUnwind (see bug 1120126) so we set all the flags.
+#if defined(GP_ARCH_amd64)
+  context.ContextFlags = CONTEXT_FULL;
+#else
+  context.ContextFlags = CONTEXT_CONTROL;
+#endif
+  if (!GetThreadContext(profiled_thread, &context)) {
+    ResumeThread(profiled_thread);
+    return;
+  }
+
+  //----------------------------------------------------------------//
+  // Sample the target thread.
+
+  // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+  //
+  // The profiler's "critical section" begins here.  We must be very careful
+  // what we do here, or risk deadlock.  See the corresponding comment in
+  // platform-linux-android.cpp for details.
+
+  Registers regs;
+  PopulateRegsFromContext(regs, &context);
+  aProcessRegs(regs, aNow);
+
+  //----------------------------------------------------------------//
+  // Resume the target thread.
+
+  ResumeThread(profiled_thread);
+
+  // The profiler's critical section ends here.
+  //
+  // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+}
+
+// END Sampler target specifics
+////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN SamplerThread target specifics
+
+static unsigned int __stdcall ThreadEntry(void* aArg) {
+  auto thread = static_cast<SamplerThread*>(aArg);
+  thread->Run();
+  return 0;
+}
+
+static unsigned int __stdcall UnregisteredThreadSpyEntry(void* aArg) {
+  auto thread = static_cast<SamplerThread*>(aArg);
+  thread->RunUnregisteredThreadSpy();
+  return 0;
+}
+
+SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
+                             double aIntervalMilliseconds, uint32_t aFeatures)
+    : mSampler(aLock),
+      mActivityGeneration(aActivityGeneration),
+      mIntervalMicroseconds(
+          std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5)))),
+      mNoTimerResolutionChange(
+          ProfilerFeature::HasNoTimerResolutionChange(aFeatures)) {
+  if ((!mNoTimerResolutionChange) && (mIntervalMicroseconds < 10 * 1000)) {
+    // By default the timer resolution (which tends to be 1/64Hz, around 16ms)
+    // is not changed. However, if the requested interval is sufficiently low,
+    // the resolution will be adjusted to match. Note that this affects all
+    // timers in Firefox, and could therefore hide issues while profiling. This
+    // change may be prevented with the "notimerresolutionchange" feature.
+    ::timeBeginPeriod(mIntervalMicroseconds / 1000);
+  }
+
+  if (ProfilerFeature::HasUnregisteredThreads(aFeatures)) {
+    // Sampler&spy threads are not running yet, so it's safe to modify
+    // mSpyingState without locking the monitor.
+    mSpyingState = SpyingState::Spy_Initializing;
+    mUnregisteredThreadSpyThread = reinterpret_cast<HANDLE>(
+        _beginthreadex(nullptr,
+                       /* stack_size */ 0, UnregisteredThreadSpyEntry, this,
+                       /* initflag */ 0, nullptr));
+    if (mUnregisteredThreadSpyThread == 0) {
+      MOZ_CRASH("_beginthreadex failed");
+    }
+  }
+
+  // Create a new thread. It is important to use _beginthreadex() instead of
+  // the Win32 function CreateThread(), because the CreateThread() does not
+  // initialize thread-specific structures in the C runtime library.
+  mThread = reinterpret_cast<HANDLE>(_beginthreadex(nullptr,
+                                                    /* stack_size */ 0,
+                                                    ThreadEntry, this,
+                                                    /* initflag */ 0, nullptr));
+  if (mThread == 0) {
+    MOZ_CRASH("_beginthreadex failed");
+  }
+}
+
+SamplerThread::~SamplerThread() {
+  if (mUnregisteredThreadSpyThread) {
+    {
+      // Make sure the spying thread is not actively working, because the win32
+      // function it's using could deadlock with WaitForSingleObject below.
+      MonitorAutoLock spyingStateLock{mSpyingStateMonitor};
+      while (mSpyingState != SpyingState::Spy_Waiting &&
+             mSpyingState != SpyingState::SamplerToSpy_Start) {
+        spyingStateLock.Wait();
+      }
+
+      mSpyingState = SpyingState::MainToSpy_Shutdown;
+      spyingStateLock.NotifyAll();
+
+      do {
+        spyingStateLock.Wait();
+      } while (mSpyingState != SpyingState::SpyToMain_ShuttingDown);
+    }
+
+    WaitForSingleObject(mUnregisteredThreadSpyThread, INFINITE);
+
+    // Close our own handle for the thread.
+    if (mUnregisteredThreadSpyThread != kNoThread) {
+      CloseHandle(mUnregisteredThreadSpyThread);
+    }
+  }
+
+  WaitForSingleObject(mThread, INFINITE);
+
+  // Close our own handle for the thread.
+  if (mThread != kNoThread) {
+    CloseHandle(mThread);
+  }
+
+  // Just in the unlikely case some callbacks were added between the end of the
+  // thread and now.
+  InvokePostSamplingCallbacks(std::move(mPostSamplingCallbackList),
+                              SamplingState::JustStopped);
+}
+
+void SamplerThread::RunUnregisteredThreadSpy() {
+  // TODO: Consider registering this thread.
+  // Pros: Remove from list of unregistered threads; Not useful to profiling
+  //       Firefox itself.
+  // Cons: Doesn't appear in the profile, so users may miss the expensive CPU
+  //       cost of this work on Windows.
+  PR_SetCurrentThreadName("UnregisteredThreadSpy");
+
+  while (true) {
+    {
+      MonitorAutoLock spyingStateLock{mSpyingStateMonitor};
+      // Either this is the first loop, or we're looping after working.
+      MOZ_ASSERT(mSpyingState == SpyingState::Spy_Initializing ||
+                 mSpyingState == SpyingState::Spy_Working);
+
+      // Let everyone know we're waiting, and then wait.
+      mSpyingState = SpyingState::Spy_Waiting;
+      mSpyingStateMonitor.NotifyAll();
+      do {
+        spyingStateLock.Wait();
+      } while (mSpyingState == SpyingState::Spy_Waiting);
+
+      if (mSpyingState == SpyingState::MainToSpy_Shutdown) {
+        mSpyingState = SpyingState::SpyToMain_ShuttingDown;
+        mSpyingStateMonitor.NotifyAll();
+        break;
+      }
+
+      MOZ_ASSERT(mSpyingState == SpyingState::SamplerToSpy_Start);
+      mSpyingState = SpyingState::Spy_Working;
+    }
+
+    // Do the work without lock, so other threads can read the current state.
+    SpyOnUnregisteredThreads();
+  }
+}
+
+void SamplerThread::SleepMicro(uint32_t aMicroseconds) {
+  // For now, keep the old behaviour of minimum Sleep(1), even for
+  // smaller-than-usual sleeps after an overshoot, unless the user has
+  // explicitly opted into a sub-millisecond profiler interval.
+  if (mIntervalMicroseconds >= 1000) {
+    ::Sleep(std::max(1u, aMicroseconds / 1000));
+  } else {
+    TimeStamp start = TimeStamp::Now();
+    TimeStamp end = start + TimeDuration::FromMicroseconds(aMicroseconds);
+
+    // First, sleep for as many whole milliseconds as possible.
+    if (aMicroseconds >= 1000) {
+      ::Sleep(aMicroseconds / 1000);
+    }
+
+    // Then, spin until enough time has passed.
+    while (TimeStamp::Now() < end) {
+      YieldProcessor();
+    }
+  }
+}
+
+void SamplerThread::Stop(PSLockRef aLock) {
+  if ((!mNoTimerResolutionChange) && (mIntervalMicroseconds < 10 * 1000)) {
+    // Disable any timer resolution changes we've made. Do it now while
+    // gPSMutex is locked, i.e. before any other SamplerThread can be created
+    // and call ::timeBeginPeriod().
+    //
+    // It's safe to do this now even though this SamplerThread is still alive,
+    // because the next time the main loop of Run() iterates it won't get past
+    // the mActivityGeneration check, and so it won't make any more ::Sleep()
+    // calls.
+    ::timeEndPeriod(mIntervalMicroseconds / 1000);
+  }
+
+  mSampler.Disable(aLock);
+}
+
+// END SamplerThread target specifics
+////////////////////////////////////////////////////////////////////////
+
+static void PlatformInit(PSLockRef aLock) {}
+
+#if defined(HAVE_NATIVE_UNWIND)
+#  define REGISTERS_SYNC_POPULATE(regs) \
+    CONTEXT context;                    \
+    RtlCaptureContext(&context);        \
+    PopulateRegsFromContext(regs, &context);
+#endif
+
+#if defined(GP_PLAT_amd64_windows)
+
+// Use InitializeWin64ProfilerHooks from the base profiler.
+
+namespace mozilla {
+namespace baseprofiler {
+MFBT_API void InitializeWin64ProfilerHooks();
+}  // namespace baseprofiler
+}  // namespace mozilla
+
+using mozilla::baseprofiler::InitializeWin64ProfilerHooks;
+
+#endif  // defined(GP_PLAT_amd64_windows)
diff --git a/tools/profiler/core/platform.cpp b/tools/profiler/core/platform.cpp
new file mode 100644
index 0000000000..8950c48b58
--- /dev/null
+++ b/tools/profiler/core/platform.cpp
@@ -0,0 +1,7067 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// There are three kinds of samples done by the profiler.
+//
+// - A "periodic" sample is the most complex kind. It is done in response to a
+//   timer while the profiler is active. It involves writing a stack trace plus
+//   a variety of other values (memory measurements, responsiveness
+//   measurements, markers, etc.) into the main ProfileBuffer. The sampling is
+//   done from off-thread, and so SuspendAndSampleAndResumeThread() is used to
+//   get the register values.
+//
+// - A "synchronous" sample is a simpler kind. It is done in response to an API
+//   call (profiler_get_backtrace()). It involves writing a stack trace and
+//   little else into a temporary ProfileBuffer, and wrapping that up in a
+//   ProfilerBacktrace that can be subsequently used in a marker. The sampling
+//   is done on-thread, and so REGISTERS_SYNC_POPULATE() is used to get the
+//   register values.
+//
+// - A "backtrace" sample is the simplest kind. It is done in response to an
+//   API call (profiler_suspend_and_sample_thread()). It involves getting a
+//   stack trace via a ProfilerStackCollector; it does not write to a
+//   ProfileBuffer. The sampling is done from off-thread, and so uses
+//   SuspendAndSampleAndResumeThread() to get the register values.
+
+#include "platform.h"
+
+#include "GeckoProfiler.h"
+#include "GeckoProfilerReporter.h"
+#include "PageInformation.h"
+#include "PowerCounters.h"
+#include "ProfileBuffer.h"
+#include "ProfiledThreadData.h"
+#include "ProfilerBacktrace.h"
+#include "ProfilerChild.h"
+#include "ProfilerCodeAddressService.h"
+#include "ProfilerControl.h"
+#include "ProfilerIOInterposeObserver.h"
+#include "ProfilerParent.h"
+#include "ProfilerRustBindings.h"
+#include "mozilla/MozPromise.h"
+#include "shared-libraries.h"
+#include "VTuneProfiler.h"
+
+#include "js/ProfilingFrameIterator.h"
+#include "memory_hooks.h"
+#include "mozilla/ArrayUtils.h"
+#include "mozilla/AutoProfilerLabel.h"
+#include "mozilla/BaseAndGeckoProfilerDetail.h"
+#include "mozilla/ExtensionPolicyService.h"
+#include "mozilla/extensions/WebExtensionPolicy.h"
+#include "mozilla/glean/GleanMetrics.h"
+#include "mozilla/Monitor.h"
+#include "mozilla/Preferences.h"
+#include "mozilla/Printf.h"
+#include "mozilla/ProcInfo.h"
+#include "mozilla/ProfileBufferChunkManagerSingle.h"
+#include "mozilla/ProfileBufferChunkManagerWithLocalLimit.h"
+#include "mozilla/ProfileChunkedBuffer.h"
+#include "mozilla/SchedulerGroup.h"
+#include "mozilla/Services.h"
+#include "mozilla/StackWalk.h"
+#ifdef XP_WIN
+#  include "mozilla/StackWalkThread.h"
+#endif
+#include "mozilla/StaticPtr.h"
+#include "mozilla/ThreadLocal.h"
+#include "mozilla/TimeStamp.h"
+#include "mozilla/UniquePtr.h"
+#include "mozilla/Vector.h"
+#include "BaseProfiler.h"
+#include "nsDirectoryServiceDefs.h"
+#include "nsDirectoryServiceUtils.h"
+#include "nsIDocShell.h"
+#include "nsIHttpProtocolHandler.h"
+#include "nsIObserverService.h"
+#include "nsIPropertyBag2.h"
+#include "nsIXULAppInfo.h"
+#include "nsIXULRuntime.h"
+#include "nsJSPrincipals.h"
+#include "nsMemoryReporterManager.h"
+#include "nsPIDOMWindow.h"
+#include "nsProfilerStartParams.h"
+#include "nsScriptSecurityManager.h"
+#include "nsSystemInfo.h"
+#include "nsThreadUtils.h"
+#include "nsXULAppAPI.h"
+#include "Tracing.h"
+#include "prdtoa.h"
+#include "prtime.h"
+
+#include <algorithm>
+#include <errno.h>
+#include <fstream>
+#include <ostream>
+#include <set>
+#include <sstream>
+#include <string_view>
+#include <type_traits>
+
+#if defined(GP_OS_android)
+#  include "JavaExceptions.h"
+#  include "mozilla/java/GeckoJavaSamplerNatives.h"
+#  include "mozilla/jni/Refs.h"
+#endif
+
+#if defined(GP_OS_darwin)
+#  include "nsCocoaFeatures.h"
+#endif
+
+#if defined(GP_PLAT_amd64_darwin)
+#  include <cpuid.h>
+#endif
+
+#if defined(GP_OS_windows)
+#  include <processthreadsapi.h>
+
+// GetThreadInformation is not available on Windows 7.
+WINBASEAPI
+BOOL WINAPI GetThreadInformation(
+    _In_ HANDLE hThread, _In_ THREAD_INFORMATION_CLASS ThreadInformationClass,
+    _Out_writes_bytes_(ThreadInformationSize) LPVOID ThreadInformation,
+    _In_ DWORD ThreadInformationSize);
+
+#endif
+
+// Win32 builds always have frame pointers, so FramePointerStackWalk() always
+// works.
+#if defined(GP_PLAT_x86_windows)
+#  define HAVE_NATIVE_UNWIND
+#  define USE_FRAME_POINTER_STACK_WALK
+#endif
+
+// Win64 builds always omit frame pointers, so we use the slower
+// MozStackWalk(), which works in that case.
+#if defined(GP_PLAT_amd64_windows)
+#  define HAVE_NATIVE_UNWIND
+#  define USE_MOZ_STACK_WALK
+#endif
+
+// AArch64 Win64 doesn't seem to use frame pointers, so we use the slower
+// MozStackWalk().
+#if defined(GP_PLAT_arm64_windows)
+#  define HAVE_NATIVE_UNWIND
+#  define USE_MOZ_STACK_WALK
+#endif
+
+// Mac builds use FramePointerStackWalk(). Even if we build without
+// frame pointers, we'll still get useful stacks in system libraries
+// because those always have frame pointers.
+// We don't use MozStackWalk() on Mac.
+#if defined(GP_OS_darwin)
+#  define HAVE_NATIVE_UNWIND
+#  define USE_FRAME_POINTER_STACK_WALK
+#endif
+
+// Android builds use the ARM Exception Handling ABI to unwind.
+#if defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
+#  define HAVE_NATIVE_UNWIND
+#  define USE_EHABI_STACKWALK
+#  include "EHABIStackWalk.h"
+#endif
+
+// Linux/BSD builds use LUL, which uses DWARF info to unwind stacks.
+#if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) ||       \
+    defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) ||   \
+    defined(GP_PLAT_mips64_linux) || defined(GP_PLAT_arm64_linux) ||    \
+    defined(GP_PLAT_arm64_android) || defined(GP_PLAT_amd64_freebsd) || \
+    defined(GP_PLAT_arm64_freebsd)
+#  define HAVE_NATIVE_UNWIND
+#  define USE_LUL_STACKWALK
+#  include "lul/LulMain.h"
+#  include "lul/platform-linux-lul.h"
+
+// On linux we use LUL for periodic samples and synchronous samples, but we use
+// FramePointerStackWalk for backtrace samples when MOZ_PROFILING is enabled.
+// (See the comment at the top of the file for a definition of
+// periodic/synchronous/backtrace.).
+//
+// FramePointerStackWalk can produce incomplete stacks when the current entry is
+// in a shared library without framepointers, however LUL can take a long time
+// to initialize, which is undesirable for consumers of
+// profiler_suspend_and_sample_thread like the Background Hang Reporter.
+#  if defined(MOZ_PROFILING)
+#    define USE_FRAME_POINTER_STACK_WALK
+#  endif
+#endif
+
+// We can only stackwalk without expensive initialization on platforms which
+// support FramePointerStackWalk or MozStackWalk. LUL Stackwalking requires
+// initializing LUL, and EHABIStackWalk requires initializing EHABI, both of
+// which can be expensive.
+#if defined(USE_FRAME_POINTER_STACK_WALK) || defined(USE_MOZ_STACK_WALK)
+#  define HAVE_FASTINIT_NATIVE_UNWIND
+#endif
+
+#ifdef MOZ_VALGRIND
+#  include <valgrind/memcheck.h>
+#else
+#  define VALGRIND_MAKE_MEM_DEFINED(_addr, _len) ((void)0)
+#endif
+
+#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
+#  include <ucontext.h>
+#endif
+
+using namespace mozilla;
+using namespace mozilla::literals::ProportionValue_literals;
+
+using mozilla::profiler::detail::RacyFeatures;
+using ThreadRegistration = mozilla::profiler::ThreadRegistration;
+using ThreadRegistrationInfo = mozilla::profiler::ThreadRegistrationInfo;
+using ThreadRegistry = mozilla::profiler::ThreadRegistry;
+
+LazyLogModule gProfilerLog("prof");
+
+ProfileChunkedBuffer& profiler_get_core_buffer() {
+  // Defer to the Base Profiler in mozglue to create the core buffer if needed,
+  // and keep a reference here, for quick access in xul.
+  static ProfileChunkedBuffer& sProfileChunkedBuffer =
+      baseprofiler::profiler_get_core_buffer();
+  return sProfileChunkedBuffer;
+}
+
+mozilla::Atomic<int, mozilla::MemoryOrdering::Relaxed> gSkipSampling;
+
+#if defined(GP_OS_android)
+class GeckoJavaSampler
+    : public java::GeckoJavaSampler::Natives<GeckoJavaSampler> {
+ private:
+  GeckoJavaSampler();
+
+ public:
+  static double GetProfilerTime() {
+    if (!profiler_is_active()) {
+      return 0.0;
+    }
+    return profiler_time();
+  };
+
+  static void JavaStringArrayToCharArray(jni::ObjectArray::Param& aJavaArray,
+                                         Vector<const char*>& aCharArray,
+                                         JNIEnv* aJni) {
+    int arraySize = aJavaArray->Length();
+    for (int i = 0; i < arraySize; i++) {
+      jstring javaString =
+          (jstring)(aJni->GetObjectArrayElement(aJavaArray.Get(), i));
+      const char* filterString = aJni->GetStringUTFChars(javaString, 0);
+      // FIXME. These strings are leaked.
+      MOZ_RELEASE_ASSERT(aCharArray.append(filterString));
+    }
+  }
+
+  static void StartProfiler(jni::ObjectArray::Param aFiltersArray,
+                            jni::ObjectArray::Param aFeaturesArray) {
+    JNIEnv* jni = jni::GetEnvForThread();
+    Vector<const char*> filtersTemp;
+    Vector<const char*> featureStringArray;
+
+    JavaStringArrayToCharArray(aFiltersArray, filtersTemp, jni);
+    JavaStringArrayToCharArray(aFeaturesArray, featureStringArray, jni);
+
+    uint32_t features = 0;
+    features = ParseFeaturesFromStringArray(featureStringArray.begin(),
+                                            featureStringArray.length());
+
+    // 128 * 1024 * 1024 is the entries preset that is given in
+    // devtools/client/performance-new/shared/background.jsm.js
+    profiler_start(PowerOfTwo32(128 * 1024 * 1024), 5.0, features,
+                   filtersTemp.begin(), filtersTemp.length(), 0, Nothing());
+  }
+
+  static void StopProfiler(jni::Object::Param aGeckoResult) {
+    auto result = java::GeckoResult::LocalRef(aGeckoResult);
+    profiler_pause();
+    nsCOMPtr<nsIProfiler> nsProfiler(
+        do_GetService("@mozilla.org/tools/profiler;1"));
+    nsProfiler->GetProfileDataAsGzippedArrayBufferAndroid(0)->Then(
+        GetMainThreadSerialEventTarget(), __func__,
+        [result](FallibleTArray<uint8_t> compressedProfile) {
+          result->Complete(jni::ByteArray::New(
+              reinterpret_cast<const int8_t*>(compressedProfile.Elements()),
+              compressedProfile.Length()));
+
+          // Done with capturing a profile. Stop the profiler.
+          profiler_stop();
+        },
+        [result](nsresult aRv) {
+          char errorString[9];
+          sprintf(errorString, "%08x", aRv);
+          result->CompleteExceptionally(
+              mozilla::java::sdk::IllegalStateException::New(errorString)
+                  .Cast<jni::Throwable>());
+
+          // Failed to capture a profile. Stop the profiler.
+          profiler_stop();
+        });
+  }
+};
+#endif
+
+constexpr static bool ValidateFeatures() {
+  int expectedFeatureNumber = 0;
+
+  // Feature numbers should start at 0 and increase by 1 each.
+#define CHECK_FEATURE(n_, str_, Name_, desc_) \
+  if ((n_) != expectedFeatureNumber) {        \
+    return false;                             \
+  }                                           \
+  ++expectedFeatureNumber;
+
+  PROFILER_FOR_EACH_FEATURE(CHECK_FEATURE)
+
+#undef CHECK_FEATURE
+
+  return true;
+}
+
+static_assert(ValidateFeatures(), "Feature list is invalid");
+
+// Return all features that are available on this platform.
+static uint32_t AvailableFeatures() {
+  uint32_t features = 0;
+
+#define ADD_FEATURE(n_, str_, Name_, desc_) \
+  ProfilerFeature::Set##Name_(features);
+
+  // Add all the possible features.
+  PROFILER_FOR_EACH_FEATURE(ADD_FEATURE)
+
+#undef ADD_FEATURE
+
+  // Now remove features not supported on this platform/configuration.
+#if !defined(GP_OS_android)
+  ProfilerFeature::ClearJava(features);
+#endif
+#if !defined(HAVE_NATIVE_UNWIND)
+  ProfilerFeature::ClearStackWalk(features);
+#endif
+#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
+  if (getenv("XPCOM_MEM_BLOAT_LOG")) {
+    NS_WARNING("XPCOM_MEM_BLOAT_LOG is set, disabling native allocations.");
+    // The memory hooks are available, but the bloat log is enabled, which is
+    // not compatible with the native allocations tracking. See the comment in
+    // enable_native_allocations() (tools/profiler/core/memory_hooks.cpp) for
+    // more information.
+    ProfilerFeature::ClearNativeAllocations(features);
+  }
+#else
+  // The memory hooks are not available.
+  ProfilerFeature::ClearNativeAllocations(features);
+#endif
+
+#if !defined(GP_OS_windows)
+  ProfilerFeature::ClearNoTimerResolutionChange(features);
+#endif
+
+  return features;
+}
+
+// Default features common to all contexts (even if not available).
+static constexpr uint32_t DefaultFeatures() {
+  return ProfilerFeature::Java | ProfilerFeature::JS |
+         ProfilerFeature::StackWalk | ProfilerFeature::CPUUtilization |
+         ProfilerFeature::Screenshots | ProfilerFeature::ProcessCPU;
+}
+
+// Extra default features when MOZ_PROFILER_STARTUP is set (even if not
+// available).
+static constexpr uint32_t StartupExtraDefaultFeatures() {
+  // Enable file I/Os by default for startup profiles as startup is heavy on
+  // I/O operations.
+  return ProfilerFeature::FileIOAll | ProfilerFeature::IPCMessages;
+}
+
+Json::String ToCompactString(const Json::Value& aJsonValue) {
+  Json::StreamWriterBuilder builder;
+  // No indentations, and no newlines.
+  builder["indentation"] = "";
+  // This removes spaces after colons.
+  builder["enableYAMLCompatibility"] = false;
+  // Only 6 digits after the decimal point; timestamps in ms have ns precision.
+  builder["precision"] = 6;
+  builder["precisionType"] = "decimal";
+
+  return Json::writeString(builder, aJsonValue);
+}
+
+/* static */ mozilla::baseprofiler::detail::BaseProfilerMutex
+    ProfilingLog::gMutex;
+/* static */ mozilla::UniquePtr<Json::Value> ProfilingLog::gLog;
+
+/* static */ void ProfilingLog::Init() {
+  mozilla::baseprofiler::detail::BaseProfilerAutoLock lock{gMutex};
+  MOZ_ASSERT(!gLog);
+  gLog = mozilla::MakeUniqueFallible<Json::Value>(Json::objectValue);
+  if (gLog) {
+    (*gLog)[Json::StaticString{"profilingLogBegin" TIMESTAMP_JSON_SUFFIX}] =
+        ProfilingLog::Timestamp();
+  }
+}
+
+/* static */ void ProfilingLog::Destroy() {
+  mozilla::baseprofiler::detail::BaseProfilerAutoLock lock{gMutex};
+  MOZ_ASSERT(gLog);
+  gLog = nullptr;
+}
+
+/* static */ bool ProfilingLog::IsLockedOnCurrentThread() {
+  return gMutex.IsLockedOnCurrentThread();
+}
+
+// RAII class to lock the profiler mutex.
+// It provides a mechanism to determine if it is locked or not in order for
+// memory hooks to avoid re-entering the profiler locked state.
+// Locking order: Profiler, ThreadRegistry, ThreadRegistration.
+class MOZ_RAII PSAutoLock {
+ public:
+  PSAutoLock()
+      : mLock([]() -> mozilla::baseprofiler::detail::BaseProfilerMutex& {
+          // In DEBUG builds, *before* we attempt to lock gPSMutex, we want to
+          // check that the ThreadRegistry, ThreadRegistration, and ProfilingLog
+          // mutexes are *not* locked on this thread, to avoid inversion
+          // deadlocks.
+          MOZ_ASSERT(!ThreadRegistry::IsRegistryMutexLockedOnCurrentThread());
+          MOZ_ASSERT(!ThreadRegistration::IsDataMutexLockedOnCurrentThread());
+          MOZ_ASSERT(!ProfilingLog::IsLockedOnCurrentThread());
+          return gPSMutex;
+        }()) {}
+
+  PSAutoLock(const PSAutoLock&) = delete;
+  void operator=(const PSAutoLock&) = delete;
+
+  static bool IsLockedOnCurrentThread() {
+    return gPSMutex.IsLockedOnCurrentThread();
+  }
+
+ private:
+  static mozilla::baseprofiler::detail::BaseProfilerMutex gPSMutex;
+  mozilla::baseprofiler::detail::BaseProfilerAutoLock mLock;
+};
+
+/* static */ mozilla::baseprofiler::detail::BaseProfilerMutex
+    PSAutoLock::gPSMutex{"Gecko Profiler mutex"};
+
+// Only functions that take a PSLockRef arg can access CorePS's and ActivePS's
+// fields.
+typedef const PSAutoLock& PSLockRef;
+
+#define PS_GET(type_, name_)      \
+  static type_ name_(PSLockRef) { \
+    MOZ_ASSERT(sInstance);        \
+    return sInstance->m##name_;   \
+  }
+
+#define PS_GET_LOCKLESS(type_, name_) \
+  static type_ name_() {              \
+    MOZ_ASSERT(sInstance);            \
+    return sInstance->m##name_;       \
+  }
+
+#define PS_GET_AND_SET(type_, name_)                  \
+  PS_GET(type_, name_)                                \
+  static void Set##name_(PSLockRef, type_ a##name_) { \
+    MOZ_ASSERT(sInstance);                            \
+    sInstance->m##name_ = a##name_;                   \
+  }
+
+static constexpr size_t MAX_JS_FRAMES =
+    mozilla::profiler::ThreadRegistrationData::MAX_JS_FRAMES;
+using JsFrame = mozilla::profiler::ThreadRegistrationData::JsFrame;
+using JsFrameBuffer = mozilla::profiler::ThreadRegistrationData::JsFrameBuffer;
+
+// All functions in this file can run on multiple threads unless they have an
+// NS_IsMainThread() assertion.
+
+// This class contains the profiler's core global state, i.e. that which is
+// valid even when the profiler is not active. Most profile operations can't do
+// anything useful when this class is not instantiated, so we release-assert
+// its non-nullness in all such operations.
+//
+// Accesses to CorePS are guarded by gPSMutex. Getters and setters take a
+// PSAutoLock reference as an argument as proof that the gPSMutex is currently
+// locked. This makes it clear when gPSMutex is locked and helps avoid
+// accidental unlocked accesses to global state. There are ways to circumvent
+// this mechanism, but please don't do so without *very* good reason and a
+// detailed explanation.
+//
+// The exceptions to this rule:
+//
+// - mProcessStartTime, because it's immutable;
+class CorePS {
+ private:
+  CorePS()
+      : mProcessStartTime(TimeStamp::ProcessCreation())
+#ifdef USE_LUL_STACKWALK
+        ,
+        mLul(nullptr)
+#endif
+  {
+    MOZ_ASSERT(NS_IsMainThread(),
+               "CorePS must be created from the main thread");
+  }
+
+  ~CorePS() {
+#ifdef USE_LUL_STACKWALK
+    delete sInstance->mLul;
+#endif
+  }
+
+ public:
+  static void Create(PSLockRef aLock) {
+    MOZ_ASSERT(!sInstance);
+    sInstance = new CorePS();
+  }
+
+  static void Destroy(PSLockRef aLock) {
+    MOZ_ASSERT(sInstance);
+    delete sInstance;
+    sInstance = nullptr;
+  }
+
+  // Unlike ActivePS::Exists(), CorePS::Exists() can be called without gPSMutex
+  // being locked. This is because CorePS is instantiated so early on the main
+  // thread that we don't have to worry about it being racy.
+  static bool Exists() { return !!sInstance; }
+
+  static void AddSizeOf(PSLockRef, MallocSizeOf aMallocSizeOf,
+                        size_t& aProfSize, size_t& aLulSize) {
+    MOZ_ASSERT(sInstance);
+
+    aProfSize += aMallocSizeOf(sInstance);
+
+    aProfSize += ThreadRegistry::SizeOfIncludingThis(aMallocSizeOf);
+
+    for (auto& registeredPage : sInstance->mRegisteredPages) {
+      aProfSize += registeredPage->SizeOfIncludingThis(aMallocSizeOf);
+    }
+
+    // Measurement of the following things may be added later if DMD finds it
+    // is worthwhile:
+    // - CorePS::mRegisteredPages itself (its elements' children are
+    // measured above)
+
+#if defined(USE_LUL_STACKWALK)
+    if (lul::LUL* lulPtr = sInstance->mLul; lulPtr) {
+      aLulSize += lulPtr->SizeOfIncludingThis(aMallocSizeOf);
+    }
+#endif
+  }
+
+  // No PSLockRef is needed for this field because it's immutable.
+  PS_GET_LOCKLESS(TimeStamp, ProcessStartTime)
+
+  PS_GET(JsFrameBuffer&, JsFrames)
+
+  PS_GET(Vector<RefPtr<PageInformation>>&, RegisteredPages)
+
+  static void AppendRegisteredPage(PSLockRef,
+                                   RefPtr<PageInformation>&& aRegisteredPage) {
+    MOZ_ASSERT(sInstance);
+    struct RegisteredPageComparator {
+      PageInformation* aA;
+      bool operator()(PageInformation* aB) const { return aA->Equals(aB); }
+    };
+
+    auto foundPageIter = std::find_if(
+        sInstance->mRegisteredPages.begin(), sInstance->mRegisteredPages.end(),
+        RegisteredPageComparator{aRegisteredPage.get()});
+
+    if (foundPageIter != sInstance->mRegisteredPages.end()) {
+      if ((*foundPageIter)->Url().EqualsLiteral("about:blank")) {
+        // When a BrowsingContext is loaded, the first url loaded in it will be
+        // about:blank, and if the principal matches, the first document loaded
+        // in it will share an inner window. That's why we should delete the
+        // intermittent about:blank if they share the inner window.
+        sInstance->mRegisteredPages.erase(foundPageIter);
+      } else {
+        // Do not register the same page again.
+        return;
+      }
+    }
+
+    MOZ_RELEASE_ASSERT(
+        sInstance->mRegisteredPages.append(std::move(aRegisteredPage)));
+  }
+
+  static void RemoveRegisteredPage(PSLockRef,
+                                   uint64_t aRegisteredInnerWindowID) {
+    MOZ_ASSERT(sInstance);
+    // Remove RegisteredPage from mRegisteredPages by given inner window ID.
+    sInstance->mRegisteredPages.eraseIf([&](const RefPtr<PageInformation>& rd) {
+      return rd->InnerWindowID() == aRegisteredInnerWindowID;
+    });
+  }
+
+  static void ClearRegisteredPages(PSLockRef) {
+    MOZ_ASSERT(sInstance);
+    sInstance->mRegisteredPages.clear();
+  }
+
+  PS_GET(const Vector<BaseProfilerCount*>&, Counters)
+
+  static void AppendCounter(PSLockRef, BaseProfilerCount* aCounter) {
+    MOZ_ASSERT(sInstance);
+    // we don't own the counter; they may be stored in static objects
+    MOZ_RELEASE_ASSERT(sInstance->mCounters.append(aCounter));
+  }
+
+  static void RemoveCounter(PSLockRef, BaseProfilerCount* aCounter) {
+    // we may be called to remove a counter after the profiler is stopped or
+    // late in shutdown.
+    if (sInstance) {
+      auto* counter = std::find(sInstance->mCounters.begin(),
+                                sInstance->mCounters.end(), aCounter);
+      MOZ_RELEASE_ASSERT(counter != sInstance->mCounters.end());
+      sInstance->mCounters.erase(counter);
+    }
+  }
+
+#ifdef USE_LUL_STACKWALK
+  static lul::LUL* Lul() {
+    MOZ_RELEASE_ASSERT(sInstance);
+    return sInstance->mLul;
+  }
+  static void SetLul(UniquePtr<lul::LUL> aLul) {
+    MOZ_RELEASE_ASSERT(sInstance);
+    MOZ_RELEASE_ASSERT(
+        sInstance->mLul.compareExchange(nullptr, aLul.release()));
+  }
+#endif
+
+  PS_GET_AND_SET(const nsACString&, ProcessName)
+  PS_GET_AND_SET(const nsACString&, ETLDplus1)
+
+ private:
+  // The singleton instance
+  static CorePS* sInstance;
+
+  // The time that the process started.
+  const TimeStamp mProcessStartTime;
+
+  // Info on all the registered pages.
+  // InnerWindowIDs in mRegisteredPages are unique.
+  Vector<RefPtr<PageInformation>> mRegisteredPages;
+
+  // Non-owning pointers to all active counters
+  Vector<BaseProfilerCount*> mCounters;
+
+#ifdef USE_LUL_STACKWALK
+  // LUL's state. Null prior to the first activation, non-null thereafter.
+  // Owned by this CorePS.
+  mozilla::Atomic<lul::LUL*> mLul;
+#endif
+
+  // Process name, provided by child process initialization code.
+  nsAutoCString mProcessName;
+  // Private name, provided by child process initialization code (eTLD+1 in
+  // fission)
+  nsAutoCString mETLDplus1;
+
+  // This memory buffer is used by the MergeStacks mechanism. Previously it was
+  // stack allocated, but this led to a stack overflow, as it was too much
+  // memory. Here the buffer can be pre-allocated, and shared with the
+  // MergeStacks feature as needed. MergeStacks is only run while holding the
+  // lock, so it is safe to have only one instance allocated for all of the
+  // threads.
+  JsFrameBuffer mJsFrames;
+};
+
+CorePS* CorePS::sInstance = nullptr;
+
+void locked_profiler_add_sampled_counter(PSLockRef aLock,
+                                         BaseProfilerCount* aCounter) {
+  CorePS::AppendCounter(aLock, aCounter);
+}
+
+void locked_profiler_remove_sampled_counter(PSLockRef aLock,
+                                            BaseProfilerCount* aCounter) {
+  // Note: we don't enforce a final sample, though we could do so if the
+  // profiler was active
+  CorePS::RemoveCounter(aLock, aCounter);
+}
+
+class SamplerThread;
+
+static SamplerThread* NewSamplerThread(PSLockRef aLock, uint32_t aGeneration,
+                                       double aInterval, uint32_t aFeatures);
+
+struct LiveProfiledThreadData {
+  UniquePtr<ProfiledThreadData> mProfiledThreadData;
+};
+
+// The buffer size is provided as a number of "entries", this is their size in
+// bytes.
+constexpr static uint32_t scBytesPerEntry = 8;
+
+// This class contains the profiler's global state that is valid only when the
+// profiler is active. When not instantiated, the profiler is inactive.
+//
+// Accesses to ActivePS are guarded by gPSMutex, in much the same fashion as
+// CorePS.
+//
+class ActivePS {
+ private:
+  // We need to decide how many chunks of what size we want to fit in the given
+  // total maximum capacity for this process, in the (likely) context of
+  // multiple processes doing the same choice and having an inter-process
+  // mechanism to control the overal memory limit.
+
+  // Minimum chunk size allowed, enough for at least one stack.
+  constexpr static uint32_t scMinimumChunkSize =
+      2 * ProfileBufferChunkManager::scExpectedMaximumStackSize;
+
+  // Ideally we want at least 2 unreleased chunks to work with (1 current and 1
+  // next), and 2 released chunks (so that one can be recycled when old, leaving
+  // one with some data).
+  constexpr static uint32_t scMinimumNumberOfChunks = 4;
+
+  // And we want to limit chunks to a maximum size, which is a compromise
+  // between:
+  // - A big size, which helps with reducing the rate of allocations and IPCs.
+  // - A small size, which helps with equalizing the duration of recorded data
+  //   (as the inter-process controller will discard the oldest chunks in all
+  //   Firefox processes).
+  constexpr static uint32_t scMaximumChunkSize = 1024 * 1024;
+
+ public:
+  // We should be able to store at least the minimum number of the smallest-
+  // possible chunks.
+  constexpr static uint32_t scMinimumBufferSize =
+      scMinimumNumberOfChunks * scMinimumChunkSize;
+  // Note: Keep in sync with GeckoThread.maybeStartGeckoProfiler:
+  // https://searchfox.org/mozilla-central/source/mobile/android/geckoview/src/main/java/org/mozilla/gecko/GeckoThread.java
+  constexpr static uint32_t scMinimumBufferEntries =
+      scMinimumBufferSize / scBytesPerEntry;
+
+  // Limit to 2GiB.
+  constexpr static uint32_t scMaximumBufferSize = 2u * 1024u * 1024u * 1024u;
+  constexpr static uint32_t scMaximumBufferEntries =
+      scMaximumBufferSize / scBytesPerEntry;
+
+  constexpr static uint32_t ClampToAllowedEntries(uint32_t aEntries) {
+    if (aEntries <= scMinimumBufferEntries) {
+      return scMinimumBufferEntries;
+    }
+    if (aEntries >= scMaximumBufferEntries) {
+      return scMaximumBufferEntries;
+    }
+    return aEntries;
+  }
+
+ private:
+  constexpr static uint32_t ChunkSizeForEntries(uint32_t aEntries) {
+    return uint32_t(std::min(size_t(ClampToAllowedEntries(aEntries)) *
+                                 scBytesPerEntry / scMinimumNumberOfChunks,
+                             size_t(scMaximumChunkSize)));
+  }
+
+  static uint32_t AdjustFeatures(uint32_t aFeatures, uint32_t aFilterCount) {
+    // Filter out any features unavailable in this platform/configuration.
+    aFeatures &= AvailableFeatures();
+
+    // Some features imply others.
+    if (aFeatures & ProfilerFeature::FileIOAll) {
+      aFeatures |= ProfilerFeature::MainThreadIO | ProfilerFeature::FileIO;
+    } else if (aFeatures & ProfilerFeature::FileIO) {
+      aFeatures |= ProfilerFeature::MainThreadIO;
+    }
+
+    if (aFeatures & ProfilerFeature::CPUAllThreads) {
+      aFeatures |= ProfilerFeature::CPUUtilization;
+    }
+
+    return aFeatures;
+  }
+
+  bool ShouldInterposeIOs() {
+    return ProfilerFeature::HasMainThreadIO(mFeatures) ||
+           ProfilerFeature::HasFileIO(mFeatures) ||
+           ProfilerFeature::HasFileIOAll(mFeatures);
+  }
+
+  ActivePS(
+      PSLockRef aLock, const TimeStamp& aProfilingStartTime,
+      PowerOfTwo32 aCapacity, double aInterval, uint32_t aFeatures,
+      const char** aFilters, uint32_t aFilterCount, uint64_t aActiveTabID,
+      const Maybe<double>& aDuration,
+      UniquePtr<ProfileBufferChunkManagerWithLocalLimit> aChunkManagerOrNull)
+      : mProfilingStartTime(aProfilingStartTime),
+        mGeneration(sNextGeneration++),
+        mCapacity(aCapacity),
+        mDuration(aDuration),
+        mInterval(aInterval),
+        mFeatures(AdjustFeatures(aFeatures, aFilterCount)),
+        mActiveTabID(aActiveTabID),
+        mProfileBufferChunkManager(
+            aChunkManagerOrNull
+                ? std::move(aChunkManagerOrNull)
+                : MakeUnique<ProfileBufferChunkManagerWithLocalLimit>(
+                      size_t(ClampToAllowedEntries(aCapacity.Value())) *
+                          scBytesPerEntry,
+                      ChunkSizeForEntries(aCapacity.Value()))),
+        mProfileBuffer([this]() -> ProfileChunkedBuffer& {
+          ProfileChunkedBuffer& coreBuffer = profiler_get_core_buffer();
+          coreBuffer.SetChunkManagerIfDifferent(*mProfileBufferChunkManager);
+          return coreBuffer;
+        }()),
+        mMaybeProcessCPUCounter(ProfilerFeature::HasProcessCPU(aFeatures)
+                                    ? new ProcessCPUCounter(aLock)
+                                    : nullptr),
+        mMaybePowerCounters(nullptr),
+        // The new sampler thread doesn't start sampling immediately because the
+        // main loop within Run() is blocked until this function's caller
+        // unlocks gPSMutex.
+        mSamplerThread(
+            NewSamplerThread(aLock, mGeneration, aInterval, aFeatures)),
+        mIsPaused(false),
+        mIsSamplingPaused(false) {
+    ProfilingLog::Init();
+
+    // Deep copy and lower-case aFilters.
+    MOZ_ALWAYS_TRUE(mFilters.resize(aFilterCount));
+    MOZ_ALWAYS_TRUE(mFiltersLowered.resize(aFilterCount));
+    for (uint32_t i = 0; i < aFilterCount; ++i) {
+      mFilters[i] = aFilters[i];
+      mFiltersLowered[i].reserve(mFilters[i].size());
+      std::transform(mFilters[i].cbegin(), mFilters[i].cend(),
+                     std::back_inserter(mFiltersLowered[i]), ::tolower);
+    }
+
+#if !defined(RELEASE_OR_BETA)
+    if (ShouldInterposeIOs()) {
+      // We need to register the observer on the main thread, because we want
+      // to observe IO that happens on the main thread.
+      // IOInterposer needs to be initialized before calling
+      // IOInterposer::Register or our observer will be silently dropped.
+      if (NS_IsMainThread()) {
+        IOInterposer::Init();
+        IOInterposer::Register(IOInterposeObserver::OpAll,
+                               &ProfilerIOInterposeObserver::GetInstance());
+      } else {
+        NS_DispatchToMainThread(
+            NS_NewRunnableFunction("ActivePS::ActivePS", []() {
+              // Note: This could theoretically happen after ActivePS gets
+              // destroyed, but it's ok:
+              // - The Observer always checks that the profiler is (still)
+              //   active before doing its work.
+              // - The destruction should happen on the same thread as this
+              //   construction, so the un-registration will also be dispatched
+              //   and queued on the main thread, and run after this.
+              IOInterposer::Init();
+              IOInterposer::Register(
+                  IOInterposeObserver::OpAll,
+                  &ProfilerIOInterposeObserver::GetInstance());
+            }));
+      }
+    }
+#endif
+
+    if (ProfilerFeature::HasPower(aFeatures)) {
+      mMaybePowerCounters = new PowerCounters();
+      for (const auto& powerCounter : mMaybePowerCounters->GetCounters()) {
+        locked_profiler_add_sampled_counter(aLock, powerCounter);
+      }
+    }
+  }
+
+  ~ActivePS() {
+    MOZ_ASSERT(
+        !mMaybeProcessCPUCounter,
+        "mMaybeProcessCPUCounter should have been deleted before ~ActivePS()");
+    MOZ_ASSERT(
+        !mMaybePowerCounters,
+        "mMaybePowerCounters should have been deleted before ~ActivePS()");
+
+#if !defined(RELEASE_OR_BETA)
+    if (ShouldInterposeIOs()) {
+      // We need to unregister the observer on the main thread, because that's
+      // where we've registered it.
+      if (NS_IsMainThread()) {
+        IOInterposer::Unregister(IOInterposeObserver::OpAll,
+                                 &ProfilerIOInterposeObserver::GetInstance());
+      } else {
+        NS_DispatchToMainThread(
+            NS_NewRunnableFunction("ActivePS::~ActivePS", []() {
+              IOInterposer::Unregister(
+                  IOInterposeObserver::OpAll,
+                  &ProfilerIOInterposeObserver::GetInstance());
+            }));
+      }
+    }
+#endif
+    if (mProfileBufferChunkManager) {
+      // We still control the chunk manager, remove it from the core buffer.
+      profiler_get_core_buffer().ResetChunkManager();
+    }
+
+    ProfilingLog::Destroy();
+  }
+
+  bool ThreadSelected(const char* aThreadName) {
+    if (mFiltersLowered.empty()) {
+      return true;
+    }
+
+    std::string name = aThreadName;
+    std::transform(name.begin(), name.end(), name.begin(), ::tolower);
+
+    for (const auto& filter : mFiltersLowered) {
+      if (filter == "*") {
+        return true;
+      }
+
+      // Crude, non UTF-8 compatible, case insensitive substring search
+      if (name.find(filter) != std::string::npos) {
+        return true;
+      }
+
+      // If the filter is "pid:<my pid>", profile all threads.
+      if (mozilla::profiler::detail::FilterHasPid(filter.c_str())) {
+        return true;
+      }
+    }
+
+    return false;
+  }
+
+ public:
+  static void Create(
+      PSLockRef aLock, const TimeStamp& aProfilingStartTime,
+      PowerOfTwo32 aCapacity, double aInterval, uint32_t aFeatures,
+      const char** aFilters, uint32_t aFilterCount, uint64_t aActiveTabID,
+      const Maybe<double>& aDuration,
+      UniquePtr<ProfileBufferChunkManagerWithLocalLimit> aChunkManagerOrNull) {
+    MOZ_ASSERT(!sInstance);
+    sInstance = new ActivePS(aLock, aProfilingStartTime, aCapacity, aInterval,
+                             aFeatures, aFilters, aFilterCount, aActiveTabID,
+                             aDuration, std::move(aChunkManagerOrNull));
+  }
+
+  [[nodiscard]] static SamplerThread* Destroy(PSLockRef aLock) {
+    MOZ_ASSERT(sInstance);
+    if (sInstance->mMaybeProcessCPUCounter) {
+      locked_profiler_remove_sampled_counter(
+          aLock, sInstance->mMaybeProcessCPUCounter);
+      delete sInstance->mMaybeProcessCPUCounter;
+      sInstance->mMaybeProcessCPUCounter = nullptr;
+    }
+
+    if (sInstance->mMaybePowerCounters) {
+      for (const auto& powerCounter :
+           sInstance->mMaybePowerCounters->GetCounters()) {
+        locked_profiler_remove_sampled_counter(aLock, powerCounter);
+      }
+      delete sInstance->mMaybePowerCounters;
+      sInstance->mMaybePowerCounters = nullptr;
+    }
+
+    auto samplerThread = sInstance->mSamplerThread;
+    delete sInstance;
+    sInstance = nullptr;
+
+    return samplerThread;
+  }
+
+  static bool Exists(PSLockRef) { return !!sInstance; }
+
+  static bool Equals(PSLockRef, PowerOfTwo32 aCapacity,
+                     const Maybe<double>& aDuration, double aInterval,
+                     uint32_t aFeatures, const char** aFilters,
+                     uint32_t aFilterCount, uint64_t aActiveTabID) {
+    MOZ_ASSERT(sInstance);
+    if (sInstance->mCapacity != aCapacity ||
+        sInstance->mDuration != aDuration ||
+        sInstance->mInterval != aInterval ||
+        sInstance->mFeatures != aFeatures ||
+        sInstance->mFilters.length() != aFilterCount ||
+        sInstance->mActiveTabID != aActiveTabID) {
+      return false;
+    }
+
+    for (uint32_t i = 0; i < sInstance->mFilters.length(); ++i) {
+      if (strcmp(sInstance->mFilters[i].c_str(), aFilters[i]) != 0) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  static size_t SizeOf(PSLockRef, MallocSizeOf aMallocSizeOf) {
+    MOZ_ASSERT(sInstance);
+
+    size_t n = aMallocSizeOf(sInstance);
+
+    n += sInstance->mProfileBuffer.SizeOfExcludingThis(aMallocSizeOf);
+
+    // Measurement of the following members may be added later if DMD finds it
+    // is worthwhile:
+    // - mLiveProfiledThreads (both the array itself, and the contents)
+    // - mDeadProfiledThreads (both the array itself, and the contents)
+    //
+
+    return n;
+  }
+
+  static ThreadProfilingFeatures ProfilingFeaturesForThread(
+      PSLockRef aLock, const ThreadRegistrationInfo& aInfo) {
+    MOZ_ASSERT(sInstance);
+    if (sInstance->ThreadSelected(aInfo.Name())) {
+      // This thread was selected by the user, record everything.
+      return ThreadProfilingFeatures::Any;
+    }
+    ThreadProfilingFeatures features = ThreadProfilingFeatures::NotProfiled;
+    if (ActivePS::FeatureCPUAllThreads(aLock)) {
+      features = Combine(features, ThreadProfilingFeatures::CPUUtilization);
+    }
+    if (ActivePS::FeatureSamplingAllThreads(aLock)) {
+      features = Combine(features, ThreadProfilingFeatures::Sampling);
+    }
+    if (ActivePS::FeatureMarkersAllThreads(aLock)) {
+      features = Combine(features, ThreadProfilingFeatures::Markers);
+    }
+    return features;
+  }
+
+  [[nodiscard]] static bool AppendPostSamplingCallback(
+      PSLockRef, PostSamplingCallback&& aCallback);
+
+  // Writes out the current active configuration of the profile.
+  static void WriteActiveConfiguration(
+      PSLockRef aLock, JSONWriter& aWriter,
+      const Span<const char>& aPropertyName = MakeStringSpan("")) {
+    if (!sInstance) {
+      if (!aPropertyName.empty()) {
+        aWriter.NullProperty(aPropertyName);
+      } else {
+        aWriter.NullElement();
+      }
+      return;
+    };
+
+    if (!aPropertyName.empty()) {
+      aWriter.StartObjectProperty(aPropertyName);
+    } else {
+      aWriter.StartObjectElement();
+    }
+
+    {
+      aWriter.StartArrayProperty("features");
+#define WRITE_ACTIVE_FEATURES(n_, str_, Name_, desc_)    \
+  if (profiler_feature_active(ProfilerFeature::Name_)) { \
+    aWriter.StringElement(str_);                         \
+  }
+
+      PROFILER_FOR_EACH_FEATURE(WRITE_ACTIVE_FEATURES)
+#undef WRITE_ACTIVE_FEATURES
+      aWriter.EndArray();
+    }
+    {
+      aWriter.StartArrayProperty("threads");
+      for (const auto& filter : sInstance->mFilters) {
+        aWriter.StringElement(filter);
+      }
+      aWriter.EndArray();
+    }
+    {
+      // Now write all the simple values.
+
+      // The interval is also available on profile.meta.interval
+      aWriter.DoubleProperty("interval", sInstance->mInterval);
+      aWriter.IntProperty("capacity", sInstance->mCapacity.Value());
+      if (sInstance->mDuration) {
+        aWriter.DoubleProperty("duration", sInstance->mDuration.value());
+      }
+      // Here, we are converting uint64_t to double. Tab IDs are
+      // being created using `nsContentUtils::GenerateProcessSpecificId`, which
+      // is specifically designed to only use 53 of the 64 bits to be lossless
+      // when passed into and out of JS as a double.
+      aWriter.DoubleProperty("activeTabID", sInstance->mActiveTabID);
+    }
+    aWriter.EndObject();
+  }
+
+  PS_GET_LOCKLESS(TimeStamp, ProfilingStartTime)
+
+  PS_GET(uint32_t, Generation)
+
+  PS_GET(PowerOfTwo32, Capacity)
+
+  PS_GET(Maybe<double>, Duration)
+
+  PS_GET(double, Interval)
+
+  PS_GET(uint32_t, Features)
+
+  PS_GET(uint64_t, ActiveTabID)
+
+#define PS_GET_FEATURE(n_, str_, Name_, desc_)                \
+  static bool Feature##Name_(PSLockRef) {                     \
+    MOZ_ASSERT(sInstance);                                    \
+    return ProfilerFeature::Has##Name_(sInstance->mFeatures); \
+  }
+
+  PROFILER_FOR_EACH_FEATURE(PS_GET_FEATURE)
+
+#undef PS_GET_FEATURE
+
+  static uint32_t JSFlags(PSLockRef aLock) {
+    uint32_t Flags = 0;
+    Flags |=
+        FeatureJS(aLock) ? uint32_t(JSInstrumentationFlags::StackSampling) : 0;
+
+    Flags |= FeatureJSAllocations(aLock)
+                 ? uint32_t(JSInstrumentationFlags::Allocations)
+                 : 0;
+    return Flags;
+  }
+
+  PS_GET(const Vector<std::string>&, Filters)
+  PS_GET(const Vector<std::string>&, FiltersLowered)
+
+  // Not using PS_GET, because only the "Controlled" interface of
+  // `mProfileBufferChunkManager` should be exposed here.
+  static ProfileBufferChunkManagerWithLocalLimit& ControlledChunkManager(
+      PSLockRef) {
+    MOZ_ASSERT(sInstance);
+    MOZ_ASSERT(sInstance->mProfileBufferChunkManager);
+    return *sInstance->mProfileBufferChunkManager;
+  }
+
+  static void FulfillChunkRequests(PSLockRef) {
+    MOZ_ASSERT(sInstance);
+    if (sInstance->mProfileBufferChunkManager) {
+      sInstance->mProfileBufferChunkManager->FulfillChunkRequests();
+    }
+  }
+
+  static ProfileBuffer& Buffer(PSLockRef) {
+    MOZ_ASSERT(sInstance);
+    return sInstance->mProfileBuffer;
+  }
+
+  static const Vector<LiveProfiledThreadData>& LiveProfiledThreads(PSLockRef) {
+    MOZ_ASSERT(sInstance);
+    return sInstance->mLiveProfiledThreads;
+  }
+
+  struct ProfiledThreadListElement {
+    TimeStamp mRegisterTime;
+    JSContext* mJSContext;  // Null for unregistered threads.
+    ProfiledThreadData* mProfiledThreadData;
+  };
+  using ProfiledThreadList = Vector<ProfiledThreadListElement>;
+
+  // Returns a ProfiledThreadList with all threads that should be included in a
+  // profile, both for threads that are still registered, and for threads that
+  // have been unregistered but still have data in the buffer.
+  // The returned array is sorted by thread register time.
+  // Do not hold on to the return value past LockedRegistry.
+  static ProfiledThreadList ProfiledThreads(
+      ThreadRegistry::LockedRegistry& aLockedRegistry, PSLockRef aLock) {
+    MOZ_ASSERT(sInstance);
+    ProfiledThreadList array;
+    MOZ_RELEASE_ASSERT(
+        array.initCapacity(sInstance->mLiveProfiledThreads.length() +
+                           sInstance->mDeadProfiledThreads.length()));
+
+    for (ThreadRegistry::OffThreadRef offThreadRef : aLockedRegistry) {
+      ProfiledThreadData* profiledThreadData =
+          offThreadRef.UnlockedRWForLockedProfilerRef().GetProfiledThreadData(
+              aLock);
+      if (!profiledThreadData) {
+        // This thread was not profiled, continue with the next one.
+        continue;
+      }
+      ThreadRegistry::OffThreadRef::RWFromAnyThreadWithLock lockedThreadData =
+          offThreadRef.GetLockedRWFromAnyThread();
+      MOZ_RELEASE_ASSERT(array.append(ProfiledThreadListElement{
+          profiledThreadData->Info().RegisterTime(),
+          lockedThreadData->GetJSContext(), profiledThreadData}));
+    }
+
+    for (auto& t : sInstance->mDeadProfiledThreads) {
+      MOZ_RELEASE_ASSERT(array.append(ProfiledThreadListElement{
+          t->Info().RegisterTime(), (JSContext*)nullptr, t.get()}));
+    }
+
+    std::sort(array.begin(), array.end(),
+              [](const ProfiledThreadListElement& a,
+                 const ProfiledThreadListElement& b) {
+                return a.mRegisterTime < b.mRegisterTime;
+              });
+    return array;
+  }
+
+  static Vector<RefPtr<PageInformation>> ProfiledPages(PSLockRef aLock) {
+    MOZ_ASSERT(sInstance);
+    Vector<RefPtr<PageInformation>> array;
+    for (auto& d : CorePS::RegisteredPages(aLock)) {
+      MOZ_RELEASE_ASSERT(array.append(d));
+    }
+    for (auto& d : sInstance->mDeadProfiledPages) {
+      MOZ_RELEASE_ASSERT(array.append(d));
+    }
+    // We don't need to sort the pages like threads since we won't show them
+    // as a list.
+    return array;
+  }
+
+  static ProfiledThreadData* AddLiveProfiledThread(
+      PSLockRef, UniquePtr<ProfiledThreadData>&& aProfiledThreadData) {
+    MOZ_ASSERT(sInstance);
+    MOZ_RELEASE_ASSERT(sInstance->mLiveProfiledThreads.append(
+        LiveProfiledThreadData{std::move(aProfiledThreadData)}));
+
+    // Return a weak pointer to the ProfiledThreadData object.
+    return sInstance->mLiveProfiledThreads.back().mProfiledThreadData.get();
+  }
+
+  static void UnregisterThread(PSLockRef aLockRef,
+                               ProfiledThreadData* aProfiledThreadData) {
+    MOZ_ASSERT(sInstance);
+
+    DiscardExpiredDeadProfiledThreads(aLockRef);
+
+    // Find the right entry in the mLiveProfiledThreads array and remove the
+    // element, moving the ProfiledThreadData object for the thread into the
+    // mDeadProfiledThreads array.
+    for (size_t i = 0; i < sInstance->mLiveProfiledThreads.length(); i++) {
+      LiveProfiledThreadData& thread = sInstance->mLiveProfiledThreads[i];
+      if (thread.mProfiledThreadData == aProfiledThreadData) {
+        thread.mProfiledThreadData->NotifyUnregistered(
+            sInstance->mProfileBuffer.BufferRangeEnd());
+        MOZ_RELEASE_ASSERT(sInstance->mDeadProfiledThreads.append(
+            std::move(thread.mProfiledThreadData)));
+        sInstance->mLiveProfiledThreads.erase(
+            &sInstance->mLiveProfiledThreads[i]);
+        return;
+      }
+    }
+  }
+
+  // This is a counter to collect process CPU utilization during profiling.
+  // It cannot be a raw `ProfilerCounter` because we need to manually add/remove
+  // it while the profiler lock is already held.
+  class ProcessCPUCounter final : public BaseProfilerCount {
+   public:
+    explicit ProcessCPUCounter(PSLockRef aLock)
+        : BaseProfilerCount("processCPU", &mCounter, nullptr, "CPU",
+                            "Process CPU utilization") {
+      // Adding on construction, so it's ready before the sampler starts.
+      locked_profiler_add_sampled_counter(aLock, this);
+      // Note: Removed from ActivePS::Destroy, because a lock is needed.
+    }
+
+    void Add(int64_t aNumber) { mCounter += aNumber; }
+
+   private:
+    ProfilerAtomicSigned mCounter;
+  };
+  PS_GET(ProcessCPUCounter*, MaybeProcessCPUCounter);
+
+  PS_GET(PowerCounters*, MaybePowerCounters);
+
+  PS_GET_AND_SET(bool, IsPaused)
+
+  // True if sampling is paused (though generic `SetIsPaused()` or specific
+  // `SetIsSamplingPaused()`).
+  static bool IsSamplingPaused(PSLockRef lock) {
+    MOZ_ASSERT(sInstance);
+    return IsPaused(lock) || sInstance->mIsSamplingPaused;
+  }
+
+  static void SetIsSamplingPaused(PSLockRef, bool aIsSamplingPaused) {
+    MOZ_ASSERT(sInstance);
+    sInstance->mIsSamplingPaused = aIsSamplingPaused;
+  }
+
+  static void DiscardExpiredDeadProfiledThreads(PSLockRef) {
+    MOZ_ASSERT(sInstance);
+    uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart();
+    // Discard any dead threads that were unregistered before bufferRangeStart.
+    sInstance->mDeadProfiledThreads.eraseIf(
+        [bufferRangeStart](
+            const UniquePtr<ProfiledThreadData>& aProfiledThreadData) {
+          Maybe<uint64_t> bufferPosition =
+              aProfiledThreadData->BufferPositionWhenUnregistered();
+          MOZ_RELEASE_ASSERT(bufferPosition,
+                             "should have unregistered this thread");
+          return *bufferPosition < bufferRangeStart;
+        });
+  }
+
+  static void UnregisterPage(PSLockRef aLock,
+                             uint64_t aRegisteredInnerWindowID) {
+    MOZ_ASSERT(sInstance);
+    auto& registeredPages = CorePS::RegisteredPages(aLock);
+    for (size_t i = 0; i < registeredPages.length(); i++) {
+      RefPtr<PageInformation>& page = registeredPages[i];
+      if (page->InnerWindowID() == aRegisteredInnerWindowID) {
+        page->NotifyUnregistered(sInstance->mProfileBuffer.BufferRangeEnd());
+        MOZ_RELEASE_ASSERT(
+            sInstance->mDeadProfiledPages.append(std::move(page)));
+        registeredPages.erase(&registeredPages[i--]);
+      }
+    }
+  }
+
+  static void DiscardExpiredPages(PSLockRef) {
+    MOZ_ASSERT(sInstance);
+    uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart();
+    // Discard any dead pages that were unregistered before
+    // bufferRangeStart.
+    sInstance->mDeadProfiledPages.eraseIf(
+        [bufferRangeStart](const RefPtr<PageInformation>& aProfiledPage) {
+          Maybe<uint64_t> bufferPosition =
+              aProfiledPage->BufferPositionWhenUnregistered();
+          MOZ_RELEASE_ASSERT(bufferPosition,
+                             "should have unregistered this page");
+          return *bufferPosition < bufferRangeStart;
+        });
+  }
+
+  static void ClearUnregisteredPages(PSLockRef) {
+    MOZ_ASSERT(sInstance);
+    sInstance->mDeadProfiledPages.clear();
+  }
+
+  static void ClearExpiredExitProfiles(PSLockRef) {
+    MOZ_ASSERT(sInstance);
+    uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart();
+    // Discard exit profiles that were gathered before our buffer RangeStart.
+    // If we have started to overwrite our data from when the Base profile was
+    // added, we should get rid of that Base profile because it's now older than
+    // our oldest Gecko profile data.
+    //
+    // When adding: (In practice the starting buffer should be empty)
+    // v Start == End
+    // |                 <-- Buffer range, initially empty.
+    // ^ mGeckoIndexWhenBaseProfileAdded < Start FALSE -> keep it
+    //
+    // Later, still in range:
+    // v Start   v End
+    // |=========|       <-- Buffer range growing.
+    // ^ mGeckoIndexWhenBaseProfileAdded < Start FALSE -> keep it
+    //
+    // Even later, now out of range:
+    //       v Start      v End
+    //       |============|       <-- Buffer range full and sliding.
+    // ^ mGeckoIndexWhenBaseProfileAdded < Start TRUE! -> Discard it
+    if (sInstance->mBaseProfileThreads &&
+        sInstance->mGeckoIndexWhenBaseProfileAdded
+                .ConvertToProfileBufferIndex() <
+            profiler_get_core_buffer().GetState().mRangeStart) {
+      DEBUG_LOG("ClearExpiredExitProfiles() - Discarding base profile %p",
+                sInstance->mBaseProfileThreads.get());
+      sInstance->mBaseProfileThreads.reset();
+    }
+    sInstance->mExitProfiles.eraseIf(
+        [bufferRangeStart](const ExitProfile& aExitProfile) {
+          return aExitProfile.mBufferPositionAtGatherTime < bufferRangeStart;
+        });
+  }
+
+  static void AddBaseProfileThreads(PSLockRef aLock,
+                                    UniquePtr<char[]> aBaseProfileThreads) {
+    MOZ_ASSERT(sInstance);
+    DEBUG_LOG("AddBaseProfileThreads(%p)", aBaseProfileThreads.get());
+    sInstance->mBaseProfileThreads = std::move(aBaseProfileThreads);
+    sInstance->mGeckoIndexWhenBaseProfileAdded =
+        ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+            profiler_get_core_buffer().GetState().mRangeEnd);
+  }
+
+  static UniquePtr<char[]> MoveBaseProfileThreads(PSLockRef aLock) {
+    MOZ_ASSERT(sInstance);
+
+    ClearExpiredExitProfiles(aLock);
+
+    DEBUG_LOG("MoveBaseProfileThreads() - Consuming base profile %p",
+              sInstance->mBaseProfileThreads.get());
+    return std::move(sInstance->mBaseProfileThreads);
+  }
+
+  static void AddExitProfile(PSLockRef aLock, const nsACString& aExitProfile) {
+    MOZ_ASSERT(sInstance);
+
+    ClearExpiredExitProfiles(aLock);
+
+    MOZ_RELEASE_ASSERT(sInstance->mExitProfiles.append(ExitProfile{
+        nsCString(aExitProfile), sInstance->mProfileBuffer.BufferRangeEnd()}));
+  }
+
+  static Vector<nsCString> MoveExitProfiles(PSLockRef aLock) {
+    MOZ_ASSERT(sInstance);
+
+    ClearExpiredExitProfiles(aLock);
+
+    Vector<nsCString> profiles;
+    MOZ_RELEASE_ASSERT(
+        profiles.initCapacity(sInstance->mExitProfiles.length()));
+    for (auto& profile : sInstance->mExitProfiles) {
+      MOZ_RELEASE_ASSERT(profiles.append(std::move(profile.mJSON)));
+    }
+    sInstance->mExitProfiles.clear();
+    return profiles;
+  }
+
+#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
+  static void SetMemoryCounter(const BaseProfilerCount* aMemoryCounter) {
+    MOZ_ASSERT(sInstance);
+
+    sInstance->mMemoryCounter = aMemoryCounter;
+  }
+
+  static bool IsMemoryCounter(const BaseProfilerCount* aMemoryCounter) {
+    MOZ_ASSERT(sInstance);
+
+    return sInstance->mMemoryCounter == aMemoryCounter;
+  }
+#endif
+
+ private:
+  // The singleton instance.
+  static ActivePS* sInstance;
+
+  const TimeStamp mProfilingStartTime;
+
+  // We need to track activity generations. If we didn't we could have the
+  // following scenario.
+  //
+  // - profiler_stop() locks gPSMutex, de-instantiates ActivePS, unlocks
+  //   gPSMutex, deletes the SamplerThread (which does a join).
+  //
+  // - profiler_start() runs on a different thread, locks gPSMutex,
+  //   re-instantiates ActivePS, unlocks gPSMutex -- all before the join
+  //   completes.
+  //
+  // - SamplerThread::Run() locks gPSMutex, sees that ActivePS is instantiated,
+  //   and continues as if the start/stop pair didn't occur. Also
+  //   profiler_stop() is stuck, unable to finish.
+  //
+  // By checking ActivePS *and* the generation, we can avoid this scenario.
+  // sNextGeneration is used to track the next generation number; it is static
+  // because it must persist across different ActivePS instantiations.
+  const uint32_t mGeneration;
+  static uint32_t sNextGeneration;
+
+  // The maximum number of entries in mProfileBuffer.
+  const PowerOfTwo32 mCapacity;
+
+  // The maximum duration of entries in mProfileBuffer, in seconds.
+  const Maybe<double> mDuration;
+
+  // The interval between samples, measured in milliseconds.
+  const double mInterval;
+
+  // The profile features that are enabled.
+  const uint32_t mFeatures;
+
+  // Substrings of names of threads we want to profile.
+  Vector<std::string> mFilters;
+  Vector<std::string> mFiltersLowered;
+
+  // ID of the active browser screen's active tab.
+  // It's being used to determine the profiled tab. It's "0" if we failed to
+  // get the ID.
+  const uint64_t mActiveTabID;
+
+  // The chunk manager used by `mProfileBuffer` below.
+  // May become null if it gets transferred ouf of the Gecko Profiler.
+  UniquePtr<ProfileBufferChunkManagerWithLocalLimit> mProfileBufferChunkManager;
+
+  // The buffer into which all samples are recorded.
+  ProfileBuffer mProfileBuffer;
+
+  // ProfiledThreadData objects for any threads that were profiled at any point
+  // during this run of the profiler:
+  //  - mLiveProfiledThreads contains all threads that are still registered, and
+  //  - mDeadProfiledThreads contains all threads that have already been
+  //    unregistered but for which there is still data in the profile buffer.
+  Vector<LiveProfiledThreadData> mLiveProfiledThreads;
+  Vector<UniquePtr<ProfiledThreadData>> mDeadProfiledThreads;
+
+  // Info on all the dead pages.
+  // Registered pages are being moved to this array after unregistration.
+  // We are keeping them in case we need them in the profile data.
+  // We are removing them when we ensure that we won't need them anymore.
+  Vector<RefPtr<PageInformation>> mDeadProfiledPages;
+
+  // Used to collect process CPU utilization values, if the feature is on.
+  ProcessCPUCounter* mMaybeProcessCPUCounter;
+
+  // Used to collect power use data, if the power feature is on.
+  PowerCounters* mMaybePowerCounters;
+
+  // The current sampler thread. This class is not responsible for destroying
+  // the SamplerThread object; the Destroy() method returns it so the caller
+  // can destroy it.
+  SamplerThread* const mSamplerThread;
+
+  // Is the profiler fully paused?
+  bool mIsPaused;
+
+  // Is the profiler periodic sampling paused?
+  bool mIsSamplingPaused;
+
+  // Optional startup profile thread array from BaseProfiler.
+  UniquePtr<char[]> mBaseProfileThreads;
+  ProfileBufferBlockIndex mGeckoIndexWhenBaseProfileAdded;
+
+  struct ExitProfile {
+    nsCString mJSON;
+    uint64_t mBufferPositionAtGatherTime;
+  };
+  Vector<ExitProfile> mExitProfiles;
+
+#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
+  Atomic<const BaseProfilerCount*> mMemoryCounter;
+#endif
+};
+
+ActivePS* ActivePS::sInstance = nullptr;
+uint32_t ActivePS::sNextGeneration = 0;
+
+#undef PS_GET
+#undef PS_GET_LOCKLESS
+#undef PS_GET_AND_SET
+
+using ProfilerStateChangeMutex =
+    mozilla::baseprofiler::detail::BaseProfilerMutex;
+using ProfilerStateChangeLock =
+    mozilla::baseprofiler::detail::BaseProfilerAutoLock;
+static ProfilerStateChangeMutex gProfilerStateChangeMutex;
+
+struct IdentifiedProfilingStateChangeCallback {
+  ProfilingStateSet mProfilingStateSet;
+  ProfilingStateChangeCallback mProfilingStateChangeCallback;
+  uintptr_t mUniqueIdentifier;
+
+  explicit IdentifiedProfilingStateChangeCallback(
+      ProfilingStateSet aProfilingStateSet,
+      ProfilingStateChangeCallback&& aProfilingStateChangeCallback,
+      uintptr_t aUniqueIdentifier)
+      : mProfilingStateSet(aProfilingStateSet),
+        mProfilingStateChangeCallback(aProfilingStateChangeCallback),
+        mUniqueIdentifier(aUniqueIdentifier) {}
+};
+using IdentifiedProfilingStateChangeCallbackUPtr =
+    UniquePtr<IdentifiedProfilingStateChangeCallback>;
+
+static Vector<IdentifiedProfilingStateChangeCallbackUPtr>
+    mIdentifiedProfilingStateChangeCallbacks;
+
+void profiler_add_state_change_callback(
+    ProfilingStateSet aProfilingStateSet,
+    ProfilingStateChangeCallback&& aCallback,
+    uintptr_t aUniqueIdentifier /* = 0 */) {
+  MOZ_ASSERT(!PSAutoLock::IsLockedOnCurrentThread());
+  ProfilerStateChangeLock lock(gProfilerStateChangeMutex);
+
+#ifdef DEBUG
+  // Check if a non-zero id is not already used. Bug forgive it in non-DEBUG
+  // builds; in the worst case they may get removed too early.
+  if (aUniqueIdentifier != 0) {
+    for (const IdentifiedProfilingStateChangeCallbackUPtr& idedCallback :
+         mIdentifiedProfilingStateChangeCallbacks) {
+      MOZ_ASSERT(idedCallback->mUniqueIdentifier != aUniqueIdentifier);
+    }
+  }
+#endif  // DEBUG
+
+  if (aProfilingStateSet.contains(ProfilingState::AlreadyActive) &&
+      profiler_is_active()) {
+    aCallback(ProfilingState::AlreadyActive);
+  }
+
+  (void)mIdentifiedProfilingStateChangeCallbacks.append(
+      MakeUnique<IdentifiedProfilingStateChangeCallback>(
+          aProfilingStateSet, std::move(aCallback), aUniqueIdentifier));
+}
+
+// Remove the callback with the given identifier.
+void profiler_remove_state_change_callback(uintptr_t aUniqueIdentifier) {
+  MOZ_ASSERT(aUniqueIdentifier != 0);
+  if (aUniqueIdentifier == 0) {
+    // Forgive zero in non-DEBUG builds.
+    return;
+  }
+
+  MOZ_ASSERT(!PSAutoLock::IsLockedOnCurrentThread());
+  ProfilerStateChangeLock lock(gProfilerStateChangeMutex);
+
+  mIdentifiedProfilingStateChangeCallbacks.eraseIf(
+      [aUniqueIdentifier](
+          const IdentifiedProfilingStateChangeCallbackUPtr& aIdedCallback) {
+        if (aIdedCallback->mUniqueIdentifier != aUniqueIdentifier) {
+          return false;
+        }
+        if (aIdedCallback->mProfilingStateSet.contains(
+                ProfilingState::RemovingCallback)) {
+          aIdedCallback->mProfilingStateChangeCallback(
+              ProfilingState::RemovingCallback);
+        }
+        return true;
+      });
+}
+
+static void invoke_profiler_state_change_callbacks(
+    ProfilingState aProfilingState) {
+  MOZ_ASSERT(!PSAutoLock::IsLockedOnCurrentThread());
+  ProfilerStateChangeLock lock(gProfilerStateChangeMutex);
+
+  for (const IdentifiedProfilingStateChangeCallbackUPtr& idedCallback :
+       mIdentifiedProfilingStateChangeCallbacks) {
+    if (idedCallback->mProfilingStateSet.contains(aProfilingState)) {
+      idedCallback->mProfilingStateChangeCallback(aProfilingState);
+    }
+  }
+}
+
+Atomic<uint32_t, MemoryOrdering::Relaxed> RacyFeatures::sActiveAndFeatures(0);
+
+// The name of the main thread.
+static const char* const kMainThreadName = "GeckoMain";
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN sampling/unwinding code
+
+// The registers used for stack unwinding and a few other sampling purposes.
+// The ctor does nothing; users are responsible for filling in the fields.
+class Registers {
+ public:
+  Registers() : mPC{nullptr}, mSP{nullptr}, mFP{nullptr}, mLR{nullptr} {}
+
+  void Clear() { memset(this, 0, sizeof(*this)); }
+
+  // These fields are filled in by
+  // Sampler::SuspendAndSampleAndResumeThread() for periodic and backtrace
+  // samples, and by REGISTERS_SYNC_POPULATE for synchronous samples.
+  Address mPC;  // Instruction pointer.
+  Address mSP;  // Stack pointer.
+  Address mFP;  // Frame pointer.
+  Address mLR;  // ARM link register.
+#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
+  // This contains all the registers, which means it duplicates the four fields
+  // above. This is ok.
+  ucontext_t* mContext;  // The context from the signal handler or below.
+  ucontext_t mContextSyncStorage;  // Storage for sync stack unwinding.
+#endif
+};
+
+// Setting MAX_NATIVE_FRAMES too high risks the unwinder wasting a lot of time
+// looping on corrupted stacks.
+static const size_t MAX_NATIVE_FRAMES = 1024;
+
+struct NativeStack {
+  void* mPCs[MAX_NATIVE_FRAMES];
+  void* mSPs[MAX_NATIVE_FRAMES];
+  size_t mCount;  // Number of frames filled.
+
+  NativeStack() : mPCs(), mSPs(), mCount(0) {}
+};
+
+Atomic<bool> WALKING_JS_STACK(false);
+
+struct AutoWalkJSStack {
+  bool walkAllowed;
+
+  AutoWalkJSStack() : walkAllowed(false) {
+    walkAllowed = WALKING_JS_STACK.compareExchange(false, true);
+  }
+
+  ~AutoWalkJSStack() {
+    if (walkAllowed) {
+      WALKING_JS_STACK = false;
+    }
+  }
+};
+
+class StackWalkControl {
+ public:
+  struct ResumePoint {
+    // If lost, the stack walker should resume at these values.
+    void* resumeSp;  // If null, stop the walker here, don't resume again.
+    void* resumeBp;
+    void* resumePc;
+  };
+
+#if ((defined(USE_MOZ_STACK_WALK) || defined(USE_FRAME_POINTER_STACK_WALK)) && \
+     defined(GP_ARCH_amd64))
+ public:
+  static constexpr bool scIsSupported = true;
+
+  void Clear() { mResumePointCount = 0; }
+
+  size_t ResumePointCount() const { return mResumePointCount; }
+
+  static constexpr size_t MaxResumePointCount() {
+    return scMaxResumePointCount;
+  }
+
+  // Add a resume point. Note that adding anything past MaxResumePointCount()
+  // would silently fail. In practice this means that stack walking may still
+  // lose native frames.
+  void AddResumePoint(ResumePoint&& aResumePoint) {
+    // If SP is null, we expect BP and PC to also be null.
+    MOZ_ASSERT_IF(!aResumePoint.resumeSp, !aResumePoint.resumeBp);
+    MOZ_ASSERT_IF(!aResumePoint.resumeSp, !aResumePoint.resumePc);
+
+    // If BP and/or PC are not null, SP must not be null. (But we allow BP/PC to
+    // be null even if SP is not null.)
+    MOZ_ASSERT_IF(aResumePoint.resumeBp, aResumePoint.resumeSp);
+    MOZ_ASSERT_IF(aResumePoint.resumePc, aResumePoint.resumeSp);
+
+    if (mResumePointCount < scMaxResumePointCount) {
+      mResumePoint[mResumePointCount] = std::move(aResumePoint);
+      ++mResumePointCount;
+    }
+  }
+
+  // Only allow non-modifying range-for loops.
+  const ResumePoint* begin() const { return &mResumePoint[0]; }
+  const ResumePoint* end() const { return &mResumePoint[mResumePointCount]; }
+
+  // Find the next resume point that would be a caller of the function with the
+  // given SP; i.e., the resume point with the closest resumeSp > aSp.
+  const ResumePoint* GetResumePointCallingSp(void* aSp) const {
+    const ResumePoint* callingResumePoint = nullptr;
+    for (const ResumePoint& resumePoint : *this) {
+      if (resumePoint.resumeSp &&        // This is a potential resume point.
+          resumePoint.resumeSp > aSp &&  // It is a caller of the given SP.
+          (!callingResumePoint ||        // This is the first candidate.
+           resumePoint.resumeSp < callingResumePoint->resumeSp)  // Or better.
+      ) {
+        callingResumePoint = &resumePoint;
+      }
+    }
+    return callingResumePoint;
+  }
+
+ private:
+  size_t mResumePointCount = 0;
+  static constexpr size_t scMaxResumePointCount = 32;
+  ResumePoint mResumePoint[scMaxResumePointCount];
+
+#else
+ public:
+  static constexpr bool scIsSupported = false;
+  // Discarded constexpr-if statements are still checked during compilation,
+  // these declarations are necessary for that, even if not actually used.
+  void Clear();
+  size_t ResumePointCount();
+  static constexpr size_t MaxResumePointCount();
+  void AddResumePoint(ResumePoint&& aResumePoint);
+  const ResumePoint* begin() const;
+  const ResumePoint* end() const;
+  const ResumePoint* GetResumePointCallingSp(void* aSp) const;
+#endif
+};
+
+// Make a copy of the JS stack into a JSFrame array, and return the number of
+// copied frames.
+// This copy is necessary since, like the native stack, the JS stack is iterated
+// youngest-to-oldest and we need to iterate oldest-to-youngest in MergeStacks.
+static uint32_t ExtractJsFrames(
+    bool aIsSynchronous,
+    const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData,
+    const Registers& aRegs, ProfilerStackCollector& aCollector,
+    JsFrameBuffer aJsFrames, StackWalkControl* aStackWalkControlIfSupported) {
+  MOZ_ASSERT(aJsFrames,
+             "ExtractJsFrames should only be called if there is a "
+             "JsFrameBuffer to fill.");
+
+  uint32_t jsFramesCount = 0;
+
+  // Only walk jit stack if profiling frame iterator is turned on.
+  JSContext* context = aThreadData.GetJSContext();
+  if (context && JS::IsProfilingEnabledForContext(context)) {
+    AutoWalkJSStack autoWalkJSStack;
+
+    if (autoWalkJSStack.walkAllowed) {
+      JS::ProfilingFrameIterator::RegisterState registerState;
+      registerState.pc = aRegs.mPC;
+      registerState.sp = aRegs.mSP;
+      registerState.lr = aRegs.mLR;
+      registerState.fp = aRegs.mFP;
+
+      // Non-periodic sampling passes Nothing() as the buffer write position to
+      // ProfilingFrameIterator to avoid incorrectly resetting the buffer
+      // position of sampled JIT frames inside the JS engine.
+      Maybe<uint64_t> samplePosInBuffer;
+      if (!aIsSynchronous) {
+        // aCollector.SamplePositionInBuffer() will return Nothing() when
+        // profiler_suspend_and_sample_thread is called from the background hang
+        // reporter.
+        samplePosInBuffer = aCollector.SamplePositionInBuffer();
+      }
+
+      for (JS::ProfilingFrameIterator jsIter(context, registerState,
+                                             samplePosInBuffer);
+           !jsIter.done(); ++jsIter) {
+        if (aIsSynchronous || jsIter.isWasm()) {
+          jsFramesCount +=
+              jsIter.extractStack(aJsFrames, jsFramesCount, MAX_JS_FRAMES);
+          if (jsFramesCount == MAX_JS_FRAMES) {
+            break;
+          }
+        } else {
+          Maybe<JS::ProfilingFrameIterator::Frame> frame =
+              jsIter.getPhysicalFrameWithoutLabel();
+          if (frame.isSome()) {
+            aJsFrames[jsFramesCount++] = std::move(frame).ref();
+            if (jsFramesCount == MAX_JS_FRAMES) {
+              break;
+            }
+          }
+        }
+
+        if constexpr (StackWalkControl::scIsSupported) {
+          if (aStackWalkControlIfSupported) {
+            jsIter.getCppEntryRegisters().apply(
+                [&](const JS::ProfilingFrameIterator::RegisterState&
+                        aCppEntry) {
+                  StackWalkControl::ResumePoint resumePoint;
+                  resumePoint.resumeSp = aCppEntry.sp;
+                  resumePoint.resumeBp = aCppEntry.fp;
+                  resumePoint.resumePc = aCppEntry.pc;
+                  aStackWalkControlIfSupported->AddResumePoint(
+                      std::move(resumePoint));
+                });
+          }
+        } else {
+          MOZ_ASSERT(!aStackWalkControlIfSupported,
+                     "aStackWalkControlIfSupported should be null when "
+                     "!StackWalkControl::scIsSupported");
+          (void)aStackWalkControlIfSupported;
+        }
+      }
+    }
+  }
+
+  return jsFramesCount;
+}
+
+// Merges the profiling stack, native stack, and JS stack, outputting the
+// details to aCollector.
+static void MergeStacks(
+    uint32_t aFeatures, bool aIsSynchronous,
+    const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData,
+    const Registers& aRegs, const NativeStack& aNativeStack,
+    ProfilerStackCollector& aCollector, JsFrame* aJsFrames,
+    uint32_t aJsFramesCount) {
+  // WARNING: this function runs within the profiler's "critical section".
+  // WARNING: this function might be called while the profiler is inactive, and
+  //          cannot rely on ActivePS.
+
+  MOZ_ASSERT_IF(!aJsFrames, aJsFramesCount == 0);
+
+  const ProfilingStack& profilingStack = aThreadData.ProfilingStackCRef();
+  const js::ProfilingStackFrame* profilingStackFrames = profilingStack.frames;
+  uint32_t profilingStackFrameCount = profilingStack.stackSize();
+
+  // While the profiling stack array is ordered oldest-to-youngest, the JS and
+  // native arrays are ordered youngest-to-oldest. We must add frames to aInfo
+  // oldest-to-youngest. Thus, iterate over the profiling stack forwards and JS
+  // and native arrays backwards. Note: this means the terminating condition
+  // jsIndex and nativeIndex is being < 0.
+  uint32_t profilingStackIndex = 0;
+  int32_t jsIndex = aJsFramesCount - 1;
+  int32_t nativeIndex = aNativeStack.mCount - 1;
+
+  uint8_t* lastLabelFrameStackAddr = nullptr;
+  uint8_t* jitEndStackAddr = nullptr;
+
+  // Iterate as long as there is at least one frame remaining.
+  while (profilingStackIndex != profilingStackFrameCount || jsIndex >= 0 ||
+         nativeIndex >= 0) {
+    // There are 1 to 3 frames available. Find and add the oldest.
+    uint8_t* profilingStackAddr = nullptr;
+    uint8_t* jsStackAddr = nullptr;
+    uint8_t* nativeStackAddr = nullptr;
+    uint8_t* jsActivationAddr = nullptr;
+
+    if (profilingStackIndex != profilingStackFrameCount) {
+      const js::ProfilingStackFrame& profilingStackFrame =
+          profilingStackFrames[profilingStackIndex];
+
+      if (profilingStackFrame.isLabelFrame() ||
+          profilingStackFrame.isSpMarkerFrame()) {
+        lastLabelFrameStackAddr = (uint8_t*)profilingStackFrame.stackAddress();
+      }
+
+      // Skip any JS_OSR frames. Such frames are used when the JS interpreter
+      // enters a jit frame on a loop edge (via on-stack-replacement, or OSR).
+      // To avoid both the profiling stack frame and jit frame being recorded
+      // (and showing up twice), the interpreter marks the interpreter
+      // profiling stack frame as JS_OSR to ensure that it doesn't get counted.
+      if (profilingStackFrame.isOSRFrame()) {
+        profilingStackIndex++;
+        continue;
+      }
+
+      MOZ_ASSERT(lastLabelFrameStackAddr);
+      profilingStackAddr = lastLabelFrameStackAddr;
+    }
+
+    if (jsIndex >= 0) {
+      jsStackAddr = (uint8_t*)aJsFrames[jsIndex].stackAddress;
+      jsActivationAddr = (uint8_t*)aJsFrames[jsIndex].activation;
+    }
+
+    if (nativeIndex >= 0) {
+      nativeStackAddr = (uint8_t*)aNativeStack.mSPs[nativeIndex];
+    }
+
+    // If there's a native stack frame which has the same SP as a profiling
+    // stack frame, pretend we didn't see the native stack frame.  Ditto for a
+    // native stack frame which has the same SP as a JS stack frame.  In effect
+    // this means profiling stack frames or JS frames trump conflicting native
+    // frames.
+    if (nativeStackAddr && (profilingStackAddr == nativeStackAddr ||
+                            jsStackAddr == nativeStackAddr)) {
+      nativeStackAddr = nullptr;
+      nativeIndex--;
+      MOZ_ASSERT(profilingStackAddr || jsStackAddr);
+    }
+
+    // Sanity checks.
+    MOZ_ASSERT_IF(profilingStackAddr,
+                  profilingStackAddr != jsStackAddr &&
+                      profilingStackAddr != nativeStackAddr);
+    MOZ_ASSERT_IF(jsStackAddr, jsStackAddr != profilingStackAddr &&
+                                   jsStackAddr != nativeStackAddr);
+    MOZ_ASSERT_IF(nativeStackAddr, nativeStackAddr != profilingStackAddr &&
+                                       nativeStackAddr != jsStackAddr);
+
+    // Check to see if profiling stack frame is top-most.
+    if (profilingStackAddr > jsStackAddr &&
+        profilingStackAddr > nativeStackAddr) {
+      MOZ_ASSERT(profilingStackIndex < profilingStackFrameCount);
+      const js::ProfilingStackFrame& profilingStackFrame =
+          profilingStackFrames[profilingStackIndex];
+
+      // Sp marker frames are just annotations and should not be recorded in
+      // the profile.
+      if (!profilingStackFrame.isSpMarkerFrame()) {
+        // The JIT only allows the top-most frame to have a nullptr pc.
+        MOZ_ASSERT_IF(
+            profilingStackFrame.isJsFrame() && profilingStackFrame.script() &&
+                !profilingStackFrame.pc(),
+            &profilingStackFrame ==
+                &profilingStack.frames[profilingStack.stackSize() - 1]);
+        if (aIsSynchronous && profilingStackFrame.categoryPair() ==
+                                  JS::ProfilingCategoryPair::PROFILER) {
+          // For stacks captured synchronously (ie. marker stacks), stop
+          // walking the stack as soon as we enter the profiler category,
+          // to avoid showing profiler internal code in marker stacks.
+          return;
+        }
+        aCollector.CollectProfilingStackFrame(profilingStackFrame);
+      }
+      profilingStackIndex++;
+      continue;
+    }
+
+    // Check to see if JS jit stack frame is top-most
+    if (jsStackAddr > nativeStackAddr) {
+      MOZ_ASSERT(jsIndex >= 0);
+      const JS::ProfilingFrameIterator::Frame& jsFrame = aJsFrames[jsIndex];
+      jitEndStackAddr = (uint8_t*)jsFrame.endStackAddress;
+      // Stringifying non-wasm JIT frames is delayed until streaming time. To
+      // re-lookup the entry in the JitcodeGlobalTable, we need to store the
+      // JIT code address (OptInfoAddr) in the circular buffer.
+      //
+      // Note that we cannot do this when we are sychronously sampling the
+      // current thread; that is, when called from profiler_get_backtrace. The
+      // captured backtrace is usually externally stored for an indeterminate
+      // amount of time, such as in nsRefreshDriver. Problematically, the
+      // stored backtrace may be alive across a GC during which the profiler
+      // itself is disabled. In that case, the JS engine is free to discard its
+      // JIT code. This means that if we inserted such OptInfoAddr entries into
+      // the buffer, nsRefreshDriver would now be holding on to a backtrace
+      // with stale JIT code return addresses.
+      if (aIsSynchronous ||
+          jsFrame.kind == JS::ProfilingFrameIterator::Frame_Wasm) {
+        aCollector.CollectWasmFrame(jsFrame.label);
+      } else if (jsFrame.kind ==
+                 JS::ProfilingFrameIterator::Frame_BaselineInterpreter) {
+        // Materialize a ProfilingStackFrame similar to the C++ Interpreter. We
+        // also set the IS_BLINTERP_FRAME flag to differentiate though.
+        JSScript* script = jsFrame.interpreterScript;
+        jsbytecode* pc = jsFrame.interpreterPC();
+        js::ProfilingStackFrame stackFrame;
+        constexpr uint32_t ExtraFlags =
+            uint32_t(js::ProfilingStackFrame::Flags::IS_BLINTERP_FRAME);
+        stackFrame.initJsFrame<JS::ProfilingCategoryPair::JS_BaselineInterpret,
+                               ExtraFlags>("", jsFrame.label, script, pc,
+                                           jsFrame.realmID);
+        aCollector.CollectProfilingStackFrame(stackFrame);
+      } else {
+        MOZ_ASSERT(jsFrame.kind == JS::ProfilingFrameIterator::Frame_Ion ||
+                   jsFrame.kind == JS::ProfilingFrameIterator::Frame_Baseline);
+        aCollector.CollectJitReturnAddr(jsFrame.returnAddress());
+      }
+
+      jsIndex--;
+      continue;
+    }
+
+    // If we reach here, there must be a native stack frame and it must be the
+    // greatest frame.
+    if (nativeStackAddr &&
+        // If the latest JS frame was JIT, this could be the native frame that
+        // corresponds to it. In that case, skip the native frame, because
+        // there's no need for the same frame to be present twice in the stack.
+        // The JS frame can be considered the symbolicated version of the native
+        // frame.
+        (!jitEndStackAddr || nativeStackAddr < jitEndStackAddr) &&
+        // This might still be a JIT operation, check to make sure that is not
+        // in range of the NEXT JavaScript's stacks' activation address.
+        (!jsActivationAddr || nativeStackAddr > jsActivationAddr)) {
+      MOZ_ASSERT(nativeIndex >= 0);
+      void* addr = (void*)aNativeStack.mPCs[nativeIndex];
+      aCollector.CollectNativeLeafAddr(addr);
+    }
+    if (nativeIndex >= 0) {
+      nativeIndex--;
+    }
+  }
+
+  // Update the JS context with the current profile sample buffer generation.
+  //
+  // Only do this for periodic samples. We don't want to do this for
+  // synchronous samples, and we also don't want to do it for calls to
+  // profiler_suspend_and_sample_thread() from the background hang reporter -
+  // in that case, aCollector.BufferRangeStart() will return Nothing().
+  if (!aIsSynchronous) {
+    aCollector.BufferRangeStart().apply(
+        [&aThreadData](uint64_t aBufferRangeStart) {
+          JSContext* context = aThreadData.GetJSContext();
+          if (context) {
+            JS::SetJSContextProfilerSampleBufferRangeStart(context,
+                                                           aBufferRangeStart);
+          }
+        });
+  }
+}
+
+#if defined(USE_FRAME_POINTER_STACK_WALK) || defined(USE_MOZ_STACK_WALK)
+static void StackWalkCallback(uint32_t aFrameNumber, void* aPC, void* aSP,
+                              void* aClosure) {
+  NativeStack* nativeStack = static_cast<NativeStack*>(aClosure);
+  MOZ_ASSERT(nativeStack->mCount < MAX_NATIVE_FRAMES);
+  nativeStack->mSPs[nativeStack->mCount] = aSP;
+  nativeStack->mPCs[nativeStack->mCount] = aPC;
+  nativeStack->mCount++;
+}
+#endif
+
+#if defined(USE_FRAME_POINTER_STACK_WALK)
+static void DoFramePointerBacktrace(
+    const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData,
+    const Registers& aRegs, NativeStack& aNativeStack,
+    StackWalkControl* aStackWalkControlIfSupported) {
+  // WARNING: this function runs within the profiler's "critical section".
+  // WARNING: this function might be called while the profiler is inactive, and
+  //          cannot rely on ActivePS.
+
+  // Make a local copy of the Registers, to allow modifications.
+  Registers regs = aRegs;
+
+  // Start with the current function. We use 0 as the frame number here because
+  // the FramePointerStackWalk() call below will use 1..N. This is a bit weird
+  // but it doesn't matter because StackWalkCallback() doesn't use the frame
+  // number argument.
+  StackWalkCallback(/* frameNum */ 0, regs.mPC, regs.mSP, &aNativeStack);
+
+  const void* const stackEnd = aThreadData.StackTop();
+
+  // This is to check forward-progress after using a resume point.
+  void* previousResumeSp = nullptr;
+
+  for (;;) {
+    if (!(regs.mSP && regs.mSP <= regs.mFP && regs.mFP <= stackEnd)) {
+      break;
+    }
+    FramePointerStackWalk(StackWalkCallback,
+                          uint32_t(MAX_NATIVE_FRAMES - aNativeStack.mCount),
+                          &aNativeStack, reinterpret_cast<void**>(regs.mFP),
+                          const_cast<void*>(stackEnd));
+
+    if constexpr (!StackWalkControl::scIsSupported) {
+      break;
+    } else {
+      if (aNativeStack.mCount >= MAX_NATIVE_FRAMES) {
+        // No room to add more frames.
+        break;
+      }
+      if (!aStackWalkControlIfSupported ||
+          aStackWalkControlIfSupported->ResumePointCount() == 0) {
+        // No resume information.
+        break;
+      }
+      void* lastSP = aNativeStack.mSPs[aNativeStack.mCount - 1];
+      if (previousResumeSp &&
+          ((uintptr_t)lastSP <= (uintptr_t)previousResumeSp)) {
+        // No progress after the previous resume point.
+        break;
+      }
+      const StackWalkControl::ResumePoint* resumePoint =
+          aStackWalkControlIfSupported->GetResumePointCallingSp(lastSP);
+      if (!resumePoint) {
+        break;
+      }
+      void* sp = resumePoint->resumeSp;
+      if (!sp) {
+        // Null SP in a resume point means we stop here.
+        break;
+      }
+      void* pc = resumePoint->resumePc;
+      StackWalkCallback(/* frameNum */ aNativeStack.mCount, pc, sp,
+                        &aNativeStack);
+      ++aNativeStack.mCount;
+      if (aNativeStack.mCount >= MAX_NATIVE_FRAMES) {
+        break;
+      }
+      // Prepare context to resume stack walking.
+      regs.mPC = (Address)pc;
+      regs.mSP = (Address)sp;
+      regs.mFP = (Address)resumePoint->resumeBp;
+
+      previousResumeSp = sp;
+    }
+  }
+}
+#endif
+
+#if defined(USE_MOZ_STACK_WALK)
+static void DoMozStackWalkBacktrace(
+    const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData,
+    const Registers& aRegs, NativeStack& aNativeStack,
+    StackWalkControl* aStackWalkControlIfSupported) {
+  // WARNING: this function runs within the profiler's "critical section".
+  // WARNING: this function might be called while the profiler is inactive, and
+  //          cannot rely on ActivePS.
+
+  // Start with the current function. We use 0 as the frame number here because
+  // the MozStackWalkThread() call below will use 1..N. This is a bit weird but
+  // it doesn't matter because StackWalkCallback() doesn't use the frame number
+  // argument.
+  StackWalkCallback(/* frameNum */ 0, aRegs.mPC, aRegs.mSP, &aNativeStack);
+
+  HANDLE thread = aThreadData.PlatformDataCRef().ProfiledThread();
+  MOZ_ASSERT(thread);
+
+  CONTEXT context_buf;
+  CONTEXT* context = nullptr;
+  if constexpr (StackWalkControl::scIsSupported) {
+    context = &context_buf;
+    memset(&context_buf, 0, sizeof(CONTEXT));
+    context_buf.ContextFlags = CONTEXT_FULL;
+#  if defined(_M_AMD64)
+    context_buf.Rsp = (DWORD64)aRegs.mSP;
+    context_buf.Rbp = (DWORD64)aRegs.mFP;
+    context_buf.Rip = (DWORD64)aRegs.mPC;
+#  else
+    static_assert(!StackWalkControl::scIsSupported,
+                  "Mismatched support between StackWalkControl and "
+                  "DoMozStackWalkBacktrace");
+#  endif
+  } else {
+    context = nullptr;
+  }
+
+  // This is to check forward-progress after using a resume point.
+  void* previousResumeSp = nullptr;
+
+  for (;;) {
+    MozStackWalkThread(StackWalkCallback,
+                       uint32_t(MAX_NATIVE_FRAMES - aNativeStack.mCount),
+                       &aNativeStack, thread, context);
+
+    if constexpr (!StackWalkControl::scIsSupported) {
+      break;
+    } else {
+      if (aNativeStack.mCount >= MAX_NATIVE_FRAMES) {
+        // No room to add more frames.
+        break;
+      }
+      if (!aStackWalkControlIfSupported ||
+          aStackWalkControlIfSupported->ResumePointCount() == 0) {
+        // No resume information.
+        break;
+      }
+      void* lastSP = aNativeStack.mSPs[aNativeStack.mCount - 1];
+      if (previousResumeSp &&
+          ((uintptr_t)lastSP <= (uintptr_t)previousResumeSp)) {
+        // No progress after the previous resume point.
+        break;
+      }
+      const StackWalkControl::ResumePoint* resumePoint =
+          aStackWalkControlIfSupported->GetResumePointCallingSp(lastSP);
+      if (!resumePoint) {
+        break;
+      }
+      void* sp = resumePoint->resumeSp;
+      if (!sp) {
+        // Null SP in a resume point means we stop here.
+        break;
+      }
+      void* pc = resumePoint->resumePc;
+      StackWalkCallback(/* frameNum */ aNativeStack.mCount, pc, sp,
+                        &aNativeStack);
+      ++aNativeStack.mCount;
+      if (aNativeStack.mCount >= MAX_NATIVE_FRAMES) {
+        break;
+      }
+      // Prepare context to resume stack walking.
+      memset(&context_buf, 0, sizeof(CONTEXT));
+      context_buf.ContextFlags = CONTEXT_FULL;
+#  if defined(_M_AMD64)
+      context_buf.Rsp = (DWORD64)sp;
+      context_buf.Rbp = (DWORD64)resumePoint->resumeBp;
+      context_buf.Rip = (DWORD64)pc;
+#  else
+      static_assert(!StackWalkControl::scIsSupported,
+                    "Mismatched support between StackWalkControl and "
+                    "DoMozStackWalkBacktrace");
+#  endif
+      previousResumeSp = sp;
+    }
+  }
+}
+#endif
+
+#ifdef USE_EHABI_STACKWALK
+static void DoEHABIBacktrace(
+    const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData,
+    const Registers& aRegs, NativeStack& aNativeStack,
+    StackWalkControl* aStackWalkControlIfSupported) {
+  // WARNING: this function runs within the profiler's "critical section".
+  // WARNING: this function might be called while the profiler is inactive, and
+  //          cannot rely on ActivePS.
+
+  aNativeStack.mCount = EHABIStackWalk(
+      aRegs.mContext->uc_mcontext, const_cast<void*>(aThreadData.StackTop()),
+      aNativeStack.mSPs, aNativeStack.mPCs, MAX_NATIVE_FRAMES);
+  (void)aStackWalkControlIfSupported;  // TODO: Implement.
+}
+#endif
+
+#ifdef USE_LUL_STACKWALK
+
+// See the comment at the callsite for why this function is necessary.
+#  if defined(MOZ_HAVE_ASAN_IGNORE)
+MOZ_ASAN_IGNORE static void ASAN_memcpy(void* aDst, const void* aSrc,
+                                        size_t aLen) {
+  // The obvious thing to do here is call memcpy(). However, although
+  // ASAN_memcpy() is not instrumented by ASAN, memcpy() still is, and the
+  // false positive still manifests! So we must implement memcpy() ourselves
+  // within this function.
+  char* dst = static_cast<char*>(aDst);
+  const char* src = static_cast<const char*>(aSrc);
+
+  for (size_t i = 0; i < aLen; i++) {
+    dst[i] = src[i];
+  }
+}
+#  endif
+
+static void DoLULBacktrace(
+    const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData,
+    const Registers& aRegs, NativeStack& aNativeStack,
+    StackWalkControl* aStackWalkControlIfSupported) {
+  // WARNING: this function runs within the profiler's "critical section".
+  // WARNING: this function might be called while the profiler is inactive, and
+  //          cannot rely on ActivePS.
+
+  (void)aStackWalkControlIfSupported;  // TODO: Implement.
+
+  const mcontext_t* mc = &aRegs.mContext->uc_mcontext;
+
+  lul::UnwindRegs startRegs;
+  memset(&startRegs, 0, sizeof(startRegs));
+
+#  if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android)
+  startRegs.xip = lul::TaggedUWord(mc->gregs[REG_RIP]);
+  startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_RSP]);
+  startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_RBP]);
+#  elif defined(GP_PLAT_amd64_freebsd)
+  startRegs.xip = lul::TaggedUWord(mc->mc_rip);
+  startRegs.xsp = lul::TaggedUWord(mc->mc_rsp);
+  startRegs.xbp = lul::TaggedUWord(mc->mc_rbp);
+#  elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
+  startRegs.r15 = lul::TaggedUWord(mc->arm_pc);
+  startRegs.r14 = lul::TaggedUWord(mc->arm_lr);
+  startRegs.r13 = lul::TaggedUWord(mc->arm_sp);
+  startRegs.r12 = lul::TaggedUWord(mc->arm_ip);
+  startRegs.r11 = lul::TaggedUWord(mc->arm_fp);
+  startRegs.r7 = lul::TaggedUWord(mc->arm_r7);
+#  elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android)
+  startRegs.pc = lul::TaggedUWord(mc->pc);
+  startRegs.x29 = lul::TaggedUWord(mc->regs[29]);
+  startRegs.x30 = lul::TaggedUWord(mc->regs[30]);
+  startRegs.sp = lul::TaggedUWord(mc->sp);
+#  elif defined(GP_PLAT_arm64_freebsd)
+  startRegs.pc = lul::TaggedUWord(mc->mc_gpregs.gp_elr);
+  startRegs.x29 = lul::TaggedUWord(mc->mc_gpregs.gp_x[29]);
+  startRegs.x30 = lul::TaggedUWord(mc->mc_gpregs.gp_lr);
+  startRegs.sp = lul::TaggedUWord(mc->mc_gpregs.gp_sp);
+#  elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
+  startRegs.xip = lul::TaggedUWord(mc->gregs[REG_EIP]);
+  startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_ESP]);
+  startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_EBP]);
+#  elif defined(GP_PLAT_mips64_linux)
+  startRegs.pc = lul::TaggedUWord(mc->pc);
+  startRegs.sp = lul::TaggedUWord(mc->gregs[29]);
+  startRegs.fp = lul::TaggedUWord(mc->gregs[30]);
+#  else
+#    error "Unknown plat"
+#  endif
+
+  // Copy up to N_STACK_BYTES from rsp-REDZONE upwards, but not going past the
+  // stack's registered top point.  Do some basic validity checks too.  This
+  // assumes that the TaggedUWord holding the stack pointer value is valid, but
+  // it should be, since it was constructed that way in the code just above.
+
+  // We could construct |stackImg| so that LUL reads directly from the stack in
+  // question, rather than from a copy of it.  That would reduce overhead and
+  // space use a bit.  However, it gives a problem with dynamic analysis tools
+  // (ASan, TSan, Valgrind) which is that such tools will report invalid or
+  // racing memory accesses, and such accesses will be reported deep inside LUL.
+  // By taking a copy here, we can either sanitise the copy (for Valgrind) or
+  // copy it using an unchecked memcpy (for ASan, TSan).  That way we don't have
+  // to try and suppress errors inside LUL.
+  //
+  // N_STACK_BYTES is set to 160KB.  This is big enough to hold all stacks
+  // observed in some minutes of testing, whilst keeping the size of this
+  // function (DoNativeBacktrace)'s frame reasonable.  Most stacks observed in
+  // practice are small, 4KB or less, and so the copy costs are insignificant
+  // compared to other profiler overhead.
+  //
+  // |stackImg| is allocated on this (the sampling thread's) stack.  That
+  // implies that the frame for this function is at least N_STACK_BYTES large.
+  // In general it would be considered unacceptable to have such a large frame
+  // on a stack, but it only exists for the unwinder thread, and so is not
+  // expected to be a problem.  Allocating it on the heap is troublesome because
+  // this function runs whilst the sampled thread is suspended, so any heap
+  // allocation risks deadlock.  Allocating it as a global variable is not
+  // thread safe, which would be a problem if we ever allow multiple sampler
+  // threads.  Hence allocating it on the stack seems to be the least-worst
+  // option.
+
+  lul::StackImage stackImg;
+
+  {
+#  if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android) || \
+      defined(GP_PLAT_amd64_freebsd)
+    uintptr_t rEDZONE_SIZE = 128;
+    uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE;
+#  elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
+    uintptr_t rEDZONE_SIZE = 0;
+    uintptr_t start = startRegs.r13.Value() - rEDZONE_SIZE;
+#  elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android) || \
+      defined(GP_PLAT_arm64_freebsd)
+    uintptr_t rEDZONE_SIZE = 0;
+    uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE;
+#  elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
+    uintptr_t rEDZONE_SIZE = 0;
+    uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE;
+#  elif defined(GP_PLAT_mips64_linux)
+    uintptr_t rEDZONE_SIZE = 0;
+    uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE;
+#  else
+#    error "Unknown plat"
+#  endif
+    uintptr_t end = reinterpret_cast<uintptr_t>(aThreadData.StackTop());
+    uintptr_t ws = sizeof(void*);
+    start &= ~(ws - 1);
+    end &= ~(ws - 1);
+    uintptr_t nToCopy = 0;
+    if (start < end) {
+      nToCopy = end - start;
+      if (nToCopy >= 1024u * 1024u) {
+        // start is abnormally far from end, possibly due to some special code
+        // that uses a separate stack elsewhere (e.g.: rr). In this case we just
+        // give up on this sample.
+        nToCopy = 0;
+      } else if (nToCopy > lul::N_STACK_BYTES) {
+        nToCopy = lul::N_STACK_BYTES;
+      }
+    }
+    MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES);
+    stackImg.mLen = nToCopy;
+    stackImg.mStartAvma = start;
+    if (nToCopy > 0) {
+      // If this is a vanilla memcpy(), ASAN makes the following complaint:
+      //
+      //   ERROR: AddressSanitizer: stack-buffer-underflow ...
+      //   ...
+      //   HINT: this may be a false positive if your program uses some custom
+      //   stack unwind mechanism or swapcontext
+      //
+      // This code is very much a custom stack unwind mechanism! So we use an
+      // alternative memcpy() implementation that is ignored by ASAN.
+#  if defined(MOZ_HAVE_ASAN_IGNORE)
+      ASAN_memcpy(&stackImg.mContents[0], (void*)start, nToCopy);
+#  else
+      memcpy(&stackImg.mContents[0], (void*)start, nToCopy);
+#  endif
+      (void)VALGRIND_MAKE_MEM_DEFINED(&stackImg.mContents[0], nToCopy);
+    }
+  }
+
+  size_t framePointerFramesAcquired = 0;
+  lul::LUL* lul = CorePS::Lul();
+  MOZ_RELEASE_ASSERT(lul);
+  lul->Unwind(reinterpret_cast<uintptr_t*>(aNativeStack.mPCs),
+              reinterpret_cast<uintptr_t*>(aNativeStack.mSPs),
+              &aNativeStack.mCount, &framePointerFramesAcquired,
+              MAX_NATIVE_FRAMES, &startRegs, &stackImg);
+
+  // Update stats in the LUL stats object.  Unfortunately this requires
+  // three global memory operations.
+  lul->mStats.mContext += 1;
+  lul->mStats.mCFI += aNativeStack.mCount - 1 - framePointerFramesAcquired;
+  lul->mStats.mFP += framePointerFramesAcquired;
+}
+
+#endif
+
+#ifdef HAVE_NATIVE_UNWIND
+static void DoNativeBacktrace(
+    const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData,
+    const Registers& aRegs, NativeStack& aNativeStack,
+    StackWalkControl* aStackWalkControlIfSupported) {
+  // This method determines which stackwalker is used for periodic and
+  // synchronous samples. (Backtrace samples are treated differently, see
+  // profiler_suspend_and_sample_thread() for details). The only part of the
+  // ordering that matters is that LUL must precede FRAME_POINTER, because on
+  // Linux they can both be present.
+#  if defined(USE_LUL_STACKWALK)
+  DoLULBacktrace(aThreadData, aRegs, aNativeStack,
+                 aStackWalkControlIfSupported);
+#  elif defined(USE_EHABI_STACKWALK)
+  DoEHABIBacktrace(aThreadData, aRegs, aNativeStack,
+                   aStackWalkControlIfSupported);
+#  elif defined(USE_FRAME_POINTER_STACK_WALK)
+  DoFramePointerBacktrace(aThreadData, aRegs, aNativeStack,
+                          aStackWalkControlIfSupported);
+#  elif defined(USE_MOZ_STACK_WALK)
+  DoMozStackWalkBacktrace(aThreadData, aRegs, aNativeStack,
+                          aStackWalkControlIfSupported);
+#  else
+#    error "Invalid configuration"
+#  endif
+}
+#endif
+
+// Writes some components shared by periodic and synchronous profiles to
+// ActivePS's ProfileBuffer. (This should only be called from DoSyncSample()
+// and DoPeriodicSample().)
+//
+// The grammar for entry sequences is in a comment above
+// ProfileBuffer::StreamSamplesToJSON.
+static inline void DoSharedSample(
+    bool aIsSynchronous, uint32_t aFeatures,
+    const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData,
+    JsFrame* aJsFrames, const Registers& aRegs, uint64_t aSamplePos,
+    uint64_t aBufferRangeStart, ProfileBuffer& aBuffer,
+    StackCaptureOptions aCaptureOptions = StackCaptureOptions::Full) {
+  // WARNING: this function runs within the profiler's "critical section".
+
+  MOZ_ASSERT(!aBuffer.IsThreadSafe(),
+             "Mutexes cannot be used inside this critical section");
+
+  ProfileBufferCollector collector(aBuffer, aSamplePos, aBufferRangeStart);
+  StackWalkControl* stackWalkControlIfSupported = nullptr;
+#if defined(HAVE_NATIVE_UNWIND)
+  const bool captureNative = ProfilerFeature::HasStackWalk(aFeatures) &&
+                             aCaptureOptions == StackCaptureOptions::Full;
+  StackWalkControl stackWalkControl;
+  if constexpr (StackWalkControl::scIsSupported) {
+    if (captureNative) {
+      stackWalkControlIfSupported = &stackWalkControl;
+    }
+  }
+#endif  // defined(HAVE_NATIVE_UNWIND)
+  const uint32_t jsFramesCount =
+      aJsFrames ? ExtractJsFrames(aIsSynchronous, aThreadData, aRegs, collector,
+                                  aJsFrames, stackWalkControlIfSupported)
+                : 0;
+  NativeStack nativeStack;
+#if defined(HAVE_NATIVE_UNWIND)
+  if (captureNative) {
+    DoNativeBacktrace(aThreadData, aRegs, nativeStack,
+                      stackWalkControlIfSupported);
+
+    MergeStacks(aFeatures, aIsSynchronous, aThreadData, aRegs, nativeStack,
+                collector, aJsFrames, jsFramesCount);
+  } else
+#endif
+  {
+    MergeStacks(aFeatures, aIsSynchronous, aThreadData, aRegs, nativeStack,
+                collector, aJsFrames, jsFramesCount);
+
+    // We can't walk the whole native stack, but we can record the top frame.
+    if (aCaptureOptions == StackCaptureOptions::Full) {
+      aBuffer.AddEntry(ProfileBufferEntry::NativeLeafAddr((void*)aRegs.mPC));
+    }
+  }
+}
+
+// Writes the components of a synchronous sample to the given ProfileBuffer.
+static void DoSyncSample(
+    uint32_t aFeatures,
+    const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData,
+    const TimeStamp& aNow, const Registers& aRegs, ProfileBuffer& aBuffer,
+    StackCaptureOptions aCaptureOptions) {
+  // WARNING: this function runs within the profiler's "critical section".
+
+  MOZ_ASSERT(aCaptureOptions != StackCaptureOptions::NoStack,
+             "DoSyncSample should not be called when no capture is needed");
+
+  const uint64_t bufferRangeStart = aBuffer.BufferRangeStart();
+
+  const uint64_t samplePos =
+      aBuffer.AddThreadIdEntry(aThreadData.Info().ThreadId());
+
+  TimeDuration delta = aNow - CorePS::ProcessStartTime();
+  aBuffer.AddEntry(ProfileBufferEntry::Time(delta.ToMilliseconds()));
+
+  if (!aThreadData.GetJSContext()) {
+    // No JSContext, there is no JS frame buffer (and no need for it).
+    DoSharedSample(/* aIsSynchronous = */ true, aFeatures, aThreadData,
+                   /* aJsFrames = */ nullptr, aRegs, samplePos,
+                   bufferRangeStart, aBuffer, aCaptureOptions);
+  } else {
+    // JSContext is present, we need to lock the thread data to access the JS
+    // frame buffer.
+    ThreadRegistration::WithOnThreadRef([&](ThreadRegistration::OnThreadRef
+                                                aOnThreadRef) {
+      aOnThreadRef.WithConstLockedRWOnThread(
+          [&](const ThreadRegistration::LockedRWOnThread& aLockedThreadData) {
+            DoSharedSample(/* aIsSynchronous = */ true, aFeatures, aThreadData,
+                           aLockedThreadData.GetJsFrameBuffer(), aRegs,
+                           samplePos, bufferRangeStart, aBuffer,
+                           aCaptureOptions);
+          });
+    });
+  }
+}
+
+// Writes the components of a periodic sample to ActivePS's ProfileBuffer.
+// The ThreadId entry is already written in the main ProfileBuffer, its location
+// is `aSamplePos`, we can write the rest to `aBuffer` (which may be different).
+static inline void DoPeriodicSample(
+    PSLockRef aLock,
+    const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData,
+    const Registers& aRegs, uint64_t aSamplePos, uint64_t aBufferRangeStart,
+    ProfileBuffer& aBuffer) {
+  // WARNING: this function runs within the profiler's "critical section".
+
+  MOZ_RELEASE_ASSERT(ActivePS::Exists(aLock));
+
+  JsFrameBuffer& jsFrames = CorePS::JsFrames(aLock);
+  DoSharedSample(/* aIsSynchronous = */ false, ActivePS::Features(aLock),
+                 aThreadData, jsFrames, aRegs, aSamplePos, aBufferRangeStart,
+                 aBuffer);
+}
+
+// END sampling/unwinding code
+////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN saving/streaming code
+
+const static uint64_t kJS_MAX_SAFE_UINTEGER = +9007199254740991ULL;
+
+static int64_t SafeJSInteger(uint64_t aValue) {
+  return aValue <= kJS_MAX_SAFE_UINTEGER ? int64_t(aValue) : -1;
+}
+
+static void AddSharedLibraryInfoToStream(JSONWriter& aWriter,
+                                         const SharedLibrary& aLib) {
+  aWriter.StartObjectElement();
+  aWriter.IntProperty("start", SafeJSInteger(aLib.GetStart()));
+  aWriter.IntProperty("end", SafeJSInteger(aLib.GetEnd()));
+  aWriter.IntProperty("offset", SafeJSInteger(aLib.GetOffset()));
+  aWriter.StringProperty("name", NS_ConvertUTF16toUTF8(aLib.GetModuleName()));
+  aWriter.StringProperty("path", NS_ConvertUTF16toUTF8(aLib.GetModulePath()));
+  aWriter.StringProperty("debugName",
+                         NS_ConvertUTF16toUTF8(aLib.GetDebugName()));
+  aWriter.StringProperty("debugPath",
+                         NS_ConvertUTF16toUTF8(aLib.GetDebugPath()));
+  aWriter.StringProperty("breakpadId", aLib.GetBreakpadId());
+  aWriter.StringProperty("codeId", aLib.GetCodeId());
+  aWriter.StringProperty("arch", aLib.GetArch());
+  aWriter.EndObject();
+}
+
+void AppendSharedLibraries(JSONWriter& aWriter,
+                           const SharedLibraryInfo& aInfo) {
+  for (size_t i = 0; i < aInfo.GetSize(); i++) {
+    AddSharedLibraryInfoToStream(aWriter, aInfo.GetEntry(i));
+  }
+}
+
+static void StreamCategories(SpliceableJSONWriter& aWriter) {
+  // Same order as ProfilingCategory. Format:
+  // [
+  //   {
+  //     name: "Idle",
+  //     color: "transparent",
+  //     subcategories: ["Other"],
+  //   },
+  //   {
+  //     name: "Other",
+  //     color: "grey",
+  //     subcategories: [
+  //       "JSM loading",
+  //       "Subprocess launching",
+  //       "DLL loading"
+  //     ]
+  //   },
+  //   ...
+  // ]
+
+#define CATEGORY_JSON_BEGIN_CATEGORY(name, labelAsString, color) \
+  aWriter.Start();                                               \
+  aWriter.StringProperty("name", labelAsString);                 \
+  aWriter.StringProperty("color", color);                        \
+  aWriter.StartArrayProperty("subcategories");
+#define CATEGORY_JSON_SUBCATEGORY(supercategory, name, labelAsString) \
+  aWriter.StringElement(labelAsString);
+#define CATEGORY_JSON_END_CATEGORY \
+  aWriter.EndArray();              \
+  aWriter.EndObject();
+
+  MOZ_PROFILING_CATEGORY_LIST(CATEGORY_JSON_BEGIN_CATEGORY,
+                              CATEGORY_JSON_SUBCATEGORY,
+                              CATEGORY_JSON_END_CATEGORY)
+
+#undef CATEGORY_JSON_BEGIN_CATEGORY
+#undef CATEGORY_JSON_SUBCATEGORY
+#undef CATEGORY_JSON_END_CATEGORY
+}
+
+static void StreamMarkerSchema(SpliceableJSONWriter& aWriter) {
+  // Get an array view with all registered marker-type-specific functions.
+  base_profiler_markers_detail::Streaming::LockedMarkerTypeFunctionsList
+      markerTypeFunctionsArray;
+  // List of streamed marker names, this is used to spot duplicates.
+  std::set<std::string> names;
+  // Stream the display schema for each different one. (Duplications may come
+  // from the same code potentially living in different libraries.)
+  for (const auto& markerTypeFunctions : markerTypeFunctionsArray) {
+    auto name = markerTypeFunctions.mMarkerTypeNameFunction();
+    // std::set.insert(T&&) returns a pair, its `second` is true if the element
+    // was actually inserted (i.e., it was not there yet.)
+    const bool didInsert =
+        names.insert(std::string(name.data(), name.size())).second;
+    if (didInsert) {
+      markerTypeFunctions.mMarkerSchemaFunction().Stream(aWriter, name);
+    }
+  }
+
+  // Now stream the Rust marker schemas. Passing the names set as a void pointer
+  // as well, so we can continue checking if the schemes are added already in
+  // the Rust side.
+  profiler::ffi::gecko_profiler_stream_marker_schemas(
+      &aWriter, static_cast<void*>(&names));
+}
+
+// Some meta information that is better recorded before streaming the profile.
+// This is *not* intended to be cached, as some values could change between
+// profiling sessions.
+struct PreRecordedMetaInformation {
+  bool mAsyncStacks;
+
+  // This struct should only live on the stack, so it's fine to use Auto
+  // strings.
+  nsAutoCString mHttpPlatform;
+  nsAutoCString mHttpOscpu;
+  nsAutoCString mHttpMisc;
+
+  nsAutoCString mRuntimeABI;
+  nsAutoCString mRuntimeToolkit;
+
+  nsAutoCString mAppInfoProduct;
+  nsAutoCString mAppInfoAppBuildID;
+  nsAutoCString mAppInfoSourceURL;
+
+  int32_t mProcessInfoCpuCount;
+  int32_t mProcessInfoCpuCores;
+  nsAutoCString mProcessInfoCpuName;
+};
+
+// This function should be called out of the profiler lock.
+// It gathers non-trivial data that doesn't require the profiler to stop, or for
+// which the request could theoretically deadlock if the profiler is locked.
+static PreRecordedMetaInformation PreRecordMetaInformation() {
+  MOZ_ASSERT(!PSAutoLock::IsLockedOnCurrentThread());
+
+  PreRecordedMetaInformation info = {};  // Aggregate-init all fields.
+
+  if (!NS_IsMainThread()) {
+    // Leave these properties out if we're not on the main thread.
+    // At the moment, the only case in which this function is called on a
+    // background thread is if we're in a content process and are going to
+    // send this profile to the parent process. In that case, the parent
+    // process profile's "meta" object already has the rest of the properties,
+    // and the parent process profile is dumped on that process's main thread.
+    return info;
+  }
+
+  info.mAsyncStacks = Preferences::GetBool("javascript.options.asyncstack");
+
+  nsresult res;
+
+  if (nsCOMPtr<nsIHttpProtocolHandler> http =
+          do_GetService(NS_NETWORK_PROTOCOL_CONTRACTID_PREFIX "http", &res);
+      !NS_FAILED(res) && http) {
+    Unused << http->GetPlatform(info.mHttpPlatform);
+
+#if defined(GP_OS_darwin)
+    // On Mac, the http "oscpu" is capped at 10.15, so we need to get the real
+    // OS version directly.
+    int major = 0;
+    int minor = 0;
+    int bugfix = 0;
+    nsCocoaFeatures::GetSystemVersion(major, minor, bugfix);
+    if (major != 0) {
+      info.mHttpOscpu.AppendLiteral("macOS ");
+      info.mHttpOscpu.AppendInt(major);
+      info.mHttpOscpu.AppendLiteral(".");
+      info.mHttpOscpu.AppendInt(minor);
+      info.mHttpOscpu.AppendLiteral(".");
+      info.mHttpOscpu.AppendInt(bugfix);
+    } else
+#endif
+#if defined(GP_OS_windows)
+      // On Windows, the http "oscpu" is capped at Windows 10, so we need to get
+      // the real OS version directly.
+      OSVERSIONINFO ovi = {sizeof(OSVERSIONINFO)};
+    if (GetVersionEx(&ovi)) {
+      info.mHttpOscpu.AppendLiteral("Windows ");
+      // The major version returned for Windows 11 is 10, but we can
+      // identify it from the build number.
+      info.mHttpOscpu.AppendInt(
+          ovi.dwBuildNumber >= 22000 ? 11 : int32_t(ovi.dwMajorVersion));
+      info.mHttpOscpu.AppendLiteral(".");
+      info.mHttpOscpu.AppendInt(int32_t(ovi.dwMinorVersion));
+#  if defined(_ARM64_)
+      info.mHttpOscpu.AppendLiteral(" Arm64");
+#  endif
+      info.mHttpOscpu.AppendLiteral("; build=");
+      info.mHttpOscpu.AppendInt(int32_t(ovi.dwBuildNumber));
+    } else
+#endif
+    {
+      Unused << http->GetOscpu(info.mHttpOscpu);
+    }
+
+    // Firefox version is capped to 109.0 in the http "misc" field due to some
+    // webcompat issues (Bug 1805967). We need to put the real version instead.
+    info.mHttpMisc.AssignLiteral("rv:");
+    info.mHttpMisc.AppendLiteral(MOZILLA_UAVERSION);
+  }
+
+  if (nsCOMPtr<nsIXULRuntime> runtime =
+          do_GetService("@mozilla.org/xre/runtime;1");
+      runtime) {
+    Unused << runtime->GetXPCOMABI(info.mRuntimeABI);
+    Unused << runtime->GetWidgetToolkit(info.mRuntimeToolkit);
+  }
+
+  if (nsCOMPtr<nsIXULAppInfo> appInfo =
+          do_GetService("@mozilla.org/xre/app-info;1");
+      appInfo) {
+    Unused << appInfo->GetName(info.mAppInfoProduct);
+    Unused << appInfo->GetAppBuildID(info.mAppInfoAppBuildID);
+    Unused << appInfo->GetSourceURL(info.mAppInfoSourceURL);
+  }
+
+  ProcessInfo processInfo = {};  // Aggregate-init all fields to false/zeroes.
+  if (NS_SUCCEEDED(CollectProcessInfo(processInfo))) {
+    info.mProcessInfoCpuCount = processInfo.cpuCount;
+    info.mProcessInfoCpuCores = processInfo.cpuCores;
+    info.mProcessInfoCpuName = processInfo.cpuName;
+  }
+
+  return info;
+}
+
+// Implemented in platform-specific cpps, to add object properties describing
+// the units of CPU measurements in samples.
+static void StreamMetaPlatformSampleUnits(PSLockRef aLock,
+                                          SpliceableJSONWriter& aWriter);
+
+static void StreamMetaJSCustomObject(
+    PSLockRef aLock, SpliceableJSONWriter& aWriter, bool aIsShuttingDown,
+    const PreRecordedMetaInformation& aPreRecordedMetaInformation) {
+  MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
+
+  aWriter.IntProperty("version", 27);
+
+  // The "startTime" field holds the number of milliseconds since midnight
+  // January 1, 1970 GMT. This grotty code computes (Now - (Now -
+  // ProcessStartTime)) to convert CorePS::ProcessStartTime() into that form.
+  // Note: This is the only absolute time in the profile! All other timestamps
+  // are relative to this startTime.
+  TimeDuration delta = TimeStamp::Now() - CorePS::ProcessStartTime();
+  aWriter.DoubleProperty(
+      "startTime",
+      static_cast<double>(PR_Now() / 1000.0 - delta.ToMilliseconds()));
+
+  aWriter.DoubleProperty("profilingStartTime", (ActivePS::ProfilingStartTime() -
+                                                CorePS::ProcessStartTime())
+                                                   .ToMilliseconds());
+
+  if (const TimeStamp contentEarliestTime =
+          ActivePS::Buffer(aLock)
+              .UnderlyingChunkedBuffer()
+              .GetEarliestChunkStartTimeStamp();
+      !contentEarliestTime.IsNull()) {
+    aWriter.DoubleProperty(
+        "contentEarliestTime",
+        (contentEarliestTime - CorePS::ProcessStartTime()).ToMilliseconds());
+  } else {
+    aWriter.NullProperty("contentEarliestTime");
+  }
+
+  const double profilingEndTime = profiler_time();
+  aWriter.DoubleProperty("profilingEndTime", profilingEndTime);
+
+  if (aIsShuttingDown) {
+    aWriter.DoubleProperty("shutdownTime", profilingEndTime);
+  } else {
+    aWriter.NullProperty("shutdownTime");
+  }
+
+  aWriter.StartArrayProperty("categories");
+  StreamCategories(aWriter);
+  aWriter.EndArray();
+
+  aWriter.StartArrayProperty("markerSchema");
+  StreamMarkerSchema(aWriter);
+  aWriter.EndArray();
+
+  ActivePS::WriteActiveConfiguration(aLock, aWriter,
+                                     MakeStringSpan("configuration"));
+
+  if (!NS_IsMainThread()) {
+    // Leave the rest of the properties out if we're not on the main thread.
+    // At the moment, the only case in which this function is called on a
+    // background thread is if we're in a content process and are going to
+    // send this profile to the parent process. In that case, the parent
+    // process profile's "meta" object already has the rest of the properties,
+    // and the parent process profile is dumped on that process's main thread.
+    return;
+  }
+
+  aWriter.DoubleProperty("interval", ActivePS::Interval(aLock));
+  aWriter.IntProperty("stackwalk", ActivePS::FeatureStackWalk(aLock));
+
+#ifdef DEBUG
+  aWriter.IntProperty("debug", 1);
+#else
+  aWriter.IntProperty("debug", 0);
+#endif
+
+  aWriter.IntProperty("gcpoison", JS::IsGCPoisoning() ? 1 : 0);
+
+  aWriter.IntProperty("asyncstack", aPreRecordedMetaInformation.mAsyncStacks);
+
+  aWriter.IntProperty("processType", XRE_GetProcessType());
+
+  aWriter.StringProperty("updateChannel", MOZ_STRINGIFY(MOZ_UPDATE_CHANNEL));
+
+  if (!aPreRecordedMetaInformation.mHttpPlatform.IsEmpty()) {
+    aWriter.StringProperty("platform",
+                           aPreRecordedMetaInformation.mHttpPlatform);
+  }
+  if (!aPreRecordedMetaInformation.mHttpOscpu.IsEmpty()) {
+    aWriter.StringProperty("oscpu", aPreRecordedMetaInformation.mHttpOscpu);
+  }
+  if (!aPreRecordedMetaInformation.mHttpMisc.IsEmpty()) {
+    aWriter.StringProperty("misc", aPreRecordedMetaInformation.mHttpMisc);
+  }
+
+  if (!aPreRecordedMetaInformation.mRuntimeABI.IsEmpty()) {
+    aWriter.StringProperty("abi", aPreRecordedMetaInformation.mRuntimeABI);
+  }
+  if (!aPreRecordedMetaInformation.mRuntimeToolkit.IsEmpty()) {
+    aWriter.StringProperty("toolkit",
+                           aPreRecordedMetaInformation.mRuntimeToolkit);
+  }
+
+  if (!aPreRecordedMetaInformation.mAppInfoProduct.IsEmpty()) {
+    aWriter.StringProperty("product",
+                           aPreRecordedMetaInformation.mAppInfoProduct);
+  }
+  if (!aPreRecordedMetaInformation.mAppInfoAppBuildID.IsEmpty()) {
+    aWriter.StringProperty("appBuildID",
+                           aPreRecordedMetaInformation.mAppInfoAppBuildID);
+  }
+  if (!aPreRecordedMetaInformation.mAppInfoSourceURL.IsEmpty()) {
+    aWriter.StringProperty("sourceURL",
+                           aPreRecordedMetaInformation.mAppInfoSourceURL);
+  }
+
+  if (!aPreRecordedMetaInformation.mProcessInfoCpuName.IsEmpty()) {
+    aWriter.StringProperty("CPUName",
+                           aPreRecordedMetaInformation.mProcessInfoCpuName);
+  }
+  if (aPreRecordedMetaInformation.mProcessInfoCpuCores > 0) {
+    aWriter.IntProperty("physicalCPUs",
+                        aPreRecordedMetaInformation.mProcessInfoCpuCores);
+  }
+  if (aPreRecordedMetaInformation.mProcessInfoCpuCount > 0) {
+    aWriter.IntProperty("logicalCPUs",
+                        aPreRecordedMetaInformation.mProcessInfoCpuCount);
+  }
+
+#if defined(GP_OS_android)
+  jni::String::LocalRef deviceInformation =
+      java::GeckoJavaSampler::GetDeviceInformation();
+  aWriter.StringProperty("device", deviceInformation->ToCString());
+#endif
+
+  aWriter.StartObjectProperty("sampleUnits");
+  {
+    aWriter.StringProperty("time", "ms");
+    aWriter.StringProperty("eventDelay", "ms");
+    StreamMetaPlatformSampleUnits(aLock, aWriter);
+  }
+  aWriter.EndObject();
+
+  // We should avoid collecting extension metadata for profiler when there is no
+  // observer service, since a ExtensionPolicyService could not be created then.
+  if (nsCOMPtr<nsIObserverService> os = services::GetObserverService()) {
+    aWriter.StartObjectProperty("extensions");
+    {
+      {
+        JSONSchemaWriter schema(aWriter);
+        schema.WriteField("id");
+        schema.WriteField("name");
+        schema.WriteField("baseURL");
+      }
+
+      aWriter.StartArrayProperty("data");
+      {
+        nsTArray<RefPtr<WebExtensionPolicy>> exts;
+        ExtensionPolicyService::GetSingleton().GetAll(exts);
+
+        for (auto& ext : exts) {
+          aWriter.StartArrayElement();
+
+          nsAutoString id;
+          ext->GetId(id);
+          aWriter.StringElement(NS_ConvertUTF16toUTF8(id));
+
+          aWriter.StringElement(NS_ConvertUTF16toUTF8(ext->Name()));
+
+          auto url = ext->GetURL(u""_ns);
+          if (url.isOk()) {
+            aWriter.StringElement(NS_ConvertUTF16toUTF8(url.unwrap()));
+          }
+
+          aWriter.EndArray();
+        }
+      }
+      aWriter.EndArray();
+    }
+    aWriter.EndObject();
+  }
+}
+
+static void StreamPages(PSLockRef aLock, SpliceableJSONWriter& aWriter) {
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+  ActivePS::DiscardExpiredPages(aLock);
+  for (const auto& page : ActivePS::ProfiledPages(aLock)) {
+    page->StreamJSON(aWriter);
+  }
+}
+
+#if defined(GP_OS_android)
+template <int N>
+static bool StartsWith(const nsACString& string, const char (&prefix)[N]) {
+  if (N - 1 > string.Length()) {
+    return false;
+  }
+  return memcmp(string.Data(), prefix, N - 1) == 0;
+}
+
+static JS::ProfilingCategoryPair InferJavaCategory(nsACString& aName) {
+  if (aName.EqualsLiteral("android.os.MessageQueue.nativePollOnce()")) {
+    return JS::ProfilingCategoryPair::IDLE;
+  }
+  if (aName.EqualsLiteral("java.lang.Object.wait()")) {
+    return JS::ProfilingCategoryPair::JAVA_BLOCKED;
+  }
+  if (StartsWith(aName, "android.") || StartsWith(aName, "com.android.")) {
+    return JS::ProfilingCategoryPair::JAVA_ANDROID;
+  }
+  if (StartsWith(aName, "mozilla.") || StartsWith(aName, "org.mozilla.")) {
+    return JS::ProfilingCategoryPair::JAVA_MOZILLA;
+  }
+  if (StartsWith(aName, "java.") || StartsWith(aName, "sun.") ||
+      StartsWith(aName, "com.sun.")) {
+    return JS::ProfilingCategoryPair::JAVA_LANGUAGE;
+  }
+  if (StartsWith(aName, "kotlin.") || StartsWith(aName, "kotlinx.")) {
+    return JS::ProfilingCategoryPair::JAVA_KOTLIN;
+  }
+  if (StartsWith(aName, "androidx.")) {
+    return JS::ProfilingCategoryPair::JAVA_ANDROIDX;
+  }
+  return JS::ProfilingCategoryPair::OTHER;
+}
+
+// Marker type for Java markers without any details.
+struct JavaMarker {
+  static constexpr Span<const char> MarkerTypeName() {
+    return MakeStringSpan("Java");
+  }
+  static void StreamJSONMarkerData(
+      baseprofiler::SpliceableJSONWriter& aWriter) {}
+  static MarkerSchema MarkerTypeDisplay() {
+    using MS = MarkerSchema;
+    MS schema{MS::Location::TimelineOverview, MS::Location::MarkerChart,
+              MS::Location::MarkerTable};
+    schema.SetAllLabels("{marker.name}");
+    return schema;
+  }
+};
+
+// Marker type for Java markers with a detail field.
+struct JavaMarkerWithDetails {
+  static constexpr Span<const char> MarkerTypeName() {
+    return MakeStringSpan("JavaWithDetails");
+  }
+  static void StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter& aWriter,
+                                   const ProfilerString8View& aText) {
+    // This (currently) needs to be called "name" to be searchable on the
+    // front-end.
+    aWriter.StringProperty("name", aText);
+  }
+  static MarkerSchema MarkerTypeDisplay() {
+    using MS = MarkerSchema;
+    MS schema{MS::Location::TimelineOverview, MS::Location::MarkerChart,
+              MS::Location::MarkerTable};
+    schema.SetTooltipLabel("{marker.name}");
+    schema.SetChartLabel("{marker.data.name}");
+    schema.SetTableLabel("{marker.name} - {marker.data.name}");
+    schema.AddKeyLabelFormatSearchable("name", "Details", MS::Format::String,
+                                       MS::Searchable::Searchable);
+    return schema;
+  }
+};
+
+static void CollectJavaThreadProfileData(
+    nsTArray<java::GeckoJavaSampler::ThreadInfo::LocalRef>& javaThreads,
+    ProfileBuffer& aProfileBuffer) {
+  // Retrieve metadata about the threads.
+  const auto threadCount = java::GeckoJavaSampler::GetRegisteredThreadCount();
+  for (int i = 0; i < threadCount; i++) {
+    javaThreads.AppendElement(
+        java::GeckoJavaSampler::GetRegisteredThreadInfo(i));
+  }
+
+  // locked_profiler_start uses sample count is 1000 for Java thread.
+  // This entry size is enough now, but we might have to estimate it
+  // if we can customize it
+  // Pass the samples
+  int sampleId = 0;
+  while (true) {
+    const auto threadId = java::GeckoJavaSampler::GetThreadId(sampleId);
+    double sampleTime = java::GeckoJavaSampler::GetSampleTime(sampleId);
+    if (threadId == 0 || sampleTime == 0.0) {
+      break;
+    }
+
+    aProfileBuffer.AddThreadIdEntry(ProfilerThreadId::FromNumber(threadId));
+    aProfileBuffer.AddEntry(ProfileBufferEntry::Time(sampleTime));
+    int frameId = 0;
+    while (true) {
+      jni::String::LocalRef frameName =
+          java::GeckoJavaSampler::GetFrameName(sampleId, frameId++);
+      if (!frameName) {
+        break;
+      }
+      nsCString frameNameString = frameName->ToCString();
+
+      auto categoryPair = InferJavaCategory(frameNameString);
+      aProfileBuffer.CollectCodeLocation("", frameNameString.get(), 0, 0,
+                                         Nothing(), Nothing(),
+                                         Some(categoryPair));
+    }
+    sampleId++;
+  }
+
+  // Pass the markers now
+  while (true) {
+    // Gets the data from the Android UI thread only.
+    java::GeckoJavaSampler::Marker::LocalRef marker =
+        java::GeckoJavaSampler::PollNextMarker();
+    if (!marker) {
+      // All markers are transferred.
+      break;
+    }
+
+    // Get all the marker information from the Java thread using JNI.
+    const auto threadId = ProfilerThreadId::FromNumber(marker->GetThreadId());
+    nsCString markerName = marker->GetMarkerName()->ToCString();
+    jni::String::LocalRef text = marker->GetMarkerText();
+    TimeStamp startTime =
+        CorePS::ProcessStartTime() +
+        TimeDuration::FromMilliseconds(marker->GetStartTime());
+
+    double endTimeMs = marker->GetEndTime();
+    // A marker can be either a duration with start and end, or a point in time
+    // with only startTime. If endTime is 0, this means it's a point in time.
+    TimeStamp endTime = endTimeMs == 0
+                            ? startTime
+                            : CorePS::ProcessStartTime() +
+                                  TimeDuration::FromMilliseconds(endTimeMs);
+    MarkerTiming timing = endTimeMs == 0
+                              ? MarkerTiming::InstantAt(startTime)
+                              : MarkerTiming::Interval(startTime, endTime);
+
+    if (!text) {
+      // This marker doesn't have a text.
+      AddMarkerToBuffer(aProfileBuffer.UnderlyingChunkedBuffer(), markerName,
+                        geckoprofiler::category::JAVA_ANDROID,
+                        {MarkerThreadId(threadId), std::move(timing)},
+                        JavaMarker{});
+    } else {
+      // This marker has a text.
+      AddMarkerToBuffer(aProfileBuffer.UnderlyingChunkedBuffer(), markerName,
+                        geckoprofiler::category::JAVA_ANDROID,
+                        {MarkerThreadId(threadId), std::move(timing)},
+                        JavaMarkerWithDetails{}, text->ToCString());
+    }
+  }
+}
+#endif
+
+UniquePtr<ProfilerCodeAddressService>
+profiler_code_address_service_for_presymbolication() {
+  static const bool preSymbolicate = []() {
+    const char* symbolicate = getenv("MOZ_PROFILER_SYMBOLICATE");
+    return symbolicate && symbolicate[0] != '\0';
+  }();
+  return preSymbolicate ? MakeUnique<ProfilerCodeAddressService>() : nullptr;
+}
+
+static ProfilerResult<ProfileGenerationAdditionalInformation>
+locked_profiler_stream_json_for_this_process(
+    PSLockRef aLock, SpliceableJSONWriter& aWriter, double aSinceTime,
+    const PreRecordedMetaInformation& aPreRecordedMetaInformation,
+    bool aIsShuttingDown, ProfilerCodeAddressService* aService,
+    mozilla::ProgressLogger aProgressLogger) {
+  LOG("locked_profiler_stream_json_for_this_process");
+
+#ifdef DEBUG
+  PRIntervalTime slowWithSleeps = 0;
+  if (!XRE_IsParentProcess()) {
+    for (const auto& filter : ActivePS::Filters(aLock)) {
+      if (filter == "test-debug-child-slow-json") {
+        LOG("test-debug-child-slow-json");
+        // There are 10 slow-downs below, each will sleep 250ms, for a total of
+        // 2.5s, which should trigger the first progress request after 1s, and
+        // the next progress which will have advanced further, so this profile
+        // shouldn't get dropped.
+        slowWithSleeps = PR_MillisecondsToInterval(250);
+      } else if (filter == "test-debug-child-very-slow-json") {
+        LOG("test-debug-child-very-slow-json");
+        // Wait for more than 2s without any progress, which should get this
+        // profile discarded.
+        PR_Sleep(PR_SecondsToInterval(5));
+      }
+    }
+  }
+#  define SLOW_DOWN_FOR_TESTING()                                        \
+    if (slowWithSleeps != 0) {                                           \
+      DEBUG_LOG("progress=%.0f%%, sleep...",                             \
+                aProgressLogger.GetGlobalProgress().ToDouble() * 100.0); \
+      PR_Sleep(slowWithSleeps);                                          \
+    }
+#else                             // #ifdef DEBUG
+#  define SLOW_DOWN_FOR_TESTING() /* No slow-downs */
+#endif                            // #ifdef DEBUG #else
+
+  MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
+
+  AUTO_PROFILER_STATS(locked_profiler_stream_json_for_this_process);
+
+  const double collectionStartMs = profiler_time();
+
+  ProfileBuffer& buffer = ActivePS::Buffer(aLock);
+
+  aProgressLogger.SetLocalProgress(1_pc, "Locked profile buffer");
+
+  SLOW_DOWN_FOR_TESTING();
+
+  // If there is a set "Window length", discard older data.
+  Maybe<double> durationS = ActivePS::Duration(aLock);
+  if (durationS.isSome()) {
+    const double durationStartMs = collectionStartMs - *durationS * 1000;
+    buffer.DiscardSamplesBeforeTime(durationStartMs);
+  }
+  aProgressLogger.SetLocalProgress(2_pc, "Discarded old data");
+
+  if (aWriter.Failed()) {
+    return Err(ProfilerError::JsonGenerationFailed);
+  }
+  SLOW_DOWN_FOR_TESTING();
+
+#if defined(GP_OS_android)
+  // Java thread profile data should be collected before serializing the meta
+  // object. This is because Java thread adds some markers with marker schema
+  // objects. And these objects should be added before the serialization of the
+  // `profile.meta.markerSchema` array, so these marker schema objects can also
+  // be serialized properly. That's why java thread profile data needs to be
+  // done before everything.
+
+  // We are allocating it chunk by chunk. So this will not allocate 64 MiB
+  // at once. This size should be more than enough for java threads.
+  // This buffer is being created for each process but Android has
+  // relatively fewer processes compared to desktop, so it's okay here.
+  mozilla::ProfileBufferChunkManagerWithLocalLimit javaChunkManager(
+      64 * 1024 * 1024, 1024 * 1024);
+  ProfileChunkedBuffer javaBufferManager(
+      ProfileChunkedBuffer::ThreadSafety::WithoutMutex, javaChunkManager);
+  ProfileBuffer javaBuffer(javaBufferManager);
+
+  nsTArray<java::GeckoJavaSampler::ThreadInfo::LocalRef> javaThreads;
+
+  if (ActivePS::FeatureJava(aLock)) {
+    CollectJavaThreadProfileData(javaThreads, javaBuffer);
+    aProgressLogger.SetLocalProgress(3_pc, "Collected Java thread");
+  }
+#endif
+
+  // Put shared library info
+  aWriter.StartArrayProperty("libs");
+  SharedLibraryInfo sharedLibraryInfo = SharedLibraryInfo::GetInfoForSelf();
+  sharedLibraryInfo.SortByAddress();
+  AppendSharedLibraries(aWriter, sharedLibraryInfo);
+  aWriter.EndArray();
+  aProgressLogger.SetLocalProgress(4_pc, "Wrote library information");
+
+  if (aWriter.Failed()) {
+    return Err(ProfilerError::JsonGenerationFailed);
+  }
+  SLOW_DOWN_FOR_TESTING();
+
+  // Put meta data
+  aWriter.StartObjectProperty("meta");
+  {
+    StreamMetaJSCustomObject(aLock, aWriter, aIsShuttingDown,
+                             aPreRecordedMetaInformation);
+  }
+  aWriter.EndObject();
+  aProgressLogger.SetLocalProgress(5_pc, "Wrote profile metadata");
+
+  if (aWriter.Failed()) {
+    return Err(ProfilerError::JsonGenerationFailed);
+  }
+  SLOW_DOWN_FOR_TESTING();
+
+  // Put page data
+  aWriter.StartArrayProperty("pages");
+  { StreamPages(aLock, aWriter); }
+  aWriter.EndArray();
+  aProgressLogger.SetLocalProgress(6_pc, "Wrote pages");
+
+  buffer.StreamProfilerOverheadToJSON(
+      aWriter, CorePS::ProcessStartTime(), aSinceTime,
+      aProgressLogger.CreateSubLoggerTo(10_pc, "Wrote profiler overheads"));
+
+  buffer.StreamCountersToJSON(
+      aWriter, CorePS::ProcessStartTime(), aSinceTime,
+      aProgressLogger.CreateSubLoggerTo(14_pc, "Wrote counters"));
+
+  if (aWriter.Failed()) {
+    return Err(ProfilerError::JsonGenerationFailed);
+  }
+  SLOW_DOWN_FOR_TESTING();
+
+  // Lists the samples for each thread profile
+  aWriter.StartArrayProperty("threads");
+  {
+    ActivePS::DiscardExpiredDeadProfiledThreads(aLock);
+    aProgressLogger.SetLocalProgress(15_pc, "Discarded expired profiles");
+
+    ThreadRegistry::LockedRegistry lockedRegistry;
+    ActivePS::ProfiledThreadList threads =
+        ActivePS::ProfiledThreads(lockedRegistry, aLock);
+
+    const uint32_t threadCount = uint32_t(threads.length());
+
+    if (aWriter.Failed()) {
+      return Err(ProfilerError::JsonGenerationFailed);
+    }
+    SLOW_DOWN_FOR_TESTING();
+
+    // Prepare the streaming context for each thread.
+    ProcessStreamingContext processStreamingContext(
+        threadCount, aWriter.SourceFailureLatch(), CorePS::ProcessStartTime(),
+        aSinceTime);
+    for (auto&& [i, progressLogger] : aProgressLogger.CreateLoopSubLoggersTo(
+             20_pc, threadCount, "Preparing thread streaming contexts...")) {
+      ActivePS::ProfiledThreadListElement& thread = threads[i];
+      MOZ_RELEASE_ASSERT(thread.mProfiledThreadData);
+      processStreamingContext.AddThreadStreamingContext(
+          *thread.mProfiledThreadData, buffer, thread.mJSContext, aService,
+          std::move(progressLogger));
+      if (aWriter.Failed()) {
+        return Err(ProfilerError::JsonGenerationFailed);
+      }
+    }
+
+    SLOW_DOWN_FOR_TESTING();
+
+    // Read the buffer once, and extract all samples and markers that the
+    // context expects.
+    buffer.StreamSamplesAndMarkersToJSON(
+        processStreamingContext, aProgressLogger.CreateSubLoggerTo(
+                                     "Processing samples and markers...", 80_pc,
+                                     "Processed samples and markers"));
+
+    if (aWriter.Failed()) {
+      return Err(ProfilerError::JsonGenerationFailed);
+    }
+    SLOW_DOWN_FOR_TESTING();
+
+    // Stream each thread from the pre-filled context.
+    ThreadStreamingContext* const contextListBegin =
+        processStreamingContext.begin();
+    MOZ_ASSERT(uint32_t(processStreamingContext.end() - contextListBegin) ==
+               threadCount);
+    for (auto&& [i, progressLogger] : aProgressLogger.CreateLoopSubLoggersTo(
+             92_pc, threadCount, "Streaming threads...")) {
+      ThreadStreamingContext& threadStreamingContext = contextListBegin[i];
+      threadStreamingContext.FinalizeWriter();
+      threadStreamingContext.mProfiledThreadData.StreamJSON(
+          std::move(threadStreamingContext), aWriter,
+          CorePS::ProcessName(aLock), CorePS::ETLDplus1(aLock),
+          CorePS::ProcessStartTime(), aService, std::move(progressLogger));
+      if (aWriter.Failed()) {
+        return Err(ProfilerError::JsonGenerationFailed);
+      }
+    }
+    aProgressLogger.SetLocalProgress(92_pc, "Wrote samples and markers");
+
+#if defined(GP_OS_android)
+    if (ActivePS::FeatureJava(aLock)) {
+      for (java::GeckoJavaSampler::ThreadInfo::LocalRef& threadInfo :
+           javaThreads) {
+        ProfiledThreadData threadData(ThreadRegistrationInfo{
+            threadInfo->GetName()->ToCString().BeginReading(),
+            ProfilerThreadId::FromNumber(threadInfo->GetId()), false,
+            CorePS::ProcessStartTime()});
+
+        threadData.StreamJSON(
+            javaBuffer, nullptr, aWriter, CorePS::ProcessName(aLock),
+            CorePS::ETLDplus1(aLock), CorePS::ProcessStartTime(), aSinceTime,
+            nullptr,
+            aProgressLogger.CreateSubLoggerTo("Streaming Java thread...", 96_pc,
+                                              "Streamed Java thread"));
+      }
+      if (aWriter.Failed()) {
+        return Err(ProfilerError::JsonGenerationFailed);
+      }
+    } else {
+      aProgressLogger.SetLocalProgress(96_pc, "No Java thread");
+    }
+#endif
+
+    UniquePtr<char[]> baseProfileThreads =
+        ActivePS::MoveBaseProfileThreads(aLock);
+    if (baseProfileThreads) {
+      aWriter.Splice(MakeStringSpan(baseProfileThreads.get()));
+      if (aWriter.Failed()) {
+        return Err(ProfilerError::JsonGenerationFailed);
+      }
+      aProgressLogger.SetLocalProgress(97_pc, "Wrote baseprofiler data");
+    } else {
+      aProgressLogger.SetLocalProgress(97_pc, "No baseprofiler data");
+    }
+  }
+  aWriter.EndArray();
+
+  SLOW_DOWN_FOR_TESTING();
+
+  aWriter.StartArrayProperty("pausedRanges");
+  {
+    buffer.StreamPausedRangesToJSON(
+        aWriter, aSinceTime,
+        aProgressLogger.CreateSubLoggerTo("Streaming pauses...", 99_pc,
+                                          "Streamed pauses"));
+  }
+  aWriter.EndArray();
+
+  if (aWriter.Failed()) {
+    return Err(ProfilerError::JsonGenerationFailed);
+  }
+
+  ProfilingLog::Access([&](Json::Value& aProfilingLogObject) {
+    aProfilingLogObject[Json::StaticString{
+        "profilingLogEnd" TIMESTAMP_JSON_SUFFIX}] = ProfilingLog::Timestamp();
+
+    aWriter.StartObjectProperty("profilingLog");
+    {
+      nsAutoCString pid;
+      pid.AppendInt(int64_t(profiler_current_process_id().ToNumber()));
+      Json::String logString = ToCompactString(aProfilingLogObject);
+      aWriter.SplicedJSONProperty(pid, logString);
+    }
+    aWriter.EndObject();
+  });
+
+  const double collectionEndMs = profiler_time();
+
+  // Record timestamps for the collection into the buffer, so that consumers
+  // know why we didn't collect any samples for its duration.
+  // We put these entries into the buffer after we've collected the profile,
+  // so they'll be visible for the *next* profile collection (if they haven't
+  // been overwritten due to buffer wraparound by then).
+  buffer.AddEntry(ProfileBufferEntry::CollectionStart(collectionStartMs));
+  buffer.AddEntry(ProfileBufferEntry::CollectionEnd(collectionEndMs));
+
+#ifdef DEBUG
+  if (slowWithSleeps != 0) {
+    LOG("locked_profiler_stream_json_for_this_process done");
+  }
+#endif  // DEBUG
+
+  return ProfileGenerationAdditionalInformation{std::move(sharedLibraryInfo)};
+}
+
+// Keep this internal function non-static, so it may be used by tests.
+ProfilerResult<ProfileGenerationAdditionalInformation>
+do_profiler_stream_json_for_this_process(
+    SpliceableJSONWriter& aWriter, double aSinceTime, bool aIsShuttingDown,
+    ProfilerCodeAddressService* aService,
+    mozilla::ProgressLogger aProgressLogger) {
+  LOG("profiler_stream_json_for_this_process");
+
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  const auto preRecordedMetaInformation = PreRecordMetaInformation();
+
+  aProgressLogger.SetLocalProgress(2_pc, "PreRecordMetaInformation done");
+
+  if (profiler_is_active()) {
+    invoke_profiler_state_change_callbacks(ProfilingState::GeneratingProfile);
+  }
+
+  PSAutoLock lock;
+
+  if (!ActivePS::Exists(lock)) {
+    return Err(ProfilerError::IsInactive);
+  }
+
+  ProfileGenerationAdditionalInformation additionalInfo;
+  MOZ_TRY_VAR(
+      additionalInfo,
+      locked_profiler_stream_json_for_this_process(
+          lock, aWriter, aSinceTime, preRecordedMetaInformation,
+          aIsShuttingDown, aService,
+          aProgressLogger.CreateSubLoggerFromTo(
+              3_pc, "locked_profiler_stream_json_for_this_process started",
+              100_pc, "locked_profiler_stream_json_for_this_process done")));
+
+  if (aWriter.Failed()) {
+    return Err(ProfilerError::JsonGenerationFailed);
+  }
+  return additionalInfo;
+}
+
+ProfilerResult<ProfileGenerationAdditionalInformation>
+profiler_stream_json_for_this_process(SpliceableJSONWriter& aWriter,
+                                      double aSinceTime, bool aIsShuttingDown,
+                                      ProfilerCodeAddressService* aService,
+                                      mozilla::ProgressLogger aProgressLogger) {
+  MOZ_RELEASE_ASSERT(
+      !XRE_IsParentProcess() || NS_IsMainThread(),
+      "In the parent process, profiles should only be generated from the main "
+      "thread, otherwise they will be incomplete.");
+
+  ProfileGenerationAdditionalInformation additionalInfo;
+  MOZ_TRY_VAR(additionalInfo, do_profiler_stream_json_for_this_process(
+                                  aWriter, aSinceTime, aIsShuttingDown,
+                                  aService, std::move(aProgressLogger)));
+
+  return additionalInfo;
+}
+
+// END saving/streaming code
+////////////////////////////////////////////////////////////////////////
+
+static char FeatureCategory(uint32_t aFeature) {
+  if (aFeature & DefaultFeatures()) {
+    if (aFeature & AvailableFeatures()) {
+      return 'D';
+    }
+    return 'd';
+  }
+
+  if (aFeature & StartupExtraDefaultFeatures()) {
+    if (aFeature & AvailableFeatures()) {
+      return 'S';
+    }
+    return 's';
+  }
+
+  if (aFeature & AvailableFeatures()) {
+    return '-';
+  }
+  return 'x';
+}
+
+static void PrintUsage() {
+  MOZ_RELEASE_ASSERT(NS_IsMainThread());
+
+  printf(
+      "\n"
+      "Profiler environment variable usage:\n"
+      "\n"
+      "  MOZ_PROFILER_HELP\n"
+      "  If set to any value, prints this message.\n"
+      "  Use MOZ_BASE_PROFILER_HELP for BaseProfiler help.\n"
+      "\n"
+      "  MOZ_LOG\n"
+      "  Enables logging. The levels of logging available are\n"
+      "  'prof:3' (least verbose), 'prof:4', 'prof:5' (most verbose).\n"
+      "\n"
+      "  MOZ_PROFILER_STARTUP\n"
+      "  If set to any value other than '' or '0'/'N'/'n', starts the\n"
+      "  profiler immediately on start-up.\n"
+      "  Useful if you want profile code that runs very early.\n"
+      "\n"
+      "  MOZ_PROFILER_STARTUP_ENTRIES=<%u..%u>\n"
+      "  If MOZ_PROFILER_STARTUP is set, specifies the number of entries per\n"
+      "  process in the profiler's circular buffer when the profiler is first\n"
+      "  started.\n"
+      "  If unset, the platform default is used:\n"
+      "  %u entries per process, or %u when MOZ_PROFILER_STARTUP is set.\n"
+      "  (%u bytes per entry -> %u or %u total bytes per process)\n"
+      "  Optional units in bytes: KB, KiB, MB, MiB, GB, GiB\n"
+      "\n"
+      "  MOZ_PROFILER_STARTUP_DURATION=<1..>\n"
+      "  If MOZ_PROFILER_STARTUP is set, specifies the maximum life time of\n"
+      "  entries in the the profiler's circular buffer when the profiler is\n"
+      "  first started, in seconds.\n"
+      "  If unset, the life time of the entries will only be restricted by\n"
+      "  MOZ_PROFILER_STARTUP_ENTRIES (or its default value), and no\n"
+      "  additional time duration restriction will be applied.\n"
+      "\n"
+      "  MOZ_PROFILER_STARTUP_INTERVAL=<1..%d>\n"
+      "  If MOZ_PROFILER_STARTUP is set, specifies the sample interval,\n"
+      "  measured in milliseconds, when the profiler is first started.\n"
+      "  If unset, the platform default is used.\n"
+      "\n"
+      "  MOZ_PROFILER_STARTUP_FEATURES_BITFIELD=<Number>\n"
+      "  If MOZ_PROFILER_STARTUP is set, specifies the profiling features, as\n"
+      "  the integer value of the features bitfield.\n"
+      "  If unset, the value from MOZ_PROFILER_STARTUP_FEATURES is used.\n"
+      "\n"
+      "  MOZ_PROFILER_STARTUP_FEATURES=<Features>\n"
+      "  If MOZ_PROFILER_STARTUP is set, specifies the profiling features, as\n"
+      "  a comma-separated list of strings.\n"
+      "  Ignored if  MOZ_PROFILER_STARTUP_FEATURES_BITFIELD is set.\n"
+      "  If unset, the platform default is used.\n"
+      "\n"
+      "    Features: (x=unavailable, D/d=default/unavailable,\n"
+      "               S/s=MOZ_PROFILER_STARTUP extra default/unavailable)\n",
+      unsigned(ActivePS::scMinimumBufferEntries),
+      unsigned(ActivePS::scMaximumBufferEntries),
+      unsigned(PROFILER_DEFAULT_ENTRIES.Value()),
+      unsigned(PROFILER_DEFAULT_STARTUP_ENTRIES.Value()),
+      unsigned(scBytesPerEntry),
+      unsigned(PROFILER_DEFAULT_ENTRIES.Value() * scBytesPerEntry),
+      unsigned(PROFILER_DEFAULT_STARTUP_ENTRIES.Value() * scBytesPerEntry),
+      PROFILER_MAX_INTERVAL);
+
+#define PRINT_FEATURE(n_, str_, Name_, desc_)                                  \
+  printf("    %c %7u: \"%s\" (%s)\n", FeatureCategory(ProfilerFeature::Name_), \
+         ProfilerFeature::Name_, str_, desc_);
+
+  PROFILER_FOR_EACH_FEATURE(PRINT_FEATURE)
+
+#undef PRINT_FEATURE
+
+  printf(
+      "    -          \"default\" (All above D+S defaults)\n"
+      "\n"
+      "  MOZ_PROFILER_STARTUP_FILTERS=<Filters>\n"
+      "  If MOZ_PROFILER_STARTUP is set, specifies the thread filters, as a\n"
+      "  comma-separated list of strings. A given thread will be sampled if\n"
+      "  any of the filters is a case-insensitive substring of the thread\n"
+      "  name. If unset, a default is used.\n"
+      "\n"
+      "  MOZ_PROFILER_STARTUP_ACTIVE_TAB_ID=<Number>\n"
+      "  This variable is used to propagate the activeTabID of\n"
+      "  the profiler init params to subprocesses.\n"
+      "\n"
+      "  MOZ_PROFILER_SHUTDOWN=<Filename>\n"
+      "  If set, the profiler saves a profile to the named file on shutdown.\n"
+      "  If the Filename contains \"%%p\", this will be replaced with the'\n"
+      "  process id of the parent process.\n"
+      "\n"
+      "  MOZ_PROFILER_SYMBOLICATE\n"
+      "  If set, the profiler will pre-symbolicate profiles.\n"
+      "  *Note* This will add a significant pause when gathering data, and\n"
+      "  is intended mainly for local development.\n"
+      "\n"
+      "  MOZ_PROFILER_LUL_TEST\n"
+      "  If set to any value, runs LUL unit tests at startup.\n"
+      "\n"
+      "  This platform %s native unwinding.\n"
+      "\n",
+#if defined(HAVE_NATIVE_UNWIND)
+      "supports"
+#else
+      "does not support"
+#endif
+  );
+}
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN Sampler
+
+#if defined(GP_OS_linux) || defined(GP_OS_android)
+struct SigHandlerCoordinator;
+#endif
+
+// Sampler performs setup and teardown of the state required to sample with the
+// profiler. Sampler may exist when ActivePS is not present.
+//
+// SuspendAndSampleAndResumeThread must only be called from a single thread,
+// and must not sample the thread it is being called from. A separate Sampler
+// instance must be used for each thread which wants to capture samples.
+
+// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+//
+// With the exception of SamplerThread, all Sampler objects must be Disable-d
+// before releasing the lock which was used to create them. This avoids races
+// on linux with the SIGPROF signal handler.
+
+class Sampler {
+ public:
+  // Sets up the profiler such that it can begin sampling.
+  explicit Sampler(PSLockRef aLock);
+
+  // Disable the sampler, restoring it to its previous state. This must be
+  // called once, and only once, before the Sampler is destroyed.
+  void Disable(PSLockRef aLock);
+
+  // This method suspends and resumes the samplee thread. It calls the passed-in
+  // function-like object aProcessRegs (passing it a populated |const
+  // Registers&| arg) while the samplee thread is suspended.  Note that
+  // the aProcessRegs function must be very careful not to do anything that
+  // requires a lock, since we may have interrupted the thread at any point.
+  // As an example, you can't call TimeStamp::Now() since on windows it
+  // takes a lock on the performance counter.
+  //
+  // Func must be a function-like object of type `void()`.
+  template <typename Func>
+  void SuspendAndSampleAndResumeThread(
+      PSLockRef aLock,
+      const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData,
+      const TimeStamp& aNow, const Func& aProcessRegs);
+
+ private:
+#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
+  // Used to restore the SIGPROF handler when ours is removed.
+  struct sigaction mOldSigprofHandler;
+
+  // This process' ID. Needed as an argument for tgkill in
+  // SuspendAndSampleAndResumeThread.
+  ProfilerProcessId mMyPid;
+
+  // The sampler thread's ID.  Used to assert that it is not sampling itself,
+  // which would lead to deadlock.
+  ProfilerThreadId mSamplerTid;
+
+ public:
+  // This is the one-and-only variable used to communicate between the sampler
+  // thread and the samplee thread's signal handler. It's static because the
+  // samplee thread's signal handler is static.
+  static struct SigHandlerCoordinator* sSigHandlerCoordinator;
+#endif
+};
+
+// END Sampler
+////////////////////////////////////////////////////////////////////////
+
+// Platform-specific function that retrieves per-thread CPU measurements.
+static RunningTimes GetThreadRunningTimesDiff(
+    PSLockRef aLock,
+    ThreadRegistration::UnlockedRWForLockedProfiler& aThreadData);
+// Platform-specific function that *may* discard CPU measurements since the
+// previous call to GetThreadRunningTimesDiff, if the way to suspend threads on
+// this platform may add running times to that thread.
+// No-op otherwise, if suspending a thread doesn't make it work.
+static void DiscardSuspendedThreadRunningTimes(
+    PSLockRef aLock,
+    ThreadRegistration::UnlockedRWForLockedProfiler& aThreadData);
+
+// Platform-specific function that retrieves process CPU measurements.
+static RunningTimes GetProcessRunningTimesDiff(
+    PSLockRef aLock, RunningTimes& aPreviousRunningTimesToBeUpdated);
+
+// Template function to be used by `GetThreadRunningTimesDiff()` (unless some
+// platform has a better way to achieve this).
+// It help perform CPU measurements and tie them to a timestamp, such that the
+// measurements and timestamp are very close together.
+// This is necessary, because the relative CPU usage is computed by dividing
+// consecutive CPU measurements by their timestamp difference; if there was an
+// unexpected big gap, it could skew this computation and produce impossible
+// spikes that would hide the rest of the data. See bug 1685938 for more info.
+// Note that this may call the measurement function more than once; it is
+// assumed to normally be fast.
+// This was verified experimentally, but there is currently no regression
+// testing for it; see follow-up bug 1687402.
+template <typename GetCPURunningTimesFunction>
+RunningTimes GetRunningTimesWithTightTimestamp(
+    GetCPURunningTimesFunction&& aGetCPURunningTimesFunction) {
+  // Once per process, compute a threshold over which running times and their
+  // timestamp is considered too far apart.
+  static const TimeDuration scMaxRunningTimesReadDuration = [&]() {
+    // Run the main CPU measurements + timestamp a number of times and capture
+    // their durations.
+    constexpr int loops = 128;
+    TimeDuration durations[loops];
+    RunningTimes runningTimes;
+    TimeStamp before = TimeStamp::Now();
+    for (int i = 0; i < loops; ++i) {
+      AUTO_PROFILER_STATS(GetRunningTimes_MaxRunningTimesReadDuration);
+      aGetCPURunningTimesFunction(runningTimes);
+      const TimeStamp after = TimeStamp::Now();
+      durations[i] = after - before;
+      before = after;
+    }
+    // Move median duration to the middle.
+    std::nth_element(&durations[0], &durations[loops / 2], &durations[loops]);
+    // Use median*8 as cut-off point.
+    // Typical durations should be around a microsecond, the cut-off should then
+    // be around 10 microseconds, well below the expected minimum inter-sample
+    // interval (observed as a few milliseconds), so overall this should keep
+    // cpu/interval spikes
+    return durations[loops / 2] * 8;
+  }();
+
+  // Record CPU measurements between two timestamps.
+  RunningTimes runningTimes;
+  TimeStamp before = TimeStamp::Now();
+  aGetCPURunningTimesFunction(runningTimes);
+  TimeStamp after = TimeStamp::Now();
+  const TimeDuration duration = after - before;
+
+  // In most cases, the above should be quick enough. But if not (e.g., because
+  // of an OS context switch), repeat once:
+  if (MOZ_UNLIKELY(duration > scMaxRunningTimesReadDuration)) {
+    AUTO_PROFILER_STATS(GetRunningTimes_REDO);
+    RunningTimes runningTimes2;
+    aGetCPURunningTimesFunction(runningTimes2);
+    TimeStamp after2 = TimeStamp::Now();
+    const TimeDuration duration2 = after2 - after;
+    if (duration2 < duration) {
+      // We did it faster, use the new results. (But it could still be slower
+      // than expected, see note below for why it's acceptable.)
+      // This must stay *after* the CPU measurements.
+      runningTimes2.SetPostMeasurementTimeStamp(after2);
+      return runningTimes2;
+    }
+    // Otherwise use the initial results, they were slow, but faster than the
+    // second attempt.
+    // This means that something bad happened twice in a row on the same thread!
+    // So trying more times would be unlikely to get much better, and would be
+    // more expensive than the precision is worth.
+    // At worst, it means that a spike of activity may be reported in the next
+    // time slice. But in the end, the cumulative work is conserved, so it
+    // should still be visible at about the correct time in the graph.
+    AUTO_PROFILER_STATS(GetRunningTimes_RedoWasWorse);
+  }
+
+  // This must stay *after* the CPU measurements.
+  runningTimes.SetPostMeasurementTimeStamp(after);
+
+  return runningTimes;
+}
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN SamplerThread
+
+// The sampler thread controls sampling and runs whenever the profiler is
+// active. It periodically runs through all registered threads, finds those
+// that should be sampled, then pauses and samples them.
+
+class SamplerThread {
+ public:
+  // Creates a sampler thread, but doesn't start it.
+  SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
+                double aIntervalMilliseconds, uint32_t aFeatures);
+  ~SamplerThread();
+
+  // This runs on (is!) the sampler thread.
+  void Run();
+
+#if defined(GP_OS_windows)
+  // This runs on (is!) the thread to spy on unregistered threads.
+  void RunUnregisteredThreadSpy();
+#endif
+
+  // This runs on the main thread.
+  void Stop(PSLockRef aLock);
+
+  void AppendPostSamplingCallback(PSLockRef, PostSamplingCallback&& aCallback) {
+    // We are under lock, so it's safe to just modify the list pointer.
+    // Also this means the sampler has not started its run yet, so any callback
+    // added now will be invoked at the end of the next loop; this guarantees
+    // that the callback will be invoked after at least one full sampling loop.
+    mPostSamplingCallbackList = MakeUnique<PostSamplingCallbackListItem>(
+        std::move(mPostSamplingCallbackList), std::move(aCallback));
+  }
+
+ private:
+  void SpyOnUnregisteredThreads();
+
+  // Item containing a post-sampling callback, and a tail-list of more items.
+  // Using a linked list means no need to move items when adding more, and
+  // "stealing" the whole list is one pointer move.
+  struct PostSamplingCallbackListItem {
+    UniquePtr<PostSamplingCallbackListItem> mPrev;
+    PostSamplingCallback mCallback;
+
+    PostSamplingCallbackListItem(UniquePtr<PostSamplingCallbackListItem> aPrev,
+                                 PostSamplingCallback&& aCallback)
+        : mPrev(std::move(aPrev)), mCallback(std::move(aCallback)) {}
+  };
+
+  [[nodiscard]] UniquePtr<PostSamplingCallbackListItem>
+  TakePostSamplingCallbacks(PSLockRef) {
+    return std::move(mPostSamplingCallbackList);
+  }
+
+  static void InvokePostSamplingCallbacks(
+      UniquePtr<PostSamplingCallbackListItem> aCallbacks,
+      SamplingState aSamplingState) {
+    if (!aCallbacks) {
+      return;
+    }
+    // We want to drill down to the last element in this list, which is the
+    // oldest one, so that we invoke them in FIFO order.
+    // We don't expect many callbacks, so it's safe to recurse. Note that we're
+    // moving-from the UniquePtr, so the tail will implicitly get destroyed.
+    InvokePostSamplingCallbacks(std::move(aCallbacks->mPrev), aSamplingState);
+    // We are going to destroy this item, so we can safely move-from the
+    // callback before calling it (in case it has an rvalue-ref-qualified call
+    // operator).
+    std::move(aCallbacks->mCallback)(aSamplingState);
+    // It may be tempting for a future maintainer to change aCallbacks into an
+    // rvalue reference; this will remind them not to do that!
+    static_assert(
+        std::is_same_v<decltype(aCallbacks),
+                       UniquePtr<PostSamplingCallbackListItem>>,
+        "We need to capture the list by-value, to implicitly destroy it");
+  }
+
+  // This suspends the calling thread for the given number of microseconds.
+  // Best effort timing.
+  void SleepMicro(uint32_t aMicroseconds);
+
+  // The sampler used to suspend and sample threads.
+  Sampler mSampler;
+
+  // The activity generation, for detecting when the sampler thread must stop.
+  const uint32_t mActivityGeneration;
+
+  // The interval between samples, measured in microseconds.
+  const int mIntervalMicroseconds;
+
+  // The OS-specific handle for the sampler thread.
+#if defined(GP_OS_windows)
+  HANDLE mThread;
+  HANDLE mUnregisteredThreadSpyThread = nullptr;
+  enum class SpyingState {
+    NoSpying,
+    Spy_Initializing,
+    // Spy is waiting for SamplerToSpy_Start or MainToSpy_Shutdown.
+    Spy_Waiting,
+    // Sampler requests spy to start working. May be pre-empted by
+    // MainToSpy_Shutdown.
+    SamplerToSpy_Start,
+    // Spy is currently working, cannot be interrupted, only the spy is allowed
+    // to change the state again.
+    Spy_Working,
+    // Main control requests spy to shut down.
+    MainToSpy_Shutdown,
+    // Spy notified main control that it's out of the loop, about to exit.
+    SpyToMain_ShuttingDown
+  };
+  SpyingState mSpyingState = SpyingState::NoSpying;
+  // The sampler will increment this while the spy is working, then while the
+  // spy is waiting the sampler will decrement it until <=0 before starting the
+  // spy. This will ensure that the work doesn't take more than 50% of a CPU
+  // core.
+  int mDelaySpyStart = 0;
+  Monitor mSpyingStateMonitor MOZ_UNANNOTATED{
+      "SamplerThread::mSpyingStateMonitor"};
+#elif defined(GP_OS_darwin) || defined(GP_OS_linux) || \
+    defined(GP_OS_android) || defined(GP_OS_freebsd)
+  pthread_t mThread;
+#endif
+
+  // Post-sampling callbacks are kept in a simple linked list, which will be
+  // stolen by the sampler thread at the end of its next run.
+  UniquePtr<PostSamplingCallbackListItem> mPostSamplingCallbackList;
+
+#if defined(GP_OS_windows)
+  bool mNoTimerResolutionChange = true;
+#endif
+
+  struct SpiedThread {
+    base::ProcessId mThreadId;
+    nsCString mName;
+    uint64_t mCPUTimeNs;
+
+    SpiedThread(base::ProcessId aThreadId, const nsACString& aName,
+                uint64_t aCPUTimeNs)
+        : mThreadId(aThreadId), mName(aName), mCPUTimeNs(aCPUTimeNs) {}
+
+    // Comparisons with just a thread id, for easy searching in an array.
+    friend bool operator==(const SpiedThread& aSpiedThread,
+                           base::ProcessId aThreadId) {
+      return aSpiedThread.mThreadId == aThreadId;
+    }
+    friend bool operator==(base::ProcessId aThreadId,
+                           const SpiedThread& aSpiedThread) {
+      return aThreadId == aSpiedThread.mThreadId;
+    }
+  };
+
+  // Time at which mSpiedThreads was previously updated. Null before 1st update.
+  TimeStamp mLastSpying;
+  // Unregistered threads that have been found, and are being spied on.
+  using SpiedThreads = AutoTArray<SpiedThread, 128>;
+  SpiedThreads mSpiedThreads;
+
+  SamplerThread(const SamplerThread&) = delete;
+  void operator=(const SamplerThread&) = delete;
+};
+
+// [[nodiscard]] static
+bool ActivePS::AppendPostSamplingCallback(PSLockRef aLock,
+                                          PostSamplingCallback&& aCallback) {
+  if (!sInstance || !sInstance->mSamplerThread) {
+    return false;
+  }
+  sInstance->mSamplerThread->AppendPostSamplingCallback(aLock,
+                                                        std::move(aCallback));
+  return true;
+}
+
+// This function is required because we need to create a SamplerThread within
+// ActivePS's constructor, but SamplerThread is defined after ActivePS. It
+// could probably be removed by moving some code around.
+static SamplerThread* NewSamplerThread(PSLockRef aLock, uint32_t aGeneration,
+                                       double aInterval, uint32_t aFeatures) {
+  return new SamplerThread(aLock, aGeneration, aInterval, aFeatures);
+}
+
+// This function is the sampler thread.  This implementation is used for all
+// targets.
+void SamplerThread::Run() {
+  PR_SetCurrentThreadName("SamplerThread");
+
+  // Features won't change during this SamplerThread's lifetime, so we can read
+  // them once and store them locally.
+  const uint32_t features = []() -> uint32_t {
+    PSAutoLock lock;
+    if (!ActivePS::Exists(lock)) {
+      // If there is no active profiler, it doesn't matter what we return,
+      // because this thread will exit before any feature is used.
+      return 0;
+    }
+    return ActivePS::Features(lock);
+  }();
+
+  // Not *no*-stack-sampling means we do want stack sampling.
+  const bool stackSampling = !ProfilerFeature::HasNoStackSampling(features);
+
+  const bool cpuUtilization = ProfilerFeature::HasCPUUtilization(features);
+
+  // Use local ProfileBuffer and underlying buffer to capture the stack.
+  // (This is to avoid touching the core buffer lock while a thread is
+  // suspended, because that thread could be working with the core buffer as
+  // well.
+  mozilla::ProfileBufferChunkManagerSingle localChunkManager(
+      ProfileBufferChunkManager::scExpectedMaximumStackSize);
+  ProfileChunkedBuffer localBuffer(
+      ProfileChunkedBuffer::ThreadSafety::WithoutMutex, localChunkManager);
+  ProfileBuffer localProfileBuffer(localBuffer);
+
+  // Will be kept between collections, to know what each collection does.
+  auto previousState = localBuffer.GetState();
+
+  // This will be filled at every loop, to be used by the next loop to compute
+  // the CPU utilization between samples.
+  RunningTimes processRunningTimes;
+
+  // This will be set inside the loop, from inside the lock scope, to capture
+  // all callbacks added before that, but none after the lock is released.
+  UniquePtr<PostSamplingCallbackListItem> postSamplingCallbacks;
+  // This will be set inside the loop, before invoking callbacks outside.
+  SamplingState samplingState{};
+
+  const TimeDuration sampleInterval =
+      TimeDuration::FromMicroseconds(mIntervalMicroseconds);
+  const uint32_t minimumIntervalSleepUs =
+      static_cast<uint32_t>(mIntervalMicroseconds / 4);
+
+  // This is the scheduled time at which each sampling loop should start.
+  // It will determine the ideal next sampling start by adding the expected
+  // interval, unless when sampling runs late -- See end of while() loop.
+  TimeStamp scheduledSampleStart = TimeStamp::Now();
+
+  while (true) {
+    const TimeStamp sampleStart = TimeStamp::Now();
+
+    // This scope is for |lock|. It ends before we sleep below.
+    {
+      // There should be no local callbacks left from a previous loop.
+      MOZ_ASSERT(!postSamplingCallbacks);
+
+      PSAutoLock lock;
+      TimeStamp lockAcquired = TimeStamp::Now();
+
+      // Move all the post-sampling callbacks locally, so that new ones cannot
+      // sneak in between the end of the lock scope and the invocation after it.
+      postSamplingCallbacks = TakePostSamplingCallbacks(lock);
+
+      if (!ActivePS::Exists(lock)) {
+        // Exit the `while` loop, including the lock scope, before invoking
+        // callbacks and returning.
+        samplingState = SamplingState::JustStopped;
+        break;
+      }
+
+      // At this point profiler_stop() might have been called, and
+      // profiler_start() might have been called on another thread. If this
+      // happens the generation won't match.
+      if (ActivePS::Generation(lock) != mActivityGeneration) {
+        samplingState = SamplingState::JustStopped;
+        // Exit the `while` loop, including the lock scope, before invoking
+        // callbacks and returning.
+        break;
+      }
+
+      ActivePS::ClearExpiredExitProfiles(lock);
+
+      TimeStamp expiredMarkersCleaned = TimeStamp::Now();
+
+      if (int(gSkipSampling) <= 0 && !ActivePS::IsSamplingPaused(lock)) {
+        double sampleStartDeltaMs =
+            (sampleStart - CorePS::ProcessStartTime()).ToMilliseconds();
+        ProfileBuffer& buffer = ActivePS::Buffer(lock);
+
+        // Before sampling counters, update the process CPU counter if active.
+        if (ActivePS::ProcessCPUCounter* processCPUCounter =
+                ActivePS::MaybeProcessCPUCounter(lock);
+            processCPUCounter) {
+          RunningTimes processRunningTimesDiff =
+              GetProcessRunningTimesDiff(lock, processRunningTimes);
+          Maybe<uint64_t> cpu = processRunningTimesDiff.GetJsonThreadCPUDelta();
+          if (cpu) {
+            processCPUCounter->Add(static_cast<int64_t>(*cpu));
+          }
+        }
+
+        if (PowerCounters* powerCounters = ActivePS::MaybePowerCounters(lock);
+            powerCounters) {
+          powerCounters->Sample();
+        }
+
+        // handle per-process generic counters
+        const Vector<BaseProfilerCount*>& counters = CorePS::Counters(lock);
+        for (auto& counter : counters) {
+          if (auto sample = counter->Sample(); sample.isSampleNew) {
+            // create Buffer entries for each counter
+            buffer.AddEntry(ProfileBufferEntry::CounterId(counter));
+            buffer.AddEntry(ProfileBufferEntry::Time(sampleStartDeltaMs));
+#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
+            if (ActivePS::IsMemoryCounter(counter)) {
+              // For the memory counter, substract the size of our buffer to
+              // avoid giving the misleading impression that the memory use
+              // keeps on growing when it's just the profiler session that's
+              // using a larger buffer as it gets longer.
+              sample.count -= static_cast<int64_t>(
+                  ActivePS::ControlledChunkManager(lock).TotalSize());
+            }
+#endif
+            // In the future, we may support keyed counters - for example,
+            // counters with a key which is a thread ID. For "simple" counters
+            // we'll just use a key of 0.
+            buffer.AddEntry(ProfileBufferEntry::CounterKey(0));
+            buffer.AddEntry(ProfileBufferEntry::Count(sample.count));
+            if (sample.number) {
+              buffer.AddEntry(ProfileBufferEntry::Number(sample.number));
+            }
+          }
+        }
+        TimeStamp countersSampled = TimeStamp::Now();
+
+        if (stackSampling || cpuUtilization) {
+          samplingState = SamplingState::SamplingCompleted;
+
+          // Prevent threads from ending (or starting) and allow access to all
+          // OffThreadRef's.
+          ThreadRegistry::LockedRegistry lockedRegistry;
+
+          for (ThreadRegistry::OffThreadRef offThreadRef : lockedRegistry) {
+            ThreadRegistration::UnlockedRWForLockedProfiler&
+                unlockedThreadData =
+                    offThreadRef.UnlockedRWForLockedProfilerRef();
+            ProfiledThreadData* profiledThreadData =
+                unlockedThreadData.GetProfiledThreadData(lock);
+            if (!profiledThreadData) {
+              // This thread is not being profiled, continue with the next one.
+              continue;
+            }
+
+            const ThreadProfilingFeatures whatToProfile =
+                unlockedThreadData.ProfilingFeatures();
+            const bool threadCPUUtilization =
+                cpuUtilization &&
+                DoFeaturesIntersect(whatToProfile,
+                                    ThreadProfilingFeatures::CPUUtilization);
+            const bool threadStackSampling =
+                stackSampling &&
+                DoFeaturesIntersect(whatToProfile,
+                                    ThreadProfilingFeatures::Sampling);
+            if (!threadCPUUtilization && !threadStackSampling) {
+              // Nothing to profile on this thread, continue with the next one.
+              continue;
+            }
+
+            const ProfilerThreadId threadId =
+                unlockedThreadData.Info().ThreadId();
+
+            const RunningTimes runningTimesDiff = [&]() {
+              if (!threadCPUUtilization) {
+                // If we don't need CPU measurements, we only need a timestamp.
+                return RunningTimes(TimeStamp::Now());
+              }
+              return GetThreadRunningTimesDiff(lock, unlockedThreadData);
+            }();
+
+            const TimeStamp& now = runningTimesDiff.PostMeasurementTimeStamp();
+            double threadSampleDeltaMs =
+                (now - CorePS::ProcessStartTime()).ToMilliseconds();
+
+            // If the thread is asleep and has been sampled before in the same
+            // sleep episode, or otherwise(*) if there was zero CPU activity
+            // since the previous sampling, find and copy the previous sample,
+            // as that's cheaper than taking a new sample.
+            // (*) Tech note: The asleep check is done first and always, because
+            //     it is more reliable, and knows if it's the first asleep
+            //     sample, which cannot be duplicated; if the test was the other
+            //     way around, it could find zero CPU and then short-circuit
+            //     that state-changing second-asleep-check operation, which
+            //     could result in an unneeded sample.
+            // However we're using current running times (instead of copying the
+            // old ones) because some work could have happened.
+            if (threadStackSampling &&
+                (unlockedThreadData.CanDuplicateLastSampleDueToSleep() ||
+                 runningTimesDiff.GetThreadCPUDelta() == Some(uint64_t(0)))) {
+              const bool dup_ok = ActivePS::Buffer(lock).DuplicateLastSample(
+                  threadId, threadSampleDeltaMs,
+                  profiledThreadData->LastSample(), runningTimesDiff);
+              if (dup_ok) {
+                continue;
+              }
+            }
+
+            AUTO_PROFILER_STATS(gecko_SamplerThread_Run_DoPeriodicSample);
+
+            // Record the global profiler buffer's range start now, before
+            // adding the first entry for this thread's sample.
+            const uint64_t bufferRangeStart = buffer.BufferRangeStart();
+
+            // Add the thread ID now, so we know its position in the main
+            // buffer, which is used by some JS data.
+            // (DoPeriodicSample only knows about the temporary local buffer.)
+            const uint64_t samplePos = buffer.AddThreadIdEntry(threadId);
+            profiledThreadData->LastSample() = Some(samplePos);
+
+            // Also add the time, so it's always there after the thread ID, as
+            // expected by the parser. (Other stack data is optional.)
+            buffer.AddEntry(ProfileBufferEntry::TimeBeforeCompactStack(
+                threadSampleDeltaMs));
+
+            Maybe<double> unresponsiveDuration_ms;
+
+            // If we have RunningTimes data, store it before the CompactStack.
+            // Note: It is not stored inside the CompactStack so that it doesn't
+            // get incorrectly duplicated when the thread is sleeping.
+            if (!runningTimesDiff.IsEmpty()) {
+              profiler_get_core_buffer().PutObjects(
+                  ProfileBufferEntry::Kind::RunningTimes, runningTimesDiff);
+            }
+
+            if (threadStackSampling) {
+              ThreadRegistry::OffThreadRef::RWFromAnyThreadWithLock
+                  lockedThreadData = offThreadRef.GetLockedRWFromAnyThread();
+              // Suspend the thread and collect its stack data in the local
+              // buffer.
+              mSampler.SuspendAndSampleAndResumeThread(
+                  lock, lockedThreadData.DataCRef(), now,
+                  [&](const Registers& aRegs, const TimeStamp& aNow) {
+                    DoPeriodicSample(lock, lockedThreadData.DataCRef(), aRegs,
+                                     samplePos, bufferRangeStart,
+                                     localProfileBuffer);
+
+                    // For "eventDelay", we want the input delay - but if
+                    // there are no events in the input queue (or even if there
+                    // are), we're interested in how long the delay *would* be
+                    // for an input event now, which would be the time to finish
+                    // the current event + the delay caused by any events
+                    // already in the input queue (plus any High priority
+                    // events).  Events at lower priorities (in a
+                    // PrioritizedEventQueue) than Input count for input delay
+                    // only for the duration that they're running, since when
+                    // they finish, any queued input event would run.
+                    //
+                    // Unless we record the time state of all events and queue
+                    // states at all times, this is hard to precisely calculate,
+                    // but we can approximate it well in post-processing with
+                    // RunningEventDelay and RunningEventStart.
+                    //
+                    // RunningEventDelay is the time duration the event was
+                    // queued before starting execution.  RunningEventStart is
+                    // the time the event started. (Note: since we care about
+                    // Input event delays on MainThread, for
+                    // PrioritizedEventQueues we return 0 for RunningEventDelay
+                    // if the currently running event has a lower priority than
+                    // Input (since Input events won't queue behind them).
+                    //
+                    // To directly measure this we would need to record the time
+                    // at which the newest event currently in each queue at time
+                    // X (the sample time) finishes running.  This of course
+                    // would require looking into the future, or recording all
+                    // this state and then post-processing it later. If we were
+                    // to trace every event start and end we could do this, but
+                    // it would have significant overhead to do so (and buffer
+                    // usage).  From a recording of RunningEventDelays and
+                    // RunningEventStarts we can infer the actual delay:
+                    //
+                    // clang-format off
+                    // Event queue: <tail> D  :  C  :  B  : A <head>
+                    // Time inserted (ms): 40 :  20 : 10  : 0
+                    // Run Time (ms):      30 : 100 : 40  : 30
+                    //
+                    // 0    10   20   30   40   50   60   70   80   90  100  110  120  130  140  150  160  170
+                    // [A||||||||||||]
+                    //      ----------[B|||||||||||||||||]
+                    //           -------------------------[C|||||||||||||||||||||||||||||||||||||||||||||||]
+                    //                     -----------------------------------------------------------------[D|||||||||...]
+                    //
+                    // Calculate the delay of a new event added at time t: (run every sample)
+                    //    TimeSinceRunningEventBlockedInputEvents = RunningEventDelay + (now - RunningEventStart);
+                    //    effective_submission = now - TimeSinceRunningEventBlockedInputEvents;
+                    //    delta = (now - last_sample_time);
+                    //    last_sample_time = now;
+                    //    for (t=effective_submission to now) {
+                    //       delay[t] += delta;
+                    //    }
+                    //
+                    // Can be reduced in overhead by:
+                    //    TimeSinceRunningEventBlockedInputEvents = RunningEventDelay + (now - RunningEventStart);
+                    //    effective_submission = now - TimeSinceRunningEventBlockedInputEvents;
+                    //    if (effective_submission != last_submission) {
+                    //      delta = (now - last_submision);
+                    //      // this loop should be made to match each sample point in the range
+                    //      // intead of assuming 1ms sampling as this pseudocode does
+                    //      for (t=last_submission to effective_submission-1) {
+                    //         delay[t] += delta;
+                    //         delta -= 1; // assumes 1ms; adjust as needed to match for()
+                    //      }
+                    //      last_submission = effective_submission;
+                    //    }
+                    //
+                    // Time  Head of queue   Running Event  RunningEventDelay  Delay of       Effective     Started    Calc (submission->now add 10ms)  Final
+                    //                                                         hypothetical   Submission    Running @                                   result
+                    //                                                         event E
+                    // 0        Empty            A                0                30              0           0       @0=10                             30
+                    // 10         B              A                0                60              0           0       @0=20, @10=10                     60
+                    // 20         B              A                0               150              0           0       @0=30, @10=20, @20=10            150
+                    // 30         C              B               20               140             10          30       @10=20, @20=10, @30=0            140
+                    // 40         C              B               20               160                                  @10=30, @20=20...                160
+                    // 50         C              B               20               150                                                                   150
+                    // 60         C              B               20               140                                  @10=50, @20=40...                140
+                    // 70         D              C               50               130             20          70       @20=50, @30=40...                130
+                    // ...
+                    // 160        D              C               50                40                                  @20=140, @30=130...               40
+                    // 170      <empty>          D              140                30             40                   @40=140, @50=130... (rounding)    30
+                    // 180      <empty>          D              140                20             40                   @40=150                           20
+                    // 190      <empty>          D              140                10             40                   @40=160                           10
+                    // 200      <empty>        <empty>            0                 0             NA                                                      0
+                    //
+                    // Function Delay(t) = the time between t and the time at which a hypothetical
+                    // event e would start executing, if e was enqueued at time t.
+                    //
+                    // Delay(-1) = 0 // Before A was enqueued. No wait time, can start running
+                    //               // instantly.
+                    // Delay(0) = 30 // The hypothetical event e got enqueued just after A got
+                    //               // enqueued. It can start running at 30, when A is done.
+                    // Delay(5) = 25
+                    // Delay(10) = 60 // Can start running at 70, after both A and B are done.
+                    // Delay(19) = 51
+                    // Delay(20) = 150 // Can start running at 170, after A, B & C.
+                    // Delay(25) = 145
+                    // Delay(30) = 170 // Can start running at 200, after A, B, C & D.
+                    // Delay(120) = 80
+                    // Delay(200) = 0 // (assuming nothing was enqueued after D)
+                    //
+                    // For every event that gets enqueued, the Delay time will go up by the
+                    // event's running time at the time at which the event is enqueued.
+                    // The Delay function will be a sawtooth of the following shape:
+                    //
+                    //             |\           |...
+                    //             | \          |
+                    //        |\   |  \         |
+                    //        | \  |   \        |
+                    //     |\ |  \ |    \       |
+                    //  |\ | \|   \|     \      |
+                    //  | \|              \     |
+                    // _|                  \____|
+                    //
+                    //
+                    // A more complex example with a PrioritizedEventQueue:
+                    //
+                    // Event queue: <tail> D  :  C  :  B  : A <head>
+                    // Time inserted (ms): 40 :  20 : 10  : 0
+                    // Run Time (ms):      30 : 100 : 40  : 30
+                    // Priority:         Input: Norm: Norm: Norm
+                    //
+                    // 0    10   20   30   40   50   60   70   80   90  100  110  120  130  140  150  160  170
+                    // [A||||||||||||]
+                    //      ----------[B|||||||||||||||||]
+                    //           ----------------------------------------[C|||||||||||||||||||||||||||||||||||||||||||||||]
+                    //                     ---------------[D||||||||||||]
+                    //
+                    //
+                    // Time  Head of queue   Running Event  RunningEventDelay  Delay of       Effective   Started    Calc (submission->now add 10ms)   Final
+                    //                                                         hypothetical   Submission  Running @                                    result
+                    //                                                         event
+                    // 0        Empty            A                0                30              0           0       @0=10                             30
+                    // 10         B              A                0                20              0           0       @0=20, @10=10                     20
+                    // 20         B              A                0                10              0           0       @0=30, @10=20, @20=10             10
+                    // 30         C              B                0                40             30          30       @30=10                            40
+                    // 40         C              B                0                60             30                   @40=10, @30=20                    60
+                    // 50         C              B                0                50             30                   @50=10, @40=20, @30=30            50
+                    // 60         C              B                0                40             30                   @60=10, @50=20, @40=30, @30=40    40
+                    // 70         C              D               30                30             40          70       @60=20, @50=30, @40=40            30
+                    // 80         C              D               30                20             40          70       ...@50=40, @40=50                 20
+                    // 90         C              D               30                10             40          70       ...@60=40, @50=50, @40=60         10
+                    // 100      <empty>          C                0               100             100        100       @100=10                          100
+                    // 110      <empty>          C                0                90             100        100       @110=10, @100=20                  90
+
+                    //
+                    // For PrioritizedEventQueue, the definition of the Delay(t) function is adjusted: the hypothetical event e has Input priority.
+                    // Delay(-1) = 0 // Before A was enqueued. No wait time, can start running
+                    //               // instantly.
+                    // Delay(0) = 30 // The hypothetical input event e got enqueued just after A got
+                    //               // enqueued. It can start running at 30, when A is done.
+                    // Delay(5) = 25
+                    // Delay(10) = 20
+                    // Delay(25) = 5 // B has been queued, but e does not need to wait for B because e has Input priority and B does not.
+                    //               // So e can start running at 30, when A is done.
+                    // Delay(30) = 40 // Can start running at 70, after B is done.
+                    // Delay(40) = 60 // Can start at 100, after B and D are done (D is Input Priority)
+                    // Delay(80) = 20
+                    // Delay(100) = 100 // Wait for C to finish
+
+                    // clang-format on
+                    //
+                    // Alternatively we could insert (recycled instead of
+                    // allocated/freed) input events at every sample period
+                    // (1ms...), and use them to back-calculate the delay.  This
+                    // might also be somewhat expensive, and would require
+                    // guessing at the maximum delay, which would likely be in
+                    // the seconds, and so you'd need 1000's of pre-allocated
+                    // events per queue per thread - so there would be a memory
+                    // impact as well.
+
+                    TimeDuration currentEventDelay;
+                    TimeDuration currentEventRunning;
+                    lockedThreadData->GetRunningEventDelay(
+                        aNow, currentEventDelay, currentEventRunning);
+
+                    // Note: eventDelay is a different definition of
+                    // responsiveness than the 16ms event injection.
+
+                    // Don't suppress 0's for now; that can be a future
+                    // optimization.  We probably want one zero to be stored
+                    // before we start suppressing, which would be more
+                    // complex.
+                    unresponsiveDuration_ms =
+                        Some(currentEventDelay.ToMilliseconds() +
+                             currentEventRunning.ToMilliseconds());
+                  });
+
+              if (cpuUtilization) {
+                // Suspending the thread for sampling could have added some
+                // running time to it, discard any since the call to
+                // GetThreadRunningTimesDiff above.
+                DiscardSuspendedThreadRunningTimes(lock, unlockedThreadData);
+              }
+
+              // If we got eventDelay data, store it before the CompactStack.
+              // Note: It is not stored inside the CompactStack so that it
+              // doesn't get incorrectly duplicated when the thread is sleeping.
+              if (unresponsiveDuration_ms.isSome()) {
+                profiler_get_core_buffer().PutObjects(
+                    ProfileBufferEntry::Kind::UnresponsiveDurationMs,
+                    *unresponsiveDuration_ms);
+              }
+            }
+
+            // There *must* be a CompactStack after a TimeBeforeCompactStack;
+            // but note that other entries may have been concurrently inserted
+            // between the TimeBeforeCompactStack above and now. If the captured
+            // sample from `DoPeriodicSample` is complete, copy it into the
+            // global buffer, otherwise add an empty one to satisfy the parser
+            // that expects one.
+            auto state = localBuffer.GetState();
+            if (NS_WARN_IF(state.mFailedPutBytes !=
+                           previousState.mFailedPutBytes)) {
+              LOG("Stack sample too big for local storage, failed to store %u "
+                  "bytes",
+                  unsigned(state.mFailedPutBytes -
+                           previousState.mFailedPutBytes));
+              // There *must* be a CompactStack after a TimeBeforeCompactStack,
+              // even an empty one.
+              profiler_get_core_buffer().PutObjects(
+                  ProfileBufferEntry::Kind::CompactStack,
+                  UniquePtr<ProfileChunkedBuffer>(nullptr));
+            } else if (state.mRangeEnd - previousState.mRangeEnd >=
+                       *profiler_get_core_buffer().BufferLength()) {
+              LOG("Stack sample too big for profiler storage, needed %u bytes",
+                  unsigned(state.mRangeEnd - previousState.mRangeEnd));
+              // There *must* be a CompactStack after a TimeBeforeCompactStack,
+              // even an empty one.
+              profiler_get_core_buffer().PutObjects(
+                  ProfileBufferEntry::Kind::CompactStack,
+                  UniquePtr<ProfileChunkedBuffer>(nullptr));
+            } else {
+              profiler_get_core_buffer().PutObjects(
+                  ProfileBufferEntry::Kind::CompactStack, localBuffer);
+            }
+
+            // Clean up for the next run.
+            localBuffer.Clear();
+            previousState = localBuffer.GetState();
+          }
+        } else {
+          samplingState = SamplingState::NoStackSamplingCompleted;
+        }
+
+#if defined(USE_LUL_STACKWALK)
+        // The LUL unwind object accumulates frame statistics. Periodically we
+        // should poke it to give it a chance to print those statistics.  This
+        // involves doing I/O (fprintf, __android_log_print, etc.) and so
+        // can't safely be done from the critical section inside
+        // SuspendAndSampleAndResumeThread, which is why it is done here.
+        lul::LUL* lul = CorePS::Lul();
+        if (lul) {
+          lul->MaybeShowStats();
+        }
+#endif
+        TimeStamp threadsSampled = TimeStamp::Now();
+
+        {
+          AUTO_PROFILER_STATS(Sampler_FulfillChunkRequests);
+          ActivePS::FulfillChunkRequests(lock);
+        }
+
+        buffer.CollectOverheadStats(sampleStartDeltaMs,
+                                    lockAcquired - sampleStart,
+                                    expiredMarkersCleaned - lockAcquired,
+                                    countersSampled - expiredMarkersCleaned,
+                                    threadsSampled - countersSampled);
+      } else {
+        samplingState = SamplingState::SamplingPaused;
+      }
+    }
+    // gPSMutex is not held after this point.
+
+    // Invoke end-of-sampling callbacks outside of the locked scope.
+    InvokePostSamplingCallbacks(std::move(postSamplingCallbacks),
+                                samplingState);
+
+    ProfilerChild::ProcessPendingUpdate();
+
+    if (ProfilerFeature::HasUnregisteredThreads(features)) {
+#if defined(GP_OS_windows)
+      {
+        MonitorAutoLock spyingStateLock{mSpyingStateMonitor};
+        switch (mSpyingState) {
+          case SpyingState::SamplerToSpy_Start:
+          case SpyingState::Spy_Working:
+            // If the spy is working (or about to work), record this loop
+            // iteration to delay the next start.
+            ++mDelaySpyStart;
+            break;
+          case SpyingState::Spy_Waiting:
+            // The Spy is idle, waiting for instructions. Should we delay?
+            if (--mDelaySpyStart <= 0) {
+              mDelaySpyStart = 0;
+              mSpyingState = SpyingState::SamplerToSpy_Start;
+              mSpyingStateMonitor.NotifyAll();
+            }
+            break;
+          default:
+            // Otherwise the spy should be initializing or shutting down.
+            MOZ_ASSERT(mSpyingState == SpyingState::Spy_Initializing ||
+                       mSpyingState == SpyingState::MainToSpy_Shutdown ||
+                       mSpyingState == SpyingState::SpyToMain_ShuttingDown);
+            break;
+        }
+      }
+#else
+      // On non-Windows platforms, this is fast enough to run in this thread,
+      // each sampling loop.
+      SpyOnUnregisteredThreads();
+#endif
+    }
+
+    // We expect the next sampling loop to start `sampleInterval` after this
+    // loop here was scheduled to start.
+    scheduledSampleStart += sampleInterval;
+
+    // Try to sleep until we reach that next scheduled time.
+    const TimeStamp beforeSleep = TimeStamp::Now();
+    if (scheduledSampleStart >= beforeSleep) {
+      // There is still time before the next scheduled sample time.
+      const uint32_t sleepTimeUs = static_cast<uint32_t>(
+          (scheduledSampleStart - beforeSleep).ToMicroseconds());
+      if (sleepTimeUs >= minimumIntervalSleepUs) {
+        SleepMicro(sleepTimeUs);
+      } else {
+        // If we're too close to that time, sleep the minimum amount of time.
+        // Note that the next scheduled start is not shifted, so at the end of
+        // the next loop, sleep may again be adjusted to get closer to schedule.
+        SleepMicro(minimumIntervalSleepUs);
+      }
+    } else {
+      // This sampling loop ended after the next sampling should have started!
+      // There is little point to try and keep up to schedule now, it would
+      // require more work, while it's likely we're late because the system is
+      // already busy. Try and restart a normal schedule from now.
+      scheduledSampleStart = beforeSleep + sampleInterval;
+      SleepMicro(static_cast<uint32_t>(sampleInterval.ToMicroseconds()));
+    }
+  }
+
+  // End of `while` loop. We can only be here from a `break` inside the loop.
+  InvokePostSamplingCallbacks(std::move(postSamplingCallbacks), samplingState);
+}
+
+namespace geckoprofiler::markers {
+
+struct UnregisteredThreadLifetimeMarker {
+  static constexpr Span<const char> MarkerTypeName() {
+    return MakeStringSpan("UnregisteredThreadLifetime");
+  }
+  static void StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter& aWriter,
+                                   base::ProcessId aThreadId,
+                                   const ProfilerString8View& aName,
+                                   const ProfilerString8View& aEndEvent) {
+    aWriter.IntProperty("Thread Id", aThreadId);
+    aWriter.StringProperty("Thread Name", aName.Length() != 0
+                                              ? aName.AsSpan()
+                                              : MakeStringSpan("~Unnamed~"));
+    if (aEndEvent.Length() != 0) {
+      aWriter.StringProperty("End Event", aEndEvent);
+    }
+  }
+  static MarkerSchema MarkerTypeDisplay() {
+    using MS = MarkerSchema;
+    MS schema{MS::Location::MarkerChart, MS::Location::MarkerTable};
+    schema.AddKeyFormatSearchable("Thread Id", MS::Format::Integer,
+                                  MS::Searchable::Searchable);
+    schema.AddKeyFormatSearchable("Thread Name", MS::Format::String,
+                                  MS::Searchable::Searchable);
+    schema.AddKeyFormat("End Event", MS::Format::String);
+    schema.AddStaticLabelValue(
+        "Note",
+        "Start and end are approximate, based on first and last appearances.");
+    schema.SetChartLabel(
+        "{marker.data.Thread Name} (tid {marker.data.Thread Id})");
+    schema.SetTableLabel("{marker.name} lifetime");
+    return schema;
+  }
+};
+
+struct UnregisteredThreadCPUMarker {
+  static constexpr Span<const char> MarkerTypeName() {
+    return MakeStringSpan("UnregisteredThreadCPU");
+  }
+  static void StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter& aWriter,
+                                   base::ProcessId aThreadId,
+                                   int64_t aCPUDiffNs, const TimeStamp& aStart,
+                                   const TimeStamp& aEnd) {
+    aWriter.IntProperty("Thread Id", aThreadId);
+    aWriter.IntProperty("CPU Time", aCPUDiffNs);
+    aWriter.DoubleProperty(
+        "CPU Utilization",
+        double(aCPUDiffNs) / ((aEnd - aStart).ToMicroseconds() * 1000.0));
+  }
+  static MarkerSchema MarkerTypeDisplay() {
+    using MS = MarkerSchema;
+    MS schema{MS::Location::MarkerChart, MS::Location::MarkerTable};
+    schema.AddKeyFormatSearchable("Thread Id", MS::Format::Integer,
+                                  MS::Searchable::Searchable);
+    schema.AddKeyFormat("CPU Time", MS::Format::Nanoseconds);
+    schema.AddKeyFormat("CPU Utilization", MS::Format::Percentage);
+    schema.SetChartLabel("{marker.data.CPU Utilization}");
+    schema.SetTableLabel(
+        "{marker.name} - Activity: {marker.data.CPU Utilization}");
+    return schema;
+  }
+};
+
+}  // namespace geckoprofiler::markers
+
+static bool IsThreadIdRegistered(ProfilerThreadId aThreadId) {
+  ThreadRegistry::LockedRegistry lockedRegistry;
+  const auto registryEnd = lockedRegistry.end();
+  return std::find_if(
+             lockedRegistry.begin(), registryEnd,
+             [aThreadId](const ThreadRegistry::OffThreadRef& aOffThreadRef) {
+               return aOffThreadRef.UnlockedConstReaderCRef()
+                          .Info()
+                          .ThreadId() == aThreadId;
+             }) != registryEnd;
+}
+
+static nsAutoCString MakeThreadInfoMarkerName(base::ProcessId aThreadId,
+                                              const nsACString& aName) {
+  nsAutoCString markerName{"tid "};
+  markerName.AppendInt(int64_t(aThreadId));
+  if (!aName.IsEmpty()) {
+    markerName.AppendLiteral(" ");
+    markerName.Append(aName);
+  }
+  return markerName;
+}
+
+void SamplerThread::SpyOnUnregisteredThreads() {
+  const TimeStamp unregisteredThreadSearchStart = TimeStamp::Now();
+
+  const base::ProcessId currentProcessId =
+      base::ProcessId(profiler_current_process_id().ToNumber());
+  nsTArray<ProcInfoRequest> request(1);
+  request.EmplaceBack(
+      /* aPid = */ currentProcessId,
+      /* aProcessType = */ ProcType::Unknown,
+      /* aOrigin = */ ""_ns,
+      /* aWindowInfo = */ nsTArray<WindowInfo>{},
+      /* aUtilityInfo = */ nsTArray<UtilityInfo>{},
+      /* aChild = */ 0
+#ifdef XP_MACOSX
+      ,
+      /* aChildTask = */ MACH_PORT_NULL
+#endif  // XP_MACOSX
+  );
+
+  const ProcInfoPromise::ResolveOrRejectValue procInfoOrError =
+      GetProcInfoSync(std::move(request));
+
+  if (!procInfoOrError.IsResolve()) {
+    PROFILER_MARKER_TEXT("Failed unregistered thread search", PROFILER,
+                         MarkerOptions(MarkerThreadId::MainThread(),
+                                       MarkerTiming::IntervalUntilNowFrom(
+                                           unregisteredThreadSearchStart)),
+                         "Could not retrieve any process information");
+    return;
+  }
+
+  const auto& procInfoHashMap = procInfoOrError.ResolveValue();
+  // Expecting the requested (current) process information to be present in the
+  // hashmap.
+  const auto& procInfoPtr =
+      procInfoHashMap.readonlyThreadsafeLookup(currentProcessId);
+  if (!procInfoPtr) {
+    PROFILER_MARKER_TEXT("Failed unregistered thread search", PROFILER,
+                         MarkerOptions(MarkerThreadId::MainThread(),
+                                       MarkerTiming::IntervalUntilNowFrom(
+                                           unregisteredThreadSearchStart)),
+                         "Could not retrieve information about this process");
+    return;
+  }
+
+  // Record the time spent so far, which is OS-bound...
+  PROFILER_MARKER_TEXT("Unregistered thread search", PROFILER,
+                       MarkerOptions(MarkerThreadId::MainThread(),
+                                     MarkerTiming::IntervalUntilNowFrom(
+                                         unregisteredThreadSearchStart)),
+                       "Work to discover threads");
+
+  // ... and record the time needed to process the data, which we can control.
+  AUTO_PROFILER_MARKER_TEXT(
+      "Unregistered thread search", PROFILER,
+      MarkerOptions(MarkerThreadId::MainThread()),
+      "Work to process discovered threads and record unregistered ones"_ns);
+
+  const Span<const mozilla::ThreadInfo> threads = procInfoPtr->value().threads;
+
+  // mLastSpying timestamp should be null only at the beginning of a session,
+  // when mSpiedThreads is still empty.
+  MOZ_ASSERT_IF(mLastSpying.IsNull(), mSpiedThreads.IsEmpty());
+
+  const TimeStamp previousSpying = std::exchange(mLastSpying, TimeStamp::Now());
+
+  // Find threads that were spied on but are not present anymore.
+  const auto threadsBegin = threads.begin();
+  const auto threadsEnd = threads.end();
+  for (size_t spiedThreadIndexPlus1 = mSpiedThreads.Length();
+       spiedThreadIndexPlus1 != 0; --spiedThreadIndexPlus1) {
+    const SpiedThread& spiedThread = mSpiedThreads[spiedThreadIndexPlus1 - 1];
+    if (std::find_if(threadsBegin, threadsEnd,
+                     [spiedTid = spiedThread.mThreadId](
+                         const mozilla::ThreadInfo& aThreadInfo) {
+                       return aThreadInfo.tid == spiedTid;
+                     }) == threadsEnd) {
+      // This spied thread is gone.
+      PROFILER_MARKER(
+          MakeThreadInfoMarkerName(spiedThread.mThreadId, spiedThread.mName),
+          PROFILER,
+          MarkerOptions(
+              MarkerThreadId::MainThread(),
+              // Place the end between this update and the previous one.
+              MarkerTiming::IntervalEnd(previousSpying +
+                                        (mLastSpying - previousSpying) /
+                                            int64_t(2))),
+          UnregisteredThreadLifetimeMarker, spiedThread.mThreadId,
+          spiedThread.mName, "Thread disappeared");
+
+      // Don't spy on it anymore, assuming it won't come back.
+      mSpiedThreads.RemoveElementAt(spiedThreadIndexPlus1 - 1);
+    }
+  }
+
+  for (const mozilla::ThreadInfo& threadInfo : threads) {
+    // Index of this encountered thread in mSpiedThreads, or NoIndex.
+    size_t spiedThreadIndex = mSpiedThreads.IndexOf(threadInfo.tid);
+    if (IsThreadIdRegistered(ProfilerThreadId::FromNumber(threadInfo.tid))) {
+      // This thread id is already officially registered.
+      if (spiedThreadIndex != SpiedThreads::NoIndex) {
+        // This now-registered thread was previously being spied.
+        SpiedThread& spiedThread = mSpiedThreads[spiedThreadIndex];
+        PROFILER_MARKER(
+            MakeThreadInfoMarkerName(spiedThread.mThreadId, spiedThread.mName),
+            PROFILER,
+            MarkerOptions(
+                MarkerThreadId::MainThread(),
+                // Place the end between this update and the previous one.
+                // TODO: Find the real time from the thread registration?
+                MarkerTiming::IntervalEnd(previousSpying +
+                                          (mLastSpying - previousSpying) /
+                                              int64_t(2))),
+            UnregisteredThreadLifetimeMarker, spiedThread.mThreadId,
+            spiedThread.mName, "Thread registered itself");
+
+        // Remove from mSpiedThreads, since it can be profiled normally.
+        mSpiedThreads.RemoveElement(threadInfo.tid);
+      }
+    } else {
+      // This thread id is not registered.
+      if (spiedThreadIndex == SpiedThreads::NoIndex) {
+        // This unregistered thread has not been spied yet, store it now.
+        NS_ConvertUTF16toUTF8 name(threadInfo.name);
+        mSpiedThreads.EmplaceBack(threadInfo.tid, name, threadInfo.cpuTime);
+
+        PROFILER_MARKER(
+            MakeThreadInfoMarkerName(threadInfo.tid, name), PROFILER,
+            MarkerOptions(
+                MarkerThreadId::MainThread(),
+                // Place the start between this update and the previous one (or
+                // the start of this search if it's the first one).
+                MarkerTiming::IntervalStart(
+                    mLastSpying -
+                    (mLastSpying - (previousSpying.IsNull()
+                                        ? unregisteredThreadSearchStart
+                                        : previousSpying)) /
+                        int64_t(2))),
+            UnregisteredThreadLifetimeMarker, threadInfo.tid, name,
+            /* aEndEvent */ "");
+      } else {
+        // This unregistered thread was already being spied, record its work.
+        SpiedThread& spiedThread = mSpiedThreads[spiedThreadIndex];
+        int64_t diffCPUTimeNs =
+            int64_t(threadInfo.cpuTime) - int64_t(spiedThread.mCPUTimeNs);
+        spiedThread.mCPUTimeNs = threadInfo.cpuTime;
+        if (diffCPUTimeNs != 0) {
+          PROFILER_MARKER(
+              MakeThreadInfoMarkerName(threadInfo.tid, spiedThread.mName),
+              PROFILER,
+              MarkerOptions(
+                  MarkerThreadId::MainThread(),
+                  MarkerTiming::Interval(previousSpying, mLastSpying)),
+              UnregisteredThreadCPUMarker, threadInfo.tid, diffCPUTimeNs,
+              previousSpying, mLastSpying);
+        }
+      }
+    }
+  }
+
+  PROFILER_MARKER_TEXT("Unregistered thread search", PROFILER,
+                       MarkerOptions(MarkerThreadId::MainThread(),
+                                     MarkerTiming::IntervalUntilNowFrom(
+                                         unregisteredThreadSearchStart)),
+                       "Work to discover and record unregistered threads");
+}
+
+// We #include these files directly because it means those files can use
+// declarations from this file trivially.  These provide target-specific
+// implementations of all SamplerThread methods except Run().
+#if defined(GP_OS_windows)
+#  include "platform-win32.cpp"
+#elif defined(GP_OS_darwin)
+#  include "platform-macos.cpp"
+#elif defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
+#  include "platform-linux-android.cpp"
+#else
+#  error "bad platform"
+#endif
+
+// END SamplerThread
+////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN externally visible functions
+
+MOZ_DEFINE_MALLOC_SIZE_OF(GeckoProfilerMallocSizeOf)
+
+NS_IMETHODIMP
+GeckoProfilerReporter::CollectReports(nsIHandleReportCallback* aHandleReport,
+                                      nsISupports* aData, bool aAnonymize) {
+  MOZ_RELEASE_ASSERT(NS_IsMainThread());
+
+  size_t profSize = 0;
+  size_t lulSize = 0;
+
+  {
+    PSAutoLock lock;
+
+    if (CorePS::Exists()) {
+      CorePS::AddSizeOf(lock, GeckoProfilerMallocSizeOf, profSize, lulSize);
+    }
+
+    if (ActivePS::Exists(lock)) {
+      profSize += ActivePS::SizeOf(lock, GeckoProfilerMallocSizeOf);
+    }
+  }
+
+  MOZ_COLLECT_REPORT(
+      "explicit/profiler/profiler-state", KIND_HEAP, UNITS_BYTES, profSize,
+      "Memory used by the Gecko Profiler's global state (excluding memory used "
+      "by LUL).");
+
+#if defined(USE_LUL_STACKWALK)
+  MOZ_COLLECT_REPORT(
+      "explicit/profiler/lul", KIND_HEAP, UNITS_BYTES, lulSize,
+      "Memory used by LUL, a stack unwinder used by the Gecko Profiler.");
+#endif
+
+  return NS_OK;
+}
+
+NS_IMPL_ISUPPORTS(GeckoProfilerReporter, nsIMemoryReporter)
+
+static uint32_t ParseFeature(const char* aFeature, bool aIsStartup) {
+  if (strcmp(aFeature, "default") == 0) {
+    return (aIsStartup ? (DefaultFeatures() | StartupExtraDefaultFeatures())
+                       : DefaultFeatures()) &
+           AvailableFeatures();
+  }
+
+#define PARSE_FEATURE_BIT(n_, str_, Name_, desc_) \
+  if (strcmp(aFeature, str_) == 0) {              \
+    return ProfilerFeature::Name_;                \
+  }
+
+  PROFILER_FOR_EACH_FEATURE(PARSE_FEATURE_BIT)
+
+#undef PARSE_FEATURE_BIT
+
+  printf("\nUnrecognized feature \"%s\".\n\n", aFeature);
+  // Since we may have an old feature we don't implement anymore, don't exit.
+  PrintUsage();
+  return 0;
+}
+
+uint32_t ParseFeaturesFromStringArray(const char** aFeatures,
+                                      uint32_t aFeatureCount,
+                                      bool aIsStartup /* = false */) {
+  uint32_t features = 0;
+  for (size_t i = 0; i < aFeatureCount; i++) {
+    features |= ParseFeature(aFeatures[i], aIsStartup);
+  }
+  return features;
+}
+
+static ProfilingStack* locked_register_thread(
+    PSLockRef aLock, ThreadRegistry::OffThreadRef aOffThreadRef) {
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  VTUNE_REGISTER_THREAD(aOffThreadRef.UnlockedConstReaderCRef().Info().Name());
+
+  if (ActivePS::Exists(aLock)) {
+    ThreadProfilingFeatures threadProfilingFeatures =
+        ActivePS::ProfilingFeaturesForThread(
+            aLock, aOffThreadRef.UnlockedConstReaderCRef().Info());
+    if (threadProfilingFeatures != ThreadProfilingFeatures::NotProfiled) {
+      ThreadRegistry::OffThreadRef::RWFromAnyThreadWithLock
+          lockedRWFromAnyThread = aOffThreadRef.GetLockedRWFromAnyThread();
+
+      ProfiledThreadData* profiledThreadData = ActivePS::AddLiveProfiledThread(
+          aLock, MakeUnique<ProfiledThreadData>(
+                     aOffThreadRef.UnlockedConstReaderCRef().Info()));
+      lockedRWFromAnyThread->SetProfilingFeaturesAndData(
+          threadProfilingFeatures, profiledThreadData, aLock);
+
+      if (ActivePS::FeatureJS(aLock)) {
+        lockedRWFromAnyThread->StartJSSampling(ActivePS::JSFlags(aLock));
+        if (ThreadRegistration::LockedRWOnThread* lockedRWOnThread =
+                lockedRWFromAnyThread.GetLockedRWOnThread();
+            lockedRWOnThread) {
+          // We can manually poll the current thread so it starts sampling
+          // immediately.
+          lockedRWOnThread->PollJSSampling();
+        }
+        if (lockedRWFromAnyThread->GetJSContext()) {
+          profiledThreadData->NotifyReceivedJSContext(
+              ActivePS::Buffer(aLock).BufferRangeEnd());
+        }
+      }
+    }
+  }
+
+  return &aOffThreadRef.UnlockedConstReaderAndAtomicRWRef().ProfilingStackRef();
+}
+
+static void NotifyObservers(const char* aTopic,
+                            nsISupports* aSubject = nullptr) {
+  if (!NS_IsMainThread()) {
+    // Dispatch a task to the main thread that notifies observers.
+    // If NotifyObservers is called both on and off the main thread within a
+    // short time, the order of the notifications can be different from the
+    // order of the calls to NotifyObservers.
+    // Getting the order 100% right isn't that important at the moment, because
+    // these notifications are only observed in the parent process, where the
+    // profiler_* functions are currently only called on the main thread.
+    nsCOMPtr<nsISupports> subject = aSubject;
+    NS_DispatchToMainThread(NS_NewRunnableFunction(
+        "NotifyObservers", [=] { NotifyObservers(aTopic, subject); }));
+    return;
+  }
+
+  if (nsCOMPtr<nsIObserverService> os = services::GetObserverService()) {
+    os->NotifyObservers(aSubject, aTopic, nullptr);
+  }
+}
+
+[[nodiscard]] static RefPtr<GenericPromise> NotifyProfilerStarted(
+    const PowerOfTwo32& aCapacity, const Maybe<double>& aDuration,
+    double aInterval, uint32_t aFeatures, const char** aFilters,
+    uint32_t aFilterCount, uint64_t aActiveTabID) {
+  nsTArray<nsCString> filtersArray;
+  for (size_t i = 0; i < aFilterCount; ++i) {
+    filtersArray.AppendElement(aFilters[i]);
+  }
+
+  nsCOMPtr<nsIProfilerStartParams> params = new nsProfilerStartParams(
+      aCapacity.Value(), aDuration, aInterval, aFeatures,
+      std::move(filtersArray), aActiveTabID);
+
+  RefPtr<GenericPromise> startPromise = ProfilerParent::ProfilerStarted(params);
+  NotifyObservers("profiler-started", params);
+  return startPromise;
+}
+
+static void locked_profiler_start(PSLockRef aLock, PowerOfTwo32 aCapacity,
+                                  double aInterval, uint32_t aFeatures,
+                                  const char** aFilters, uint32_t aFilterCount,
+                                  uint64_t aActiveTabID,
+                                  const Maybe<double>& aDuration);
+
+// This basically duplicates AutoProfilerLabel's constructor.
+static void* MozGlueLabelEnter(const char* aLabel, const char* aDynamicString,
+                               void* aSp) {
+  ThreadRegistration::OnThreadPtr onThreadPtr =
+      ThreadRegistration::GetOnThreadPtr();
+  if (!onThreadPtr) {
+    return nullptr;
+  }
+  ProfilingStack& profilingStack =
+      onThreadPtr->UnlockedConstReaderAndAtomicRWRef().ProfilingStackRef();
+  profilingStack.pushLabelFrame(aLabel, aDynamicString, aSp,
+                                JS::ProfilingCategoryPair::OTHER);
+  return &profilingStack;
+}
+
+// This basically duplicates AutoProfilerLabel's destructor.
+static void MozGlueLabelExit(void* aProfilingStack) {
+  if (aProfilingStack) {
+    reinterpret_cast<ProfilingStack*>(aProfilingStack)->pop();
+  }
+}
+
+static Vector<const char*> SplitAtCommas(const char* aString,
+                                         UniquePtr<char[]>& aStorage) {
+  size_t len = strlen(aString);
+  aStorage = MakeUnique<char[]>(len + 1);
+  PodCopy(aStorage.get(), aString, len + 1);
+
+  // Iterate over all characters in aStorage and split at commas, by
+  // overwriting commas with the null char.
+  Vector<const char*> array;
+  size_t currentElementStart = 0;
+  for (size_t i = 0; i <= len; i++) {
+    if (aStorage[i] == ',') {
+      aStorage[i] = '\0';
+    }
+    if (aStorage[i] == '\0') {
+      // Only add non-empty elements, otherwise ParseFeatures would later
+      // complain about unrecognized features.
+      if (currentElementStart != i) {
+        MOZ_RELEASE_ASSERT(array.append(&aStorage[currentElementStart]));
+      }
+      currentElementStart = i + 1;
+    }
+  }
+  return array;
+}
+
+void profiler_init_threadmanager() {
+  LOG("profiler_init_threadmanager");
+
+  ThreadRegistration::WithOnThreadRef(
+      [](ThreadRegistration::OnThreadRef aOnThreadRef) {
+        aOnThreadRef.WithLockedRWOnThread(
+            [](ThreadRegistration::LockedRWOnThread& aThreadData) {
+              if (!aThreadData.GetEventTarget()) {
+                aThreadData.ResetMainThread(NS_GetCurrentThreadNoCreate());
+              }
+            });
+      });
+}
+
+static const char* get_size_suffix(const char* str) {
+  const char* ptr = str;
+
+  while (isdigit(*ptr)) {
+    ptr++;
+  }
+
+  return ptr;
+}
+
+void profiler_init(void* aStackTop) {
+  LOG("profiler_init");
+
+  profiler_init_main_thread_id();
+
+  VTUNE_INIT();
+
+  MOZ_RELEASE_ASSERT(!CorePS::Exists());
+
+  if (getenv("MOZ_PROFILER_HELP")) {
+    PrintUsage();
+    exit(0);
+  }
+
+  SharedLibraryInfo::Initialize();
+
+  uint32_t features = DefaultFeatures() & AvailableFeatures();
+
+  UniquePtr<char[]> filterStorage;
+
+  Vector<const char*> filters;
+  MOZ_RELEASE_ASSERT(filters.append("GeckoMain"));
+  MOZ_RELEASE_ASSERT(filters.append("Compositor"));
+  MOZ_RELEASE_ASSERT(filters.append("Renderer"));
+  MOZ_RELEASE_ASSERT(filters.append("DOM Worker"));
+
+  PowerOfTwo32 capacity = PROFILER_DEFAULT_ENTRIES;
+  Maybe<double> duration = Nothing();
+  double interval = PROFILER_DEFAULT_INTERVAL;
+  uint64_t activeTabID = PROFILER_DEFAULT_ACTIVE_TAB_ID;
+
+  ThreadRegistration::RegisterThread(kMainThreadName, aStackTop);
+
+  {
+    PSAutoLock lock;
+
+    // We've passed the possible failure point. Instantiate CorePS, which
+    // indicates that the profiler has initialized successfully.
+    CorePS::Create(lock);
+
+    // Make sure threads already in the ThreadRegistry (like the main thread)
+    // get registered in CorePS as well.
+    {
+      ThreadRegistry::LockedRegistry lockedRegistry;
+      for (ThreadRegistry::OffThreadRef offThreadRef : lockedRegistry) {
+        locked_register_thread(lock, offThreadRef);
+      }
+    }
+
+    // Platform-specific initialization.
+    PlatformInit(lock);
+
+#if defined(GP_OS_android)
+    if (jni::IsAvailable()) {
+      GeckoJavaSampler::Init();
+    }
+#endif
+
+    // (Linux-only) We could create CorePS::mLul and read unwind info into it
+    // at this point. That would match the lifetime implied by destruction of
+    // it in profiler_shutdown() just below. However, that gives a big delay on
+    // startup, even if no profiling is actually to be done. So, instead, it is
+    // created on demand at the first call to PlatformStart().
+
+    const char* startupEnv = getenv("MOZ_PROFILER_STARTUP");
+    if (!startupEnv || startupEnv[0] == '\0' ||
+        ((startupEnv[0] == '0' || startupEnv[0] == 'N' ||
+          startupEnv[0] == 'n') &&
+         startupEnv[1] == '\0')) {
+      return;
+    }
+
+    LOG("- MOZ_PROFILER_STARTUP is set");
+
+    // Startup default capacity may be different.
+    capacity = PROFILER_DEFAULT_STARTUP_ENTRIES;
+
+    const char* startupCapacity = getenv("MOZ_PROFILER_STARTUP_ENTRIES");
+    if (startupCapacity && startupCapacity[0] != '\0') {
+      errno = 0;
+      long capacityLong = strtol(startupCapacity, nullptr, 10);
+      std::string_view sizeSuffix = get_size_suffix(startupCapacity);
+
+      if (sizeSuffix == "KB") {
+        capacityLong *= 1000 / scBytesPerEntry;
+      } else if (sizeSuffix == "KiB") {
+        capacityLong *= 1024 / scBytesPerEntry;
+      } else if (sizeSuffix == "MB") {
+        capacityLong *= (1000 * 1000) / scBytesPerEntry;
+      } else if (sizeSuffix == "MiB") {
+        capacityLong *= (1024 * 1024) / scBytesPerEntry;
+      } else if (sizeSuffix == "GB") {
+        capacityLong *= (1000 * 1000 * 1000) / scBytesPerEntry;
+      } else if (sizeSuffix == "GiB") {
+        capacityLong *= (1024 * 1024 * 1024) / scBytesPerEntry;
+      } else if (!sizeSuffix.empty()) {
+        LOG("- MOZ_PROFILER_STARTUP_ENTRIES unit must be one of the "
+            "following: KB, KiB, MB, MiB, GB, GiB");
+        PrintUsage();
+        exit(1);
+      }
+
+      // `long` could be 32 or 64 bits, so we force a 64-bit comparison with
+      // the maximum 32-bit signed number (as more than that is clamped down to
+      // 2^31 anyway).
+      if (errno == 0 && capacityLong > 0 &&
+          static_cast<uint64_t>(capacityLong) <=
+              static_cast<uint64_t>(INT32_MAX)) {
+        capacity = PowerOfTwo32(ActivePS::ClampToAllowedEntries(
+            static_cast<uint32_t>(capacityLong)));
+        LOG("- MOZ_PROFILER_STARTUP_ENTRIES = %u", unsigned(capacity.Value()));
+      } else {
+        LOG("- MOZ_PROFILER_STARTUP_ENTRIES not a valid integer: %s",
+            startupCapacity);
+        PrintUsage();
+        exit(1);
+      }
+    }
+
+    const char* startupDuration = getenv("MOZ_PROFILER_STARTUP_DURATION");
+    if (startupDuration && startupDuration[0] != '\0') {
+      errno = 0;
+      double durationVal = PR_strtod(startupDuration, nullptr);
+      if (errno == 0 && durationVal >= 0.0) {
+        if (durationVal > 0.0) {
+          duration = Some(durationVal);
+        }
+        LOG("- MOZ_PROFILER_STARTUP_DURATION = %f", durationVal);
+      } else {
+        LOG("- MOZ_PROFILER_STARTUP_DURATION not a valid float: %s",
+            startupDuration);
+        PrintUsage();
+        exit(1);
+      }
+    }
+
+    const char* startupInterval = getenv("MOZ_PROFILER_STARTUP_INTERVAL");
+    if (startupInterval && startupInterval[0] != '\0') {
+      errno = 0;
+      interval = PR_strtod(startupInterval, nullptr);
+      if (errno == 0 && interval > 0.0 && interval <= PROFILER_MAX_INTERVAL) {
+        LOG("- MOZ_PROFILER_STARTUP_INTERVAL = %f", interval);
+      } else {
+        LOG("- MOZ_PROFILER_STARTUP_INTERVAL not a valid float: %s",
+            startupInterval);
+        PrintUsage();
+        exit(1);
+      }
+    }
+
+    features |= StartupExtraDefaultFeatures() & AvailableFeatures();
+
+    const char* startupFeaturesBitfield =
+        getenv("MOZ_PROFILER_STARTUP_FEATURES_BITFIELD");
+    if (startupFeaturesBitfield && startupFeaturesBitfield[0] != '\0') {
+      errno = 0;
+      features = strtol(startupFeaturesBitfield, nullptr, 10);
+      if (errno == 0) {
+        LOG("- MOZ_PROFILER_STARTUP_FEATURES_BITFIELD = %d", features);
+      } else {
+        LOG("- MOZ_PROFILER_STARTUP_FEATURES_BITFIELD not a valid integer: %s",
+            startupFeaturesBitfield);
+        PrintUsage();
+        exit(1);
+      }
+    } else {
+      const char* startupFeatures = getenv("MOZ_PROFILER_STARTUP_FEATURES");
+      if (startupFeatures) {
+        // Interpret startupFeatures as a list of feature strings, separated by
+        // commas.
+        UniquePtr<char[]> featureStringStorage;
+        Vector<const char*> featureStringArray =
+            SplitAtCommas(startupFeatures, featureStringStorage);
+        features = ParseFeaturesFromStringArray(featureStringArray.begin(),
+                                                featureStringArray.length(),
+                                                /* aIsStartup */ true);
+        LOG("- MOZ_PROFILER_STARTUP_FEATURES = %d", features);
+      }
+    }
+
+    const char* startupFilters = getenv("MOZ_PROFILER_STARTUP_FILTERS");
+    if (startupFilters && startupFilters[0] != '\0') {
+      filters = SplitAtCommas(startupFilters, filterStorage);
+      LOG("- MOZ_PROFILER_STARTUP_FILTERS = %s", startupFilters);
+
+      if (mozilla::profiler::detail::FiltersExcludePid(filters)) {
+        LOG(" -> This process is excluded and won't be profiled");
+        return;
+      }
+    }
+
+    const char* startupActiveTabID =
+        getenv("MOZ_PROFILER_STARTUP_ACTIVE_TAB_ID");
+    if (startupActiveTabID && startupActiveTabID[0] != '\0') {
+      std::istringstream iss(startupActiveTabID);
+      iss >> activeTabID;
+      if (!iss.fail()) {
+        LOG("- MOZ_PROFILER_STARTUP_ACTIVE_TAB_ID = %" PRIu64, activeTabID);
+      } else {
+        LOG("- MOZ_PROFILER_STARTUP_ACTIVE_TAB_ID not a valid "
+            "uint64_t: %s",
+            startupActiveTabID);
+        PrintUsage();
+        exit(1);
+      }
+    }
+
+    locked_profiler_start(lock, capacity, interval, features, filters.begin(),
+                          filters.length(), activeTabID, duration);
+  }
+
+  // The GeckoMain thread registration happened too early to record a marker,
+  // so let's record it again now.
+  profiler_mark_thread_awake();
+
+#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
+  // Start counting memory allocations (outside of lock because this may call
+  // profiler_add_sampled_counter which would attempt to take the lock.)
+  ActivePS::SetMemoryCounter(mozilla::profiler::install_memory_hooks());
+#endif
+
+  invoke_profiler_state_change_callbacks(ProfilingState::Started);
+
+  // We do this with gPSMutex unlocked. The comment in profiler_stop() explains
+  // why.
+  Unused << NotifyProfilerStarted(capacity, duration, interval, features,
+                                  filters.begin(), filters.length(), 0);
+}
+
+static void locked_profiler_save_profile_to_file(
+    PSLockRef aLock, const char* aFilename,
+    const PreRecordedMetaInformation& aPreRecordedMetaInformation,
+    bool aIsShuttingDown);
+
+static SamplerThread* locked_profiler_stop(PSLockRef aLock);
+
+void profiler_shutdown(IsFastShutdown aIsFastShutdown) {
+  LOG("profiler_shutdown");
+
+  VTUNE_SHUTDOWN();
+
+  MOZ_RELEASE_ASSERT(NS_IsMainThread());
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  if (profiler_is_active()) {
+    invoke_profiler_state_change_callbacks(ProfilingState::Stopping);
+  }
+  invoke_profiler_state_change_callbacks(ProfilingState::ShuttingDown);
+
+  const auto preRecordedMetaInformation = PreRecordMetaInformation();
+
+  ProfilerParent::ProfilerWillStopIfStarted();
+
+  // If the profiler is active we must get a handle to the SamplerThread before
+  // ActivePS is destroyed, in order to delete it.
+  SamplerThread* samplerThread = nullptr;
+  {
+    PSAutoLock lock;
+
+    // Save the profile on shutdown if requested.
+    if (ActivePS::Exists(lock)) {
+      const char* filename = getenv("MOZ_PROFILER_SHUTDOWN");
+      if (filename && filename[0] != '\0') {
+        locked_profiler_save_profile_to_file(lock, filename,
+                                             preRecordedMetaInformation,
+                                             /* aIsShuttingDown */ true);
+      }
+      if (aIsFastShutdown == IsFastShutdown::Yes) {
+        return;
+      }
+
+      samplerThread = locked_profiler_stop(lock);
+    } else if (aIsFastShutdown == IsFastShutdown::Yes) {
+      return;
+    }
+
+    CorePS::Destroy(lock);
+  }
+
+  // We do these operations with gPSMutex unlocked. The comments in
+  // profiler_stop() explain why.
+  if (samplerThread) {
+    Unused << ProfilerParent::ProfilerStopped();
+    NotifyObservers("profiler-stopped");
+    delete samplerThread;
+  }
+
+  // Reverse the registration done in profiler_init.
+  ThreadRegistration::UnregisterThread();
+}
+
+static bool WriteProfileToJSONWriter(SpliceableChunkedJSONWriter& aWriter,
+                                     double aSinceTime, bool aIsShuttingDown,
+                                     ProfilerCodeAddressService* aService,
+                                     mozilla::ProgressLogger aProgressLogger) {
+  LOG("WriteProfileToJSONWriter");
+
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  aWriter.Start();
+  {
+    auto rv = profiler_stream_json_for_this_process(
+        aWriter, aSinceTime, aIsShuttingDown, aService,
+        aProgressLogger.CreateSubLoggerFromTo(
+            0_pc,
+            "WriteProfileToJSONWriter: "
+            "profiler_stream_json_for_this_process started",
+            100_pc,
+            "WriteProfileToJSONWriter: "
+            "profiler_stream_json_for_this_process done"));
+
+    if (rv.isErr()) {
+      return false;
+    }
+
+    // Don't include profiles from other processes because this is a
+    // synchronous function.
+    aWriter.StartArrayProperty("processes");
+    aWriter.EndArray();
+  }
+  aWriter.End();
+  return !aWriter.Failed();
+}
+
+void profiler_set_process_name(const nsACString& aProcessName,
+                               const nsACString* aETLDplus1) {
+  LOG("profiler_set_process_name(\"%s\", \"%s\")", aProcessName.Data(),
+      aETLDplus1 ? aETLDplus1->Data() : "<none>");
+  PSAutoLock lock;
+  CorePS::SetProcessName(lock, aProcessName);
+  if (aETLDplus1) {
+    CorePS::SetETLDplus1(lock, *aETLDplus1);
+  }
+}
+
+UniquePtr<char[]> profiler_get_profile(double aSinceTime,
+                                       bool aIsShuttingDown) {
+  LOG("profiler_get_profile");
+
+  UniquePtr<ProfilerCodeAddressService> service =
+      profiler_code_address_service_for_presymbolication();
+
+  FailureLatchSource failureLatch;
+  SpliceableChunkedJSONWriter b{failureLatch};
+  if (!WriteProfileToJSONWriter(b, aSinceTime, aIsShuttingDown, service.get(),
+                                ProgressLogger{})) {
+    return nullptr;
+  }
+  return b.ChunkedWriteFunc().CopyData();
+}
+
+[[nodiscard]] bool profiler_get_profile_json(
+    SpliceableChunkedJSONWriter& aSpliceableChunkedJSONWriter,
+    double aSinceTime, bool aIsShuttingDown,
+    mozilla::ProgressLogger aProgressLogger) {
+  LOG("profiler_get_profile_json");
+
+  UniquePtr<ProfilerCodeAddressService> service =
+      profiler_code_address_service_for_presymbolication();
+
+  return WriteProfileToJSONWriter(
+      aSpliceableChunkedJSONWriter, aSinceTime, aIsShuttingDown, service.get(),
+      aProgressLogger.CreateSubLoggerFromTo(
+          0.1_pc, "profiler_get_profile_json: WriteProfileToJSONWriter started",
+          99.9_pc, "profiler_get_profile_json: WriteProfileToJSONWriter done"));
+}
+
+void profiler_get_start_params(int* aCapacity, Maybe<double>* aDuration,
+                               double* aInterval, uint32_t* aFeatures,
+                               Vector<const char*>* aFilters,
+                               uint64_t* aActiveTabID) {
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  if (NS_WARN_IF(!aCapacity) || NS_WARN_IF(!aDuration) ||
+      NS_WARN_IF(!aInterval) || NS_WARN_IF(!aFeatures) ||
+      NS_WARN_IF(!aFilters)) {
+    return;
+  }
+
+  PSAutoLock lock;
+
+  if (!ActivePS::Exists(lock)) {
+    *aCapacity = 0;
+    *aDuration = Nothing();
+    *aInterval = 0;
+    *aFeatures = 0;
+    *aActiveTabID = 0;
+    aFilters->clear();
+    return;
+  }
+
+  *aCapacity = ActivePS::Capacity(lock).Value();
+  *aDuration = ActivePS::Duration(lock);
+  *aInterval = ActivePS::Interval(lock);
+  *aFeatures = ActivePS::Features(lock);
+  *aActiveTabID = ActivePS::ActiveTabID(lock);
+
+  const Vector<std::string>& filters = ActivePS::Filters(lock);
+  MOZ_ALWAYS_TRUE(aFilters->resize(filters.length()));
+  for (uint32_t i = 0; i < filters.length(); ++i) {
+    (*aFilters)[i] = filters[i].c_str();
+  }
+}
+
+ProfileBufferControlledChunkManager* profiler_get_controlled_chunk_manager() {
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+  PSAutoLock lock;
+  if (NS_WARN_IF(!ActivePS::Exists(lock))) {
+    return nullptr;
+  }
+  return &ActivePS::ControlledChunkManager(lock);
+}
+
+namespace mozilla {
+
+void GetProfilerEnvVarsForChildProcess(
+    std::function<void(const char* key, const char* value)>&& aSetEnv) {
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  PSAutoLock lock;
+
+  if (!ActivePS::Exists(lock)) {
+    aSetEnv("MOZ_PROFILER_STARTUP", "");
+    return;
+  }
+
+  aSetEnv("MOZ_PROFILER_STARTUP", "1");
+
+  // If MOZ_PROFILER_SHUTDOWN is defined, make sure it's empty in children, so
+  // that they don't attempt to write over that file.
+  if (getenv("MOZ_PROFILER_SHUTDOWN")) {
+    aSetEnv("MOZ_PROFILER_SHUTDOWN", "");
+  }
+
+  // Hidden option to stop Base Profiler, mostly due to Talos intermittents,
+  // see https://bugzilla.mozilla.org/show_bug.cgi?id=1638851#c3
+  // TODO: Investigate root cause and remove this in bugs 1648324 and 1648325.
+  if (getenv("MOZ_PROFILER_STARTUP_NO_BASE")) {
+    aSetEnv("MOZ_PROFILER_STARTUP_NO_BASE", "1");
+  }
+
+  auto capacityString =
+      Smprintf("%u", unsigned(ActivePS::Capacity(lock).Value()));
+  aSetEnv("MOZ_PROFILER_STARTUP_ENTRIES", capacityString.get());
+
+  // Use AppendFloat instead of Smprintf with %f because the decimal
+  // separator used by %f is locale-dependent. But the string we produce needs
+  // to be parseable by strtod, which only accepts the period character as a
+  // decimal separator. AppendFloat always uses the period character.
+  nsCString intervalString;
+  intervalString.AppendFloat(ActivePS::Interval(lock));
+  aSetEnv("MOZ_PROFILER_STARTUP_INTERVAL", intervalString.get());
+
+  auto featuresString = Smprintf("%d", ActivePS::Features(lock));
+  aSetEnv("MOZ_PROFILER_STARTUP_FEATURES_BITFIELD", featuresString.get());
+
+  std::string filtersString;
+  const Vector<std::string>& filters = ActivePS::Filters(lock);
+  for (uint32_t i = 0; i < filters.length(); ++i) {
+    if (i != 0) {
+      filtersString += ",";
+    }
+    filtersString += filters[i];
+  }
+  aSetEnv("MOZ_PROFILER_STARTUP_FILTERS", filtersString.c_str());
+
+  auto activeTabIDString = Smprintf("%" PRIu64, ActivePS::ActiveTabID(lock));
+  aSetEnv("MOZ_PROFILER_STARTUP_ACTIVE_TAB_ID", activeTabIDString.get());
+}
+
+}  // namespace mozilla
+
+void profiler_received_exit_profile(const nsACString& aExitProfile) {
+  MOZ_RELEASE_ASSERT(NS_IsMainThread());
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+  PSAutoLock lock;
+  if (!ActivePS::Exists(lock)) {
+    return;
+  }
+  ActivePS::AddExitProfile(lock, aExitProfile);
+}
+
+Vector<nsCString> profiler_move_exit_profiles() {
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+  PSAutoLock lock;
+  Vector<nsCString> profiles;
+  if (ActivePS::Exists(lock)) {
+    profiles = ActivePS::MoveExitProfiles(lock);
+  }
+  return profiles;
+}
+
+static void locked_profiler_save_profile_to_file(
+    PSLockRef aLock, const char* aFilename,
+    const PreRecordedMetaInformation& aPreRecordedMetaInformation,
+    bool aIsShuttingDown = false) {
+  nsAutoCString processedFilename(aFilename);
+  const auto processInsertionIndex = processedFilename.Find("%p");
+  if (processInsertionIndex != kNotFound) {
+    // Replace "%p" with the process id.
+    nsAutoCString process;
+    process.AppendInt(profiler_current_process_id().ToNumber());
+    processedFilename.Replace(processInsertionIndex, 2, process);
+    LOG("locked_profiler_save_profile_to_file(\"%s\" -> \"%s\")", aFilename,
+        processedFilename.get());
+  } else {
+    LOG("locked_profiler_save_profile_to_file(\"%s\")", aFilename);
+  }
+
+  MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
+
+  std::ofstream stream;
+  stream.open(processedFilename.get());
+  if (stream.is_open()) {
+    OStreamJSONWriteFunc sw(stream);
+    SpliceableJSONWriter w(sw, FailureLatchInfallibleSource::Singleton());
+    w.Start();
+    {
+      Unused << locked_profiler_stream_json_for_this_process(
+          aLock, w, /* sinceTime */ 0, aPreRecordedMetaInformation,
+          aIsShuttingDown, nullptr, ProgressLogger{});
+
+      w.StartArrayProperty("processes");
+      Vector<nsCString> exitProfiles = ActivePS::MoveExitProfiles(aLock);
+      for (auto& exitProfile : exitProfiles) {
+        if (!exitProfile.IsEmpty() && exitProfile[0] != '*') {
+          w.Splice(exitProfile);
+        }
+      }
+      w.EndArray();
+    }
+    w.End();
+
+    stream.close();
+  }
+}
+
+void profiler_save_profile_to_file(const char* aFilename) {
+  LOG("profiler_save_profile_to_file(%s)", aFilename);
+
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  const auto preRecordedMetaInformation = PreRecordMetaInformation();
+
+  PSAutoLock lock;
+
+  if (!ActivePS::Exists(lock)) {
+    return;
+  }
+
+  locked_profiler_save_profile_to_file(lock, aFilename,
+                                       preRecordedMetaInformation);
+}
+
+uint32_t profiler_get_available_features() {
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+  return AvailableFeatures();
+}
+
+Maybe<ProfilerBufferInfo> profiler_get_buffer_info() {
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  PSAutoLock lock;
+
+  if (!ActivePS::Exists(lock)) {
+    return Nothing();
+  }
+
+  return Some(ActivePS::Buffer(lock).GetProfilerBufferInfo());
+}
+
+static void PollJSSamplingForCurrentThread() {
+  ThreadRegistration::WithOnThreadRef(
+      [](ThreadRegistration::OnThreadRef aOnThreadRef) {
+        aOnThreadRef.WithLockedRWOnThread(
+            [](ThreadRegistration::LockedRWOnThread& aThreadData) {
+              aThreadData.PollJSSampling();
+            });
+      });
+}
+
+// When the profiler is started on a background thread, we can't synchronously
+// call PollJSSampling on the main thread's ThreadInfo. And the next regular
+// call to PollJSSampling on the main thread would only happen once the main
+// thread triggers a JS interrupt callback.
+// This means that all the JS execution between profiler_start() and the first
+// JS interrupt would happen with JS sampling disabled, and we wouldn't get any
+// JS function information for that period of time.
+// So in order to start JS sampling as soon as possible, we dispatch a runnable
+// to the main thread which manually calls PollJSSamplingForCurrentThread().
+// In some cases this runnable will lose the race with the next JS interrupt.
+// That's fine; PollJSSamplingForCurrentThread() is immune to redundant calls.
+static void TriggerPollJSSamplingOnMainThread() {
+  nsCOMPtr<nsIThread> mainThread;
+  nsresult rv = NS_GetMainThread(getter_AddRefs(mainThread));
+  if (NS_SUCCEEDED(rv) && mainThread) {
+    nsCOMPtr<nsIRunnable> task =
+        NS_NewRunnableFunction("TriggerPollJSSamplingOnMainThread",
+                               []() { PollJSSamplingForCurrentThread(); });
+    SchedulerGroup::Dispatch(TaskCategory::Other, task.forget());
+  }
+}
+
+static void locked_profiler_start(PSLockRef aLock, PowerOfTwo32 aCapacity,
+                                  double aInterval, uint32_t aFeatures,
+                                  const char** aFilters, uint32_t aFilterCount,
+                                  uint64_t aActiveTabID,
+                                  const Maybe<double>& aDuration) {
+  TimeStamp profilingStartTime = TimeStamp::Now();
+
+  if (LOG_TEST) {
+    LOG("locked_profiler_start");
+    LOG("- capacity  = %u", unsigned(aCapacity.Value()));
+    LOG("- duration  = %.2f", aDuration ? *aDuration : -1);
+    LOG("- interval = %.2f", aInterval);
+    LOG("- tab ID = %" PRIu64, aActiveTabID);
+
+#define LOG_FEATURE(n_, str_, Name_, desc_)     \
+  if (ProfilerFeature::Has##Name_(aFeatures)) { \
+    LOG("- feature  = %s", str_);               \
+  }
+
+    PROFILER_FOR_EACH_FEATURE(LOG_FEATURE)
+
+#undef LOG_FEATURE
+
+    for (uint32_t i = 0; i < aFilterCount; i++) {
+      LOG("- threads  = %s", aFilters[i]);
+    }
+  }
+
+  MOZ_RELEASE_ASSERT(CorePS::Exists() && !ActivePS::Exists(aLock));
+
+  // Do this before the Base Profiler is stopped, to keep the existing buffer
+  // (if any) alive for our use.
+  if (NS_IsMainThread()) {
+    mozilla::base_profiler_markers_detail::EnsureBufferForMainThreadAddMarker();
+  } else {
+    NS_DispatchToMainThread(
+        NS_NewRunnableFunction("EnsureBufferForMainThreadAddMarker",
+                               &mozilla::base_profiler_markers_detail::
+                                   EnsureBufferForMainThreadAddMarker));
+  }
+
+  UniquePtr<ProfileBufferChunkManagerWithLocalLimit> baseChunkManager;
+  bool profilersHandOver = false;
+  if (baseprofiler::profiler_is_active()) {
+    // Note that we still hold the lock, so the sampler cannot run yet and
+    // interact negatively with the still-active BaseProfiler sampler.
+    // Assume that Base Profiler is active because of MOZ_PROFILER_STARTUP.
+
+    // Take ownership of the chunk manager from the Base Profiler, to extend its
+    // lifetime during the new Gecko Profiler session. Since we're using the
+    // same core buffer, all the base profiler data remains.
+    baseChunkManager = baseprofiler::detail::ExtractBaseProfilerChunkManager();
+
+    if (baseChunkManager) {
+      profilersHandOver = true;
+      if (const TimeStamp baseProfilingStartTime =
+              baseprofiler::detail::GetProfilingStartTime();
+          !baseProfilingStartTime.IsNull()) {
+        profilingStartTime = baseProfilingStartTime;
+      }
+
+      BASE_PROFILER_MARKER_TEXT(
+          "Profilers handover", PROFILER, MarkerTiming::IntervalStart(),
+          "Transition from Base to Gecko Profiler, some data may be missing");
+    }
+
+    // Now stop Base Profiler (BP), as further recording will be ignored anyway,
+    // and so that it won't clash with Gecko Profiler (GP) sampling starting
+    // after the lock is dropped.
+    // On Linux this is especially important to do before creating the GP
+    // sampler, because the BP sampler may send a signal (to stop threads to be
+    // sampled), which the GP would intercept before its own initialization is
+    // complete and ready to handle such signals.
+    // Note that even though `profiler_stop()` doesn't immediately destroy and
+    // join the sampler thread, it safely deactivates it in such a way that the
+    // thread will soon exit without doing any actual work.
+    // TODO: Allow non-sampling profiling to continue.
+    // TODO: Re-start BP after GP shutdown, to capture post-XPCOM shutdown.
+    baseprofiler::profiler_stop();
+  }
+
+#if defined(GP_PLAT_amd64_windows)
+  InitializeWin64ProfilerHooks();
+#endif
+
+  // Fall back to the default values if the passed-in values are unreasonable.
+  // We want to be able to store at least one full stack.
+  PowerOfTwo32 capacity =
+      (aCapacity.Value() >=
+       ProfileBufferChunkManager::scExpectedMaximumStackSize / scBytesPerEntry)
+          ? aCapacity
+          : PROFILER_DEFAULT_ENTRIES;
+  Maybe<double> duration = aDuration;
+
+  if (aDuration && *aDuration <= 0) {
+    duration = Nothing();
+  }
+
+  double interval = aInterval > 0 ? aInterval : PROFILER_DEFAULT_INTERVAL;
+
+  ActivePS::Create(aLock, profilingStartTime, capacity, interval, aFeatures,
+                   aFilters, aFilterCount, aActiveTabID, duration,
+                   std::move(baseChunkManager));
+
+  // ActivePS::Create can only succeed or crash.
+  MOZ_ASSERT(ActivePS::Exists(aLock));
+
+  // Set up profiling for each registered thread, if appropriate.
+#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
+  bool isMainThreadBeingProfiled = false;
+#endif
+  ThreadRegistry::LockedRegistry lockedRegistry;
+  for (ThreadRegistry::OffThreadRef offThreadRef : lockedRegistry) {
+    const ThreadRegistrationInfo& info =
+        offThreadRef.UnlockedConstReaderCRef().Info();
+
+    ThreadProfilingFeatures threadProfilingFeatures =
+        ActivePS::ProfilingFeaturesForThread(aLock, info);
+    if (threadProfilingFeatures != ThreadProfilingFeatures::NotProfiled) {
+      ThreadRegistry::OffThreadRef::RWFromAnyThreadWithLock lockedThreadData =
+          offThreadRef.GetLockedRWFromAnyThread();
+      ProfiledThreadData* profiledThreadData = ActivePS::AddLiveProfiledThread(
+          aLock, MakeUnique<ProfiledThreadData>(info));
+      lockedThreadData->SetProfilingFeaturesAndData(threadProfilingFeatures,
+                                                    profiledThreadData, aLock);
+      lockedThreadData->GetNewCpuTimeInNs();
+      if (ActivePS::FeatureJS(aLock)) {
+        lockedThreadData->StartJSSampling(ActivePS::JSFlags(aLock));
+        if (ThreadRegistration::LockedRWOnThread* lockedRWOnThread =
+                lockedThreadData.GetLockedRWOnThread();
+            lockedRWOnThread) {
+          // We can manually poll the current thread so it starts sampling
+          // immediately.
+          lockedRWOnThread->PollJSSampling();
+        } else if (info.IsMainThread()) {
+          // Dispatch a runnable to the main thread to call
+          // PollJSSampling(), so that we don't have wait for the next JS
+          // interrupt callback in order to start profiling JS.
+          TriggerPollJSSamplingOnMainThread();
+        }
+      }
+#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
+      if (info.IsMainThread()) {
+        isMainThreadBeingProfiled = true;
+      }
+#endif
+      lockedThreadData->ReinitializeOnResume();
+      if (ActivePS::FeatureJS(aLock) && lockedThreadData->GetJSContext()) {
+        profiledThreadData->NotifyReceivedJSContext(0);
+      }
+    }
+  }
+
+  // Setup support for pushing/popping labels in mozglue.
+  RegisterProfilerLabelEnterExit(MozGlueLabelEnter, MozGlueLabelExit);
+
+#if defined(GP_OS_android)
+  if (ActivePS::FeatureJava(aLock)) {
+    int javaInterval = interval;
+    // Java sampling doesn't accurately keep up with the sampling rate that is
+    // lower than 1ms.
+    if (javaInterval < 1) {
+      javaInterval = 1;
+    }
+
+    JNIEnv* env = jni::GetEnvForThread();
+    const auto& filters = ActivePS::Filters(aLock);
+    jni::ObjectArray::LocalRef javaFilters =
+        jni::ObjectArray::New<jni::String>(filters.length());
+    for (size_t i = 0; i < filters.length(); i++) {
+      javaFilters->SetElement(i, jni::StringParam(filters[i].data(), env));
+    }
+
+    // Send the interval-relative entry count, but we have 100000 hard cap in
+    // the java code, it can't be more than that.
+    java::GeckoJavaSampler::Start(
+        javaFilters, javaInterval,
+        std::round((double)(capacity.Value()) * interval /
+                   (double)(javaInterval)));
+  }
+#endif
+
+#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
+  if (ActivePS::FeatureNativeAllocations(aLock)) {
+    if (isMainThreadBeingProfiled) {
+      mozilla::profiler::enable_native_allocations();
+    } else {
+      NS_WARNING(
+          "The nativeallocations feature is turned on, but the main thread is "
+          "not being profiled. The allocations are only stored on the main "
+          "thread.");
+    }
+  }
+#endif
+
+  if (ProfilerFeature::HasAudioCallbackTracing(aFeatures)) {
+    StartAudioCallbackTracing();
+  }
+
+  // At the very end, set up RacyFeatures.
+  RacyFeatures::SetActive(ActivePS::Features(aLock));
+
+  if (profilersHandOver) {
+    PROFILER_MARKER_UNTYPED("Profilers handover", PROFILER,
+                            MarkerTiming::IntervalEnd());
+  }
+}
+
+RefPtr<GenericPromise> profiler_start(PowerOfTwo32 aCapacity, double aInterval,
+                                      uint32_t aFeatures, const char** aFilters,
+                                      uint32_t aFilterCount,
+                                      uint64_t aActiveTabID,
+                                      const Maybe<double>& aDuration) {
+  LOG("profiler_start");
+
+  ProfilerParent::ProfilerWillStopIfStarted();
+
+  SamplerThread* samplerThread = nullptr;
+  {
+    PSAutoLock lock;
+
+    // Initialize if necessary.
+    if (!CorePS::Exists()) {
+      profiler_init(nullptr);
+    }
+
+    // Reset the current state if the profiler is running.
+    if (ActivePS::Exists(lock)) {
+      // Note: Not invoking callbacks with ProfilingState::Stopping, because
+      // we're under lock, and also it would not be useful: Any profiling data
+      // will be discarded, and we're immediately restarting the profiler below
+      // and then notifying ProfilingState::Started.
+      samplerThread = locked_profiler_stop(lock);
+    }
+
+    locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters,
+                          aFilterCount, aActiveTabID, aDuration);
+  }
+
+#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
+  // Start counting memory allocations (outside of lock because this may call
+  // profiler_add_sampled_counter which would attempt to take the lock.)
+  ActivePS::SetMemoryCounter(mozilla::profiler::install_memory_hooks());
+#endif
+
+  invoke_profiler_state_change_callbacks(ProfilingState::Started);
+
+  // We do these operations with gPSMutex unlocked. The comments in
+  // profiler_stop() explain why.
+  if (samplerThread) {
+    Unused << ProfilerParent::ProfilerStopped();
+    NotifyObservers("profiler-stopped");
+    delete samplerThread;
+  }
+  return NotifyProfilerStarted(aCapacity, aDuration, aInterval, aFeatures,
+                               aFilters, aFilterCount, aActiveTabID);
+}
+
+void profiler_ensure_started(PowerOfTwo32 aCapacity, double aInterval,
+                             uint32_t aFeatures, const char** aFilters,
+                             uint32_t aFilterCount, uint64_t aActiveTabID,
+                             const Maybe<double>& aDuration) {
+  LOG("profiler_ensure_started");
+
+  ProfilerParent::ProfilerWillStopIfStarted();
+
+  bool startedProfiler = false;
+  SamplerThread* samplerThread = nullptr;
+  {
+    PSAutoLock lock;
+
+    // Initialize if necessary.
+    if (!CorePS::Exists()) {
+      profiler_init(nullptr);
+    }
+
+    if (ActivePS::Exists(lock)) {
+      // The profiler is active.
+      if (!ActivePS::Equals(lock, aCapacity, aDuration, aInterval, aFeatures,
+                            aFilters, aFilterCount, aActiveTabID)) {
+        // Stop and restart with different settings.
+        // Note: Not invoking callbacks with ProfilingState::Stopping, because
+        // we're under lock, and also it would not be useful: Any profiling data
+        // will be discarded, and we're immediately restarting the profiler
+        // below and then notifying ProfilingState::Started.
+        samplerThread = locked_profiler_stop(lock);
+        locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters,
+                              aFilterCount, aActiveTabID, aDuration);
+        startedProfiler = true;
+      }
+    } else {
+      // The profiler is stopped.
+      locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters,
+                            aFilterCount, aActiveTabID, aDuration);
+      startedProfiler = true;
+    }
+  }
+
+  // We do these operations with gPSMutex unlocked. The comments in
+  // profiler_stop() explain why.
+  if (samplerThread) {
+    Unused << ProfilerParent::ProfilerStopped();
+    NotifyObservers("profiler-stopped");
+    delete samplerThread;
+  }
+
+  if (startedProfiler) {
+    invoke_profiler_state_change_callbacks(ProfilingState::Started);
+
+    Unused << NotifyProfilerStarted(aCapacity, aDuration, aInterval, aFeatures,
+                                    aFilters, aFilterCount, aActiveTabID);
+  }
+}
+
+[[nodiscard]] static SamplerThread* locked_profiler_stop(PSLockRef aLock) {
+  LOG("locked_profiler_stop");
+
+  MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
+
+  // At the very start, clear RacyFeatures.
+  RacyFeatures::SetInactive();
+
+  if (ActivePS::FeatureAudioCallbackTracing(aLock)) {
+    StopAudioCallbackTracing();
+  }
+
+#if defined(GP_OS_android)
+  if (ActivePS::FeatureJava(aLock)) {
+    java::GeckoJavaSampler::Stop();
+  }
+#endif
+
+  // Remove support for pushing/popping labels in mozglue.
+  RegisterProfilerLabelEnterExit(nullptr, nullptr);
+
+  // Stop sampling live threads.
+  ThreadRegistry::LockedRegistry lockedRegistry;
+  for (ThreadRegistry::OffThreadRef offThreadRef : lockedRegistry) {
+    if (offThreadRef.UnlockedRWForLockedProfilerRef().ProfilingFeatures() ==
+        ThreadProfilingFeatures::NotProfiled) {
+      continue;
+    }
+
+    ThreadRegistry::OffThreadRef::RWFromAnyThreadWithLock lockedThreadData =
+        offThreadRef.GetLockedRWFromAnyThread();
+
+    lockedThreadData->ClearProfilingFeaturesAndData(aLock);
+
+    if (ActivePS::FeatureJS(aLock)) {
+      lockedThreadData->StopJSSampling();
+      if (ThreadRegistration::LockedRWOnThread* lockedRWOnThread =
+              lockedThreadData.GetLockedRWOnThread();
+          lockedRWOnThread) {
+        // We are on the thread, we can manually poll the current thread so it
+        // stops profiling immediately.
+        lockedRWOnThread->PollJSSampling();
+      } else if (lockedThreadData->Info().IsMainThread()) {
+        // Dispatch a runnable to the main thread to call PollJSSampling(),
+        // so that we don't have wait for the next JS interrupt callback in
+        // order to start profiling JS.
+        TriggerPollJSSamplingOnMainThread();
+      }
+    }
+  }
+
+#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
+  if (ActivePS::FeatureNativeAllocations(aLock)) {
+    mozilla::profiler::disable_native_allocations();
+  }
+#endif
+
+  // The Stop() call doesn't actually stop Run(); that happens in this
+  // function's caller when the sampler thread is destroyed. Stop() just gives
+  // the SamplerThread a chance to do some cleanup with gPSMutex locked.
+  SamplerThread* samplerThread = ActivePS::Destroy(aLock);
+  samplerThread->Stop(aLock);
+
+  if (NS_IsMainThread()) {
+    mozilla::base_profiler_markers_detail::
+        ReleaseBufferForMainThreadAddMarker();
+  } else {
+    NS_DispatchToMainThread(
+        NS_NewRunnableFunction("ReleaseBufferForMainThreadAddMarker",
+                               &mozilla::base_profiler_markers_detail::
+                                   ReleaseBufferForMainThreadAddMarker));
+  }
+
+  return samplerThread;
+}
+
+RefPtr<GenericPromise> profiler_stop() {
+  LOG("profiler_stop");
+
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  if (profiler_is_active()) {
+    invoke_profiler_state_change_callbacks(ProfilingState::Stopping);
+  }
+
+  ProfilerParent::ProfilerWillStopIfStarted();
+
+#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
+  // Remove the hooks early, as native allocations (if they are on) can be
+  // quite expensive.
+  mozilla::profiler::remove_memory_hooks();
+#endif
+
+  SamplerThread* samplerThread;
+  {
+    PSAutoLock lock;
+
+    if (!ActivePS::Exists(lock)) {
+      return GenericPromise::CreateAndResolve(/* unused */ true, __func__);
+    }
+
+    samplerThread = locked_profiler_stop(lock);
+  }
+
+  // We notify observers with gPSMutex unlocked. Otherwise we might get a
+  // deadlock, if code run by these functions calls a profiler function that
+  // locks gPSMutex, for example when it wants to insert a marker.
+  // (This has been seen in practise in bug 1346356, when we were still firing
+  // these notifications synchronously.)
+  RefPtr<GenericPromise> promise = ProfilerParent::ProfilerStopped();
+  NotifyObservers("profiler-stopped");
+
+  // We delete with gPSMutex unlocked. Otherwise we would get a deadlock: we
+  // would be waiting here with gPSMutex locked for SamplerThread::Run() to
+  // return so the join operation within the destructor can complete, but Run()
+  // needs to lock gPSMutex to return.
+  //
+  // Because this call occurs with gPSMutex unlocked, it -- including the final
+  // iteration of Run()'s loop -- must be able detect deactivation and return
+  // in a way that's safe with respect to other gPSMutex-locking operations
+  // that may have occurred in the meantime.
+  delete samplerThread;
+
+  return promise;
+}
+
+bool profiler_is_paused() {
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  PSAutoLock lock;
+
+  if (!ActivePS::Exists(lock)) {
+    return false;
+  }
+
+  return ActivePS::IsPaused(lock);
+}
+
+/* [[nodiscard]] */ bool profiler_callback_after_sampling(
+    PostSamplingCallback&& aCallback) {
+  LOG("profiler_callback_after_sampling");
+
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  PSAutoLock lock;
+
+  return ActivePS::AppendPostSamplingCallback(lock, std::move(aCallback));
+}
+
+RefPtr<GenericPromise> profiler_pause() {
+  LOG("profiler_pause");
+
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  invoke_profiler_state_change_callbacks(ProfilingState::Pausing);
+
+  {
+    PSAutoLock lock;
+
+    if (!ActivePS::Exists(lock)) {
+      return GenericPromise::CreateAndResolve(/* unused */ true, __func__);
+    }
+
+#if defined(GP_OS_android)
+    if (ActivePS::FeatureJava(lock) && !ActivePS::IsSamplingPaused(lock)) {
+      // Not paused yet, so this is the first pause, let Java know.
+      // TODO: Distinguish Pause and PauseSampling in Java.
+      java::GeckoJavaSampler::PauseSampling();
+    }
+#endif
+
+    RacyFeatures::SetPaused();
+    ActivePS::SetIsPaused(lock, true);
+    ActivePS::Buffer(lock).AddEntry(ProfileBufferEntry::Pause(profiler_time()));
+  }
+
+  // gPSMutex must be unlocked when we notify, to avoid potential deadlocks.
+  RefPtr<GenericPromise> promise = ProfilerParent::ProfilerPaused();
+  NotifyObservers("profiler-paused");
+  return promise;
+}
+
+RefPtr<GenericPromise> profiler_resume() {
+  LOG("profiler_resume");
+
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  {
+    PSAutoLock lock;
+
+    if (!ActivePS::Exists(lock)) {
+      return GenericPromise::CreateAndResolve(/* unused */ true, __func__);
+    }
+
+    ActivePS::Buffer(lock).AddEntry(
+        ProfileBufferEntry::Resume(profiler_time()));
+    ActivePS::SetIsPaused(lock, false);
+    RacyFeatures::SetUnpaused();
+
+#if defined(GP_OS_android)
+    if (ActivePS::FeatureJava(lock) && !ActivePS::IsSamplingPaused(lock)) {
+      // Not paused anymore, so this is the last unpause, let Java know.
+      // TODO: Distinguish Unpause and UnpauseSampling in Java.
+      java::GeckoJavaSampler::UnpauseSampling();
+    }
+#endif
+  }
+
+  // gPSMutex must be unlocked when we notify, to avoid potential deadlocks.
+  RefPtr<GenericPromise> promise = ProfilerParent::ProfilerResumed();
+  NotifyObservers("profiler-resumed");
+
+  invoke_profiler_state_change_callbacks(ProfilingState::Resumed);
+
+  return promise;
+}
+
+bool profiler_is_sampling_paused() {
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  PSAutoLock lock;
+
+  if (!ActivePS::Exists(lock)) {
+    return false;
+  }
+
+  return ActivePS::IsSamplingPaused(lock);
+}
+
+RefPtr<GenericPromise> profiler_pause_sampling() {
+  LOG("profiler_pause_sampling");
+
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  {
+    PSAutoLock lock;
+
+    if (!ActivePS::Exists(lock)) {
+      return GenericPromise::CreateAndResolve(/* unused */ true, __func__);
+    }
+
+#if defined(GP_OS_android)
+    if (ActivePS::FeatureJava(lock) && !ActivePS::IsSamplingPaused(lock)) {
+      // Not paused yet, so this is the first pause, let Java know.
+      // TODO: Distinguish Pause and PauseSampling in Java.
+      java::GeckoJavaSampler::PauseSampling();
+    }
+#endif
+
+    RacyFeatures::SetSamplingPaused();
+    ActivePS::SetIsSamplingPaused(lock, true);
+    ActivePS::Buffer(lock).AddEntry(
+        ProfileBufferEntry::PauseSampling(profiler_time()));
+  }
+
+  // gPSMutex must be unlocked when we notify, to avoid potential deadlocks.
+  RefPtr<GenericPromise> promise = ProfilerParent::ProfilerPausedSampling();
+  NotifyObservers("profiler-paused-sampling");
+  return promise;
+}
+
+RefPtr<GenericPromise> profiler_resume_sampling() {
+  LOG("profiler_resume_sampling");
+
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  {
+    PSAutoLock lock;
+
+    if (!ActivePS::Exists(lock)) {
+      return GenericPromise::CreateAndResolve(/* unused */ true, __func__);
+    }
+
+    ActivePS::Buffer(lock).AddEntry(
+        ProfileBufferEntry::ResumeSampling(profiler_time()));
+    ActivePS::SetIsSamplingPaused(lock, false);
+    RacyFeatures::SetSamplingUnpaused();
+
+#if defined(GP_OS_android)
+    if (ActivePS::FeatureJava(lock) && !ActivePS::IsSamplingPaused(lock)) {
+      // Not paused anymore, so this is the last unpause, let Java know.
+      // TODO: Distinguish Unpause and UnpauseSampling in Java.
+      java::GeckoJavaSampler::UnpauseSampling();
+    }
+#endif
+  }
+
+  // gPSMutex must be unlocked when we notify, to avoid potential deadlocks.
+  RefPtr<GenericPromise> promise = ProfilerParent::ProfilerResumedSampling();
+  NotifyObservers("profiler-resumed-sampling");
+  return promise;
+}
+
+bool profiler_feature_active(uint32_t aFeature) {
+  // This function runs both on and off the main thread.
+
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  // This function is hot enough that we use RacyFeatures, not ActivePS.
+  return RacyFeatures::IsActiveWithFeature(aFeature);
+}
+
+bool profiler_active_without_feature(uint32_t aFeature) {
+  // This function runs both on and off the main thread.
+
+  // This function is hot enough that we use RacyFeatures, not ActivePS.
+  return RacyFeatures::IsActiveWithoutFeature(aFeature);
+}
+
+void profiler_write_active_configuration(JSONWriter& aWriter) {
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+  PSAutoLock lock;
+  ActivePS::WriteActiveConfiguration(lock, aWriter);
+}
+
+void profiler_add_sampled_counter(BaseProfilerCount* aCounter) {
+  DEBUG_LOG("profiler_add_sampled_counter(%s)", aCounter->mLabel);
+  PSAutoLock lock;
+  locked_profiler_add_sampled_counter(lock, aCounter);
+}
+
+void profiler_remove_sampled_counter(BaseProfilerCount* aCounter) {
+  DEBUG_LOG("profiler_remove_sampled_counter(%s)", aCounter->mLabel);
+  PSAutoLock lock;
+  locked_profiler_remove_sampled_counter(lock, aCounter);
+}
+
+ProfilingStack* profiler_register_thread(const char* aName,
+                                         void* aGuessStackTop) {
+  DEBUG_LOG("profiler_register_thread(%s)", aName);
+
+  // This will call `ThreadRegistry::Register()` (see below).
+  return ThreadRegistration::RegisterThread(aName, aGuessStackTop);
+}
+
+/* static */
+void ThreadRegistry::Register(ThreadRegistration::OnThreadRef aOnThreadRef) {
+  // Set the thread name (except for the main thread, which is controlled
+  // elsewhere, and influences the process name on some systems like Linux).
+  if (!aOnThreadRef.UnlockedConstReaderCRef().Info().IsMainThread()) {
+    // Make sure we have a nsThread wrapper for the current thread, and that
+    // NSPR knows its name.
+    (void)NS_GetCurrentThread();
+    NS_SetCurrentThreadName(
+        aOnThreadRef.UnlockedConstReaderCRef().Info().Name());
+  }
+
+  PSAutoLock lock;
+
+  {
+    RegistryLockExclusive lock{sRegistryMutex};
+    MOZ_RELEASE_ASSERT(sRegistryContainer.append(OffThreadRef{aOnThreadRef}));
+  }
+
+  if (!CorePS::Exists()) {
+    // CorePS has not been created yet.
+    // If&when that happens, it will handle already-registered threads then.
+    return;
+  }
+
+  (void)locked_register_thread(lock, OffThreadRef{aOnThreadRef});
+}
+
+void profiler_unregister_thread() {
+  // This will call `ThreadRegistry::Unregister()` (see below).
+  ThreadRegistration::UnregisterThread();
+}
+
+static void locked_unregister_thread(
+    PSLockRef lock, ThreadRegistration::OnThreadRef aOnThreadRef) {
+  if (!CorePS::Exists()) {
+    // This function can be called after the main thread has already shut
+    // down.
+    return;
+  }
+
+  // We don't call StopJSSampling() here; there's no point doing that for a JS
+  // thread that is in the process of disappearing.
+
+  ThreadRegistration::OnThreadRef::RWOnThreadWithLock lockedThreadData =
+      aOnThreadRef.GetLockedRWOnThread();
+
+  ProfiledThreadData* profiledThreadData =
+      lockedThreadData->GetProfiledThreadData(lock);
+  lockedThreadData->ClearProfilingFeaturesAndData(lock);
+
+  MOZ_RELEASE_ASSERT(
+      lockedThreadData->Info().ThreadId() == profiler_current_thread_id(),
+      "Thread being unregistered has changed its TID");
+
+  DEBUG_LOG("profiler_unregister_thread: %s", lockedThreadData->Info().Name());
+
+  if (profiledThreadData && ActivePS::Exists(lock)) {
+    ActivePS::UnregisterThread(lock, profiledThreadData);
+  }
+}
+
+/* static */
+void ThreadRegistry::Unregister(ThreadRegistration::OnThreadRef aOnThreadRef) {
+  PSAutoLock psLock;
+  locked_unregister_thread(psLock, aOnThreadRef);
+
+  RegistryLockExclusive lock{sRegistryMutex};
+  for (OffThreadRef& thread : sRegistryContainer) {
+    if (thread.IsPointingAt(*aOnThreadRef.mThreadRegistration)) {
+      sRegistryContainer.erase(&thread);
+      break;
+    }
+  }
+}
+
+void profiler_register_page(uint64_t aTabID, uint64_t aInnerWindowID,
+                            const nsCString& aUrl,
+                            uint64_t aEmbedderInnerWindowID,
+                            bool aIsPrivateBrowsing) {
+  DEBUG_LOG("profiler_register_page(%" PRIu64 ", %" PRIu64 ", %s, %" PRIu64
+            ", %s)",
+            aTabID, aInnerWindowID, aUrl.get(), aEmbedderInnerWindowID,
+            aIsPrivateBrowsing ? "true" : "false");
+
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  PSAutoLock lock;
+
+  // When a Browsing context is first loaded, the first url loaded in it will be
+  // about:blank. Because of that, this call keeps the first non-about:blank
+  // registration of window and discards the previous one.
+  RefPtr<PageInformation> pageInfo = new PageInformation(
+      aTabID, aInnerWindowID, aUrl, aEmbedderInnerWindowID, aIsPrivateBrowsing);
+  CorePS::AppendRegisteredPage(lock, std::move(pageInfo));
+
+  // After appending the given page to CorePS, look for the expired
+  // pages and remove them if there are any.
+  if (ActivePS::Exists(lock)) {
+    ActivePS::DiscardExpiredPages(lock);
+  }
+}
+
+void profiler_unregister_page(uint64_t aRegisteredInnerWindowID) {
+  PSAutoLock lock;
+
+  if (!CorePS::Exists()) {
+    // This function can be called after the main thread has already shut down.
+    return;
+  }
+
+  // During unregistration, if the profiler is active, we have to keep the
+  // page information since there may be some markers associated with the given
+  // page. But if profiler is not active. we have no reason to keep the
+  // page information here because there can't be any marker associated with it.
+  if (ActivePS::Exists(lock)) {
+    ActivePS::UnregisterPage(lock, aRegisteredInnerWindowID);
+  } else {
+    CorePS::RemoveRegisteredPage(lock, aRegisteredInnerWindowID);
+  }
+}
+
+void profiler_clear_all_pages() {
+  {
+    PSAutoLock lock;
+
+    if (!CorePS::Exists()) {
+      // This function can be called after the main thread has already shut
+      // down.
+      return;
+    }
+
+    CorePS::ClearRegisteredPages(lock);
+    if (ActivePS::Exists(lock)) {
+      ActivePS::ClearUnregisteredPages(lock);
+    }
+  }
+
+  // gPSMutex must be unlocked when we notify, to avoid potential deadlocks.
+  ProfilerParent::ClearAllPages();
+}
+
+namespace geckoprofiler::markers::detail {
+
+Maybe<uint64_t> profiler_get_inner_window_id_from_docshell(
+    nsIDocShell* aDocshell) {
+  Maybe<uint64_t> innerWindowID = Nothing();
+  if (aDocshell) {
+    auto outerWindow = aDocshell->GetWindow();
+    if (outerWindow) {
+      auto innerWindow = outerWindow->GetCurrentInnerWindow();
+      if (innerWindow) {
+        innerWindowID = Some(innerWindow->WindowID());
+      }
+    }
+  }
+  return innerWindowID;
+}
+
+}  // namespace geckoprofiler::markers::detail
+
+namespace geckoprofiler::markers {
+
+struct CPUAwakeMarker {
+  static constexpr Span<const char> MarkerTypeName() {
+    return MakeStringSpan("Awake");
+  }
+  static void StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter& aWriter,
+                                   int64_t aCPUId
+#ifdef GP_OS_darwin
+                                   ,
+                                   uint32_t aQoS
+#endif
+#ifdef GP_OS_windows
+                                   ,
+                                   int32_t aAbsolutePriority,
+                                   int32_t aRelativePriority,
+                                   int32_t aCurrentPriority
+#endif
+  ) {
+#ifndef GP_PLAT_arm64_darwin
+    aWriter.IntProperty("CPU Id", aCPUId);
+#endif
+#ifdef GP_OS_windows
+    if (aAbsolutePriority) {
+      aWriter.IntProperty("absPriority", aAbsolutePriority);
+    }
+    if (aCurrentPriority) {
+      aWriter.IntProperty("curPriority", aCurrentPriority);
+    }
+    aWriter.IntProperty("priority", aRelativePriority);
+#endif
+#ifdef GP_OS_darwin
+    const char* QoS = "";
+    switch (aQoS) {
+      case QOS_CLASS_USER_INTERACTIVE:
+        QoS = "User Interactive";
+        break;
+      case QOS_CLASS_USER_INITIATED:
+        QoS = "User Initiated";
+        break;
+      case QOS_CLASS_DEFAULT:
+        QoS = "Default";
+        break;
+      case QOS_CLASS_UTILITY:
+        QoS = "Utility";
+        break;
+      case QOS_CLASS_BACKGROUND:
+        QoS = "Background";
+        break;
+      default:
+        QoS = "Unspecified";
+    }
+
+    aWriter.StringProperty("QoS",
+                           ProfilerString8View::WrapNullTerminatedString(QoS));
+#endif
+  }
+
+  static MarkerSchema MarkerTypeDisplay() {
+    using MS = MarkerSchema;
+    MS schema{MS::Location::MarkerChart, MS::Location::MarkerTable};
+    schema.AddKeyFormat("CPU Time", MS::Format::Duration);
+#ifndef GP_PLAT_arm64_darwin
+    schema.AddKeyFormat("CPU Id", MS::Format::Integer);
+    schema.SetTableLabel("Awake - CPU Id = {marker.data.CPU Id}");
+#endif
+#ifdef GP_OS_windows
+    schema.AddKeyLabelFormat("priority", "Relative Thread Priority",
+                             MS::Format::Integer);
+    schema.AddKeyLabelFormat("absPriority", "Base Thread Priority",
+                             MS::Format::Integer);
+    schema.AddKeyLabelFormat("curPriority", "Current Thread Priority",
+                             MS::Format::Integer);
+#endif
+#ifdef GP_OS_darwin
+    schema.AddKeyLabelFormat("QoS", "Quality of Service", MS::Format::String);
+#endif
+    return schema;
+  }
+};
+
+struct CPUAwakeMarkerEnd : public CPUAwakeMarker {
+  static constexpr Span<const char> MarkerTypeName() {
+    return MakeStringSpan("AwakeEnd");
+  }
+  static void StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter& aWriter,
+                                   int64_t aCPUTimeNs) {
+    if (aCPUTimeNs) {
+      constexpr double NS_PER_MS = 1'000'000;
+      aWriter.DoubleProperty("CPU Time", double(aCPUTimeNs) / NS_PER_MS);
+    }
+  }
+};
+
+}  // namespace geckoprofiler::markers
+
+void profiler_mark_thread_asleep() {
+  if (!profiler_thread_is_being_profiled_for_markers()) {
+    return;
+  }
+
+  uint64_t cpuTimeNs = ThreadRegistration::WithOnThreadRefOr(
+      [](ThreadRegistration::OnThreadRef aOnThreadRef) {
+        return aOnThreadRef.UnlockedConstReaderAndAtomicRWRef()
+            .GetNewCpuTimeInNs();
+      },
+      0);
+  PROFILER_MARKER("Awake", OTHER, MarkerTiming::IntervalEnd(),
+                  CPUAwakeMarkerEnd, cpuTimeNs);
+}
+
+void profiler_thread_sleep() {
+  profiler_mark_thread_asleep();
+  ThreadRegistration::WithOnThreadRef(
+      [](ThreadRegistration::OnThreadRef aOnThreadRef) {
+        aOnThreadRef.UnlockedConstReaderAndAtomicRWRef().SetSleeping();
+      });
+}
+
+#if defined(GP_OS_windows)
+#  if !defined(__MINGW32__)
+enum {
+  ThreadBasicInformation,
+};
+#  endif
+
+struct THREAD_BASIC_INFORMATION {
+  NTSTATUS ExitStatus;
+  PVOID TebBaseAddress;
+  CLIENT_ID ClientId;
+  KAFFINITY AffMask;
+  DWORD Priority;
+  DWORD BasePriority;
+};
+#endif
+
+static mozilla::Atomic<uint64_t, mozilla::MemoryOrdering::Relaxed> gWakeCount(
+    0);
+
+namespace geckoprofiler::markers {
+struct WakeUpCountMarker {
+  static constexpr Span<const char> MarkerTypeName() {
+    return MakeStringSpan("WakeUpCount");
+  }
+  static void StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter& aWriter,
+                                   int32_t aCount,
+                                   const ProfilerString8View& aType) {
+    aWriter.IntProperty("Count", aCount);
+    aWriter.StringProperty("label", aType);
+  }
+  static MarkerSchema MarkerTypeDisplay() {
+    using MS = MarkerSchema;
+    MS schema{MS::Location::MarkerChart, MS::Location::MarkerTable};
+    schema.AddKeyFormat("Count", MS::Format::Integer);
+    schema.SetTooltipLabel("{marker.name} - {marker.data.label}");
+    schema.SetTableLabel(
+        "{marker.name} - {marker.data.label}: {marker.data.count}");
+    return schema;
+  }
+};
+}  // namespace geckoprofiler::markers
+
+void profiler_record_wakeup_count(const nsACString& aProcessType) {
+  static uint64_t previousThreadWakeCount = 0;
+
+  uint64_t newWakeups = gWakeCount - previousThreadWakeCount;
+  if (newWakeups > 0) {
+    if (newWakeups < std::numeric_limits<int32_t>::max()) {
+      int32_t newWakeups32 = int32_t(newWakeups);
+      mozilla::glean::power::total_thread_wakeups.Add(newWakeups32);
+      mozilla::glean::power::wakeups_per_process_type.Get(aProcessType)
+          .Add(newWakeups32);
+      PROFILER_MARKER("Thread Wake-ups", OTHER, {}, WakeUpCountMarker,
+                      newWakeups32, aProcessType);
+    }
+
+    previousThreadWakeCount += newWakeups;
+  }
+
+#ifdef NIGHTLY_BUILD
+  ThreadRegistry::LockedRegistry lockedRegistry;
+  for (ThreadRegistry::OffThreadRef offThreadRef : lockedRegistry) {
+    const ThreadRegistry::UnlockedConstReaderAndAtomicRW& threadData =
+        offThreadRef.UnlockedConstReaderAndAtomicRWRef();
+    threadData.RecordWakeCount();
+  }
+#endif
+}
+
+void profiler_mark_thread_awake() {
+  ++gWakeCount;
+  if (!profiler_thread_is_being_profiled_for_markers()) {
+    return;
+  }
+
+  int64_t cpuId = 0;
+#if defined(GP_OS_windows)
+  cpuId = GetCurrentProcessorNumber();
+#elif defined(GP_OS_darwin)
+#  ifdef GP_PLAT_amd64_darwin
+  unsigned int eax, ebx, ecx, edx;
+  __cpuid_count(1, 0, eax, ebx, ecx, edx);
+  // Check if we have an APIC.
+  if ((edx & (1 << 9))) {
+    // APIC ID is bits 24-31 of EBX
+    cpuId = ebx >> 24;
+  }
+#  endif
+#else
+  cpuId = sched_getcpu();
+#endif
+
+#if defined(GP_OS_windows)
+  LONG priority;
+  static const auto get_thread_information_fn =
+      reinterpret_cast<decltype(&::GetThreadInformation)>(::GetProcAddress(
+          ::GetModuleHandle(L"Kernel32.dll"), "GetThreadInformation"));
+
+  if (!get_thread_information_fn ||
+      !get_thread_information_fn(GetCurrentThread(), ThreadAbsoluteCpuPriority,
+                                 &priority, sizeof(priority))) {
+    priority = 0;
+  }
+
+  static const auto nt_query_information_thread_fn =
+      reinterpret_cast<decltype(&::NtQueryInformationThread)>(::GetProcAddress(
+          ::GetModuleHandle(L"ntdll.dll"), "NtQueryInformationThread"));
+
+  LONG currentPriority = 0;
+  if (nt_query_information_thread_fn) {
+    THREAD_BASIC_INFORMATION threadInfo;
+    auto status = (*nt_query_information_thread_fn)(
+        GetCurrentThread(), (THREADINFOCLASS)ThreadBasicInformation,
+        &threadInfo, sizeof(threadInfo), NULL);
+    if (NT_SUCCESS(status)) {
+      currentPriority = threadInfo.Priority;
+    }
+  }
+#endif
+  PROFILER_MARKER(
+      "Awake", OTHER, MarkerTiming::IntervalStart(), CPUAwakeMarker, cpuId
+#if defined(GP_OS_darwin)
+      ,
+      qos_class_self()
+#endif
+#if defined(GP_OS_windows)
+          ,
+      priority, GetThreadPriority(GetCurrentThread()), currentPriority
+#endif
+  );
+}
+
+void profiler_thread_wake() {
+  profiler_mark_thread_awake();
+  ThreadRegistration::WithOnThreadRef(
+      [](ThreadRegistration::OnThreadRef aOnThreadRef) {
+        aOnThreadRef.UnlockedConstReaderAndAtomicRWRef().SetAwake();
+      });
+}
+
+void profiler_js_interrupt_callback() {
+  // This function runs on JS threads being sampled.
+  PollJSSamplingForCurrentThread();
+}
+
+double profiler_time() {
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  TimeDuration delta = TimeStamp::Now() - CorePS::ProcessStartTime();
+  return delta.ToMilliseconds();
+}
+
+bool profiler_capture_backtrace_into(ProfileChunkedBuffer& aChunkedBuffer,
+                                     StackCaptureOptions aCaptureOptions) {
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  if (!profiler_is_active() ||
+      aCaptureOptions == StackCaptureOptions::NoStack) {
+    return false;
+  }
+
+  return ThreadRegistration::WithOnThreadRefOr(
+      [&](ThreadRegistration::OnThreadRef aOnThreadRef) {
+        mozilla::Maybe<uint32_t> maybeFeatures =
+            RacyFeatures::FeaturesIfActiveAndUnpaused();
+        if (!maybeFeatures) {
+          return false;
+        }
+
+        ProfileBuffer profileBuffer(aChunkedBuffer);
+
+        Registers regs;
+#if defined(HAVE_NATIVE_UNWIND)
+        REGISTERS_SYNC_POPULATE(regs);
+#else
+        regs.Clear();
+#endif
+
+        DoSyncSample(*maybeFeatures,
+                     aOnThreadRef.UnlockedReaderAndAtomicRWOnThreadCRef(),
+                     TimeStamp::Now(), regs, profileBuffer, aCaptureOptions);
+
+        return true;
+      },
+      // If this was called from a non-registered thread, return false and do no
+      // more work. This can happen from a memory hook.
+      false);
+}
+
+UniquePtr<ProfileChunkedBuffer> profiler_capture_backtrace() {
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+  AUTO_PROFILER_LABEL("profiler_capture_backtrace", PROFILER);
+
+  // Quick is-active and feature check before allocating a buffer.
+  // If NoMarkerStacks is set, we don't want to capture a backtrace.
+  if (!profiler_active_without_feature(ProfilerFeature::NoMarkerStacks)) {
+    return nullptr;
+  }
+
+  auto buffer = MakeUnique<ProfileChunkedBuffer>(
+      ProfileChunkedBuffer::ThreadSafety::WithoutMutex,
+      MakeUnique<ProfileBufferChunkManagerSingle>(
+          ProfileBufferChunkManager::scExpectedMaximumStackSize));
+
+  if (!profiler_capture_backtrace_into(*buffer, StackCaptureOptions::Full)) {
+    return nullptr;
+  }
+
+  return buffer;
+}
+
+UniqueProfilerBacktrace profiler_get_backtrace() {
+  UniquePtr<ProfileChunkedBuffer> buffer = profiler_capture_backtrace();
+
+  if (!buffer) {
+    return nullptr;
+  }
+
+  return UniqueProfilerBacktrace(
+      new ProfilerBacktrace("SyncProfile", std::move(buffer)));
+}
+
+void ProfilerBacktraceDestructor::operator()(ProfilerBacktrace* aBacktrace) {
+  delete aBacktrace;
+}
+
+bool profiler_is_locked_on_current_thread() {
+  // This function is used to help users avoid calling `profiler_...` functions
+  // when the profiler may already have a lock in place, which would prevent a
+  // 2nd recursive lock (resulting in a crash or a never-ending wait), or a
+  // deadlock between any two mutexes. So we must return `true` for any of:
+  // - The main profiler mutex, used by most functions, and/or
+  // - The buffer mutex, used directly in some functions without locking the
+  //   main mutex, e.g., marker-related functions.
+  // - The ProfilerParent or ProfilerChild mutex, used to store and process
+  //   buffer chunk updates.
+  return PSAutoLock::IsLockedOnCurrentThread() ||
+         ThreadRegistry::IsRegistryMutexLockedOnCurrentThread() ||
+         ThreadRegistration::IsDataMutexLockedOnCurrentThread() ||
+         profiler_get_core_buffer().IsThreadSafeAndLockedOnCurrentThread() ||
+         ProfilerParent::IsLockedOnCurrentThread() ||
+         ProfilerChild::IsLockedOnCurrentThread();
+}
+
+void profiler_set_js_context(JSContext* aCx) {
+  MOZ_ASSERT(aCx);
+  ThreadRegistration::WithOnThreadRef(
+      [&](ThreadRegistration::OnThreadRef aOnThreadRef) {
+        // The profiler mutex must be locked before the ThreadRegistration's.
+        PSAutoLock lock;
+        aOnThreadRef.WithLockedRWOnThread(
+            [&](ThreadRegistration::LockedRWOnThread& aThreadData) {
+              aThreadData.SetJSContext(aCx);
+
+              if (!ActivePS::Exists(lock) || !ActivePS::FeatureJS(lock)) {
+                return;
+              }
+
+              // This call is on-thread, so we can call PollJSSampling() to
+              // start JS sampling immediately.
+              aThreadData.PollJSSampling();
+
+              if (ProfiledThreadData* profiledThreadData =
+                      aThreadData.GetProfiledThreadData(lock);
+                  profiledThreadData) {
+                profiledThreadData->NotifyReceivedJSContext(
+                    ActivePS::Buffer(lock).BufferRangeEnd());
+              }
+            });
+      });
+}
+
+void profiler_clear_js_context() {
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  ThreadRegistration::WithOnThreadRef(
+      [](ThreadRegistration::OnThreadRef aOnThreadRef) {
+        JSContext* cx =
+            aOnThreadRef.UnlockedReaderAndAtomicRWOnThreadCRef().GetJSContext();
+        if (!cx) {
+          return;
+        }
+
+        // The profiler mutex must be locked before the ThreadRegistration's.
+        PSAutoLock lock;
+        ThreadRegistration::OnThreadRef::RWOnThreadWithLock lockedThreadData =
+            aOnThreadRef.GetLockedRWOnThread();
+
+        if (ProfiledThreadData* profiledThreadData =
+                lockedThreadData->GetProfiledThreadData(lock);
+            profiledThreadData && ActivePS::Exists(lock) &&
+            ActivePS::FeatureJS(lock)) {
+          profiledThreadData->NotifyAboutToLoseJSContext(
+              cx, CorePS::ProcessStartTime(), ActivePS::Buffer(lock));
+
+          // Notify the JS context that profiling for this context has
+          // stopped. Do this by calling StopJSSampling and PollJSSampling
+          // before nulling out the JSContext.
+          lockedThreadData->StopJSSampling();
+          lockedThreadData->PollJSSampling();
+
+          lockedThreadData->ClearJSContext();
+
+          // Tell the thread that we'd like to have JS sampling on this
+          // thread again, once it gets a new JSContext (if ever).
+          lockedThreadData->StartJSSampling(ActivePS::JSFlags(lock));
+        } else {
+          // This thread is not being profiled or JS profiling is off, we only
+          // need to clear the context pointer.
+          lockedThreadData->ClearJSContext();
+        }
+      });
+}
+
+static void profiler_suspend_and_sample_thread(
+    const PSAutoLock* aLockIfAsynchronousSampling,
+    const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData,
+    JsFrame* aJsFrames, uint32_t aFeatures, ProfilerStackCollector& aCollector,
+    bool aSampleNative) {
+  const ThreadRegistrationInfo& info = aThreadData.Info();
+
+  if (info.IsMainThread()) {
+    aCollector.SetIsMainThread();
+  }
+
+  // Allocate the space for the native stack
+  NativeStack nativeStack;
+
+  auto collectStack = [&](const Registers& aRegs, const TimeStamp& aNow) {
+    // The target thread is now suspended. Collect a native backtrace,
+    // and call the callback.
+    StackWalkControl* stackWalkControlIfSupported = nullptr;
+#if defined(HAVE_FASTINIT_NATIVE_UNWIND)
+    StackWalkControl stackWalkControl;
+    if constexpr (StackWalkControl::scIsSupported) {
+      if (aSampleNative) {
+        stackWalkControlIfSupported = &stackWalkControl;
+      }
+    }
+#endif
+    const uint32_t jsFramesCount =
+        aJsFrames ? ExtractJsFrames(!aLockIfAsynchronousSampling, aThreadData,
+                                    aRegs, aCollector, aJsFrames,
+                                    stackWalkControlIfSupported)
+                  : 0;
+
+#if defined(HAVE_FASTINIT_NATIVE_UNWIND)
+    if (aSampleNative) {
+      // We can only use FramePointerStackWalk or MozStackWalk from
+      // suspend_and_sample_thread as other stackwalking methods may not be
+      // initialized.
+#  if defined(USE_FRAME_POINTER_STACK_WALK)
+      DoFramePointerBacktrace(aThreadData, aRegs, nativeStack,
+                              stackWalkControlIfSupported);
+#  elif defined(USE_MOZ_STACK_WALK)
+      DoMozStackWalkBacktrace(aThreadData, aRegs, nativeStack,
+                              stackWalkControlIfSupported);
+#  else
+#    error "Invalid configuration"
+#  endif
+
+      MergeStacks(aFeatures, !aLockIfAsynchronousSampling, aThreadData, aRegs,
+                  nativeStack, aCollector, aJsFrames, jsFramesCount);
+    } else
+#endif
+    {
+      MergeStacks(aFeatures, !aLockIfAsynchronousSampling, aThreadData, aRegs,
+                  nativeStack, aCollector, aJsFrames, jsFramesCount);
+
+      aCollector.CollectNativeLeafAddr((void*)aRegs.mPC);
+    }
+  };
+
+  if (!aLockIfAsynchronousSampling) {
+    // Sampling the current thread, do NOT suspend it!
+    Registers regs;
+#if defined(HAVE_NATIVE_UNWIND)
+    REGISTERS_SYNC_POPULATE(regs);
+#else
+    regs.Clear();
+#endif
+    collectStack(regs, TimeStamp::Now());
+  } else {
+    // Suspend, sample, and then resume the target thread.
+    Sampler sampler(*aLockIfAsynchronousSampling);
+    TimeStamp now = TimeStamp::Now();
+    sampler.SuspendAndSampleAndResumeThread(*aLockIfAsynchronousSampling,
+                                            aThreadData, now, collectStack);
+
+    // NOTE: Make sure to disable the sampler before it is destroyed, in
+    // case the profiler is running at the same time.
+    sampler.Disable(*aLockIfAsynchronousSampling);
+  }
+}
+
+// NOTE: aCollector's methods will be called while the target thread is paused.
+// Doing things in those methods like allocating -- which may try to claim
+// locks -- is a surefire way to deadlock.
+void profiler_suspend_and_sample_thread(ProfilerThreadId aThreadId,
+                                        uint32_t aFeatures,
+                                        ProfilerStackCollector& aCollector,
+                                        bool aSampleNative /* = true */) {
+  if (!aThreadId.IsSpecified() || aThreadId == profiler_current_thread_id()) {
+    // Sampling the current thread. Get its information from the TLS (no locking
+    // required.)
+    ThreadRegistration::WithOnThreadRef(
+        [&](ThreadRegistration::OnThreadRef aOnThreadRef) {
+          aOnThreadRef.WithUnlockedReaderAndAtomicRWOnThread(
+              [&](const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread&
+                      aThreadData) {
+                if (!aThreadData.GetJSContext()) {
+                  // No JSContext, there is no JS frame buffer (and no need for
+                  // it).
+                  profiler_suspend_and_sample_thread(
+                      /* aLockIfAsynchronousSampling = */ nullptr, aThreadData,
+                      /* aJsFrames = */ nullptr, aFeatures, aCollector,
+                      aSampleNative);
+                } else {
+                  // JSContext is present, we need to lock the thread data to
+                  // access the JS frame buffer.
+                  aOnThreadRef.WithConstLockedRWOnThread(
+                      [&](const ThreadRegistration::LockedRWOnThread&
+                              aLockedThreadData) {
+                        profiler_suspend_and_sample_thread(
+                            /* aLockIfAsynchronousSampling = */ nullptr,
+                            aThreadData, aLockedThreadData.GetJsFrameBuffer(),
+                            aFeatures, aCollector, aSampleNative);
+                      });
+                }
+              });
+        });
+  } else {
+    // Lock the profiler before accessing the ThreadRegistry.
+    PSAutoLock lock;
+    ThreadRegistry::WithOffThreadRef(
+        aThreadId, [&](ThreadRegistry::OffThreadRef aOffThreadRef) {
+          aOffThreadRef.WithLockedRWFromAnyThread(
+              [&](const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread&
+                      aThreadData) {
+                JsFrameBuffer& jsFrames = CorePS::JsFrames(lock);
+                profiler_suspend_and_sample_thread(&lock, aThreadData, jsFrames,
+                                                   aFeatures, aCollector,
+                                                   aSampleNative);
+              });
+        });
+  }
+}
+
+// END externally visible functions
+////////////////////////////////////////////////////////////////////////
diff --git a/tools/profiler/core/platform.h b/tools/profiler/core/platform.h
new file mode 100644
index 0000000000..59d2c7ff42
--- /dev/null
+++ b/tools/profiler/core/platform.h
@@ -0,0 +1,381 @@
+// Copyright (c) 2006-2011 The Chromium Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in
+//    the documentation and/or other materials provided with the
+//    distribution.
+//  * Neither the name of Google, Inc. nor the names of its contributors
+//    may be used to endorse or promote products derived from this
+//    software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+#ifndef TOOLS_PLATFORM_H_
+#define TOOLS_PLATFORM_H_
+
+#include "PlatformMacros.h"
+
+#include "json/json.h"
+#include "mozilla/Atomics.h"
+#include "mozilla/BaseProfilerDetail.h"
+#include "mozilla/Logging.h"
+#include "mozilla/MathAlgorithms.h"
+#include "mozilla/ProfileBufferEntrySerialization.h"
+#include "mozilla/ProfileJSONWriter.h"
+#include "mozilla/ProfilerUtils.h"
+#include "mozilla/ProgressLogger.h"
+#include "mozilla/TimeStamp.h"
+#include "mozilla/UniquePtr.h"
+#include "mozilla/Vector.h"
+#include "nsString.h"
+#include "shared-libraries.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <functional>
+
+class ProfilerCodeAddressService;
+
+namespace mozilla {
+struct SymbolTable;
+}
+
+extern mozilla::LazyLogModule gProfilerLog;
+
+// These are for MOZ_LOG="prof:3" or higher. It's the default logging level for
+// the profiler, and should be used sparingly.
+#define LOG_TEST MOZ_LOG_TEST(gProfilerLog, mozilla::LogLevel::Info)
+#define LOG(arg, ...)                            \
+  MOZ_LOG(gProfilerLog, mozilla::LogLevel::Info, \
+          ("[%" PRIu64 "] " arg,                 \
+           uint64_t(profiler_current_process_id().ToNumber()), ##__VA_ARGS__))
+
+// These are for MOZ_LOG="prof:4" or higher. It should be used for logging that
+// is somewhat more verbose than LOG.
+#define DEBUG_LOG_TEST MOZ_LOG_TEST(gProfilerLog, mozilla::LogLevel::Debug)
+#define DEBUG_LOG(arg, ...)                       \
+  MOZ_LOG(gProfilerLog, mozilla::LogLevel::Debug, \
+          ("[%" PRIu64 "] " arg,                  \
+           uint64_t(profiler_current_process_id().ToNumber()), ##__VA_ARGS__))
+
+typedef uint8_t* Address;
+
+// Stringify the given JSON value, in the most compact format.
+// Note: Numbers are limited to a precision of 6 decimal digits, so that
+// timestamps in ms have a precision in ns.
+Json::String ToCompactString(const Json::Value& aJsonValue);
+
+// Profiling log stored in a Json::Value. The actual log only exists while the
+// profiler is running, and will be inserted at the end of the JSON profile.
+class ProfilingLog {
+ public:
+  // These will be called by ActivePS when the profiler starts/stops.
+  static void Init();
+  static void Destroy();
+
+  // Access the profiling log JSON object, in order to modify it.
+  // Only calls the given function if the profiler is active.
+  // Thread-safe. But `aF` must not call other locking profiler functions.
+  // This is intended to capture some internal logging that doesn't belong in
+  // other places like markers. The log is accessible through the JS console on
+  // profiler.firefox.com, in the `profile.profilingLog` object; the data format
+  // is intentionally not defined, and not intended to be shown in the
+  // front-end.
+  // Please use caution not to output too much data.
+  template <typename F>
+  static void Access(F&& aF) {
+    mozilla::baseprofiler::detail::BaseProfilerAutoLock lock{gMutex};
+    if (gLog) {
+      std::forward<F>(aF)(*gLog);
+    }
+  }
+
+#define DURATION_JSON_SUFFIX "_ms"
+
+  // Convert a TimeDuration to the value to be stored in the log.
+  // Use DURATION_JSON_SUFFIX as suffix in the property name.
+  static Json::Value Duration(const mozilla::TimeDuration& aDuration) {
+    return Json::Value{aDuration.ToMilliseconds()};
+  }
+
+#define TIMESTAMP_JSON_SUFFIX "_TSms"
+
+  // Convert a TimeStamp to the value to be stored in the log.
+  // Use TIMESTAMP_JSON_SUFFIX as suffix in the property name.
+  static Json::Value Timestamp(
+      const mozilla::TimeStamp& aTimestamp = mozilla::TimeStamp::Now()) {
+    if (aTimestamp.IsNull()) {
+      return Json::Value{0.0};
+    }
+    return Duration(aTimestamp - mozilla::TimeStamp::ProcessCreation());
+  }
+
+  static bool IsLockedOnCurrentThread();
+
+ private:
+  static mozilla::baseprofiler::detail::BaseProfilerMutex gMutex;
+  static mozilla::UniquePtr<Json::Value> gLog;
+};
+
+// ----------------------------------------------------------------------------
+// Miscellaneous
+
+// If positive, skip stack-sampling in the sampler thread loop.
+// Users should increment it atomically when samplings should be avoided, and
+// later decrement it back. Multiple uses can overlap.
+// There could be a sampling in progress when this is first incremented, so if
+// it is critical to prevent any sampling, lock the profiler mutex instead.
+// Relaxed ordering, because it's used to request that the profiler pause
+// future sampling; this is not time critical, nor dependent on anything else.
+extern mozilla::Atomic<int, mozilla::MemoryOrdering::Relaxed> gSkipSampling;
+
+void AppendSharedLibraries(mozilla::JSONWriter& aWriter,
+                           const SharedLibraryInfo& aInfo);
+
+// Convert the array of strings to a bitfield.
+uint32_t ParseFeaturesFromStringArray(const char** aFeatures,
+                                      uint32_t aFeatureCount,
+                                      bool aIsStartup = false);
+
+// Add the begin/end 'Awake' markers for the thread.
+void profiler_mark_thread_awake();
+
+void profiler_mark_thread_asleep();
+
+[[nodiscard]] bool profiler_get_profile_json(
+    SpliceableChunkedJSONWriter& aSpliceableChunkedJSONWriter,
+    double aSinceTime, bool aIsShuttingDown,
+    mozilla::ProgressLogger aProgressLogger);
+
+// Flags to conveniently track various JS instrumentations.
+enum class JSInstrumentationFlags {
+  StackSampling = 0x1,
+  Allocations = 0x2,
+};
+
+// Write out the information of the active profiling configuration.
+void profiler_write_active_configuration(mozilla::JSONWriter& aWriter);
+
+// Extract all received exit profiles that have not yet expired (i.e., they
+// still intersect with this process' buffer range).
+mozilla::Vector<nsCString> profiler_move_exit_profiles();
+
+// If the "MOZ_PROFILER_SYMBOLICATE" env-var is set, we return a new
+// ProfilerCodeAddressService object to use for local symbolication of profiles.
+// This is off by default, and mainly intended for local development.
+mozilla::UniquePtr<ProfilerCodeAddressService>
+profiler_code_address_service_for_presymbolication();
+
+extern "C" {
+// This function is defined in the profiler rust module at
+// tools/profiler/rust-helper. mozilla::SymbolTable and CompactSymbolTable
+// have identical memory layout.
+bool profiler_get_symbol_table(const char* debug_path, const char* breakpad_id,
+                               mozilla::SymbolTable* symbol_table);
+
+bool profiler_demangle_rust(const char* mangled, char* buffer, size_t len);
+}
+
+// For each running times value, call MACRO(index, name, unit, jsonProperty)
+#define PROFILER_FOR_EACH_RUNNING_TIME(MACRO) \
+  MACRO(0, ThreadCPU, Delta, threadCPUDelta)
+
+// This class contains all "running times" such as CPU usage measurements.
+// All measurements are listed in `PROFILER_FOR_EACH_RUNNING_TIME` above.
+// Each measurement is optional and only takes a value when explicitly set.
+// Two RunningTimes object may be subtracted, to get the difference between
+// known values.
+class RunningTimes {
+ public:
+  constexpr RunningTimes() = default;
+
+  // Constructor with only a timestamp, useful when no measurements will be
+  // taken.
+  constexpr explicit RunningTimes(const mozilla::TimeStamp& aTimeStamp)
+      : mPostMeasurementTimeStamp(aTimeStamp) {}
+
+  constexpr void Clear() { *this = RunningTimes{}; }
+
+  constexpr bool IsEmpty() const { return mKnownBits == 0; }
+
+  // This should be called right after CPU measurements have been taken.
+  void SetPostMeasurementTimeStamp(const mozilla::TimeStamp& aTimeStamp) {
+    mPostMeasurementTimeStamp = aTimeStamp;
+  }
+
+  const mozilla::TimeStamp& PostMeasurementTimeStamp() const {
+    return mPostMeasurementTimeStamp;
+  }
+
+  // Should be filled for any registered thread.
+
+#define RUNNING_TIME_MEMBER(index, name, unit, jsonProperty)          \
+  constexpr bool Is##name##unit##Known() const {                      \
+    return (mKnownBits & mGot##name##unit) != 0;                      \
+  }                                                                   \
+                                                                      \
+  constexpr void Clear##name##unit() {                                \
+    m##name##unit = 0;                                                \
+    mKnownBits &= ~mGot##name##unit;                                  \
+  }                                                                   \
+                                                                      \
+  constexpr void Reset##name##unit(uint64_t a##name##unit) {          \
+    m##name##unit = a##name##unit;                                    \
+    mKnownBits |= mGot##name##unit;                                   \
+  }                                                                   \
+                                                                      \
+  constexpr void Set##name##unit(uint64_t a##name##unit) {            \
+    MOZ_ASSERT(!Is##name##unit##Known(), #name #unit " already set"); \
+    Reset##name##unit(a##name##unit);                                 \
+  }                                                                   \
+                                                                      \
+  constexpr mozilla::Maybe<uint64_t> Get##name##unit() const {        \
+    if (Is##name##unit##Known()) {                                    \
+      return mozilla::Some(m##name##unit);                            \
+    }                                                                 \
+    return mozilla::Nothing{};                                        \
+  }                                                                   \
+                                                                      \
+  constexpr mozilla::Maybe<uint64_t> GetJson##name##unit() const {    \
+    if (Is##name##unit##Known()) {                                    \
+      return mozilla::Some(ConvertRawToJson(m##name##unit));          \
+    }                                                                 \
+    return mozilla::Nothing{};                                        \
+  }
+
+  PROFILER_FOR_EACH_RUNNING_TIME(RUNNING_TIME_MEMBER)
+
+#undef RUNNING_TIME_MEMBER
+
+  // Take values from another RunningTimes.
+  RunningTimes& TakeFrom(RunningTimes& aOther) {
+    if (!aOther.IsEmpty()) {
+#define RUNNING_TIME_TAKE(index, name, unit, jsonProperty)   \
+  if (aOther.Is##name##unit##Known()) {                      \
+    Set##name##unit(std::exchange(aOther.m##name##unit, 0)); \
+  }
+
+      PROFILER_FOR_EACH_RUNNING_TIME(RUNNING_TIME_TAKE)
+
+#undef RUNNING_TIME_TAKE
+
+      aOther.mKnownBits = 0;
+    }
+    return *this;
+  }
+
+  // Difference from `aBefore` to `this`. Any unknown makes the result unknown.
+  // PostMeasurementTimeStamp set to `this` PostMeasurementTimeStamp, to keep
+  // the most recent timestamp associated with the end of the interval over
+  // which the difference applies.
+  RunningTimes operator-(const RunningTimes& aBefore) const {
+    RunningTimes diff;
+    diff.mPostMeasurementTimeStamp = mPostMeasurementTimeStamp;
+#define RUNNING_TIME_SUB(index, name, unit, jsonProperty)           \
+  if (Is##name##unit##Known() && aBefore.Is##name##unit##Known()) { \
+    diff.Set##name##unit(m##name##unit - aBefore.m##name##unit);    \
+  }
+
+    PROFILER_FOR_EACH_RUNNING_TIME(RUNNING_TIME_SUB)
+
+#undef RUNNING_TIME_SUB
+    return diff;
+  }
+
+ private:
+  friend mozilla::ProfileBufferEntryWriter::Serializer<RunningTimes>;
+  friend mozilla::ProfileBufferEntryReader::Deserializer<RunningTimes>;
+
+  // Platform-dependent.
+  static uint64_t ConvertRawToJson(uint64_t aRawValue);
+
+  mozilla::TimeStamp mPostMeasurementTimeStamp;
+
+  uint32_t mKnownBits = 0u;
+
+#define RUNNING_TIME_MEMBER(index, name, unit, jsonProperty) \
+  static constexpr uint32_t mGot##name##unit = 1u << index;  \
+  uint64_t m##name##unit = 0;
+
+  PROFILER_FOR_EACH_RUNNING_TIME(RUNNING_TIME_MEMBER)
+
+#undef RUNNING_TIME_MEMBER
+};
+
+template <>
+struct mozilla::ProfileBufferEntryWriter::Serializer<RunningTimes> {
+  static Length Bytes(const RunningTimes& aRunningTimes) {
+    Length bytes = 0;
+
+#define RUNNING_TIME_SERIALIZATION_BYTES(index, name, unit, jsonProperty) \
+  if (aRunningTimes.Is##name##unit##Known()) {                            \
+    bytes += ULEB128Size(aRunningTimes.m##name##unit);                    \
+  }
+
+    PROFILER_FOR_EACH_RUNNING_TIME(RUNNING_TIME_SERIALIZATION_BYTES)
+
+#undef RUNNING_TIME_SERIALIZATION_BYTES
+    return ULEB128Size(aRunningTimes.mKnownBits) + bytes;
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW,
+                    const RunningTimes& aRunningTimes) {
+    aEW.WriteULEB128(aRunningTimes.mKnownBits);
+
+#define RUNNING_TIME_SERIALIZE(index, name, unit, jsonProperty) \
+  if (aRunningTimes.Is##name##unit##Known()) {                  \
+    aEW.WriteULEB128(aRunningTimes.m##name##unit);              \
+  }
+
+    PROFILER_FOR_EACH_RUNNING_TIME(RUNNING_TIME_SERIALIZE)
+
+#undef RUNNING_TIME_SERIALIZE
+  }
+};
+
+template <>
+struct mozilla::ProfileBufferEntryReader::Deserializer<RunningTimes> {
+  static void ReadInto(ProfileBufferEntryReader& aER,
+                       RunningTimes& aRunningTimes) {
+    aRunningTimes = Read(aER);
+  }
+
+  static RunningTimes Read(ProfileBufferEntryReader& aER) {
+    // Start with empty running times, everything is cleared.
+    RunningTimes times;
+
+    // This sets all the bits into mKnownBits, we don't need to modify it
+    // further.
+    times.mKnownBits = aER.ReadULEB128<uint32_t>();
+
+    // For each member that should be known, read its value.
+#define RUNNING_TIME_DESERIALIZE(index, name, unit, jsonProperty)           \
+  if (times.Is##name##unit##Known()) {                                      \
+    times.m##name##unit = aER.ReadULEB128<decltype(times.m##name##unit)>(); \
+  }
+
+    PROFILER_FOR_EACH_RUNNING_TIME(RUNNING_TIME_DESERIALIZE)
+
+#undef RUNNING_TIME_DESERIALIZE
+
+    return times;
+  }
+};
+
+#endif /* ndef TOOLS_PLATFORM_H_ */
diff --git a/tools/profiler/core/shared-libraries-linux.cc b/tools/profiler/core/shared-libraries-linux.cc
new file mode 100644
index 0000000000..2991e64909
--- /dev/null
+++ b/tools/profiler/core/shared-libraries-linux.cc
@@ -0,0 +1,280 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "shared-libraries.h"
+
+#define PATH_MAX_TOSTRING(x) #x
+#define PATH_MAX_STRING(x) PATH_MAX_TOSTRING(x)
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include <unistd.h>
+#include <fstream>
+#include "platform.h"
+#include "shared-libraries.h"
+#include "GeckoProfiler.h"
+#include "mozilla/Sprintf.h"
+#include "mozilla/Unused.h"
+#include "nsDebug.h"
+#include "nsNativeCharsetUtils.h"
+#include <nsTArray.h>
+
+#include "common/linux/file_id.h"
+#include <algorithm>
+#include <dlfcn.h>
+#if defined(GP_OS_linux) || defined(GP_OS_android)
+#  include <features.h>
+#endif
+#include <sys/types.h>
+
+#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
+#  include <link.h>  // dl_phdr_info
+#else
+#  error "Unexpected configuration"
+#endif
+
+#if defined(GP_OS_android)
+extern "C" MOZ_EXPORT __attribute__((weak)) int dl_iterate_phdr(
+    int (*callback)(struct dl_phdr_info* info, size_t size, void* data),
+    void* data);
+#endif
+
+struct LoadedLibraryInfo {
+  LoadedLibraryInfo(const char* aName, unsigned long aBaseAddress,
+                    unsigned long aFirstMappingStart,
+                    unsigned long aLastMappingEnd)
+      : mName(aName),
+        mBaseAddress(aBaseAddress),
+        mFirstMappingStart(aFirstMappingStart),
+        mLastMappingEnd(aLastMappingEnd) {}
+
+  nsCString mName;
+  unsigned long mBaseAddress;
+  unsigned long mFirstMappingStart;
+  unsigned long mLastMappingEnd;
+};
+
+static nsCString IDtoUUIDString(
+    const google_breakpad::wasteful_vector<uint8_t>& aIdentifier) {
+  using namespace google_breakpad;
+
+  nsCString uuid;
+  const std::string str = FileID::ConvertIdentifierToUUIDString(aIdentifier);
+  uuid.Append(str.c_str(), str.size());
+  // This is '0', not '\0', since it represents the breakpad id age.
+  uuid.Append('0');
+  return uuid;
+}
+
+// Return raw Build ID in hex.
+static nsCString IDtoString(
+    const google_breakpad::wasteful_vector<uint8_t>& aIdentifier) {
+  using namespace google_breakpad;
+
+  nsCString uuid;
+  const std::string str = FileID::ConvertIdentifierToString(aIdentifier);
+  uuid.Append(str.c_str(), str.size());
+  return uuid;
+}
+
+// Get the breakpad Id for the binary file pointed by bin_name
+static nsCString getBreakpadId(const char* bin_name) {
+  using namespace google_breakpad;
+
+  PageAllocator allocator;
+  auto_wasteful_vector<uint8_t, kDefaultBuildIdSize> identifier(&allocator);
+
+  FileID file_id(bin_name);
+  if (file_id.ElfFileIdentifier(identifier)) {
+    return IDtoUUIDString(identifier);
+  }
+
+  return ""_ns;
+}
+
+// Get the code Id for the binary file pointed by bin_name
+static nsCString getCodeId(const char* bin_name) {
+  using namespace google_breakpad;
+
+  PageAllocator allocator;
+  auto_wasteful_vector<uint8_t, kDefaultBuildIdSize> identifier(&allocator);
+
+  FileID file_id(bin_name);
+  if (file_id.ElfFileIdentifier(identifier)) {
+    return IDtoString(identifier);
+  }
+
+  return ""_ns;
+}
+
+static SharedLibrary SharedLibraryAtPath(const char* path,
+                                         unsigned long libStart,
+                                         unsigned long libEnd,
+                                         unsigned long offset = 0) {
+  nsAutoString pathStr;
+  mozilla::Unused << NS_WARN_IF(
+      NS_FAILED(NS_CopyNativeToUnicode(nsDependentCString(path), pathStr)));
+
+  nsAutoString nameStr = pathStr;
+  int32_t pos = nameStr.RFindChar('/');
+  if (pos != kNotFound) {
+    nameStr.Cut(0, pos + 1);
+  }
+
+  return SharedLibrary(libStart, libEnd, offset, getBreakpadId(path),
+                       getCodeId(path), nameStr, pathStr, nameStr, pathStr,
+                       ""_ns, "");
+}
+
+static int dl_iterate_callback(struct dl_phdr_info* dl_info, size_t size,
+                               void* data) {
+  auto libInfoList = reinterpret_cast<nsTArray<LoadedLibraryInfo>*>(data);
+
+  if (dl_info->dlpi_phnum <= 0) return 0;
+
+  unsigned long baseAddress = dl_info->dlpi_addr;
+  unsigned long firstMappingStart = -1;
+  unsigned long lastMappingEnd = 0;
+
+  for (size_t i = 0; i < dl_info->dlpi_phnum; i++) {
+    if (dl_info->dlpi_phdr[i].p_type != PT_LOAD) {
+      continue;
+    }
+    unsigned long start = dl_info->dlpi_addr + dl_info->dlpi_phdr[i].p_vaddr;
+    unsigned long end = start + dl_info->dlpi_phdr[i].p_memsz;
+    if (start < firstMappingStart) {
+      firstMappingStart = start;
+    }
+    if (end > lastMappingEnd) {
+      lastMappingEnd = end;
+    }
+  }
+
+  libInfoList->AppendElement(LoadedLibraryInfo(
+      dl_info->dlpi_name, baseAddress, firstMappingStart, lastMappingEnd));
+
+  return 0;
+}
+
+SharedLibraryInfo SharedLibraryInfo::GetInfoForSelf() {
+  SharedLibraryInfo info;
+
+#if defined(GP_OS_linux)
+  // We need to find the name of the executable (exeName, exeNameLen) and the
+  // address of its executable section (exeExeAddr) in the running image.
+  char exeName[PATH_MAX];
+  memset(exeName, 0, sizeof(exeName));
+
+  ssize_t exeNameLen = readlink("/proc/self/exe", exeName, sizeof(exeName) - 1);
+  if (exeNameLen == -1) {
+    // readlink failed for whatever reason.  Note this, but keep going.
+    exeName[0] = '\0';
+    exeNameLen = 0;
+    LOG("SharedLibraryInfo::GetInfoForSelf(): readlink failed");
+  } else {
+    // Assert no buffer overflow.
+    MOZ_RELEASE_ASSERT(exeNameLen >= 0 &&
+                       exeNameLen < static_cast<ssize_t>(sizeof(exeName)));
+  }
+
+  unsigned long exeExeAddr = 0;
+#endif
+
+#if defined(GP_OS_android)
+  // If dl_iterate_phdr doesn't exist, we give up immediately.
+  if (!dl_iterate_phdr) {
+    // On ARM Android, dl_iterate_phdr is provided by the custom linker.
+    // So if libxul was loaded by the system linker (e.g. as part of
+    // xpcshell when running tests), it won't be available and we should
+    // not call it.
+    return info;
+  }
+#endif
+
+#if defined(GP_OS_linux) || defined(GP_OS_android)
+  // Read info from /proc/self/maps. We ignore most of it.
+  pid_t pid = profiler_current_process_id().ToNumber();
+  char path[PATH_MAX];
+  SprintfLiteral(path, "/proc/%d/maps", pid);
+  std::ifstream maps(path);
+  std::string line;
+  while (std::getline(maps, line)) {
+    int ret;
+    unsigned long start;
+    unsigned long end;
+    char perm[6 + 1] = "";
+    unsigned long offset;
+    char modulePath[PATH_MAX + 1] = "";
+    ret = sscanf(line.c_str(),
+                 "%lx-%lx %6s %lx %*s %*x %" PATH_MAX_STRING(PATH_MAX) "s\n",
+                 &start, &end, perm, &offset, modulePath);
+    if (!strchr(perm, 'x')) {
+      // Ignore non executable entries
+      continue;
+    }
+    if (ret != 5 && ret != 4) {
+      LOG("SharedLibraryInfo::GetInfoForSelf(): "
+          "reading /proc/self/maps failed");
+      continue;
+    }
+
+#  if defined(GP_OS_linux)
+    // Try to establish the main executable's load address.
+    if (exeNameLen > 0 && strcmp(modulePath, exeName) == 0) {
+      exeExeAddr = start;
+    }
+#  elif defined(GP_OS_android)
+    // Use /proc/pid/maps to get the dalvik-jit section since it has no
+    // associated phdrs.
+    if (0 == strcmp(modulePath, "/dev/ashmem/dalvik-jit-code-cache")) {
+      info.AddSharedLibrary(
+          SharedLibraryAtPath(modulePath, start, end, offset));
+      if (info.GetSize() > 10000) {
+        LOG("SharedLibraryInfo::GetInfoForSelf(): "
+            "implausibly large number of mappings acquired");
+        break;
+      }
+    }
+#  endif
+  }
+#endif
+
+  nsTArray<LoadedLibraryInfo> libInfoList;
+
+  // We collect the bulk of the library info using dl_iterate_phdr.
+  dl_iterate_phdr(dl_iterate_callback, &libInfoList);
+
+  for (const auto& libInfo : libInfoList) {
+    info.AddSharedLibrary(
+        SharedLibraryAtPath(libInfo.mName.get(), libInfo.mFirstMappingStart,
+                            libInfo.mLastMappingEnd,
+                            libInfo.mFirstMappingStart - libInfo.mBaseAddress));
+  }
+
+#if defined(GP_OS_linux)
+  // Make another pass over the information we just harvested from
+  // dl_iterate_phdr.  If we see a nameless object mapped at what we earlier
+  // established to be the main executable's load address, attach the
+  // executable's name to that entry.
+  for (size_t i = 0; i < info.GetSize(); i++) {
+    SharedLibrary& lib = info.GetMutableEntry(i);
+    if (lib.GetStart() <= exeExeAddr && exeExeAddr <= lib.GetEnd() &&
+        lib.GetNativeDebugPath().empty()) {
+      lib = SharedLibraryAtPath(exeName, lib.GetStart(), lib.GetEnd(),
+                                lib.GetOffset());
+
+      // We only expect to see one such entry.
+      break;
+    }
+  }
+#endif
+
+  return info;
+}
+
+void SharedLibraryInfo::Initialize() { /* do nothing */
+}
diff --git a/tools/profiler/core/shared-libraries-macos.cc b/tools/profiler/core/shared-libraries-macos.cc
new file mode 100644
index 0000000000..415fda3633
--- /dev/null
+++ b/tools/profiler/core/shared-libraries-macos.cc
@@ -0,0 +1,211 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "shared-libraries.h"
+
+#include "ClearOnShutdown.h"
+#include "mozilla/StaticMutex.h"
+#include "mozilla/Unused.h"
+#include "nsNativeCharsetUtils.h"
+#include <AvailabilityMacros.h>
+
+#include <dlfcn.h>
+#include <mach-o/arch.h>
+#include <mach-o/dyld_images.h>
+#include <mach-o/dyld.h>
+#include <mach-o/loader.h>
+#include <mach/mach_init.h>
+#include <mach/mach_traps.h>
+#include <mach/task_info.h>
+#include <mach/task.h>
+#include <sstream>
+#include <stdlib.h>
+#include <string.h>
+#include <vector>
+
+// Architecture specific abstraction.
+#if defined(GP_ARCH_x86)
+typedef mach_header platform_mach_header;
+typedef segment_command mach_segment_command_type;
+#  define MACHO_MAGIC_NUMBER MH_MAGIC
+#  define CMD_SEGMENT LC_SEGMENT
+#  define seg_size uint32_t
+#else
+typedef mach_header_64 platform_mach_header;
+typedef segment_command_64 mach_segment_command_type;
+#  define MACHO_MAGIC_NUMBER MH_MAGIC_64
+#  define CMD_SEGMENT LC_SEGMENT_64
+#  define seg_size uint64_t
+#endif
+
+struct NativeSharedLibrary {
+  const platform_mach_header* header;
+  std::string path;
+};
+static std::vector<NativeSharedLibrary>* sSharedLibrariesList = nullptr;
+static mozilla::StaticMutex sSharedLibrariesMutex MOZ_UNANNOTATED;
+
+static void SharedLibraryAddImage(const struct mach_header* mh,
+                                  intptr_t vmaddr_slide) {
+  // NOTE: Presumably for backwards-compatibility reasons, this function accepts
+  // a mach_header even on 64-bit where it ought to be a mach_header_64. We cast
+  // it to the right type here.
+  auto header = reinterpret_cast<const platform_mach_header*>(mh);
+
+  Dl_info info;
+  if (!dladdr(header, &info)) {
+    return;
+  }
+
+  mozilla::StaticMutexAutoLock lock(sSharedLibrariesMutex);
+  if (!sSharedLibrariesList) {
+    return;
+  }
+
+  NativeSharedLibrary lib = {header, info.dli_fname};
+  sSharedLibrariesList->push_back(lib);
+}
+
+static void SharedLibraryRemoveImage(const struct mach_header* mh,
+                                     intptr_t vmaddr_slide) {
+  // NOTE: Presumably for backwards-compatibility reasons, this function accepts
+  // a mach_header even on 64-bit where it ought to be a mach_header_64. We cast
+  // it to the right type here.
+  auto header = reinterpret_cast<const platform_mach_header*>(mh);
+
+  mozilla::StaticMutexAutoLock lock(sSharedLibrariesMutex);
+  if (!sSharedLibrariesList) {
+    return;
+  }
+
+  uint32_t count = sSharedLibrariesList->size();
+  for (uint32_t i = 0; i < count; ++i) {
+    if ((*sSharedLibrariesList)[i].header == header) {
+      sSharedLibrariesList->erase(sSharedLibrariesList->begin() + i);
+      return;
+    }
+  }
+}
+
+void SharedLibraryInfo::Initialize() {
+  // NOTE: We intentionally leak this memory here. We're allocating dynamically
+  // in order to avoid static initializers.
+  sSharedLibrariesList = new std::vector<NativeSharedLibrary>();
+
+  _dyld_register_func_for_add_image(SharedLibraryAddImage);
+  _dyld_register_func_for_remove_image(SharedLibraryRemoveImage);
+}
+
+static void addSharedLibrary(const platform_mach_header* header,
+                             const char* path, SharedLibraryInfo& info) {
+  const struct load_command* cmd =
+      reinterpret_cast<const struct load_command*>(header + 1);
+
+  seg_size size = 0;
+  unsigned long long start = reinterpret_cast<unsigned long long>(header);
+  // Find the cmd segment in the macho image. It will contain the offset we care
+  // about.
+  const uint8_t* uuid_bytes = nullptr;
+  for (unsigned int i = 0;
+       cmd && (i < header->ncmds) && (uuid_bytes == nullptr || size == 0);
+       ++i) {
+    if (cmd->cmd == CMD_SEGMENT) {
+      const mach_segment_command_type* seg =
+          reinterpret_cast<const mach_segment_command_type*>(cmd);
+
+      if (!strcmp(seg->segname, "__TEXT")) {
+        size = seg->vmsize;
+      }
+    } else if (cmd->cmd == LC_UUID) {
+      const uuid_command* ucmd = reinterpret_cast<const uuid_command*>(cmd);
+      uuid_bytes = ucmd->uuid;
+    }
+
+    cmd = reinterpret_cast<const struct load_command*>(
+        reinterpret_cast<const char*>(cmd) + cmd->cmdsize);
+  }
+
+  nsAutoCString uuid;
+  nsAutoCString breakpadId;
+  if (uuid_bytes != nullptr) {
+    uuid.AppendPrintf(
+        "%02X"
+        "%02X"
+        "%02X"
+        "%02X"
+        "%02X"
+        "%02X"
+        "%02X"
+        "%02X"
+        "%02X"
+        "%02X"
+        "%02X"
+        "%02X"
+        "%02X"
+        "%02X"
+        "%02X"
+        "%02X",
+        uuid_bytes[0], uuid_bytes[1], uuid_bytes[2], uuid_bytes[3],
+        uuid_bytes[4], uuid_bytes[5], uuid_bytes[6], uuid_bytes[7],
+        uuid_bytes[8], uuid_bytes[9], uuid_bytes[10], uuid_bytes[11],
+        uuid_bytes[12], uuid_bytes[13], uuid_bytes[14], uuid_bytes[15]);
+
+    // Breakpad id is the same as the uuid but with the additional trailing 0
+    // for the breakpad id age.
+    breakpadId.AppendPrintf(
+        "%s"
+        "0" /* breakpad id age */,
+        uuid.get());
+  }
+
+  nsAutoString pathStr;
+  mozilla::Unused << NS_WARN_IF(
+      NS_FAILED(NS_CopyNativeToUnicode(nsDependentCString(path), pathStr)));
+
+  nsAutoString nameStr = pathStr;
+  int32_t pos = nameStr.RFindChar('/');
+  if (pos != kNotFound) {
+    nameStr.Cut(0, pos + 1);
+  }
+
+  const NXArchInfo* archInfo =
+      NXGetArchInfoFromCpuType(header->cputype, header->cpusubtype);
+
+  info.AddSharedLibrary(SharedLibrary(start, start + size, 0, breakpadId, uuid,
+                                      nameStr, pathStr, nameStr, pathStr, ""_ns,
+                                      archInfo ? archInfo->name : ""));
+}
+
+// Translate the statically stored sSharedLibrariesList information into a
+// SharedLibraryInfo object.
+SharedLibraryInfo SharedLibraryInfo::GetInfoForSelf() {
+  mozilla::StaticMutexAutoLock lock(sSharedLibrariesMutex);
+  SharedLibraryInfo sharedLibraryInfo;
+
+  for (auto& info : *sSharedLibrariesList) {
+    addSharedLibrary(info.header, info.path.c_str(), sharedLibraryInfo);
+  }
+
+  // Add the entry for dyld itself.
+  // We only support macOS 10.12+, which corresponds to dyld version 15+.
+  // dyld version 15 added the dyldPath property.
+  task_dyld_info_data_t task_dyld_info;
+  mach_msg_type_number_t count = TASK_DYLD_INFO_COUNT;
+  if (task_info(mach_task_self(), TASK_DYLD_INFO, (task_info_t)&task_dyld_info,
+                &count) != KERN_SUCCESS) {
+    return sharedLibraryInfo;
+  }
+
+  struct dyld_all_image_infos* aii =
+      (struct dyld_all_image_infos*)task_dyld_info.all_image_info_addr;
+  if (aii->version >= 15) {
+    const platform_mach_header* header =
+        reinterpret_cast<const platform_mach_header*>(
+            aii->dyldImageLoadAddress);
+    addSharedLibrary(header, aii->dyldPath, sharedLibraryInfo);
+  }
+
+  return sharedLibraryInfo;
+}
diff --git a/tools/profiler/core/shared-libraries-win32.cc b/tools/profiler/core/shared-libraries-win32.cc
new file mode 100644
index 0000000000..cb0bcd1f41
--- /dev/null
+++ b/tools/profiler/core/shared-libraries-win32.cc
@@ -0,0 +1,167 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <windows.h>
+
+#include "shared-libraries.h"
+#include "nsWindowsHelpers.h"
+#include "mozilla/NativeNt.h"
+#include "mozilla/WindowsEnumProcessModules.h"
+#include "mozilla/WindowsProcessMitigations.h"
+#include "mozilla/WindowsVersion.h"
+#include "nsPrintfCString.h"
+
+static bool IsModuleUnsafeToLoad(const nsAString& aModuleName) {
+#if defined(_M_AMD64) || defined(_M_IX86)
+  // Hackaround for Bug 1607574.  Nvidia's shim driver nvd3d9wrap[x].dll detours
+  // LoadLibraryExW and it causes AV when the following conditions are met.
+  //   1. LoadLibraryExW was called for "detoured.dll"
+  //   2. nvinit[x].dll was unloaded
+  //   3. OS version is older than 6.2
+#  if defined(_M_AMD64)
+  LPCWSTR kNvidiaShimDriver = L"nvd3d9wrapx.dll";
+  LPCWSTR kNvidiaInitDriver = L"nvinitx.dll";
+#  elif defined(_M_IX86)
+  LPCWSTR kNvidiaShimDriver = L"nvd3d9wrap.dll";
+  LPCWSTR kNvidiaInitDriver = L"nvinit.dll";
+#  endif
+  if (aModuleName.LowerCaseEqualsLiteral("detoured.dll") &&
+      !mozilla::IsWin8OrLater() && ::GetModuleHandleW(kNvidiaShimDriver) &&
+      !::GetModuleHandleW(kNvidiaInitDriver)) {
+    return true;
+  }
+#endif  // defined(_M_AMD64) || defined(_M_IX86)
+
+  // Hackaround for Bug 1723868.  There is no safe way to prevent the module
+  // Microsoft's VP9 Video Decoder from being unloaded because mfplat.dll may
+  // have posted more than one task to unload the module in the work queue
+  // without calling LoadLibrary.
+  if (aModuleName.LowerCaseEqualsLiteral("msvp9dec_store.dll")) {
+    return true;
+  }
+
+  return false;
+}
+
+void AddSharedLibraryFromModuleInfo(SharedLibraryInfo& sharedLibraryInfo,
+                                    const wchar_t* aModulePath,
+                                    mozilla::Maybe<HMODULE> aModule) {
+  nsDependentSubstring moduleNameStr(
+      mozilla::nt::GetLeafName(nsDependentString(aModulePath)));
+
+  // If the module is unsafe to call LoadLibraryEx for, we skip.
+  if (IsModuleUnsafeToLoad(moduleNameStr)) {
+    return;
+  }
+
+  // If EAF+ is enabled, parsing ntdll's PE header causes a crash.
+  if (mozilla::IsEafPlusEnabled() &&
+      moduleNameStr.LowerCaseEqualsLiteral("ntdll.dll")) {
+    return;
+  }
+
+  // Load the module again - to make sure that its handle will remain valid as
+  // we attempt to read the PDB information from it - or for the first time if
+  // we only have a path. We want to load the DLL without running the newly
+  // loaded module's DllMain function, but not as a data file because we want
+  // to be able to do RVA computations easily. Hence, we use the flag
+  // LOAD_LIBRARY_AS_IMAGE_RESOURCE which ensures that the sections (not PE
+  // headers) will be relocated by the loader. Otherwise GetPdbInfo() and/or
+  // GetVersionInfo() can cause a crash. If the original handle |aModule| is
+  // valid, LoadLibraryEx just increments its refcount.
+  nsModuleHandle handleLock(
+      ::LoadLibraryExW(aModulePath, NULL, LOAD_LIBRARY_AS_IMAGE_RESOURCE));
+  if (!handleLock) {
+    return;
+  }
+
+  mozilla::nt::PEHeaders headers(handleLock.get());
+  if (!headers) {
+    return;
+  }
+
+  mozilla::Maybe<mozilla::Range<const uint8_t>> bounds = headers.GetBounds();
+  if (!bounds) {
+    return;
+  }
+
+  // Put the original |aModule| into SharedLibrary, but we get debug info
+  // from |handleLock| as |aModule| might be inaccessible.
+  const uintptr_t modStart =
+      aModule.isSome() ? reinterpret_cast<uintptr_t>(*aModule)
+                       : reinterpret_cast<uintptr_t>(handleLock.get());
+  const uintptr_t modEnd = modStart + bounds->length();
+
+  nsAutoCString breakpadId;
+  nsAutoString pdbPathStr;
+  if (const auto* debugInfo = headers.GetPdbInfo()) {
+    MOZ_ASSERT(breakpadId.IsEmpty());
+    const GUID& pdbSig = debugInfo->pdbSignature;
+    breakpadId.AppendPrintf(
+        "%08lX"                             // m0
+        "%04X%04X"                          // m1,m2
+        "%02X%02X%02X%02X%02X%02X%02X%02X"  // m3
+        "%X",                               // pdbAge
+        pdbSig.Data1, pdbSig.Data2, pdbSig.Data3, pdbSig.Data4[0],
+        pdbSig.Data4[1], pdbSig.Data4[2], pdbSig.Data4[3], pdbSig.Data4[4],
+        pdbSig.Data4[5], pdbSig.Data4[6], pdbSig.Data4[7], debugInfo->pdbAge);
+
+    // The PDB file name could be different from module filename,
+    // so report both
+    // e.g. The PDB for C:\Windows\SysWOW64\ntdll.dll is wntdll.pdb
+    pdbPathStr = NS_ConvertUTF8toUTF16(debugInfo->pdbFileName);
+  }
+
+  nsAutoCString codeId;
+  DWORD timestamp;
+  DWORD imageSize;
+  if (headers.GetTimeStamp(timestamp) && headers.GetImageSize(imageSize)) {
+    codeId.AppendPrintf(
+        "%08lX"  // Uppercase 8 digits of hex timestamp with leading zeroes.
+        "%lx",   // Lowercase hex image size
+        timestamp, imageSize);
+  }
+
+  nsAutoCString versionStr;
+  uint64_t version;
+  if (headers.GetVersionInfo(version)) {
+    versionStr.AppendPrintf("%u.%u.%u.%u",
+                            static_cast<uint32_t>((version >> 48) & 0xFFFFu),
+                            static_cast<uint32_t>((version >> 32) & 0xFFFFu),
+                            static_cast<uint32_t>((version >> 16) & 0xFFFFu),
+                            static_cast<uint32_t>(version & 0xFFFFu));
+  }
+
+  const nsString& pdbNameStr =
+      PromiseFlatString(mozilla::nt::GetLeafName(pdbPathStr));
+  SharedLibrary shlib(modStart, modEnd,
+                      0,  // DLLs are always mapped at offset 0 on Windows
+                      breakpadId, codeId, PromiseFlatString(moduleNameStr),
+                      nsDependentString(aModulePath), pdbNameStr, pdbPathStr,
+                      versionStr, "");
+  sharedLibraryInfo.AddSharedLibrary(shlib);
+}
+
+SharedLibraryInfo SharedLibraryInfo::GetInfoForSelf() {
+  SharedLibraryInfo sharedLibraryInfo;
+
+  auto addSharedLibraryFromModuleInfo =
+      [&sharedLibraryInfo](const wchar_t* aModulePath, HMODULE aModule) {
+        AddSharedLibraryFromModuleInfo(sharedLibraryInfo, aModulePath,
+                                       mozilla::Some(aModule));
+      };
+
+  mozilla::EnumerateProcessModules(addSharedLibraryFromModuleInfo);
+  return sharedLibraryInfo;
+}
+
+SharedLibraryInfo SharedLibraryInfo::GetInfoFromPath(const wchar_t* aPath) {
+  SharedLibraryInfo sharedLibraryInfo;
+  AddSharedLibraryFromModuleInfo(sharedLibraryInfo, aPath, mozilla::Nothing());
+  return sharedLibraryInfo;
+}
+
+void SharedLibraryInfo::Initialize() { /* do nothing */
+}
diff --git a/tools/profiler/core/vtune/ittnotify.h b/tools/profiler/core/vtune/ittnotify.h
new file mode 100644
index 0000000000..f1d65b3328
--- /dev/null
+++ b/tools/profiler/core/vtune/ittnotify.h
@@ -0,0 +1,4123 @@
+/* <copyright>
+  This file is provided under a dual BSD/GPLv2 license.  When using or
+  redistributing this file, you may do so under either license.
+
+  GPL LICENSE SUMMARY
+
+  Copyright (c) 2005-2014 Intel Corporation. All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+  The full GNU General Public License is included in this distribution
+  in the file called LICENSE.GPL.
+
+  Contact Information:
+  http://software.intel.com/en-us/articles/intel-vtune-amplifier-xe/
+
+  BSD LICENSE
+
+  Copyright (c) 2005-2014 Intel Corporation. All rights reserved.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Intel Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+</copyright> */
+#ifndef _ITTNOTIFY_H_
+#define _ITTNOTIFY_H_
+
+/**
+@file
+@brief Public User API functions and types
+@mainpage
+
+The ITT API is used to annotate a user's program with additional information
+that can be used by correctness and performance tools. The user inserts
+calls in their program. Those calls generate information that is collected
+at runtime, and used by Intel(R) Threading Tools.
+
+@section API Concepts
+The following general concepts are used throughout the API.
+
+@subsection Unicode Support
+Many API functions take character string arguments. On Windows, there
+are two versions of each such function. The function name is suffixed
+by W if Unicode support is enabled, and by A otherwise. Any API function
+that takes a character string argument adheres to this convention.
+
+@subsection Conditional Compilation
+Many users prefer having an option to modify ITT API code when linking it
+inside their runtimes. ITT API header file provides a mechanism to replace
+ITT API function names inside your code with empty strings. To do this,
+define the macros INTEL_NO_ITTNOTIFY_API during compilation and remove the
+static library from the linker script.
+
+@subsection Domains
+[see domains]
+Domains provide a way to separate notification for different modules or
+libraries in a program. Domains are specified by dotted character strings,
+e.g. TBB.Internal.Control.
+
+A mechanism (to be specified) is provided to enable and disable
+domains. By default, all domains are enabled.
+@subsection Named Entities and Instances
+Named entities (frames, regions, tasks, and markers) communicate
+information about the program to the analysis tools. A named entity often
+refers to a section of program code, or to some set of logical concepts
+that the programmer wants to group together.
+
+Named entities relate to the programmer's static view of the program. When
+the program actually executes, many instances of a given named entity
+may be created.
+
+The API annotations denote instances of named entities. The actual
+named entities are displayed using the analysis tools. In other words,
+the named entities come into existence when instances are created.
+
+Instances of named entities may have instance identifiers (IDs). Some
+API calls use instance identifiers to create relationships between
+different instances of named entities. Other API calls associate data
+with instances of named entities.
+
+Some named entities must always have instance IDs. In particular, regions
+and frames always have IDs. Task and markers need IDs only if the ID is
+needed in another API call (such as adding a relation or metadata).
+
+The lifetime of instance IDs is distinct from the lifetime of
+instances. This allows various relationships to be specified separate
+from the actual execution of instances. This flexibility comes at the
+expense of extra API calls.
+
+The same ID may not be reused for different instances, unless a previous
+[ref] __itt_id_destroy call for that ID has been issued.
+*/
+
+/** @cond exclude_from_documentation */
+#ifndef ITT_OS_WIN
+#  define ITT_OS_WIN   1
+#endif /* ITT_OS_WIN */
+
+#ifndef ITT_OS_LINUX
+#  define ITT_OS_LINUX 2
+#endif /* ITT_OS_LINUX */
+
+#ifndef ITT_OS_MAC
+#  define ITT_OS_MAC   3
+#endif /* ITT_OS_MAC */
+
+#ifndef ITT_OS_FREEBSD
+#  define ITT_OS_FREEBSD   4
+#endif /* ITT_OS_FREEBSD */
+
+#ifndef ITT_OS
+#  if defined WIN32 || defined _WIN32
+#    define ITT_OS ITT_OS_WIN
+#  elif defined( __APPLE__ ) && defined( __MACH__ )
+#    define ITT_OS ITT_OS_MAC
+#  elif defined( __FreeBSD__ )
+#    define ITT_OS ITT_OS_FREEBSD
+#  else
+#    define ITT_OS ITT_OS_LINUX
+#  endif
+#endif /* ITT_OS */
+
+#ifndef ITT_PLATFORM_WIN
+#  define ITT_PLATFORM_WIN 1
+#endif /* ITT_PLATFORM_WIN */
+
+#ifndef ITT_PLATFORM_POSIX
+#  define ITT_PLATFORM_POSIX 2
+#endif /* ITT_PLATFORM_POSIX */
+
+#ifndef ITT_PLATFORM_MAC
+#  define ITT_PLATFORM_MAC 3
+#endif /* ITT_PLATFORM_MAC */
+
+#ifndef ITT_PLATFORM_FREEBSD
+#  define ITT_PLATFORM_FREEBSD 4
+#endif /* ITT_PLATFORM_FREEBSD */
+
+#ifndef ITT_PLATFORM
+#  if ITT_OS==ITT_OS_WIN
+#    define ITT_PLATFORM ITT_PLATFORM_WIN
+#  elif ITT_OS==ITT_OS_MAC
+#    define ITT_PLATFORM ITT_PLATFORM_MAC
+#  elif ITT_OS==ITT_OS_FREEBSD
+#    define ITT_PLATFORM ITT_PLATFORM_FREEBSD
+#  else
+#    define ITT_PLATFORM ITT_PLATFORM_POSIX
+#  endif
+#endif /* ITT_PLATFORM */
+
+#if defined(_UNICODE) && !defined(UNICODE)
+#define UNICODE
+#endif
+
+#include <stddef.h>
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#include <tchar.h>
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#include <stdint.h>
+#if defined(UNICODE) || defined(_UNICODE)
+#include <wchar.h>
+#endif /* UNICODE || _UNICODE */
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#ifndef ITTAPI_CDECL
+#  if ITT_PLATFORM==ITT_PLATFORM_WIN
+#    define ITTAPI_CDECL __cdecl
+#  else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#    if defined _M_IX86 || defined __i386__
+#      define ITTAPI_CDECL __attribute__ ((cdecl))
+#    else  /* _M_IX86 || __i386__ */
+#      define ITTAPI_CDECL /* actual only on x86 platform */
+#    endif /* _M_IX86 || __i386__ */
+#  endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* ITTAPI_CDECL */
+
+#ifndef STDCALL
+#  if ITT_PLATFORM==ITT_PLATFORM_WIN
+#    define STDCALL __stdcall
+#  else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#    if defined _M_IX86 || defined __i386__
+#      define STDCALL __attribute__ ((stdcall))
+#    else  /* _M_IX86 || __i386__ */
+#      define STDCALL /* supported only on x86 platform */
+#    endif /* _M_IX86 || __i386__ */
+#  endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* STDCALL */
+
+#define ITTAPI    ITTAPI_CDECL
+#define LIBITTAPI ITTAPI_CDECL
+
+/* TODO: Temporary for compatibility! */
+#define ITTAPI_CALL    ITTAPI_CDECL
+#define LIBITTAPI_CALL ITTAPI_CDECL
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+/* use __forceinline (VC++ specific) */
+#define ITT_INLINE           __forceinline
+#define ITT_INLINE_ATTRIBUTE /* nothing */
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+/*
+ * Generally, functions are not inlined unless optimization is specified.
+ * For functions declared inline, this attribute inlines the function even
+ * if no optimization level was specified.
+ */
+#ifdef __STRICT_ANSI__
+#define ITT_INLINE           static
+#define ITT_INLINE_ATTRIBUTE __attribute__((unused))
+#else  /* __STRICT_ANSI__ */
+#define ITT_INLINE           static inline
+#define ITT_INLINE_ATTRIBUTE __attribute__((always_inline, unused))
+#endif /* __STRICT_ANSI__ */
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+/** @endcond */
+
+#ifdef INTEL_ITTNOTIFY_ENABLE_LEGACY
+#  if ITT_PLATFORM==ITT_PLATFORM_WIN
+#    pragma message("WARNING!!! Deprecated API is used. Please undefine INTEL_ITTNOTIFY_ENABLE_LEGACY macro")
+#  else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#    warning "Deprecated API is used. Please undefine INTEL_ITTNOTIFY_ENABLE_LEGACY macro"
+#  endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#  include "vtune/legacy/ittnotify.h"
+#endif /* INTEL_ITTNOTIFY_ENABLE_LEGACY */
+
+/** @cond exclude_from_documentation */
+/* Helper macro for joining tokens */
+#define ITT_JOIN_AUX(p,n) p##n
+#define ITT_JOIN(p,n)     ITT_JOIN_AUX(p,n)
+
+#ifdef ITT_MAJOR
+#undef ITT_MAJOR
+#endif
+#ifdef ITT_MINOR
+#undef ITT_MINOR
+#endif
+#define ITT_MAJOR     3
+#define ITT_MINOR     0
+
+/* Standard versioning of a token with major and minor version numbers */
+#define ITT_VERSIONIZE(x)    \
+    ITT_JOIN(x,              \
+    ITT_JOIN(_,              \
+    ITT_JOIN(ITT_MAJOR,      \
+    ITT_JOIN(_, ITT_MINOR))))
+
+#ifndef INTEL_ITTNOTIFY_PREFIX
+#  define INTEL_ITTNOTIFY_PREFIX __itt_
+#endif /* INTEL_ITTNOTIFY_PREFIX */
+#ifndef INTEL_ITTNOTIFY_POSTFIX
+#  define INTEL_ITTNOTIFY_POSTFIX _ptr_
+#endif /* INTEL_ITTNOTIFY_POSTFIX */
+
+#define ITTNOTIFY_NAME_AUX(n) ITT_JOIN(INTEL_ITTNOTIFY_PREFIX,n)
+#define ITTNOTIFY_NAME(n)     ITT_VERSIONIZE(ITTNOTIFY_NAME_AUX(ITT_JOIN(n,INTEL_ITTNOTIFY_POSTFIX)))
+
+#define ITTNOTIFY_VOID(n) (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)
+#define ITTNOTIFY_DATA(n) (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)
+
+#define ITTNOTIFY_VOID_D0(n,d)       (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d)
+#define ITTNOTIFY_VOID_D1(n,d,x)     (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x)
+#define ITTNOTIFY_VOID_D2(n,d,x,y)   (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y)
+#define ITTNOTIFY_VOID_D3(n,d,x,y,z) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z)
+#define ITTNOTIFY_VOID_D4(n,d,x,y,z,a)     (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a)
+#define ITTNOTIFY_VOID_D5(n,d,x,y,z,a,b)   (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b)
+#define ITTNOTIFY_VOID_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c)
+#define ITTNOTIFY_DATA_D0(n,d)       (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d)
+#define ITTNOTIFY_DATA_D1(n,d,x)     (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x)
+#define ITTNOTIFY_DATA_D2(n,d,x,y)   (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y)
+#define ITTNOTIFY_DATA_D3(n,d,x,y,z) (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z)
+#define ITTNOTIFY_DATA_D4(n,d,x,y,z,a)     (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z,a)
+#define ITTNOTIFY_DATA_D5(n,d,x,y,z,a,b)   (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b)
+#define ITTNOTIFY_DATA_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c)
+
+#ifdef ITT_STUB
+#undef ITT_STUB
+#endif
+#ifdef ITT_STUBV
+#undef ITT_STUBV
+#endif
+#define ITT_STUBV(api,type,name,args)                             \
+    typedef type (api* ITT_JOIN(ITTNOTIFY_NAME(name),_t)) args;   \
+    extern ITT_JOIN(ITTNOTIFY_NAME(name),_t) ITTNOTIFY_NAME(name);
+#define ITT_STUB ITT_STUBV
+/** @endcond */
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/** @cond exclude_from_gpa_documentation */
+/**
+ * @defgroup public Public API
+ * @{
+ * @}
+ */
+
+/**
+ * @defgroup control Collection Control
+ * @ingroup public
+ * General behavior: application continues to run, but no profiling information is being collected
+ *
+ * Pausing occurs not only for the current thread but for all process as well as spawned processes
+ * - Intel(R) Parallel Inspector and Intel(R) Inspector XE:
+ *   - Does not analyze or report errors that involve memory access.
+ *   - Other errors are reported as usual. Pausing data collection in
+ *     Intel(R) Parallel Inspector and Intel(R) Inspector XE
+ *     only pauses tracing and analyzing memory access.
+ *     It does not pause tracing or analyzing threading APIs.
+ *   .
+ * - Intel(R) Parallel Amplifier and Intel(R) VTune(TM) Amplifier XE:
+ *   - Does continue to record when new threads are started.
+ *   .
+ * - Other effects:
+ *   - Possible reduction of runtime overhead.
+ *   .
+ * @{
+ */
+/** @brief Pause collection */
+void ITTAPI __itt_pause(void);
+/** @brief Resume collection */
+void ITTAPI __itt_resume(void);
+/** @brief Detach collection */
+void ITTAPI __itt_detach(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, pause,  (void))
+ITT_STUBV(ITTAPI, void, resume, (void))
+ITT_STUBV(ITTAPI, void, detach, (void))
+#define __itt_pause      ITTNOTIFY_VOID(pause)
+#define __itt_pause_ptr  ITTNOTIFY_NAME(pause)
+#define __itt_resume     ITTNOTIFY_VOID(resume)
+#define __itt_resume_ptr ITTNOTIFY_NAME(resume)
+#define __itt_detach     ITTNOTIFY_VOID(detach)
+#define __itt_detach_ptr ITTNOTIFY_NAME(detach)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_pause()
+#define __itt_pause_ptr  0
+#define __itt_resume()
+#define __itt_resume_ptr 0
+#define __itt_detach()
+#define __itt_detach_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_pause_ptr  0
+#define __itt_resume_ptr 0
+#define __itt_detach_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} control group */
+/** @endcond */
+
+/**
+ * @defgroup threads Threads
+ * @ingroup public
+ * Give names to threads
+ * @{
+ */
+/**
+ * @brief Sets thread name of calling thread
+ * @param[in] name - name of thread
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_thread_set_nameA(const char    *name);
+void ITTAPI __itt_thread_set_nameW(const wchar_t *name);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_thread_set_name     __itt_thread_set_nameW
+#  define __itt_thread_set_name_ptr __itt_thread_set_nameW_ptr
+#else /* UNICODE */
+#  define __itt_thread_set_name     __itt_thread_set_nameA
+#  define __itt_thread_set_name_ptr __itt_thread_set_nameA_ptr
+#endif /* UNICODE */
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+void ITTAPI __itt_thread_set_name(const char *name);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, thread_set_nameA, (const char    *name))
+ITT_STUBV(ITTAPI, void, thread_set_nameW, (const wchar_t *name))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, thread_set_name,  (const char    *name))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_thread_set_nameA     ITTNOTIFY_VOID(thread_set_nameA)
+#define __itt_thread_set_nameA_ptr ITTNOTIFY_NAME(thread_set_nameA)
+#define __itt_thread_set_nameW     ITTNOTIFY_VOID(thread_set_nameW)
+#define __itt_thread_set_nameW_ptr ITTNOTIFY_NAME(thread_set_nameW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_thread_set_name     ITTNOTIFY_VOID(thread_set_name)
+#define __itt_thread_set_name_ptr ITTNOTIFY_NAME(thread_set_name)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_thread_set_nameA(name)
+#define __itt_thread_set_nameA_ptr 0
+#define __itt_thread_set_nameW(name)
+#define __itt_thread_set_nameW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_thread_set_name(name)
+#define __itt_thread_set_name_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_thread_set_nameA_ptr 0
+#define __itt_thread_set_nameW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_thread_set_name_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @cond exclude_from_gpa_documentation */
+
+/**
+ * @brief Mark current thread as ignored from this point on, for the duration of its existence.
+ */
+void ITTAPI __itt_thread_ignore(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, thread_ignore, (void))
+#define __itt_thread_ignore     ITTNOTIFY_VOID(thread_ignore)
+#define __itt_thread_ignore_ptr ITTNOTIFY_NAME(thread_ignore)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_thread_ignore()
+#define __itt_thread_ignore_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_thread_ignore_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} threads group */
+
+/**
+ * @defgroup suppress Error suppression
+ * @ingroup public
+ * General behavior: application continues to run, but errors are suppressed
+ *
+ * @{
+ */
+
+/*****************************************************************//**
+ * @name group of functions used for error suppression in correctness tools
+ *********************************************************************/
+/** @{ */
+/**
+ * @hideinitializer
+ * @brief possible value for suppression mask
+ */
+#define __itt_suppress_all_errors 0x7fffffff
+
+/**
+ * @hideinitializer
+ * @brief possible value for suppression mask (suppresses errors from threading analysis)
+ */
+#define __itt_suppress_threading_errors 0x000000ff
+
+/**
+ * @hideinitializer
+ * @brief possible value for suppression mask (suppresses errors from memory analysis)
+ */
+#define __itt_suppress_memory_errors 0x0000ff00
+
+/**
+ * @brief Start suppressing errors identified in mask on this thread
+ */
+void ITTAPI __itt_suppress_push(unsigned int mask);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, suppress_push, (unsigned int mask))
+#define __itt_suppress_push     ITTNOTIFY_VOID(suppress_push)
+#define __itt_suppress_push_ptr ITTNOTIFY_NAME(suppress_push)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_suppress_push(mask)
+#define __itt_suppress_push_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_suppress_push_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Undo the effects of the matching call to __itt_suppress_push
+ */
+void ITTAPI __itt_suppress_pop(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, suppress_pop, (void))
+#define __itt_suppress_pop     ITTNOTIFY_VOID(suppress_pop)
+#define __itt_suppress_pop_ptr ITTNOTIFY_NAME(suppress_pop)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_suppress_pop()
+#define __itt_suppress_pop_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_suppress_pop_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @enum __itt_model_disable
+ * @brief Enumerator for the disable methods
+ */
+typedef enum __itt_suppress_mode {
+    __itt_unsuppress_range,
+    __itt_suppress_range
+} __itt_suppress_mode_t;
+
+/**
+ * @brief Mark a range of memory for error suppression or unsuppression for error types included in mask
+ */
+void ITTAPI __itt_suppress_mark_range(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, suppress_mark_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size))
+#define __itt_suppress_mark_range     ITTNOTIFY_VOID(suppress_mark_range)
+#define __itt_suppress_mark_range_ptr ITTNOTIFY_NAME(suppress_mark_range)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_suppress_mark_range(mask)
+#define __itt_suppress_mark_range_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_suppress_mark_range_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Undo the effect of a matching call to __itt_suppress_mark_range.   If not matching
+ *        call is found, nothing is changed.
+ */
+void ITTAPI __itt_suppress_clear_range(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, suppress_clear_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size))
+#define __itt_suppress_clear_range     ITTNOTIFY_VOID(suppress_clear_range)
+#define __itt_suppress_clear_range_ptr ITTNOTIFY_NAME(suppress_clear_range)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_suppress_clear_range(mask)
+#define __itt_suppress_clear_range_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_suppress_clear_range_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} */
+/** @} suppress group */
+
+/**
+ * @defgroup sync Synchronization
+ * @ingroup public
+ * Indicate user-written synchronization code
+ * @{
+ */
+/**
+ * @hideinitializer
+ * @brief possible value of attribute argument for sync object type
+ */
+#define __itt_attr_barrier 1
+
+/**
+ * @hideinitializer
+ * @brief possible value of attribute argument for sync object type
+ */
+#define __itt_attr_mutex   2
+
+/**
+@brief Name a synchronization object
+@param[in] addr       Handle for the synchronization object. You should
+use a real address to uniquely identify the synchronization object.
+@param[in] objtype    null-terminated object type string. If NULL is
+passed, the name will be "User Synchronization".
+@param[in] objname    null-terminated object name string. If NULL,
+no name will be assigned to the object.
+@param[in] attribute  one of [#__itt_attr_barrier, #__itt_attr_mutex]
+ */
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_sync_createA(void *addr, const char    *objtype, const char    *objname, int attribute);
+void ITTAPI __itt_sync_createW(void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_sync_create     __itt_sync_createW
+#  define __itt_sync_create_ptr __itt_sync_createW_ptr
+#else /* UNICODE */
+#  define __itt_sync_create     __itt_sync_createA
+#  define __itt_sync_create_ptr __itt_sync_createA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+void ITTAPI __itt_sync_create (void *addr, const char *objtype, const char *objname, int attribute);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, sync_createA, (void *addr, const char    *objtype, const char    *objname, int attribute))
+ITT_STUBV(ITTAPI, void, sync_createW, (void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, sync_create,  (void *addr, const char*    objtype, const char*    objname, int attribute))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_createA     ITTNOTIFY_VOID(sync_createA)
+#define __itt_sync_createA_ptr ITTNOTIFY_NAME(sync_createA)
+#define __itt_sync_createW     ITTNOTIFY_VOID(sync_createW)
+#define __itt_sync_createW_ptr ITTNOTIFY_NAME(sync_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_create     ITTNOTIFY_VOID(sync_create)
+#define __itt_sync_create_ptr ITTNOTIFY_NAME(sync_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_createA(addr, objtype, objname, attribute)
+#define __itt_sync_createA_ptr 0
+#define __itt_sync_createW(addr, objtype, objname, attribute)
+#define __itt_sync_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_create(addr, objtype, objname, attribute)
+#define __itt_sync_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_createA_ptr 0
+#define __itt_sync_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+@brief Rename a synchronization object
+
+You can use the rename call to assign or reassign a name to a given
+synchronization object.
+@param[in] addr  handle for the synchronization object.
+@param[in] name  null-terminated object name string.
+*/
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_sync_renameA(void *addr, const char    *name);
+void ITTAPI __itt_sync_renameW(void *addr, const wchar_t *name);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_sync_rename     __itt_sync_renameW
+#  define __itt_sync_rename_ptr __itt_sync_renameW_ptr
+#else /* UNICODE */
+#  define __itt_sync_rename     __itt_sync_renameA
+#  define __itt_sync_rename_ptr __itt_sync_renameA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+void ITTAPI __itt_sync_rename(void *addr, const char *name);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, sync_renameA, (void *addr, const char    *name))
+ITT_STUBV(ITTAPI, void, sync_renameW, (void *addr, const wchar_t *name))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, sync_rename,  (void *addr, const char    *name))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_renameA     ITTNOTIFY_VOID(sync_renameA)
+#define __itt_sync_renameA_ptr ITTNOTIFY_NAME(sync_renameA)
+#define __itt_sync_renameW     ITTNOTIFY_VOID(sync_renameW)
+#define __itt_sync_renameW_ptr ITTNOTIFY_NAME(sync_renameW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_rename     ITTNOTIFY_VOID(sync_rename)
+#define __itt_sync_rename_ptr ITTNOTIFY_NAME(sync_rename)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_renameA(addr, name)
+#define __itt_sync_renameA_ptr 0
+#define __itt_sync_renameW(addr, name)
+#define __itt_sync_renameW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_rename(addr, name)
+#define __itt_sync_rename_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_renameA_ptr 0
+#define __itt_sync_renameW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_rename_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ @brief Destroy a synchronization object.
+ @param addr Handle for the synchronization object.
+ */
+void ITTAPI __itt_sync_destroy(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, sync_destroy, (void *addr))
+#define __itt_sync_destroy     ITTNOTIFY_VOID(sync_destroy)
+#define __itt_sync_destroy_ptr ITTNOTIFY_NAME(sync_destroy)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_sync_destroy(addr)
+#define __itt_sync_destroy_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_sync_destroy_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/*****************************************************************//**
+ * @name group of functions is used for performance measurement tools
+ *********************************************************************/
+/** @{ */
+/**
+ * @brief Enter spin loop on user-defined sync object
+ */
+void ITTAPI __itt_sync_prepare(void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, sync_prepare, (void *addr))
+#define __itt_sync_prepare     ITTNOTIFY_VOID(sync_prepare)
+#define __itt_sync_prepare_ptr ITTNOTIFY_NAME(sync_prepare)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_sync_prepare(addr)
+#define __itt_sync_prepare_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_sync_prepare_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Quit spin loop without acquiring spin object
+ */
+void ITTAPI __itt_sync_cancel(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, sync_cancel, (void *addr))
+#define __itt_sync_cancel     ITTNOTIFY_VOID(sync_cancel)
+#define __itt_sync_cancel_ptr ITTNOTIFY_NAME(sync_cancel)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_sync_cancel(addr)
+#define __itt_sync_cancel_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_sync_cancel_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Successful spin loop completion (sync object acquired)
+ */
+void ITTAPI __itt_sync_acquired(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, sync_acquired, (void *addr))
+#define __itt_sync_acquired     ITTNOTIFY_VOID(sync_acquired)
+#define __itt_sync_acquired_ptr ITTNOTIFY_NAME(sync_acquired)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_sync_acquired(addr)
+#define __itt_sync_acquired_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_sync_acquired_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Start sync object releasing code. Is called before the lock release call.
+ */
+void ITTAPI __itt_sync_releasing(void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, sync_releasing, (void *addr))
+#define __itt_sync_releasing     ITTNOTIFY_VOID(sync_releasing)
+#define __itt_sync_releasing_ptr ITTNOTIFY_NAME(sync_releasing)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_sync_releasing(addr)
+#define __itt_sync_releasing_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_sync_releasing_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} */
+
+/** @} sync group */
+
+/**************************************************************//**
+ * @name group of functions is used for correctness checking tools
+ ******************************************************************/
+/** @{ */
+/**
+ * @ingroup legacy
+ * @deprecated Legacy API
+ * @brief Fast synchronization which does no require spinning.
+ * - This special function is to be used by TBB and OpenMP libraries only when they know
+ *   there is no spin but they need to suppress TC warnings about shared variable modifications.
+ * - It only has corresponding pointers in static library and does not have corresponding function
+ *   in dynamic library.
+ * @see void __itt_sync_prepare(void* addr);
+ */
+void ITTAPI __itt_fsync_prepare(void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, fsync_prepare, (void *addr))
+#define __itt_fsync_prepare     ITTNOTIFY_VOID(fsync_prepare)
+#define __itt_fsync_prepare_ptr ITTNOTIFY_NAME(fsync_prepare)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_fsync_prepare(addr)
+#define __itt_fsync_prepare_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_fsync_prepare_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup legacy
+ * @deprecated Legacy API
+ * @brief Fast synchronization which does no require spinning.
+ * - This special function is to be used by TBB and OpenMP libraries only when they know
+ *   there is no spin but they need to suppress TC warnings about shared variable modifications.
+ * - It only has corresponding pointers in static library and does not have corresponding function
+ *   in dynamic library.
+ * @see void __itt_sync_cancel(void *addr);
+ */
+void ITTAPI __itt_fsync_cancel(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, fsync_cancel, (void *addr))
+#define __itt_fsync_cancel     ITTNOTIFY_VOID(fsync_cancel)
+#define __itt_fsync_cancel_ptr ITTNOTIFY_NAME(fsync_cancel)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_fsync_cancel(addr)
+#define __itt_fsync_cancel_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_fsync_cancel_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup legacy
+ * @deprecated Legacy API
+ * @brief Fast synchronization which does no require spinning.
+ * - This special function is to be used by TBB and OpenMP libraries only when they know
+ *   there is no spin but they need to suppress TC warnings about shared variable modifications.
+ * - It only has corresponding pointers in static library and does not have corresponding function
+ *   in dynamic library.
+ * @see void __itt_sync_acquired(void *addr);
+ */
+void ITTAPI __itt_fsync_acquired(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, fsync_acquired, (void *addr))
+#define __itt_fsync_acquired     ITTNOTIFY_VOID(fsync_acquired)
+#define __itt_fsync_acquired_ptr ITTNOTIFY_NAME(fsync_acquired)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_fsync_acquired(addr)
+#define __itt_fsync_acquired_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_fsync_acquired_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup legacy
+ * @deprecated Legacy API
+ * @brief Fast synchronization which does no require spinning.
+ * - This special function is to be used by TBB and OpenMP libraries only when they know
+ *   there is no spin but they need to suppress TC warnings about shared variable modifications.
+ * - It only has corresponding pointers in static library and does not have corresponding function
+ *   in dynamic library.
+ * @see void __itt_sync_releasing(void* addr);
+ */
+void ITTAPI __itt_fsync_releasing(void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, fsync_releasing, (void *addr))
+#define __itt_fsync_releasing     ITTNOTIFY_VOID(fsync_releasing)
+#define __itt_fsync_releasing_ptr ITTNOTIFY_NAME(fsync_releasing)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_fsync_releasing(addr)
+#define __itt_fsync_releasing_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_fsync_releasing_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} */
+
+/**
+ * @defgroup model Modeling by Intel(R) Parallel Advisor
+ * @ingroup public
+ * This is the subset of itt used for modeling by Intel(R) Parallel Advisor.
+ * This API is called ONLY using annotate.h, by "Annotation" macros
+ * the user places in their sources during the parallelism modeling steps.
+ *
+ * site_begin/end and task_begin/end take the address of handle variables,
+ * which are writeable by the API.  Handles must be 0 initialized prior
+ * to the first call to begin, or may cause a run-time failure.
+ * The handles are initialized in a multi-thread safe way by the API if
+ * the handle is 0.  The commonly expected idiom is one static handle to
+ * identify a site or task.  If a site or task of the same name has already
+ * been started during this collection, the same handle MAY be returned,
+ * but is not required to be - it is unspecified if data merging is done
+ * based on name.  These routines also take an instance variable.  Like
+ * the lexical instance, these must be 0 initialized.  Unlike the lexical
+ * instance, this is used to track a single dynamic instance.
+ *
+ * API used by the Intel(R) Parallel Advisor to describe potential concurrency
+ * and related activities. User-added source annotations expand to calls
+ * to these procedures to enable modeling of a hypothetical concurrent
+ * execution serially.
+ * @{
+ */
+#if !defined(_ADVISOR_ANNOTATE_H_) || defined(ANNOTATE_EXPAND_NULL)
+
+typedef void* __itt_model_site;             /*!< @brief handle for lexical site     */
+typedef void* __itt_model_site_instance;    /*!< @brief handle for dynamic instance */
+typedef void* __itt_model_task;             /*!< @brief handle for lexical site     */
+typedef void* __itt_model_task_instance;    /*!< @brief handle for dynamic instance */
+
+/**
+ * @enum __itt_model_disable
+ * @brief Enumerator for the disable methods
+ */
+typedef enum {
+    __itt_model_disable_observation,
+    __itt_model_disable_collection
+} __itt_model_disable;
+
+#endif /* !_ADVISOR_ANNOTATE_H_ || ANNOTATE_EXPAND_NULL */
+
+/**
+ * @brief ANNOTATE_SITE_BEGIN/ANNOTATE_SITE_END support.
+ *
+ * site_begin/end model a potential concurrency site.
+ * site instances may be recursively nested with themselves.
+ * site_end exits the most recently started but unended site for the current
+ * thread.  The handle passed to end may be used to validate structure.
+ * Instances of a site encountered on different threads concurrently
+ * are considered completely distinct. If the site name for two different
+ * lexical sites match, it is unspecified whether they are treated as the
+ * same or different for data presentation.
+ */
+void ITTAPI __itt_model_site_begin(__itt_model_site *site, __itt_model_site_instance *instance, const char *name);
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_model_site_beginW(const wchar_t *name);
+#endif
+void ITTAPI __itt_model_site_beginA(const char *name);
+void ITTAPI __itt_model_site_beginAL(const char *name, size_t siteNameLen);
+void ITTAPI __itt_model_site_end  (__itt_model_site *site, __itt_model_site_instance *instance);
+void ITTAPI __itt_model_site_end_2(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_site_begin,  (__itt_model_site *site, __itt_model_site_instance *instance, const char *name))
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, model_site_beginW,  (const wchar_t *name))
+#endif
+ITT_STUBV(ITTAPI, void, model_site_beginA,  (const char *name))
+ITT_STUBV(ITTAPI, void, model_site_beginAL,  (const char *name, size_t siteNameLen))
+ITT_STUBV(ITTAPI, void, model_site_end,    (__itt_model_site *site, __itt_model_site_instance *instance))
+ITT_STUBV(ITTAPI, void, model_site_end_2,  (void))
+#define __itt_model_site_begin      ITTNOTIFY_VOID(model_site_begin)
+#define __itt_model_site_begin_ptr  ITTNOTIFY_NAME(model_site_begin)
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_model_site_beginW      ITTNOTIFY_VOID(model_site_beginW)
+#define __itt_model_site_beginW_ptr  ITTNOTIFY_NAME(model_site_beginW)
+#endif
+#define __itt_model_site_beginA      ITTNOTIFY_VOID(model_site_beginA)
+#define __itt_model_site_beginA_ptr  ITTNOTIFY_NAME(model_site_beginA)
+#define __itt_model_site_beginAL      ITTNOTIFY_VOID(model_site_beginAL)
+#define __itt_model_site_beginAL_ptr  ITTNOTIFY_NAME(model_site_beginAL)
+#define __itt_model_site_end        ITTNOTIFY_VOID(model_site_end)
+#define __itt_model_site_end_ptr    ITTNOTIFY_NAME(model_site_end)
+#define __itt_model_site_end_2        ITTNOTIFY_VOID(model_site_end_2)
+#define __itt_model_site_end_2_ptr    ITTNOTIFY_NAME(model_site_end_2)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_site_begin(site, instance, name)
+#define __itt_model_site_begin_ptr  0
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_model_site_beginW(name)
+#define __itt_model_site_beginW_ptr  0
+#endif
+#define __itt_model_site_beginA(name)
+#define __itt_model_site_beginA_ptr  0
+#define __itt_model_site_beginAL(name, siteNameLen)
+#define __itt_model_site_beginAL_ptr  0
+#define __itt_model_site_end(site, instance)
+#define __itt_model_site_end_ptr    0
+#define __itt_model_site_end_2()
+#define __itt_model_site_end_2_ptr    0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_model_site_begin_ptr  0
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_model_site_beginW_ptr  0
+#endif
+#define __itt_model_site_beginA_ptr  0
+#define __itt_model_site_beginAL_ptr  0
+#define __itt_model_site_end_ptr    0
+#define __itt_model_site_end_2_ptr    0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_TASK_BEGIN/ANNOTATE_TASK_END support
+ *
+ * task_begin/end model a potential task, which is contained within the most
+ * closely enclosing dynamic site.  task_end exits the most recently started
+ * but unended task.  The handle passed to end may be used to validate
+ * structure.  It is unspecified if bad dynamic nesting is detected.  If it
+ * is, it should be encoded in the resulting data collection.  The collector
+ * should not fail due to construct nesting issues, nor attempt to directly
+ * indicate the problem.
+ */
+void ITTAPI __itt_model_task_begin(__itt_model_task *task, __itt_model_task_instance *instance, const char *name);
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_model_task_beginW(const wchar_t *name);
+void ITTAPI __itt_model_iteration_taskW(const wchar_t *name);
+#endif
+void ITTAPI __itt_model_task_beginA(const char *name);
+void ITTAPI __itt_model_task_beginAL(const char *name, size_t taskNameLen);
+void ITTAPI __itt_model_iteration_taskA(const char *name);
+void ITTAPI __itt_model_iteration_taskAL(const char *name, size_t taskNameLen);
+void ITTAPI __itt_model_task_end  (__itt_model_task *task, __itt_model_task_instance *instance);
+void ITTAPI __itt_model_task_end_2(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_task_begin,  (__itt_model_task *task, __itt_model_task_instance *instance, const char *name))
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, model_task_beginW,  (const wchar_t *name))
+ITT_STUBV(ITTAPI, void, model_iteration_taskW, (const wchar_t *name))
+#endif
+ITT_STUBV(ITTAPI, void, model_task_beginA,  (const char *name))
+ITT_STUBV(ITTAPI, void, model_task_beginAL,  (const char *name, size_t taskNameLen))
+ITT_STUBV(ITTAPI, void, model_iteration_taskA,  (const char *name))
+ITT_STUBV(ITTAPI, void, model_iteration_taskAL,  (const char *name, size_t taskNameLen))
+ITT_STUBV(ITTAPI, void, model_task_end,    (__itt_model_task *task, __itt_model_task_instance *instance))
+ITT_STUBV(ITTAPI, void, model_task_end_2,  (void))
+#define __itt_model_task_begin      ITTNOTIFY_VOID(model_task_begin)
+#define __itt_model_task_begin_ptr  ITTNOTIFY_NAME(model_task_begin)
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_model_task_beginW     ITTNOTIFY_VOID(model_task_beginW)
+#define __itt_model_task_beginW_ptr ITTNOTIFY_NAME(model_task_beginW)
+#define __itt_model_iteration_taskW     ITTNOTIFY_VOID(model_iteration_taskW)
+#define __itt_model_iteration_taskW_ptr ITTNOTIFY_NAME(model_iteration_taskW)
+#endif
+#define __itt_model_task_beginA    ITTNOTIFY_VOID(model_task_beginA)
+#define __itt_model_task_beginA_ptr ITTNOTIFY_NAME(model_task_beginA)
+#define __itt_model_task_beginAL    ITTNOTIFY_VOID(model_task_beginAL)
+#define __itt_model_task_beginAL_ptr ITTNOTIFY_NAME(model_task_beginAL)
+#define __itt_model_iteration_taskA    ITTNOTIFY_VOID(model_iteration_taskA)
+#define __itt_model_iteration_taskA_ptr ITTNOTIFY_NAME(model_iteration_taskA)
+#define __itt_model_iteration_taskAL    ITTNOTIFY_VOID(model_iteration_taskAL)
+#define __itt_model_iteration_taskAL_ptr ITTNOTIFY_NAME(model_iteration_taskAL)
+#define __itt_model_task_end        ITTNOTIFY_VOID(model_task_end)
+#define __itt_model_task_end_ptr    ITTNOTIFY_NAME(model_task_end)
+#define __itt_model_task_end_2        ITTNOTIFY_VOID(model_task_end_2)
+#define __itt_model_task_end_2_ptr    ITTNOTIFY_NAME(model_task_end_2)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_task_begin(task, instance, name)
+#define __itt_model_task_begin_ptr  0
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_model_task_beginW(name)
+#define __itt_model_task_beginW_ptr  0
+#endif
+#define __itt_model_task_beginA(name)
+#define __itt_model_task_beginA_ptr  0
+#define __itt_model_task_beginAL(name, siteNameLen)
+#define __itt_model_task_beginAL_ptr  0
+#define __itt_model_iteration_taskA(name)
+#define __itt_model_iteration_taskA_ptr  0
+#define __itt_model_iteration_taskAL(name, siteNameLen)
+#define __itt_model_iteration_taskAL_ptr  0
+#define __itt_model_task_end(task, instance)
+#define __itt_model_task_end_ptr    0
+#define __itt_model_task_end_2()
+#define __itt_model_task_end_2_ptr    0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_model_task_begin_ptr  0
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_model_task_beginW_ptr 0
+#endif
+#define __itt_model_task_beginA_ptr  0
+#define __itt_model_task_beginAL_ptr  0
+#define __itt_model_iteration_taskA_ptr    0
+#define __itt_model_iteration_taskAL_ptr    0
+#define __itt_model_task_end_ptr    0
+#define __itt_model_task_end_2_ptr    0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_LOCK_ACQUIRE/ANNOTATE_LOCK_RELEASE support
+ *
+ * lock_acquire/release model a potential lock for both lockset and
+ * performance modeling.  Each unique address is modeled as a separate
+ * lock, with invalid addresses being valid lock IDs.  Specifically:
+ * no storage is accessed by the API at the specified address - it is only
+ * used for lock identification.  Lock acquires may be self-nested and are
+ * unlocked by a corresponding number of releases.
+ * (These closely correspond to __itt_sync_acquired/__itt_sync_releasing,
+ * but may not have identical semantics.)
+ */
+void ITTAPI __itt_model_lock_acquire(void *lock);
+void ITTAPI __itt_model_lock_acquire_2(void *lock);
+void ITTAPI __itt_model_lock_release(void *lock);
+void ITTAPI __itt_model_lock_release_2(void *lock);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_lock_acquire, (void *lock))
+ITT_STUBV(ITTAPI, void, model_lock_acquire_2, (void *lock))
+ITT_STUBV(ITTAPI, void, model_lock_release, (void *lock))
+ITT_STUBV(ITTAPI, void, model_lock_release_2, (void *lock))
+#define __itt_model_lock_acquire     ITTNOTIFY_VOID(model_lock_acquire)
+#define __itt_model_lock_acquire_ptr ITTNOTIFY_NAME(model_lock_acquire)
+#define __itt_model_lock_acquire_2     ITTNOTIFY_VOID(model_lock_acquire_2)
+#define __itt_model_lock_acquire_2_ptr ITTNOTIFY_NAME(model_lock_acquire_2)
+#define __itt_model_lock_release     ITTNOTIFY_VOID(model_lock_release)
+#define __itt_model_lock_release_ptr ITTNOTIFY_NAME(model_lock_release)
+#define __itt_model_lock_release_2     ITTNOTIFY_VOID(model_lock_release_2)
+#define __itt_model_lock_release_2_ptr ITTNOTIFY_NAME(model_lock_release_2)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_lock_acquire(lock)
+#define __itt_model_lock_acquire_ptr 0
+#define __itt_model_lock_acquire_2(lock)
+#define __itt_model_lock_acquire_2_ptr 0
+#define __itt_model_lock_release(lock)
+#define __itt_model_lock_release_ptr 0
+#define __itt_model_lock_release_2(lock)
+#define __itt_model_lock_release_2_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_model_lock_acquire_ptr 0
+#define __itt_model_lock_acquire_2_ptr 0
+#define __itt_model_lock_release_ptr 0
+#define __itt_model_lock_release_2_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_RECORD_ALLOCATION/ANNOTATE_RECORD_DEALLOCATION support
+ *
+ * record_allocation/deallocation describe user-defined memory allocator
+ * behavior, which may be required for correctness modeling to understand
+ * when storage is not expected to be actually reused across threads.
+ */
+void ITTAPI __itt_model_record_allocation  (void *addr, size_t size);
+void ITTAPI __itt_model_record_deallocation(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_record_allocation,   (void *addr, size_t size))
+ITT_STUBV(ITTAPI, void, model_record_deallocation, (void *addr))
+#define __itt_model_record_allocation       ITTNOTIFY_VOID(model_record_allocation)
+#define __itt_model_record_allocation_ptr   ITTNOTIFY_NAME(model_record_allocation)
+#define __itt_model_record_deallocation     ITTNOTIFY_VOID(model_record_deallocation)
+#define __itt_model_record_deallocation_ptr ITTNOTIFY_NAME(model_record_deallocation)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_record_allocation(addr, size)
+#define __itt_model_record_allocation_ptr   0
+#define __itt_model_record_deallocation(addr)
+#define __itt_model_record_deallocation_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_model_record_allocation_ptr   0
+#define __itt_model_record_deallocation_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_INDUCTION_USES support
+ *
+ * Note particular storage is inductive through the end of the current site
+ */
+void ITTAPI __itt_model_induction_uses(void* addr, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_induction_uses, (void *addr, size_t size))
+#define __itt_model_induction_uses     ITTNOTIFY_VOID(model_induction_uses)
+#define __itt_model_induction_uses_ptr ITTNOTIFY_NAME(model_induction_uses)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_induction_uses(addr, size)
+#define __itt_model_induction_uses_ptr   0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_model_induction_uses_ptr   0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_REDUCTION_USES support
+ *
+ * Note particular storage is used for reduction through the end
+ * of the current site
+ */
+void ITTAPI __itt_model_reduction_uses(void* addr, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_reduction_uses, (void *addr, size_t size))
+#define __itt_model_reduction_uses     ITTNOTIFY_VOID(model_reduction_uses)
+#define __itt_model_reduction_uses_ptr ITTNOTIFY_NAME(model_reduction_uses)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_reduction_uses(addr, size)
+#define __itt_model_reduction_uses_ptr   0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_model_reduction_uses_ptr   0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_OBSERVE_USES support
+ *
+ * Have correctness modeling record observations about uses of storage
+ * through the end of the current site
+ */
+void ITTAPI __itt_model_observe_uses(void* addr, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_observe_uses, (void *addr, size_t size))
+#define __itt_model_observe_uses     ITTNOTIFY_VOID(model_observe_uses)
+#define __itt_model_observe_uses_ptr ITTNOTIFY_NAME(model_observe_uses)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_observe_uses(addr, size)
+#define __itt_model_observe_uses_ptr   0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_model_observe_uses_ptr   0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_CLEAR_USES support
+ *
+ * Clear the special handling of a piece of storage related to induction,
+ * reduction or observe_uses
+ */
+void ITTAPI __itt_model_clear_uses(void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_clear_uses, (void *addr))
+#define __itt_model_clear_uses     ITTNOTIFY_VOID(model_clear_uses)
+#define __itt_model_clear_uses_ptr ITTNOTIFY_NAME(model_clear_uses)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_clear_uses(addr)
+#define __itt_model_clear_uses_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_model_clear_uses_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_DISABLE_*_PUSH/ANNOTATE_DISABLE_*_POP support
+ *
+ * disable_push/disable_pop push and pop disabling based on a parameter.
+ * Disabling observations stops processing of memory references during
+ * correctness modeling, and all annotations that occur in the disabled
+ * region.  This allows description of code that is expected to be handled
+ * specially during conversion to parallelism or that is not recognized
+ * by tools (e.g. some kinds of synchronization operations.)
+ * This mechanism causes all annotations in the disabled region, other
+ * than disable_push and disable_pop, to be ignored.  (For example, this
+ * might validly be used to disable an entire parallel site and the contained
+ * tasks and locking in it for data collection purposes.)
+ * The disable for collection is a more expensive operation, but reduces
+ * collector overhead significantly.  This applies to BOTH correctness data
+ * collection and performance data collection.  For example, a site
+ * containing a task might only enable data collection for the first 10
+ * iterations.  Both performance and correctness data should reflect this,
+ * and the program should run as close to full speed as possible when
+ * collection is disabled.
+ */
+void ITTAPI __itt_model_disable_push(__itt_model_disable x);
+void ITTAPI __itt_model_disable_pop(void);
+void ITTAPI __itt_model_aggregate_task(size_t x);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_disable_push, (__itt_model_disable x))
+ITT_STUBV(ITTAPI, void, model_disable_pop,  (void))
+ITT_STUBV(ITTAPI, void, model_aggregate_task, (size_t x))
+#define __itt_model_disable_push     ITTNOTIFY_VOID(model_disable_push)
+#define __itt_model_disable_push_ptr ITTNOTIFY_NAME(model_disable_push)
+#define __itt_model_disable_pop      ITTNOTIFY_VOID(model_disable_pop)
+#define __itt_model_disable_pop_ptr  ITTNOTIFY_NAME(model_disable_pop)
+#define __itt_model_aggregate_task      ITTNOTIFY_VOID(model_aggregate_task)
+#define __itt_model_aggregate_task_ptr  ITTNOTIFY_NAME(model_aggregate_task)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_disable_push(x)
+#define __itt_model_disable_push_ptr 0
+#define __itt_model_disable_pop()
+#define __itt_model_disable_pop_ptr 0
+#define __itt_model_aggregate_task(x)
+#define __itt_model_aggregate_task_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_model_disable_push_ptr 0
+#define __itt_model_disable_pop_ptr 0
+#define __itt_model_aggregate_task_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} model group */
+
+/**
+ * @defgroup heap Heap
+ * @ingroup public
+ * Heap group
+ * @{
+ */
+
+typedef void* __itt_heap_function;
+
+/**
+ * @brief Create an identification for heap function
+ * @return non-zero identifier or NULL
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_heap_function ITTAPI __itt_heap_function_createA(const char*    name, const char*    domain);
+__itt_heap_function ITTAPI __itt_heap_function_createW(const wchar_t* name, const wchar_t* domain);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_heap_function_create     __itt_heap_function_createW
+#  define __itt_heap_function_create_ptr __itt_heap_function_createW_ptr
+#else
+#  define __itt_heap_function_create     __itt_heap_function_createA
+#  define __itt_heap_function_create_ptr __itt_heap_function_createA_ptr
+#endif /* UNICODE */
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_heap_function ITTAPI __itt_heap_function_create(const char* name, const char* domain);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createA, (const char*    name, const char*    domain))
+ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createW, (const wchar_t* name, const wchar_t* domain))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_heap_function, heap_function_create,  (const char*    name, const char*    domain))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_heap_function_createA     ITTNOTIFY_DATA(heap_function_createA)
+#define __itt_heap_function_createA_ptr ITTNOTIFY_NAME(heap_function_createA)
+#define __itt_heap_function_createW     ITTNOTIFY_DATA(heap_function_createW)
+#define __itt_heap_function_createW_ptr ITTNOTIFY_NAME(heap_function_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_heap_function_create      ITTNOTIFY_DATA(heap_function_create)
+#define __itt_heap_function_create_ptr  ITTNOTIFY_NAME(heap_function_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_heap_function_createA(name, domain) (__itt_heap_function)0
+#define __itt_heap_function_createA_ptr 0
+#define __itt_heap_function_createW(name, domain) (__itt_heap_function)0
+#define __itt_heap_function_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_heap_function_create(name, domain)  (__itt_heap_function)0
+#define __itt_heap_function_create_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_heap_function_createA_ptr 0
+#define __itt_heap_function_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_heap_function_create_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an allocation begin occurrence.
+ */
+void ITTAPI __itt_heap_allocate_begin(__itt_heap_function h, size_t size, int initialized);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_allocate_begin, (__itt_heap_function h, size_t size, int initialized))
+#define __itt_heap_allocate_begin     ITTNOTIFY_VOID(heap_allocate_begin)
+#define __itt_heap_allocate_begin_ptr ITTNOTIFY_NAME(heap_allocate_begin)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_allocate_begin(h, size, initialized)
+#define __itt_heap_allocate_begin_ptr   0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_allocate_begin_ptr   0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an allocation end occurrence.
+ */
+void ITTAPI __itt_heap_allocate_end(__itt_heap_function h, void** addr, size_t size, int initialized);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_allocate_end, (__itt_heap_function h, void** addr, size_t size, int initialized))
+#define __itt_heap_allocate_end     ITTNOTIFY_VOID(heap_allocate_end)
+#define __itt_heap_allocate_end_ptr ITTNOTIFY_NAME(heap_allocate_end)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_allocate_end(h, addr, size, initialized)
+#define __itt_heap_allocate_end_ptr   0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_allocate_end_ptr   0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an free begin occurrence.
+ */
+void ITTAPI __itt_heap_free_begin(__itt_heap_function h, void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_free_begin, (__itt_heap_function h, void* addr))
+#define __itt_heap_free_begin     ITTNOTIFY_VOID(heap_free_begin)
+#define __itt_heap_free_begin_ptr ITTNOTIFY_NAME(heap_free_begin)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_free_begin(h, addr)
+#define __itt_heap_free_begin_ptr   0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_free_begin_ptr   0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an free end occurrence.
+ */
+void ITTAPI __itt_heap_free_end(__itt_heap_function h, void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_free_end, (__itt_heap_function h, void* addr))
+#define __itt_heap_free_end     ITTNOTIFY_VOID(heap_free_end)
+#define __itt_heap_free_end_ptr ITTNOTIFY_NAME(heap_free_end)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_free_end(h, addr)
+#define __itt_heap_free_end_ptr   0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_free_end_ptr   0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an reallocation begin occurrence.
+ */
+void ITTAPI __itt_heap_reallocate_begin(__itt_heap_function h, void* addr, size_t new_size, int initialized);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_reallocate_begin, (__itt_heap_function h, void* addr, size_t new_size, int initialized))
+#define __itt_heap_reallocate_begin     ITTNOTIFY_VOID(heap_reallocate_begin)
+#define __itt_heap_reallocate_begin_ptr ITTNOTIFY_NAME(heap_reallocate_begin)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_reallocate_begin(h, addr, new_size, initialized)
+#define __itt_heap_reallocate_begin_ptr   0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_reallocate_begin_ptr   0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an reallocation end occurrence.
+ */
+void ITTAPI __itt_heap_reallocate_end(__itt_heap_function h, void* addr, void** new_addr, size_t new_size, int initialized);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_reallocate_end, (__itt_heap_function h, void* addr, void** new_addr, size_t new_size, int initialized))
+#define __itt_heap_reallocate_end     ITTNOTIFY_VOID(heap_reallocate_end)
+#define __itt_heap_reallocate_end_ptr ITTNOTIFY_NAME(heap_reallocate_end)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_reallocate_end(h, addr, new_addr, new_size, initialized)
+#define __itt_heap_reallocate_end_ptr   0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_reallocate_end_ptr   0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @brief internal access begin */
+void ITTAPI __itt_heap_internal_access_begin(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_internal_access_begin,  (void))
+#define __itt_heap_internal_access_begin      ITTNOTIFY_VOID(heap_internal_access_begin)
+#define __itt_heap_internal_access_begin_ptr  ITTNOTIFY_NAME(heap_internal_access_begin)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_internal_access_begin()
+#define __itt_heap_internal_access_begin_ptr  0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_internal_access_begin_ptr  0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @brief internal access end */
+void ITTAPI __itt_heap_internal_access_end(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_internal_access_end, (void))
+#define __itt_heap_internal_access_end     ITTNOTIFY_VOID(heap_internal_access_end)
+#define __itt_heap_internal_access_end_ptr ITTNOTIFY_NAME(heap_internal_access_end)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_internal_access_end()
+#define __itt_heap_internal_access_end_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_internal_access_end_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @brief record memory growth begin */
+void ITTAPI __itt_heap_record_memory_growth_begin(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_record_memory_growth_begin,  (void))
+#define __itt_heap_record_memory_growth_begin      ITTNOTIFY_VOID(heap_record_memory_growth_begin)
+#define __itt_heap_record_memory_growth_begin_ptr  ITTNOTIFY_NAME(heap_record_memory_growth_begin)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_record_memory_growth_begin()
+#define __itt_heap_record_memory_growth_begin_ptr  0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_record_memory_growth_begin_ptr  0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @brief record memory growth end */
+void ITTAPI __itt_heap_record_memory_growth_end(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_record_memory_growth_end, (void))
+#define __itt_heap_record_memory_growth_end     ITTNOTIFY_VOID(heap_record_memory_growth_end)
+#define __itt_heap_record_memory_growth_end_ptr ITTNOTIFY_NAME(heap_record_memory_growth_end)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_record_memory_growth_end()
+#define __itt_heap_record_memory_growth_end_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_record_memory_growth_end_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Specify the type of heap detection/reporting to modify.
+ */
+/**
+ * @hideinitializer
+ * @brief Report on memory leaks.
+ */
+#define __itt_heap_leaks 0x00000001
+
+/**
+ * @hideinitializer
+ * @brief Report on memory growth.
+ */
+#define __itt_heap_growth 0x00000002
+
+
+/** @brief heap reset detection */
+void ITTAPI __itt_heap_reset_detection(unsigned int reset_mask);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_reset_detection,  (unsigned int reset_mask))
+#define __itt_heap_reset_detection      ITTNOTIFY_VOID(heap_reset_detection)
+#define __itt_heap_reset_detection_ptr  ITTNOTIFY_NAME(heap_reset_detection)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_reset_detection()
+#define __itt_heap_reset_detection_ptr  0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_reset_detection_ptr  0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @brief report */
+void ITTAPI __itt_heap_record(unsigned int record_mask);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_record, (unsigned int record_mask))
+#define __itt_heap_record     ITTNOTIFY_VOID(heap_record)
+#define __itt_heap_record_ptr ITTNOTIFY_NAME(heap_record)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_record()
+#define __itt_heap_record_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_record_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @} heap group */
+/** @endcond */
+/* ========================================================================== */
+
+/**
+ * @defgroup domains Domains
+ * @ingroup public
+ * Domains group
+ * @{
+ */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_domain
+{
+    volatile int flags; /*!< Zero if disabled, non-zero if enabled. The meaning of different non-zero values is reserved to the runtime */
+    const char* nameA;  /*!< Copy of original name in ASCII. */
+#if defined(UNICODE) || defined(_UNICODE)
+    const wchar_t* nameW; /*!< Copy of original name in UNICODE. */
+#else  /* UNICODE || _UNICODE */
+    void* nameW;
+#endif /* UNICODE || _UNICODE */
+    int   extra1; /*!< Reserved to the runtime */
+    void* extra2; /*!< Reserved to the runtime */
+    struct ___itt_domain* next;
+} __itt_domain;
+
+#pragma pack(pop)
+/** @endcond */
+
+/**
+ * @ingroup domains
+ * @brief Create a domain.
+ * Create domain using some domain name: the URI naming style is recommended.
+ * Because the set of domains is expected to be static over the application's
+ * execution time, there is no mechanism to destroy a domain.
+ * Any domain can be accessed by any thread in the process, regardless of
+ * which thread created the domain. This call is thread-safe.
+ * @param[in] name name of domain
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_domain* ITTAPI __itt_domain_createA(const char    *name);
+__itt_domain* ITTAPI __itt_domain_createW(const wchar_t *name);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_domain_create     __itt_domain_createW
+#  define __itt_domain_create_ptr __itt_domain_createW_ptr
+#else /* UNICODE */
+#  define __itt_domain_create     __itt_domain_createA
+#  define __itt_domain_create_ptr __itt_domain_createA_ptr
+#endif /* UNICODE */
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_domain* ITTAPI __itt_domain_create(const char *name);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_domain*, domain_createA, (const char    *name))
+ITT_STUB(ITTAPI, __itt_domain*, domain_createW, (const wchar_t *name))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_domain*, domain_create,  (const char    *name))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_domain_createA     ITTNOTIFY_DATA(domain_createA)
+#define __itt_domain_createA_ptr ITTNOTIFY_NAME(domain_createA)
+#define __itt_domain_createW     ITTNOTIFY_DATA(domain_createW)
+#define __itt_domain_createW_ptr ITTNOTIFY_NAME(domain_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_domain_create     ITTNOTIFY_DATA(domain_create)
+#define __itt_domain_create_ptr ITTNOTIFY_NAME(domain_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_domain_createA(name) (__itt_domain*)0
+#define __itt_domain_createA_ptr 0
+#define __itt_domain_createW(name) (__itt_domain*)0
+#define __itt_domain_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_domain_create(name)  (__itt_domain*)0
+#define __itt_domain_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_domain_createA_ptr 0
+#define __itt_domain_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_domain_create_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} domains group */
+
+/**
+ * @defgroup ids IDs
+ * @ingroup public
+ * IDs group
+ * @{
+ */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_id
+{
+    unsigned long long d1, d2, d3;
+} __itt_id;
+
+#pragma pack(pop)
+/** @endcond */
+
+const __itt_id __itt_null = { 0, 0, 0 };
+
+/**
+ * @ingroup ids
+ * @brief A convenience function is provided to create an ID without domain control.
+ * @brief This is a convenience function to initialize an __itt_id structure. This function
+ * does not affect the collector runtime in any way. After you make the ID with this
+ * function, you still must create it with the __itt_id_create function before using the ID
+ * to identify a named entity.
+ * @param[in] addr The address of object; high QWORD of the ID value.
+ * @param[in] extra The extra data to unique identify object; low QWORD of the ID value.
+ */
+
+ITT_INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra) ITT_INLINE_ATTRIBUTE;
+ITT_INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra)
+{
+    __itt_id id = __itt_null;
+    id.d1 = (unsigned long long)((uintptr_t)addr);
+    id.d2 = (unsigned long long)extra;
+    id.d3 = (unsigned long long)0; /* Reserved. Must be zero */
+    return id;
+}
+
+/**
+ * @ingroup ids
+ * @brief Create an instance of identifier.
+ * This establishes the beginning of the lifetime of an instance of
+ * the given ID in the trace. Once this lifetime starts, the ID
+ * can be used to tag named entity instances in calls such as
+ * __itt_task_begin, and to specify relationships among
+ * identified named entity instances, using the \ref relations APIs.
+ * Instance IDs are not domain specific!
+ * @param[in] domain The domain controlling the execution of this call.
+ * @param[in] id The ID to create.
+ */
+void ITTAPI __itt_id_create(const __itt_domain *domain, __itt_id id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, id_create, (const __itt_domain *domain, __itt_id id))
+#define __itt_id_create(d,x) ITTNOTIFY_VOID_D1(id_create,d,x)
+#define __itt_id_create_ptr  ITTNOTIFY_NAME(id_create)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_id_create(domain,id)
+#define __itt_id_create_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_id_create_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup ids
+ * @brief Destroy an instance of identifier.
+ * This ends the lifetime of the current instance of the given ID value in the trace.
+ * Any relationships that are established after this lifetime ends are invalid.
+ * This call must be performed before the given ID value can be reused for a different
+ * named entity instance.
+ * @param[in] domain The domain controlling the execution of this call.
+ * @param[in] id The ID to destroy.
+ */
+void ITTAPI __itt_id_destroy(const __itt_domain *domain, __itt_id id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, id_destroy, (const __itt_domain *domain, __itt_id id))
+#define __itt_id_destroy(d,x) ITTNOTIFY_VOID_D1(id_destroy,d,x)
+#define __itt_id_destroy_ptr  ITTNOTIFY_NAME(id_destroy)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_id_destroy(domain,id)
+#define __itt_id_destroy_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_id_destroy_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} ids group */
+
+/**
+ * @defgroup handless String Handles
+ * @ingroup public
+ * String Handles group
+ * @{
+ */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_string_handle
+{
+    const char* strA; /*!< Copy of original string in ASCII. */
+#if defined(UNICODE) || defined(_UNICODE)
+    const wchar_t* strW; /*!< Copy of original string in UNICODE. */
+#else  /* UNICODE || _UNICODE */
+    void* strW;
+#endif /* UNICODE || _UNICODE */
+    int   extra1; /*!< Reserved. Must be zero   */
+    void* extra2; /*!< Reserved. Must be zero   */
+    struct ___itt_string_handle* next;
+} __itt_string_handle;
+
+#pragma pack(pop)
+/** @endcond */
+
+/**
+ * @ingroup handles
+ * @brief Create a string handle.
+ * Create and return handle value that can be associated with a string.
+ * Consecutive calls to __itt_string_handle_create with the same name
+ * return the same value. Because the set of string handles is expected to remain
+ * static during the application's execution time, there is no mechanism to destroy a string handle.
+ * Any string handle can be accessed by any thread in the process, regardless of which thread created
+ * the string handle. This call is thread-safe.
+ * @param[in] name The input string
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_string_handle* ITTAPI __itt_string_handle_createA(const char    *name);
+__itt_string_handle* ITTAPI __itt_string_handle_createW(const wchar_t *name);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_string_handle_create     __itt_string_handle_createW
+#  define __itt_string_handle_create_ptr __itt_string_handle_createW_ptr
+#else /* UNICODE */
+#  define __itt_string_handle_create     __itt_string_handle_createA
+#  define __itt_string_handle_create_ptr __itt_string_handle_createA_ptr
+#endif /* UNICODE */
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_string_handle* ITTAPI __itt_string_handle_create(const char *name);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createA, (const char    *name))
+ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createW, (const wchar_t *name))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_create,  (const char    *name))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_string_handle_createA     ITTNOTIFY_DATA(string_handle_createA)
+#define __itt_string_handle_createA_ptr ITTNOTIFY_NAME(string_handle_createA)
+#define __itt_string_handle_createW     ITTNOTIFY_DATA(string_handle_createW)
+#define __itt_string_handle_createW_ptr ITTNOTIFY_NAME(string_handle_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_string_handle_create     ITTNOTIFY_DATA(string_handle_create)
+#define __itt_string_handle_create_ptr ITTNOTIFY_NAME(string_handle_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_string_handle_createA(name) (__itt_string_handle*)0
+#define __itt_string_handle_createA_ptr 0
+#define __itt_string_handle_createW(name) (__itt_string_handle*)0
+#define __itt_string_handle_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_string_handle_create(name)  (__itt_string_handle*)0
+#define __itt_string_handle_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_string_handle_createA_ptr 0
+#define __itt_string_handle_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_string_handle_create_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} handles group */
+
+/** @cond exclude_from_documentation */
+typedef unsigned long long __itt_timestamp;
+/** @endcond */
+
+#define __itt_timestamp_none ((__itt_timestamp)-1LL)
+
+/** @cond exclude_from_gpa_documentation */
+
+/**
+ * @ingroup timestamps
+ * @brief Return timestamp corresponding to the current moment.
+ * This returns the timestamp in the format that is the most relevant for the current
+ * host or platform (RDTSC, QPC, and others). You can use the "<" operator to
+ * compare __itt_timestamp values.
+ */
+__itt_timestamp ITTAPI __itt_get_timestamp(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_timestamp, get_timestamp, (void))
+#define __itt_get_timestamp      ITTNOTIFY_DATA(get_timestamp)
+#define __itt_get_timestamp_ptr  ITTNOTIFY_NAME(get_timestamp)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_get_timestamp()
+#define __itt_get_timestamp_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_get_timestamp_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} timestamps */
+/** @endcond */
+
+/** @cond exclude_from_gpa_documentation */
+
+/**
+ * @defgroup regions Regions
+ * @ingroup public
+ * Regions group
+ * @{
+ */
+/**
+ * @ingroup regions
+ * @brief Begin of region instance.
+ * Successive calls to __itt_region_begin with the same ID are ignored
+ * until a call to __itt_region_end with the same ID
+ * @param[in] domain The domain for this region instance
+ * @param[in] id The instance ID for this region instance. Must not be __itt_null
+ * @param[in] parentid The instance ID for the parent of this region instance, or __itt_null
+ * @param[in] name The name of this region
+ */
+void ITTAPI __itt_region_begin(const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name);
+
+/**
+ * @ingroup regions
+ * @brief End of region instance.
+ * The first call to __itt_region_end with a given ID ends the
+ * region. Successive calls with the same ID are ignored, as are
+ * calls that do not have a matching __itt_region_begin call.
+ * @param[in] domain The domain for this region instance
+ * @param[in] id The instance ID for this region instance
+ */
+void ITTAPI __itt_region_end(const __itt_domain *domain, __itt_id id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, region_begin, (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name))
+ITT_STUBV(ITTAPI, void, region_end,   (const __itt_domain *domain, __itt_id id))
+#define __itt_region_begin(d,x,y,z) ITTNOTIFY_VOID_D3(region_begin,d,x,y,z)
+#define __itt_region_begin_ptr      ITTNOTIFY_NAME(region_begin)
+#define __itt_region_end(d,x)       ITTNOTIFY_VOID_D1(region_end,d,x)
+#define __itt_region_end_ptr        ITTNOTIFY_NAME(region_end)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_region_begin(d,x,y,z)
+#define __itt_region_begin_ptr 0
+#define __itt_region_end(d,x)
+#define __itt_region_end_ptr   0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_region_begin_ptr 0
+#define __itt_region_end_ptr   0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} regions group */
+
+/**
+ * @defgroup frames Frames
+ * @ingroup public
+ * Frames are similar to regions, but are intended to be easier to use and to implement.
+ * In particular:
+ * - Frames always represent periods of elapsed time
+ * - By default, frames have no nesting relationships
+ * @{
+ */
+
+/**
+ * @ingroup frames
+ * @brief Begin a frame instance.
+ * Successive calls to __itt_frame_begin with the
+ * same ID are ignored until a call to __itt_frame_end with the same ID.
+ * @param[in] domain The domain for this frame instance
+ * @param[in] id The instance ID for this frame instance or NULL
+ */
+void ITTAPI __itt_frame_begin_v3(const __itt_domain *domain, __itt_id *id);
+
+/**
+ * @ingroup frames
+ * @brief End a frame instance.
+ * The first call to __itt_frame_end with a given ID
+ * ends the frame. Successive calls with the same ID are ignored, as are
+ * calls that do not have a matching __itt_frame_begin call.
+ * @param[in] domain The domain for this frame instance
+ * @param[in] id The instance ID for this frame instance or NULL for current
+ */
+void ITTAPI __itt_frame_end_v3(const __itt_domain *domain, __itt_id *id);
+
+/**
+ * @ingroup frames
+ * @brief Submits a frame instance.
+ * Successive calls to __itt_frame_begin or __itt_frame_submit with the
+ * same ID are ignored until a call to __itt_frame_end or __itt_frame_submit
+ * with the same ID.
+ * Passing special __itt_timestamp_none value as "end" argument means
+ * take the current timestamp as the end timestamp.
+ * @param[in] domain The domain for this frame instance
+ * @param[in] id The instance ID for this frame instance or NULL
+ * @param[in] begin Timestamp of the beginning of the frame
+ * @param[in] end Timestamp of the end of the frame
+ */
+void ITTAPI __itt_frame_submit_v3(const __itt_domain *domain, __itt_id *id,
+    __itt_timestamp begin, __itt_timestamp end);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, frame_begin_v3,  (const __itt_domain *domain, __itt_id *id))
+ITT_STUBV(ITTAPI, void, frame_end_v3,    (const __itt_domain *domain, __itt_id *id))
+ITT_STUBV(ITTAPI, void, frame_submit_v3, (const __itt_domain *domain, __itt_id *id, __itt_timestamp begin, __itt_timestamp end))
+#define __itt_frame_begin_v3(d,x)      ITTNOTIFY_VOID_D1(frame_begin_v3,d,x)
+#define __itt_frame_begin_v3_ptr       ITTNOTIFY_NAME(frame_begin_v3)
+#define __itt_frame_end_v3(d,x)        ITTNOTIFY_VOID_D1(frame_end_v3,d,x)
+#define __itt_frame_end_v3_ptr         ITTNOTIFY_NAME(frame_end_v3)
+#define __itt_frame_submit_v3(d,x,b,e) ITTNOTIFY_VOID_D3(frame_submit_v3,d,x,b,e)
+#define __itt_frame_submit_v3_ptr      ITTNOTIFY_NAME(frame_submit_v3)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_frame_begin_v3(domain,id)
+#define __itt_frame_begin_v3_ptr 0
+#define __itt_frame_end_v3(domain,id)
+#define __itt_frame_end_v3_ptr   0
+#define __itt_frame_submit_v3(domain,id,begin,end)
+#define __itt_frame_submit_v3_ptr   0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_frame_begin_v3_ptr 0
+#define __itt_frame_end_v3_ptr   0
+#define __itt_frame_submit_v3_ptr   0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} frames group */
+/** @endcond */
+
+/**
+ * @defgroup taskgroup Task Group
+ * @ingroup public
+ * Task Group
+ * @{
+ */
+/**
+ * @ingroup task_groups
+ * @brief Denotes a task_group instance.
+ * Successive calls to __itt_task_group with the same ID are ignored.
+ * @param[in] domain The domain for this task_group instance
+ * @param[in] id The instance ID for this task_group instance. Must not be __itt_null.
+ * @param[in] parentid The instance ID for the parent of this task_group instance, or __itt_null.
+ * @param[in] name The name of this task_group
+ */
+void ITTAPI __itt_task_group(const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, task_group, (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name))
+#define __itt_task_group(d,x,y,z) ITTNOTIFY_VOID_D3(task_group,d,x,y,z)
+#define __itt_task_group_ptr      ITTNOTIFY_NAME(task_group)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_task_group(d,x,y,z)
+#define __itt_task_group_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_task_group_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} taskgroup group */
+
+/**
+ * @defgroup tasks Tasks
+ * @ingroup public
+ * A task instance represents a piece of work performed by a particular
+ * thread for a period of time. A call to __itt_task_begin creates a
+ * task instance. This becomes the current instance for that task on that
+ * thread. A following call to __itt_task_end on the same thread ends the
+ * instance. There may be multiple simultaneous instances of tasks with the
+ * same name on different threads. If an ID is specified, the task instance
+ * receives that ID. Nested tasks are allowed.
+ *
+ * Note: The task is defined by the bracketing of __itt_task_begin and
+ * __itt_task_end on the same thread. If some scheduling mechanism causes
+ * task switching (the thread executes a different user task) or task
+ * switching (the user task switches to a different thread) then this breaks
+ * the notion of  current instance. Additional API calls are required to
+ * deal with that possibility.
+ * @{
+ */
+
+/**
+ * @ingroup tasks
+ * @brief Begin a task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] taskid The instance ID for this task instance, or __itt_null
+ * @param[in] parentid The parent instance to which this task instance belongs, or __itt_null
+ * @param[in] name The name of this task
+ */
+void ITTAPI __itt_task_begin(const __itt_domain *domain, __itt_id taskid, __itt_id parentid, __itt_string_handle *name);
+
+/**
+ * @ingroup tasks
+ * @brief Begin a task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] taskid The identifier for this task instance (may be 0)
+ * @param[in] parentid The parent of this task (may be 0)
+ * @param[in] fn The pointer to the function you are tracing
+ */
+void ITTAPI __itt_task_begin_fn(const __itt_domain *domain, __itt_id taskid, __itt_id parentid, void* fn);
+
+/**
+ * @ingroup tasks
+ * @brief End the current task instance.
+ * @param[in] domain The domain for this task
+ */
+void ITTAPI __itt_task_end(const __itt_domain *domain);
+
+/**
+ * @ingroup tasks
+ * @brief Begin an overlapped task instance.
+ * @param[in] domain The domain for this task.
+ * @param[in] taskid The identifier for this task instance, *cannot* be __itt_null.
+ * @param[in] parentid The parent of this task, or __itt_null.
+ * @param[in] name The name of this task.
+ */
+void ITTAPI __itt_task_begin_overlapped(const __itt_domain* domain, __itt_id taskid, __itt_id parentid, __itt_string_handle* name);
+
+/**
+ * @ingroup tasks
+ * @brief End an overlapped task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] taskid Explicit ID of finished task
+ */
+void ITTAPI __itt_task_end_overlapped(const __itt_domain *domain, __itt_id taskid);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, task_begin,    (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name))
+ITT_STUBV(ITTAPI, void, task_begin_fn, (const __itt_domain *domain, __itt_id id, __itt_id parentid, void* fn))
+ITT_STUBV(ITTAPI, void, task_end,      (const __itt_domain *domain))
+ITT_STUBV(ITTAPI, void, task_begin_overlapped, (const __itt_domain *domain, __itt_id taskid, __itt_id parentid, __itt_string_handle *name))
+ITT_STUBV(ITTAPI, void, task_end_overlapped,   (const __itt_domain *domain, __itt_id taskid))
+#define __itt_task_begin(d,x,y,z)    ITTNOTIFY_VOID_D3(task_begin,d,x,y,z)
+#define __itt_task_begin_ptr         ITTNOTIFY_NAME(task_begin)
+#define __itt_task_begin_fn(d,x,y,z) ITTNOTIFY_VOID_D3(task_begin_fn,d,x,y,z)
+#define __itt_task_begin_fn_ptr      ITTNOTIFY_NAME(task_begin_fn)
+#define __itt_task_end(d)            ITTNOTIFY_VOID_D0(task_end,d)
+#define __itt_task_end_ptr           ITTNOTIFY_NAME(task_end)
+#define __itt_task_begin_overlapped(d,x,y,z) ITTNOTIFY_VOID_D3(task_begin_overlapped,d,x,y,z)
+#define __itt_task_begin_overlapped_ptr      ITTNOTIFY_NAME(task_begin_overlapped)
+#define __itt_task_end_overlapped(d,x)       ITTNOTIFY_VOID_D1(task_end_overlapped,d,x)
+#define __itt_task_end_overlapped_ptr        ITTNOTIFY_NAME(task_end_overlapped)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_task_begin(domain,id,parentid,name)
+#define __itt_task_begin_ptr    0
+#define __itt_task_begin_fn(domain,id,parentid,fn)
+#define __itt_task_begin_fn_ptr 0
+#define __itt_task_end(domain)
+#define __itt_task_end_ptr      0
+#define __itt_task_begin_overlapped(domain,taskid,parentid,name)
+#define __itt_task_begin_overlapped_ptr         0
+#define __itt_task_end_overlapped(domain,taskid)
+#define __itt_task_end_overlapped_ptr           0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_task_begin_ptr    0
+#define __itt_task_begin_fn_ptr 0
+#define __itt_task_end_ptr      0
+#define __itt_task_begin_overlapped_ptr 0
+#define __itt_task_end_overlapped_ptr   0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} tasks group */
+
+
+/**
+ * @defgroup markers Markers
+ * Markers represent a single discreet event in time. Markers have a scope,
+ * described by an enumerated type __itt_scope. Markers are created by
+ * the API call __itt_marker. A marker instance can be given an ID for use in
+ * adding metadata.
+ * @{
+ */
+
+/**
+ * @brief Describes the scope of an event object in the trace.
+ */
+typedef enum
+{
+    __itt_scope_unknown = 0,
+    __itt_scope_global,
+    __itt_scope_track_group,
+    __itt_scope_track,
+    __itt_scope_task,
+    __itt_scope_marker
+} __itt_scope;
+
+/** @cond exclude_from_documentation */
+#define __itt_marker_scope_unknown  __itt_scope_unknown
+#define __itt_marker_scope_global   __itt_scope_global
+#define __itt_marker_scope_process  __itt_scope_track_group
+#define __itt_marker_scope_thread   __itt_scope_track
+#define __itt_marker_scope_task     __itt_scope_task
+/** @endcond */
+
+/**
+ * @ingroup markers
+ * @brief Create a marker instance
+ * @param[in] domain The domain for this marker
+ * @param[in] id The instance ID for this marker or __itt_null
+ * @param[in] name The name for this marker
+ * @param[in] scope The scope for this marker
+ */
+void ITTAPI __itt_marker(const __itt_domain *domain, __itt_id id, __itt_string_handle *name, __itt_scope scope);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, marker, (const __itt_domain *domain, __itt_id id, __itt_string_handle *name, __itt_scope scope))
+#define __itt_marker(d,x,y,z) ITTNOTIFY_VOID_D3(marker,d,x,y,z)
+#define __itt_marker_ptr      ITTNOTIFY_NAME(marker)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_marker(domain,id,name,scope)
+#define __itt_marker_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_marker_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} markers group */
+
+/**
+ * @defgroup metadata Metadata
+ * The metadata API is used to attach extra information to named
+ * entities. Metadata can be attached to an identified named entity by ID,
+ * or to the current entity (which is always a task).
+ *
+ * Conceptually metadata has a type (what kind of metadata), a key (the
+ * name of the metadata), and a value (the actual data). The encoding of
+ * the value depends on the type of the metadata.
+ *
+ * The type of metadata is specified by an enumerated type __itt_metdata_type.
+ * @{
+ */
+
+/**
+ * @ingroup parameters
+ * @brief describes the type of metadata
+ */
+typedef enum {
+    __itt_metadata_unknown = 0,
+    __itt_metadata_u64,     /**< Unsigned 64-bit integer */
+    __itt_metadata_s64,     /**< Signed 64-bit integer */
+    __itt_metadata_u32,     /**< Unsigned 32-bit integer */
+    __itt_metadata_s32,     /**< Signed 32-bit integer */
+    __itt_metadata_u16,     /**< Unsigned 16-bit integer */
+    __itt_metadata_s16,     /**< Signed 16-bit integer */
+    __itt_metadata_float,   /**< Signed 32-bit floating-point */
+    __itt_metadata_double   /**< SIgned 64-bit floating-point */
+} __itt_metadata_type;
+
+/**
+ * @ingroup parameters
+ * @brief Add metadata to an instance of a named entity.
+ * @param[in] domain The domain controlling the call
+ * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task
+ * @param[in] key The name of the metadata
+ * @param[in] type The type of the metadata
+ * @param[in] count The number of elements of the given type. If count == 0, no metadata will be added.
+ * @param[in] data The metadata itself
+*/
+void ITTAPI __itt_metadata_add(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, metadata_add, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data))
+#define __itt_metadata_add(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(metadata_add,d,x,y,z,a,b)
+#define __itt_metadata_add_ptr          ITTNOTIFY_NAME(metadata_add)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_metadata_add(d,x,y,z,a,b)
+#define __itt_metadata_add_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_metadata_add_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup parameters
+ * @brief Add string metadata to an instance of a named entity.
+ * @param[in] domain The domain controlling the call
+ * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task
+ * @param[in] key The name of the metadata
+ * @param[in] data The metadata itself
+ * @param[in] length The number of characters in the string, or -1 if the length is unknown but the string is null-terminated
+*/
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_metadata_str_addA(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length);
+void ITTAPI __itt_metadata_str_addW(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const wchar_t *data, size_t length);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_metadata_str_add     __itt_metadata_str_addW
+#  define __itt_metadata_str_add_ptr __itt_metadata_str_addW_ptr
+#else /* UNICODE */
+#  define __itt_metadata_str_add     __itt_metadata_str_addA
+#  define __itt_metadata_str_add_ptr __itt_metadata_str_addA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+void ITTAPI __itt_metadata_str_add(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length);
+#endif
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, metadata_str_addA, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length))
+ITT_STUBV(ITTAPI, void, metadata_str_addW, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const wchar_t *data, size_t length))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, metadata_str_add, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_metadata_str_addA(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_addA,d,x,y,z,a)
+#define __itt_metadata_str_addA_ptr        ITTNOTIFY_NAME(metadata_str_addA)
+#define __itt_metadata_str_addW(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_addW,d,x,y,z,a)
+#define __itt_metadata_str_addW_ptr        ITTNOTIFY_NAME(metadata_str_addW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_metadata_str_add(d,x,y,z,a)  ITTNOTIFY_VOID_D4(metadata_str_add,d,x,y,z,a)
+#define __itt_metadata_str_add_ptr         ITTNOTIFY_NAME(metadata_str_add)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_metadata_str_addA(d,x,y,z,a)
+#define __itt_metadata_str_addA_ptr 0
+#define __itt_metadata_str_addW(d,x,y,z,a)
+#define __itt_metadata_str_addW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_metadata_str_add(d,x,y,z,a)
+#define __itt_metadata_str_add_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_metadata_str_addA_ptr 0
+#define __itt_metadata_str_addW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_metadata_str_add_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup parameters
+ * @brief Add metadata to an instance of a named entity.
+ * @param[in] domain The domain controlling the call
+ * @param[in] scope The scope of the instance to which the metadata is to be added
+
+ * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task
+
+ * @param[in] key The name of the metadata
+ * @param[in] type The type of the metadata
+ * @param[in] count The number of elements of the given type. If count == 0, no metadata will be added.
+ * @param[in] data The metadata itself
+*/
+void ITTAPI __itt_metadata_add_with_scope(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, metadata_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data))
+#define __itt_metadata_add_with_scope(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(metadata_add_with_scope,d,x,y,z,a,b)
+#define __itt_metadata_add_with_scope_ptr          ITTNOTIFY_NAME(metadata_add_with_scope)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_metadata_add_with_scope(d,x,y,z,a,b)
+#define __itt_metadata_add_with_scope_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_metadata_add_with_scope_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup parameters
+ * @brief Add string metadata to an instance of a named entity.
+ * @param[in] domain The domain controlling the call
+ * @param[in] scope The scope of the instance to which the metadata is to be added
+
+ * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task
+
+ * @param[in] key The name of the metadata
+ * @param[in] data The metadata itself
+ * @param[in] length The number of characters in the string, or -1 if the length is unknown but the string is null-terminated
+*/
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_metadata_str_add_with_scopeA(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length);
+void ITTAPI __itt_metadata_str_add_with_scopeW(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const wchar_t *data, size_t length);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_metadata_str_add_with_scope     __itt_metadata_str_add_with_scopeW
+#  define __itt_metadata_str_add_with_scope_ptr __itt_metadata_str_add_with_scopeW_ptr
+#else /* UNICODE */
+#  define __itt_metadata_str_add_with_scope     __itt_metadata_str_add_with_scopeA
+#  define __itt_metadata_str_add_with_scope_ptr __itt_metadata_str_add_with_scopeA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+void ITTAPI __itt_metadata_str_add_with_scope(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length);
+#endif
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeA, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length))
+ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeW, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const wchar_t *data, size_t length))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, metadata_str_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_metadata_str_add_with_scopeA(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add_with_scopeA,d,x,y,z,a)
+#define __itt_metadata_str_add_with_scopeA_ptr        ITTNOTIFY_NAME(metadata_str_add_with_scopeA)
+#define __itt_metadata_str_add_with_scopeW(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add_with_scopeW,d,x,y,z,a)
+#define __itt_metadata_str_add_with_scopeW_ptr        ITTNOTIFY_NAME(metadata_str_add_with_scopeW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_metadata_str_add_with_scope(d,x,y,z,a)  ITTNOTIFY_VOID_D4(metadata_str_add_with_scope,d,x,y,z,a)
+#define __itt_metadata_str_add_with_scope_ptr         ITTNOTIFY_NAME(metadata_str_add_with_scope)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_metadata_str_add_with_scopeA(d,x,y,z,a)
+#define __itt_metadata_str_add_with_scopeA_ptr  0
+#define __itt_metadata_str_add_with_scopeW(d,x,y,z,a)
+#define __itt_metadata_str_add_with_scopeW_ptr  0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_metadata_str_add_with_scope(d,x,y,z,a)
+#define __itt_metadata_str_add_with_scope_ptr   0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_metadata_str_add_with_scopeA_ptr  0
+#define __itt_metadata_str_add_with_scopeW_ptr  0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_metadata_str_add_with_scope_ptr   0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @} metadata group */
+
+/**
+ * @defgroup relations Relations
+ * Instances of named entities can be explicitly associated with other
+ * instances using instance IDs and the relationship API calls.
+ *
+ * @{
+ */
+
+/**
+ * @ingroup relations
+ * @brief The kind of relation between two instances is specified by the enumerated type __itt_relation.
+ * Relations between instances can be added with an API call. The relation
+ * API uses instance IDs. Relations can be added before or after the actual
+ * instances are created and persist independently of the instances. This
+ * is the motivation for having different lifetimes for instance IDs and
+ * the actual instances.
+ */
+typedef enum
+{
+    __itt_relation_is_unknown = 0,
+    __itt_relation_is_dependent_on,         /**< "A is dependent on B" means that A cannot start until B completes */
+    __itt_relation_is_sibling_of,           /**< "A is sibling of B" means that A and B were created as a group */
+    __itt_relation_is_parent_of,            /**< "A is parent of B" means that A created B */
+    __itt_relation_is_continuation_of,      /**< "A is continuation of B" means that A assumes the dependencies of B */
+    __itt_relation_is_child_of,             /**< "A is child of B" means that A was created by B (inverse of is_parent_of) */
+    __itt_relation_is_continued_by,         /**< "A is continued by B" means that B assumes the dependencies of A (inverse of is_continuation_of) */
+    __itt_relation_is_predecessor_to        /**< "A is predecessor to B" means that B cannot start until A completes (inverse of is_dependent_on) */
+} __itt_relation;
+
+/**
+ * @ingroup relations
+ * @brief Add a relation to the current task instance.
+ * The current task instance is the head of the relation.
+ * @param[in] domain The domain controlling this call
+ * @param[in] relation The kind of relation
+ * @param[in] tail The ID for the tail of the relation
+ */
+void ITTAPI __itt_relation_add_to_current(const __itt_domain *domain, __itt_relation relation, __itt_id tail);
+
+/**
+ * @ingroup relations
+ * @brief Add a relation between two instance identifiers.
+ * @param[in] domain The domain controlling this call
+ * @param[in] head The ID for the head of the relation
+ * @param[in] relation The kind of relation
+ * @param[in] tail The ID for the tail of the relation
+ */
+void ITTAPI __itt_relation_add(const __itt_domain *domain, __itt_id head, __itt_relation relation, __itt_id tail);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, relation_add_to_current, (const __itt_domain *domain, __itt_relation relation, __itt_id tail))
+ITT_STUBV(ITTAPI, void, relation_add,            (const __itt_domain *domain, __itt_id head, __itt_relation relation, __itt_id tail))
+#define __itt_relation_add_to_current(d,x,y) ITTNOTIFY_VOID_D2(relation_add_to_current,d,x,y)
+#define __itt_relation_add_to_current_ptr    ITTNOTIFY_NAME(relation_add_to_current)
+#define __itt_relation_add(d,x,y,z)          ITTNOTIFY_VOID_D3(relation_add,d,x,y,z)
+#define __itt_relation_add_ptr               ITTNOTIFY_NAME(relation_add)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_relation_add_to_current(d,x,y)
+#define __itt_relation_add_to_current_ptr 0
+#define __itt_relation_add(d,x,y,z)
+#define __itt_relation_add_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_relation_add_to_current_ptr 0
+#define __itt_relation_add_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} relations group */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_clock_info
+{
+    unsigned long long clock_freq; /*!< Clock domain frequency */
+    unsigned long long clock_base; /*!< Clock domain base timestamp */
+} __itt_clock_info;
+
+#pragma pack(pop)
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+typedef void (ITTAPI *__itt_get_clock_info_fn)(__itt_clock_info* clock_info, void* data);
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_clock_domain
+{
+    __itt_clock_info info;      /*!< Most recent clock domain info */
+    __itt_get_clock_info_fn fn; /*!< Callback function pointer */
+    void* fn_data;              /*!< Input argument for the callback function */
+    int   extra1;               /*!< Reserved. Must be zero */
+    void* extra2;               /*!< Reserved. Must be zero */
+    struct ___itt_clock_domain* next;
+} __itt_clock_domain;
+
+#pragma pack(pop)
+/** @endcond */
+
+/**
+ * @ingroup clockdomains
+ * @brief Create a clock domain.
+ * Certain applications require the capability to trace their application using
+ * a clock domain different than the CPU, for instance the instrumentation of events
+ * that occur on a GPU.
+ * Because the set of domains is expected to be static over the application's execution time,
+ * there is no mechanism to destroy a domain.
+ * Any domain can be accessed by any thread in the process, regardless of which thread created
+ * the domain. This call is thread-safe.
+ * @param[in] fn A pointer to a callback function which retrieves alternative CPU timestamps
+ * @param[in] fn_data Argument for a callback function; may be NULL
+ */
+__itt_clock_domain* ITTAPI __itt_clock_domain_create(__itt_get_clock_info_fn fn, void* fn_data);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_clock_domain*, clock_domain_create, (__itt_get_clock_info_fn fn, void* fn_data))
+#define __itt_clock_domain_create     ITTNOTIFY_DATA(clock_domain_create)
+#define __itt_clock_domain_create_ptr ITTNOTIFY_NAME(clock_domain_create)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_clock_domain_create(fn,fn_data) (__itt_clock_domain*)0
+#define __itt_clock_domain_create_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_clock_domain_create_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup clockdomains
+ * @brief Recalculate clock domains frequences and clock base timestamps.
+ */
+void ITTAPI __itt_clock_domain_reset(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, clock_domain_reset, (void))
+#define __itt_clock_domain_reset     ITTNOTIFY_VOID(clock_domain_reset)
+#define __itt_clock_domain_reset_ptr ITTNOTIFY_NAME(clock_domain_reset)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_clock_domain_reset()
+#define __itt_clock_domain_reset_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_clock_domain_reset_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup clockdomain
+ * @brief Create an instance of identifier. This establishes the beginning of the lifetime of
+ * an instance of the given ID in the trace. Once this lifetime starts, the ID can be used to
+ * tag named entity instances in calls such as __itt_task_begin, and to specify relationships among
+ * identified named entity instances, using the \ref relations APIs.
+ * @param[in] domain The domain controlling the execution of this call.
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] id The ID to create.
+ */
+void ITTAPI __itt_id_create_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id);
+
+/**
+ * @ingroup clockdomain
+ * @brief Destroy an instance of identifier. This ends the lifetime of the current instance of the
+ * given ID value in the trace. Any relationships that are established after this lifetime ends are
+ * invalid. This call must be performed before the given ID value can be reused for a different
+ * named entity instance.
+ * @param[in] domain The domain controlling the execution of this call.
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] id The ID to destroy.
+ */
+void ITTAPI __itt_id_destroy_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, id_create_ex,  (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id))
+ITT_STUBV(ITTAPI, void, id_destroy_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id))
+#define __itt_id_create_ex(d,x,y,z)  ITTNOTIFY_VOID_D3(id_create_ex,d,x,y,z)
+#define __itt_id_create_ex_ptr       ITTNOTIFY_NAME(id_create_ex)
+#define __itt_id_destroy_ex(d,x,y,z) ITTNOTIFY_VOID_D3(id_destroy_ex,d,x,y,z)
+#define __itt_id_destroy_ex_ptr      ITTNOTIFY_NAME(id_destroy_ex)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_id_create_ex(domain,clock_domain,timestamp,id)
+#define __itt_id_create_ex_ptr    0
+#define __itt_id_destroy_ex(domain,clock_domain,timestamp,id)
+#define __itt_id_destroy_ex_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_id_create_ex_ptr    0
+#define __itt_id_destroy_ex_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup clockdomain
+ * @brief Begin a task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] taskid The instance ID for this task instance, or __itt_null
+ * @param[in] parentid The parent instance to which this task instance belongs, or __itt_null
+ * @param[in] name The name of this task
+ */
+void ITTAPI __itt_task_begin_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name);
+
+/**
+ * @ingroup clockdomain
+ * @brief Begin a task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] taskid The identifier for this task instance, or __itt_null
+ * @param[in] parentid The parent of this task, or __itt_null
+ * @param[in] fn The pointer to the function you are tracing
+ */
+void ITTAPI __itt_task_begin_fn_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, void* fn);
+
+/**
+ * @ingroup clockdomain
+ * @brief End the current task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ */
+void ITTAPI __itt_task_end_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, task_begin_ex,        (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, __itt_string_handle *name))
+ITT_STUBV(ITTAPI, void, task_begin_fn_ex,     (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, void* fn))
+ITT_STUBV(ITTAPI, void, task_end_ex,          (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp))
+#define __itt_task_begin_ex(d,x,y,z,a,b)      ITTNOTIFY_VOID_D5(task_begin_ex,d,x,y,z,a,b)
+#define __itt_task_begin_ex_ptr               ITTNOTIFY_NAME(task_begin_ex)
+#define __itt_task_begin_fn_ex(d,x,y,z,a,b)   ITTNOTIFY_VOID_D5(task_begin_fn_ex,d,x,y,z,a,b)
+#define __itt_task_begin_fn_ex_ptr            ITTNOTIFY_NAME(task_begin_fn_ex)
+#define __itt_task_end_ex(d,x,y)              ITTNOTIFY_VOID_D2(task_end_ex,d,x,y)
+#define __itt_task_end_ex_ptr                 ITTNOTIFY_NAME(task_end_ex)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_task_begin_ex(domain,clock_domain,timestamp,id,parentid,name)
+#define __itt_task_begin_ex_ptr          0
+#define __itt_task_begin_fn_ex(domain,clock_domain,timestamp,id,parentid,fn)
+#define __itt_task_begin_fn_ex_ptr       0
+#define __itt_task_end_ex(domain,clock_domain,timestamp)
+#define __itt_task_end_ex_ptr            0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_task_begin_ex_ptr          0
+#define __itt_task_begin_fn_ex_ptr       0
+#define __itt_task_end_ex_ptr            0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @defgroup counters Counters
+ * @ingroup public
+ * Counters are user-defined objects with a monotonically increasing
+ * value. Counter values are 64-bit unsigned integers.
+ * Counters have names that can be displayed in
+ * the tools.
+ * @{
+ */
+
+/**
+ * @brief opaque structure for counter identification
+ */
+/** @cond exclude_from_documentation */
+
+typedef struct ___itt_counter* __itt_counter;
+
+/**
+ * @brief Create an unsigned 64 bits integer counter with given name/domain
+ *
+ * After __itt_counter_create() is called, __itt_counter_inc(id), __itt_counter_inc_delta(id, delta),
+ * __itt_counter_set_value(id, value_ptr) or __itt_counter_set_value_ex(id, clock_domain, timestamp, value_ptr)
+ * can be used to change the value of the counter, where value_ptr is a pointer to an unsigned 64 bits integer
+ *
+ * The call is equal to __itt_counter_create_typed(name, domain, __itt_metadata_u64)
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_counter ITTAPI __itt_counter_createA(const char    *name, const char    *domain);
+__itt_counter ITTAPI __itt_counter_createW(const wchar_t *name, const wchar_t *domain);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_counter_create     __itt_counter_createW
+#  define __itt_counter_create_ptr __itt_counter_createW_ptr
+#else /* UNICODE */
+#  define __itt_counter_create     __itt_counter_createA
+#  define __itt_counter_create_ptr __itt_counter_createA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_counter ITTAPI __itt_counter_create(const char *name, const char *domain);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_counter, counter_createA, (const char    *name, const char    *domain))
+ITT_STUB(ITTAPI, __itt_counter, counter_createW, (const wchar_t *name, const wchar_t *domain))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_counter, counter_create,  (const char *name, const char *domain))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_counter_createA     ITTNOTIFY_DATA(counter_createA)
+#define __itt_counter_createA_ptr ITTNOTIFY_NAME(counter_createA)
+#define __itt_counter_createW     ITTNOTIFY_DATA(counter_createW)
+#define __itt_counter_createW_ptr ITTNOTIFY_NAME(counter_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_counter_create     ITTNOTIFY_DATA(counter_create)
+#define __itt_counter_create_ptr ITTNOTIFY_NAME(counter_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_counter_createA(name, domain)
+#define __itt_counter_createA_ptr 0
+#define __itt_counter_createW(name, domain)
+#define __itt_counter_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_counter_create(name, domain)
+#define __itt_counter_create_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_counter_createA_ptr 0
+#define __itt_counter_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_counter_create_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Increment the unsigned 64 bits integer counter value
+ *
+ * Calling this function to non-unsigned 64 bits integer counters has no effect
+ */
+void ITTAPI __itt_counter_inc(__itt_counter id);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_inc, (__itt_counter id))
+#define __itt_counter_inc     ITTNOTIFY_VOID(counter_inc)
+#define __itt_counter_inc_ptr ITTNOTIFY_NAME(counter_inc)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_inc(id)
+#define __itt_counter_inc_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_inc_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/**
+ * @brief Increment the unsigned 64 bits integer counter value with x
+ *
+ * Calling this function to non-unsigned 64 bits integer counters has no effect
+ */
+void ITTAPI __itt_counter_inc_delta(__itt_counter id, unsigned long long value);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_inc_delta, (__itt_counter id, unsigned long long value))
+#define __itt_counter_inc_delta     ITTNOTIFY_VOID(counter_inc_delta)
+#define __itt_counter_inc_delta_ptr ITTNOTIFY_NAME(counter_inc_delta)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_inc_delta(id, value)
+#define __itt_counter_inc_delta_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_inc_delta_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Decrement the unsigned 64 bits integer counter value
+ *
+ * Calling this function to non-unsigned 64 bits integer counters has no effect
+ */
+void ITTAPI __itt_counter_dec(__itt_counter id);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_dec, (__itt_counter id))
+#define __itt_counter_dec     ITTNOTIFY_VOID(counter_dec)
+#define __itt_counter_dec_ptr ITTNOTIFY_NAME(counter_dec)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_dec(id)
+#define __itt_counter_dec_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_dec_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/**
+ * @brief Decrement the unsigned 64 bits integer counter value with x
+ *
+ * Calling this function to non-unsigned 64 bits integer counters has no effect
+ */
+void ITTAPI __itt_counter_dec_delta(__itt_counter id, unsigned long long value);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_dec_delta, (__itt_counter id, unsigned long long value))
+#define __itt_counter_dec_delta     ITTNOTIFY_VOID(counter_dec_delta)
+#define __itt_counter_dec_delta_ptr ITTNOTIFY_NAME(counter_dec_delta)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_dec_delta(id, value)
+#define __itt_counter_dec_delta_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_dec_delta_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup counters
+ * @brief Increment a counter by one.
+ * The first call with a given name creates a counter by that name and sets its
+ * value to zero. Successive calls increment the counter value.
+ * @param[in] domain The domain controlling the call. Counter names are not domain specific.
+ *            The domain argument is used only to enable or disable the API calls.
+ * @param[in] name The name of the counter
+ */
+void ITTAPI __itt_counter_inc_v3(const __itt_domain *domain, __itt_string_handle *name);
+
+/**
+ * @ingroup counters
+ * @brief Increment a counter by the value specified in delta.
+ * @param[in] domain The domain controlling the call. Counter names are not domain specific.
+ *            The domain argument is used only to enable or disable the API calls.
+ * @param[in] name The name of the counter
+ * @param[in] delta The amount by which to increment the counter
+ */
+void ITTAPI __itt_counter_inc_delta_v3(const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_inc_v3,       (const __itt_domain *domain, __itt_string_handle *name))
+ITT_STUBV(ITTAPI, void, counter_inc_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta))
+#define __itt_counter_inc_v3(d,x)         ITTNOTIFY_VOID_D1(counter_inc_v3,d,x)
+#define __itt_counter_inc_v3_ptr          ITTNOTIFY_NAME(counter_inc_v3)
+#define __itt_counter_inc_delta_v3(d,x,y) ITTNOTIFY_VOID_D2(counter_inc_delta_v3,d,x,y)
+#define __itt_counter_inc_delta_v3_ptr    ITTNOTIFY_NAME(counter_inc_delta_v3)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_inc_v3(domain,name)
+#define __itt_counter_inc_v3_ptr       0
+#define __itt_counter_inc_delta_v3(domain,name,delta)
+#define __itt_counter_inc_delta_v3_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_inc_v3_ptr       0
+#define __itt_counter_inc_delta_v3_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+
+/**
+ * @ingroup counters
+ * @brief Decrement a counter by one.
+ * The first call with a given name creates a counter by that name and sets its
+ * value to zero. Successive calls decrement the counter value.
+ * @param[in] domain The domain controlling the call. Counter names are not domain specific.
+ *            The domain argument is used only to enable or disable the API calls.
+ * @param[in] name The name of the counter
+ */
+void ITTAPI __itt_counter_dec_v3(const __itt_domain *domain, __itt_string_handle *name);
+
+/**
+ * @ingroup counters
+ * @brief Decrement a counter by the value specified in delta.
+ * @param[in] domain The domain controlling the call. Counter names are not domain specific.
+ *            The domain argument is used only to enable or disable the API calls.
+ * @param[in] name The name of the counter
+ * @param[in] delta The amount by which to decrement the counter
+ */
+void ITTAPI __itt_counter_dec_delta_v3(const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_dec_v3,       (const __itt_domain *domain, __itt_string_handle *name))
+ITT_STUBV(ITTAPI, void, counter_dec_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta))
+#define __itt_counter_dec_v3(d,x)         ITTNOTIFY_VOID_D1(counter_dec_v3,d,x)
+#define __itt_counter_dec_v3_ptr          ITTNOTIFY_NAME(counter_dec_v3)
+#define __itt_counter_dec_delta_v3(d,x,y) ITTNOTIFY_VOID_D2(counter_dec_delta_v3,d,x,y)
+#define __itt_counter_dec_delta_v3_ptr    ITTNOTIFY_NAME(counter_dec_delta_v3)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_dec_v3(domain,name)
+#define __itt_counter_dec_v3_ptr       0
+#define __itt_counter_dec_delta_v3(domain,name,delta)
+#define __itt_counter_dec_delta_v3_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_dec_v3_ptr       0
+#define __itt_counter_dec_delta_v3_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @} counters group */
+
+
+/**
+ * @brief Set the counter value
+ */
+void ITTAPI __itt_counter_set_value(__itt_counter id, void *value_ptr);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_set_value, (__itt_counter id, void *value_ptr))
+#define __itt_counter_set_value     ITTNOTIFY_VOID(counter_set_value)
+#define __itt_counter_set_value_ptr ITTNOTIFY_NAME(counter_set_value)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_set_value(id, value_ptr)
+#define __itt_counter_set_value_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_set_value_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Set the counter value
+ */
+void ITTAPI __itt_counter_set_value_ex(__itt_counter id, __itt_clock_domain *clock_domain, unsigned long long timestamp, void *value_ptr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_set_value_ex, (__itt_counter id, __itt_clock_domain *clock_domain, unsigned long long timestamp, void *value_ptr))
+#define __itt_counter_set_value_ex     ITTNOTIFY_VOID(counter_set_value_ex)
+#define __itt_counter_set_value_ex_ptr ITTNOTIFY_NAME(counter_set_value_ex)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_set_value_ex(id, clock_domain, timestamp, value_ptr)
+#define __itt_counter_set_value_ex_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_set_value_ex_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Create a typed counter with given name/domain
+ *
+ * After __itt_counter_create_typed() is called, __itt_counter_inc(id), __itt_counter_inc_delta(id, delta),
+ * __itt_counter_set_value(id, value_ptr) or __itt_counter_set_value_ex(id, clock_domain, timestamp, value_ptr)
+ * can be used to change the value of the counter
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_counter ITTAPI __itt_counter_create_typedA(const char    *name, const char    *domain, __itt_metadata_type type);
+__itt_counter ITTAPI __itt_counter_create_typedW(const wchar_t *name, const wchar_t *domain, __itt_metadata_type type);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_counter_create_typed     __itt_counter_create_typedW
+#  define __itt_counter_create_typed_ptr __itt_counter_create_typedW_ptr
+#else /* UNICODE */
+#  define __itt_counter_create_typed     __itt_counter_create_typedA
+#  define __itt_counter_create_typed_ptr __itt_counter_create_typedA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_counter ITTAPI __itt_counter_create_typed(const char *name, const char *domain, __itt_metadata_type type);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_counter, counter_create_typedA, (const char    *name, const char    *domain, __itt_metadata_type type))
+ITT_STUB(ITTAPI, __itt_counter, counter_create_typedW, (const wchar_t *name, const wchar_t *domain, __itt_metadata_type type))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_counter, counter_create_typed,  (const char *name, const char *domain, __itt_metadata_type type))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_counter_create_typedA     ITTNOTIFY_DATA(counter_create_typedA)
+#define __itt_counter_create_typedA_ptr ITTNOTIFY_NAME(counter_create_typedA)
+#define __itt_counter_create_typedW     ITTNOTIFY_DATA(counter_create_typedW)
+#define __itt_counter_create_typedW_ptr ITTNOTIFY_NAME(counter_create_typedW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_counter_create_typed     ITTNOTIFY_DATA(counter_create_typed)
+#define __itt_counter_create_typed_ptr ITTNOTIFY_NAME(counter_create_typed)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_counter_create_typedA(name, domain, type)
+#define __itt_counter_create_typedA_ptr 0
+#define __itt_counter_create_typedW(name, domain, type)
+#define __itt_counter_create_typedW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_counter_create_typed(name, domain, type)
+#define __itt_counter_create_typed_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_counter_create_typedA_ptr 0
+#define __itt_counter_create_typedW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_counter_create_typed_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Destroy the counter identified by the pointer previously returned by __itt_counter_create() or
+ * __itt_counter_create_typed()
+ */
+void ITTAPI __itt_counter_destroy(__itt_counter id);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_destroy, (__itt_counter id))
+#define __itt_counter_destroy     ITTNOTIFY_VOID(counter_destroy)
+#define __itt_counter_destroy_ptr ITTNOTIFY_NAME(counter_destroy)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_destroy(id)
+#define __itt_counter_destroy_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_destroy_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} counters group */
+
+/**
+ * @ingroup markers
+ * @brief Create a marker instance.
+ * @param[in] domain The domain for this marker
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] id The instance ID for this marker, or __itt_null
+ * @param[in] name The name for this marker
+ * @param[in] scope The scope for this marker
+ */
+void ITTAPI __itt_marker_ex(const __itt_domain *domain,  __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_string_handle *name, __itt_scope scope);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, marker_ex,    (const __itt_domain *domain,  __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_string_handle *name, __itt_scope scope))
+#define __itt_marker_ex(d,x,y,z,a,b)    ITTNOTIFY_VOID_D5(marker_ex,d,x,y,z,a,b)
+#define __itt_marker_ex_ptr             ITTNOTIFY_NAME(marker_ex)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_marker_ex(domain,clock_domain,timestamp,id,name,scope)
+#define __itt_marker_ex_ptr    0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_marker_ex_ptr    0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup clockdomain
+ * @brief Add a relation to the current task instance.
+ * The current task instance is the head of the relation.
+ * @param[in] domain The domain controlling this call
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] relation The kind of relation
+ * @param[in] tail The ID for the tail of the relation
+ */
+void ITTAPI __itt_relation_add_to_current_ex(const __itt_domain *domain,  __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_relation relation, __itt_id tail);
+
+/**
+ * @ingroup clockdomain
+ * @brief Add a relation between two instance identifiers.
+ * @param[in] domain The domain controlling this call
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] head The ID for the head of the relation
+ * @param[in] relation The kind of relation
+ * @param[in] tail The ID for the tail of the relation
+ */
+void ITTAPI __itt_relation_add_ex(const __itt_domain *domain,  __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id head, __itt_relation relation, __itt_id tail);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, relation_add_to_current_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_relation relation, __itt_id tail))
+ITT_STUBV(ITTAPI, void, relation_add_ex,            (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id head, __itt_relation relation, __itt_id tail))
+#define __itt_relation_add_to_current_ex(d,x,y,z,a) ITTNOTIFY_VOID_D4(relation_add_to_current_ex,d,x,y,z,a)
+#define __itt_relation_add_to_current_ex_ptr        ITTNOTIFY_NAME(relation_add_to_current_ex)
+#define __itt_relation_add_ex(d,x,y,z,a,b)          ITTNOTIFY_VOID_D5(relation_add_ex,d,x,y,z,a,b)
+#define __itt_relation_add_ex_ptr                   ITTNOTIFY_NAME(relation_add_ex)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_relation_add_to_current_ex(domain,clock_domain,timestame,relation,tail)
+#define __itt_relation_add_to_current_ex_ptr 0
+#define __itt_relation_add_ex(domain,clock_domain,timestamp,head,relation,tail)
+#define __itt_relation_add_ex_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_relation_add_to_current_ex_ptr 0
+#define __itt_relation_add_ex_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+typedef enum ___itt_track_group_type
+{
+    __itt_track_group_type_normal = 0
+} __itt_track_group_type;
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_track_group
+{
+    __itt_string_handle* name;     /*!< Name of the track group */
+    struct ___itt_track* track;    /*!< List of child tracks    */
+    __itt_track_group_type tgtype; /*!< Type of the track group */
+    int   extra1;                  /*!< Reserved. Must be zero  */
+    void* extra2;                  /*!< Reserved. Must be zero  */
+    struct ___itt_track_group* next;
+} __itt_track_group;
+
+#pragma pack(pop)
+/** @endcond */
+
+/**
+ * @brief Placeholder for custom track types. Currently, "normal" custom track
+ * is the only available track type.
+ */
+typedef enum ___itt_track_type
+{
+    __itt_track_type_normal = 0
+#ifdef INTEL_ITTNOTIFY_API_PRIVATE
+    , __itt_track_type_queue
+#endif /* INTEL_ITTNOTIFY_API_PRIVATE */
+} __itt_track_type;
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_track
+{
+    __itt_string_handle* name; /*!< Name of the track group */
+    __itt_track_group* group;  /*!< Parent group to a track */
+    __itt_track_type ttype;    /*!< Type of the track       */
+    int   extra1;              /*!< Reserved. Must be zero  */
+    void* extra2;              /*!< Reserved. Must be zero  */
+    struct ___itt_track* next;
+} __itt_track;
+
+#pragma pack(pop)
+/** @endcond */
+
+/**
+ * @brief Create logical track group.
+ */
+__itt_track_group* ITTAPI __itt_track_group_create(__itt_string_handle* name, __itt_track_group_type track_group_type);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_track_group*, track_group_create, (__itt_string_handle* name, __itt_track_group_type track_group_type))
+#define __itt_track_group_create     ITTNOTIFY_DATA(track_group_create)
+#define __itt_track_group_create_ptr ITTNOTIFY_NAME(track_group_create)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_track_group_create(name)  (__itt_track_group*)0
+#define __itt_track_group_create_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_track_group_create_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Create logical track.
+ */
+__itt_track* ITTAPI __itt_track_create(__itt_track_group* track_group, __itt_string_handle* name, __itt_track_type track_type);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_track*, track_create, (__itt_track_group* track_group,__itt_string_handle* name, __itt_track_type track_type))
+#define __itt_track_create     ITTNOTIFY_DATA(track_create)
+#define __itt_track_create_ptr ITTNOTIFY_NAME(track_create)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_track_create(track_group,name,track_type)  (__itt_track*)0
+#define __itt_track_create_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_track_create_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Set the logical track.
+ */
+void ITTAPI __itt_set_track(__itt_track* track);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, set_track, (__itt_track *track))
+#define __itt_set_track     ITTNOTIFY_VOID(set_track)
+#define __itt_set_track_ptr ITTNOTIFY_NAME(set_track)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_set_track(track)
+#define __itt_set_track_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_set_track_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/* ========================================================================== */
+/** @cond exclude_from_gpa_documentation */
+/**
+ * @defgroup events Events
+ * @ingroup public
+ * Events group
+ * @{
+ */
+/** @brief user event type */
+typedef int __itt_event;
+
+/**
+ * @brief Create an event notification
+ * @note name or namelen being null/name and namelen not matching, user event feature not enabled
+ * @return non-zero event identifier upon success and __itt_err otherwise
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_event LIBITTAPI __itt_event_createA(const char    *name, int namelen);
+__itt_event LIBITTAPI __itt_event_createW(const wchar_t *name, int namelen);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_event_create     __itt_event_createW
+#  define __itt_event_create_ptr __itt_event_createW_ptr
+#else
+#  define __itt_event_create     __itt_event_createA
+#  define __itt_event_create_ptr __itt_event_createA_ptr
+#endif /* UNICODE */
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_event LIBITTAPI __itt_event_create(const char *name, int namelen);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(LIBITTAPI, __itt_event, event_createA, (const char    *name, int namelen))
+ITT_STUB(LIBITTAPI, __itt_event, event_createW, (const wchar_t *name, int namelen))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(LIBITTAPI, __itt_event, event_create,  (const char    *name, int namelen))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_event_createA     ITTNOTIFY_DATA(event_createA)
+#define __itt_event_createA_ptr ITTNOTIFY_NAME(event_createA)
+#define __itt_event_createW     ITTNOTIFY_DATA(event_createW)
+#define __itt_event_createW_ptr ITTNOTIFY_NAME(event_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_event_create      ITTNOTIFY_DATA(event_create)
+#define __itt_event_create_ptr  ITTNOTIFY_NAME(event_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_event_createA(name, namelen) (__itt_event)0
+#define __itt_event_createA_ptr 0
+#define __itt_event_createW(name, namelen) (__itt_event)0
+#define __itt_event_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_event_create(name, namelen)  (__itt_event)0
+#define __itt_event_create_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_event_createA_ptr 0
+#define __itt_event_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_event_create_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an event occurrence.
+ * @return __itt_err upon failure (invalid event id/user event feature not enabled)
+ */
+int LIBITTAPI __itt_event_start(__itt_event event);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(LIBITTAPI, int, event_start, (__itt_event event))
+#define __itt_event_start     ITTNOTIFY_DATA(event_start)
+#define __itt_event_start_ptr ITTNOTIFY_NAME(event_start)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_event_start(event) (int)0
+#define __itt_event_start_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_event_start_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an event end occurrence.
+ * @note It is optional if events do not have durations.
+ * @return __itt_err upon failure (invalid event id/user event feature not enabled)
+ */
+int LIBITTAPI __itt_event_end(__itt_event event);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(LIBITTAPI, int, event_end, (__itt_event event))
+#define __itt_event_end     ITTNOTIFY_DATA(event_end)
+#define __itt_event_end_ptr ITTNOTIFY_NAME(event_end)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_event_end(event) (int)0
+#define __itt_event_end_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_event_end_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} events group */
+
+
+/**
+ * @defgroup arrays Arrays Visualizer
+ * @ingroup public
+ * Visualize arrays
+ * @{
+ */
+
+/**
+ * @enum __itt_av_data_type
+ * @brief Defines types of arrays data (for C/C++ intrinsic types)
+ */
+typedef enum
+{
+    __itt_e_first = 0,
+    __itt_e_char = 0,  /* 1-byte integer */
+    __itt_e_uchar,     /* 1-byte unsigned integer */
+    __itt_e_int16,     /* 2-byte integer */
+    __itt_e_uint16,    /* 2-byte unsigned integer  */
+    __itt_e_int32,     /* 4-byte integer */
+    __itt_e_uint32,    /* 4-byte unsigned integer */
+    __itt_e_int64,     /* 8-byte integer */
+    __itt_e_uint64,    /* 8-byte unsigned integer */
+    __itt_e_float,     /* 4-byte floating */
+    __itt_e_double,    /* 8-byte floating */
+    __itt_e_last = __itt_e_double
+} __itt_av_data_type;
+
+/**
+ * @brief Save an array data to a file.
+ * Output format is defined by the file extension. The csv and bmp formats are supported (bmp - for 2-dimensional array only).
+ * @param[in] data - pointer to the array data
+ * @param[in] rank - the rank of the array
+ * @param[in] dimensions - pointer to an array of integers, which specifies the array dimensions.
+ * The size of dimensions must be equal to the rank
+ * @param[in] type - the type of the array, specified as one of the __itt_av_data_type values (for intrinsic types)
+ * @param[in] filePath - the file path; the output format is defined by the file extension
+ * @param[in] columnOrder - defines how the array is stored in the linear memory.
+ * It should be 1 for column-major order (e.g. in FORTRAN) or 0 - for row-major order (e.g. in C).
+ */
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+int ITTAPI __itt_av_saveA(void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder);
+int ITTAPI __itt_av_saveW(void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_av_save     __itt_av_saveW
+#  define __itt_av_save_ptr __itt_av_saveW_ptr
+#else /* UNICODE */
+#  define __itt_av_save     __itt_av_saveA
+#  define __itt_av_save_ptr __itt_av_saveA_ptr
+#endif /* UNICODE */
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+int ITTAPI __itt_av_save(void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, int, av_saveA, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder))
+ITT_STUB(ITTAPI, int, av_saveW, (void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, int, av_save,  (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_av_saveA     ITTNOTIFY_DATA(av_saveA)
+#define __itt_av_saveA_ptr ITTNOTIFY_NAME(av_saveA)
+#define __itt_av_saveW     ITTNOTIFY_DATA(av_saveW)
+#define __itt_av_saveW_ptr ITTNOTIFY_NAME(av_saveW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_av_save     ITTNOTIFY_DATA(av_save)
+#define __itt_av_save_ptr ITTNOTIFY_NAME(av_save)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_av_saveA(name)
+#define __itt_av_saveA_ptr 0
+#define __itt_av_saveW(name)
+#define __itt_av_saveW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_av_save(name)
+#define __itt_av_save_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_av_saveA_ptr 0
+#define __itt_av_saveW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_av_save_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+void ITTAPI __itt_enable_attach(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, enable_attach, (void))
+#define __itt_enable_attach     ITTNOTIFY_VOID(enable_attach)
+#define __itt_enable_attach_ptr ITTNOTIFY_NAME(enable_attach)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_enable_attach()
+#define __itt_enable_attach_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_enable_attach_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @cond exclude_from_gpa_documentation */
+
+/** @} arrays group */
+
+/** @endcond */
+
+/**
+ * @brief Module load info
+ * This API is used to report necessary information in case of module relocation
+ * @param[in] start_addr - relocated module start address
+ * @param[in] end_addr - relocated module end address
+ * @param[in] path - file system path to the module
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_module_loadA(void *start_addr, void *end_addr, const char *path);
+void ITTAPI __itt_module_loadW(void *start_addr, void *end_addr, const wchar_t *path);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_module_load     __itt_module_loadW
+#  define __itt_module_load_ptr __itt_module_loadW_ptr
+#else /* UNICODE */
+#  define __itt_module_load     __itt_module_loadA
+#  define __itt_module_load_ptr __itt_module_loadA_ptr
+#endif /* UNICODE */
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+void ITTAPI __itt_module_load(void *start_addr, void *end_addr, const char *path);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, void, module_loadA, (void *start_addr, void *end_addr, const char *path))
+ITT_STUB(ITTAPI, void, module_loadW, (void *start_addr, void *end_addr, const wchar_t *path))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, void, module_load,  (void *start_addr, void *end_addr, const char *path))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_module_loadA     ITTNOTIFY_VOID(module_loadA)
+#define __itt_module_loadA_ptr ITTNOTIFY_NAME(module_loadA)
+#define __itt_module_loadW     ITTNOTIFY_VOID(module_loadW)
+#define __itt_module_loadW_ptr ITTNOTIFY_NAME(module_loadW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_module_load     ITTNOTIFY_VOID(module_load)
+#define __itt_module_load_ptr ITTNOTIFY_NAME(module_load)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_module_loadA(start_addr, end_addr, path)
+#define __itt_module_loadA_ptr 0
+#define __itt_module_loadW(start_addr, end_addr, path)
+#define __itt_module_loadW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_module_load(start_addr, end_addr, path)
+#define __itt_module_load_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_module_loadA_ptr 0
+#define __itt_module_loadW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_module_load_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* _ITTNOTIFY_H_ */
+
+#ifdef INTEL_ITTNOTIFY_API_PRIVATE
+
+#ifndef _ITTNOTIFY_PRIVATE_
+#define _ITTNOTIFY_PRIVATE_
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/**
+ * @ingroup clockdomain
+ * @brief Begin an overlapped task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] taskid The identifier for this task instance, *cannot* be __itt_null.
+ * @param[in] parentid The parent of this task, or __itt_null.
+ * @param[in] name The name of this task.
+ */
+void ITTAPI __itt_task_begin_overlapped_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name);
+
+/**
+ * @ingroup clockdomain
+ * @brief End an overlapped task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] taskid Explicit ID of finished task
+ */
+void ITTAPI __itt_task_end_overlapped_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, task_begin_overlapped_ex,       (const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name))
+ITT_STUBV(ITTAPI, void, task_end_overlapped_ex,         (const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid))
+#define __itt_task_begin_overlapped_ex(d,x,y,z,a,b)     ITTNOTIFY_VOID_D5(task_begin_overlapped_ex,d,x,y,z,a,b)
+#define __itt_task_begin_overlapped_ex_ptr              ITTNOTIFY_NAME(task_begin_overlapped_ex)
+#define __itt_task_end_overlapped_ex(d,x,y,z)           ITTNOTIFY_VOID_D3(task_end_overlapped_ex,d,x,y,z)
+#define __itt_task_end_overlapped_ex_ptr                ITTNOTIFY_NAME(task_end_overlapped_ex)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_task_begin_overlapped_ex(domain,clock_domain,timestamp,taskid,parentid,name)
+#define __itt_task_begin_overlapped_ex_ptr      0
+#define __itt_task_end_overlapped_ex(domain,clock_domain,timestamp,taskid)
+#define __itt_task_end_overlapped_ex_ptr        0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_task_begin_overlapped_ex_ptr      0
+#define __itt_task_end_overlapped_ptr           0
+#define __itt_task_end_overlapped_ex_ptr        0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @defgroup makrs_internal Marks
+ * @ingroup internal
+ * Marks group
+ * @warning Internal API:
+ *   - It is not shipped to outside of Intel
+ *   - It is delivered to internal Intel teams using e-mail or SVN access only
+ * @{
+ */
+/** @brief user mark type */
+typedef int __itt_mark_type;
+
+/**
+ * @brief Creates a user mark type with the specified name using char or Unicode string.
+ * @param[in] name - name of mark to create
+ * @return Returns a handle to the mark type
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_mark_type ITTAPI __itt_mark_createA(const char    *name);
+__itt_mark_type ITTAPI __itt_mark_createW(const wchar_t *name);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_mark_create     __itt_mark_createW
+#  define __itt_mark_create_ptr __itt_mark_createW_ptr
+#else /* UNICODE */
+#  define __itt_mark_create     __itt_mark_createA
+#  define __itt_mark_create_ptr __itt_mark_createA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_mark_type ITTAPI __itt_mark_create(const char *name);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_mark_type, mark_createA, (const char    *name))
+ITT_STUB(ITTAPI, __itt_mark_type, mark_createW, (const wchar_t *name))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_mark_type, mark_create,  (const char *name))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_mark_createA     ITTNOTIFY_DATA(mark_createA)
+#define __itt_mark_createA_ptr ITTNOTIFY_NAME(mark_createA)
+#define __itt_mark_createW     ITTNOTIFY_DATA(mark_createW)
+#define __itt_mark_createW_ptr ITTNOTIFY_NAME(mark_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_create      ITTNOTIFY_DATA(mark_create)
+#define __itt_mark_create_ptr  ITTNOTIFY_NAME(mark_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_mark_createA(name) (__itt_mark_type)0
+#define __itt_mark_createA_ptr 0
+#define __itt_mark_createW(name) (__itt_mark_type)0
+#define __itt_mark_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_create(name)  (__itt_mark_type)0
+#define __itt_mark_create_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_mark_createA_ptr 0
+#define __itt_mark_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_create_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Creates a "discrete" user mark type of the specified type and an optional parameter using char or Unicode string.
+ *
+ * - The mark of "discrete" type is placed to collection results in case of success. It appears in overtime view(s) as a special tick sign.
+ * - The call is "synchronous" - function returns after mark is actually added to results.
+ * - This function is useful, for example, to mark different phases of application
+ *   (beginning of the next mark automatically meand end of current region).
+ * - Can be used together with "continuous" marks (see below) at the same collection session
+ * @param[in] mt - mark, created by __itt_mark_create(const char* name) function
+ * @param[in] parameter - string parameter of mark
+ * @return Returns zero value in case of success, non-zero value otherwise.
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+int ITTAPI __itt_markA(__itt_mark_type mt, const char    *parameter);
+int ITTAPI __itt_markW(__itt_mark_type mt, const wchar_t *parameter);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_mark     __itt_markW
+#  define __itt_mark_ptr __itt_markW_ptr
+#else /* UNICODE  */
+#  define __itt_mark     __itt_markA
+#  define __itt_mark_ptr __itt_markA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+int ITTAPI __itt_mark(__itt_mark_type mt, const char *parameter);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, int, markA, (__itt_mark_type mt, const char    *parameter))
+ITT_STUB(ITTAPI, int, markW, (__itt_mark_type mt, const wchar_t *parameter))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, int, mark,  (__itt_mark_type mt, const char *parameter))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_markA     ITTNOTIFY_DATA(markA)
+#define __itt_markA_ptr ITTNOTIFY_NAME(markA)
+#define __itt_markW     ITTNOTIFY_DATA(markW)
+#define __itt_markW_ptr ITTNOTIFY_NAME(markW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark      ITTNOTIFY_DATA(mark)
+#define __itt_mark_ptr  ITTNOTIFY_NAME(mark)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_markA(mt, parameter) (int)0
+#define __itt_markA_ptr 0
+#define __itt_markW(mt, parameter) (int)0
+#define __itt_markW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark(mt, parameter)  (int)0
+#define __itt_mark_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_markA_ptr 0
+#define __itt_markW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Use this if necessary to create a "discrete" user event type (mark) for process
+ * rather then for one thread
+ * @see int __itt_mark(__itt_mark_type mt, const char* parameter);
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+int ITTAPI __itt_mark_globalA(__itt_mark_type mt, const char    *parameter);
+int ITTAPI __itt_mark_globalW(__itt_mark_type mt, const wchar_t *parameter);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_mark_global     __itt_mark_globalW
+#  define __itt_mark_global_ptr __itt_mark_globalW_ptr
+#else /* UNICODE  */
+#  define __itt_mark_global     __itt_mark_globalA
+#  define __itt_mark_global_ptr __itt_mark_globalA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+int ITTAPI __itt_mark_global(__itt_mark_type mt, const char *parameter);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, int, mark_globalA, (__itt_mark_type mt, const char    *parameter))
+ITT_STUB(ITTAPI, int, mark_globalW, (__itt_mark_type mt, const wchar_t *parameter))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, int, mark_global,  (__itt_mark_type mt, const char *parameter))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_mark_globalA     ITTNOTIFY_DATA(mark_globalA)
+#define __itt_mark_globalA_ptr ITTNOTIFY_NAME(mark_globalA)
+#define __itt_mark_globalW     ITTNOTIFY_DATA(mark_globalW)
+#define __itt_mark_globalW_ptr ITTNOTIFY_NAME(mark_globalW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_global      ITTNOTIFY_DATA(mark_global)
+#define __itt_mark_global_ptr  ITTNOTIFY_NAME(mark_global)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_mark_globalA(mt, parameter) (int)0
+#define __itt_mark_globalA_ptr 0
+#define __itt_mark_globalW(mt, parameter) (int)0
+#define __itt_mark_globalW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_global(mt, parameter)  (int)0
+#define __itt_mark_global_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_mark_globalA_ptr 0
+#define __itt_mark_globalW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_global_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Creates an "end" point for "continuous" mark with specified name.
+ *
+ * - Returns zero value in case of success, non-zero value otherwise.
+ *   Also returns non-zero value when preceding "begin" point for the
+ *   mark with the same name failed to be created or not created.
+ * - The mark of "continuous" type is placed to collection results in
+ *   case of success. It appears in overtime view(s) as a special tick
+ *   sign (different from "discrete" mark) together with line from
+ *   corresponding "begin" mark to "end" mark.
+ * @note Continuous marks can overlap and be nested inside each other.
+ * Discrete mark can be nested inside marked region
+ * @param[in] mt - mark, created by __itt_mark_create(const char* name) function
+ * @return Returns zero value in case of success, non-zero value otherwise.
+ */
+int ITTAPI __itt_mark_off(__itt_mark_type mt);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, int, mark_off, (__itt_mark_type mt))
+#define __itt_mark_off     ITTNOTIFY_DATA(mark_off)
+#define __itt_mark_off_ptr ITTNOTIFY_NAME(mark_off)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_mark_off(mt) (int)0
+#define __itt_mark_off_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_mark_off_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Use this if necessary to create an "end" point for mark of process
+ * @see int __itt_mark_off(__itt_mark_type mt);
+ */
+int ITTAPI __itt_mark_global_off(__itt_mark_type mt);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, int, mark_global_off, (__itt_mark_type mt))
+#define __itt_mark_global_off     ITTNOTIFY_DATA(mark_global_off)
+#define __itt_mark_global_off_ptr ITTNOTIFY_NAME(mark_global_off)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_mark_global_off(mt) (int)0
+#define __itt_mark_global_off_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_mark_global_off_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} marks group */
+
+/**
+ * @defgroup counters_internal Counters
+ * @ingroup internal
+ * Counters group
+ * @{
+ */
+
+
+/**
+ * @defgroup stitch Stack Stitching
+ * @ingroup internal
+ * Stack Stitching group
+ * @{
+ */
+/**
+ * @brief opaque structure for counter identification
+ */
+typedef struct ___itt_caller *__itt_caller;
+
+/**
+ * @brief Create the stitch point e.g. a point in call stack where other stacks should be stitched to.
+ * The function returns a unique identifier which is used to match the cut points with corresponding stitch points.
+ */
+__itt_caller ITTAPI __itt_stack_caller_create(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_caller, stack_caller_create, (void))
+#define __itt_stack_caller_create     ITTNOTIFY_DATA(stack_caller_create)
+#define __itt_stack_caller_create_ptr ITTNOTIFY_NAME(stack_caller_create)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_stack_caller_create() (__itt_caller)0
+#define __itt_stack_caller_create_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_stack_caller_create_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Destroy the inforamtion about stitch point identified by the pointer previously returned by __itt_stack_caller_create()
+ */
+void ITTAPI __itt_stack_caller_destroy(__itt_caller id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, stack_caller_destroy, (__itt_caller id))
+#define __itt_stack_caller_destroy     ITTNOTIFY_VOID(stack_caller_destroy)
+#define __itt_stack_caller_destroy_ptr ITTNOTIFY_NAME(stack_caller_destroy)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_stack_caller_destroy(id)
+#define __itt_stack_caller_destroy_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_stack_caller_destroy_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Sets the cut point. Stack from each event which occurs after this call will be cut
+ * at the same stack level the function was called and stitched to the corresponding stitch point.
+ */
+void ITTAPI __itt_stack_callee_enter(__itt_caller id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, stack_callee_enter, (__itt_caller id))
+#define __itt_stack_callee_enter     ITTNOTIFY_VOID(stack_callee_enter)
+#define __itt_stack_callee_enter_ptr ITTNOTIFY_NAME(stack_callee_enter)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_stack_callee_enter(id)
+#define __itt_stack_callee_enter_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_stack_callee_enter_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief This function eliminates the cut point which was set by latest __itt_stack_callee_enter().
+ */
+void ITTAPI __itt_stack_callee_leave(__itt_caller id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, stack_callee_leave, (__itt_caller id))
+#define __itt_stack_callee_leave     ITTNOTIFY_VOID(stack_callee_leave)
+#define __itt_stack_callee_leave_ptr ITTNOTIFY_NAME(stack_callee_leave)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_stack_callee_leave(id)
+#define __itt_stack_callee_leave_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_stack_callee_leave_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @} stitch group */
+
+/* ***************************************************************************************************************************** */
+
+#include <stdarg.h>
+
+/** @cond exclude_from_documentation */
+typedef enum __itt_error_code
+{
+    __itt_error_success       = 0, /*!< no error */
+    __itt_error_no_module     = 1, /*!< module can't be loaded */
+    /* %1$s -- library name; win: %2$d -- system error code; unx: %2$s -- system error message. */
+    __itt_error_no_symbol     = 2, /*!< symbol not found */
+    /* %1$s -- library name, %2$s -- symbol name. */
+    __itt_error_unknown_group = 3, /*!< unknown group specified */
+    /* %1$s -- env var name, %2$s -- group name. */
+    __itt_error_cant_read_env = 4, /*!< GetEnvironmentVariable() failed */
+    /* %1$s -- env var name, %2$d -- system error. */
+    __itt_error_env_too_long  = 5, /*!< variable value too long */
+    /* %1$s -- env var name, %2$d -- actual length of the var, %3$d -- max allowed length. */
+    __itt_error_system        = 6  /*!< pthread_mutexattr_init or pthread_mutex_init failed */
+    /* %1$s -- function name, %2$d -- errno. */
+} __itt_error_code;
+
+typedef void (__itt_error_handler_t)(__itt_error_code code, va_list);
+__itt_error_handler_t* __itt_set_error_handler(__itt_error_handler_t*);
+
+const char* ITTAPI __itt_api_version(void);
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#define __itt_error_handler ITT_JOIN(INTEL_ITTNOTIFY_PREFIX, error_handler)
+void __itt_error_handler(__itt_error_code code, va_list args);
+extern const int ITTNOTIFY_NAME(err);
+#define __itt_err ITTNOTIFY_NAME(err)
+ITT_STUB(ITTAPI, const char*, api_version, (void))
+#define __itt_api_version     ITTNOTIFY_DATA(api_version)
+#define __itt_api_version_ptr ITTNOTIFY_NAME(api_version)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_api_version()   (const char*)0
+#define __itt_api_version_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_api_version_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* _ITTNOTIFY_PRIVATE_ */
+
+#endif /* INTEL_ITTNOTIFY_API_PRIVATE */
diff --git a/tools/profiler/docs/buffer.rst b/tools/profiler/docs/buffer.rst
new file mode 100644
index 0000000000..dd7ef30dfd
--- /dev/null
+++ b/tools/profiler/docs/buffer.rst
@@ -0,0 +1,70 @@
+Buffers and Memory Management
+=============================
+
+In a post-Fission world, precise memory management across many threads and processes is
+especially important. In order for the profiler to achieve this, it uses a chunked buffer
+strategy.
+
+The `ProfileBuffer`_ is the overall buffer class that controls the memory and storage
+for the profile, it allows allocating objects into it. This can be used freely
+by things like markers and samples to store data as entries, without needing to know
+about the general strategy for how the memory is managed.
+
+The `ProfileBuffer`_ is then backed by the `ProfileChunkedBuffer`_. This specialized
+buffer grows incrementally, by allocating additional `ProfileBufferChunk`_ objects.
+More and more chunks will be allocated until a memory limit is reached, where they will
+be released. After releasing, the chunk will either be recycled or freed.
+
+The limiting of memory usage is coordinated by the `ProfilerParent`_ in the parent
+process. The `ProfilerParent`_ and `ProfilerChild`_ exchange IPC messages with information
+about how much memory is being used.  When the maximum byte threshold is passed,
+the ProfileChunkManager in the parent process removes the oldest chunk, and then the
+`ProfilerParent`_ sends a `DestroyReleasedChunksAtOrBefore`_ message to all of child
+processes so that the oldest chunks in the profile are released. This helps long profiles
+to keep having data in a similar time frame.
+
+Profile Buffer Terminology
+##########################
+
+ProfilerParent
+  The main profiler machinery is installed in the parent process. It uses IPC to
+  communicate to the child processes. The PProfiler is the actor which is used
+  to communicate across processes to coordinate things. See `ProfilerParent.h`_. The
+  ProfilerParent uses the DestroyReleasedChunksAtOrBefore meessage to control the
+  overall chunk limit.
+
+ProfilerChild
+  ProfilerChild is installed in every child process, it will receive requests from
+  DestroyReleasedChunksAtOrBefore.
+
+Entry
+  This is an individual entry in the `ProfileBuffer.h`_,. These entry sizes are not
+  related to the chunks sizes. An individual entry can straddle two different chunks.
+  An entry can contain various pieces of data, like markers, samples, and stacks.
+
+Chunk
+  An arbitrary sized chunk of memory, managed by the `ProfileChunkedBuffer`_, and
+  IPC calls from the ProfilerParent.
+
+Unreleased Chunk
+  This chunk is currently being used to write entries into.
+
+Released chunk
+  This chunk is full of data. When memory limits happen, it can either be recycled
+  or freed.
+
+Recycled chunk
+  This is a chunk that was previously written into, and full. When memory limits occur,
+  rather than freeing the memory, it is re-used as the next chunk.
+
+.. _ProfileChunkedBuffer: https://searchfox.org/mozilla-central/search?q=ProfileChunkedBuffer&path=&case=true&regexp=false
+.. _ProfileChunkManager: https://searchfox.org/mozilla-central/search?q=ProfileBufferChunkManager.h&path=&case=true&regexp=false
+.. _ProfileBufferChunk: https://searchfox.org/mozilla-central/search?q=ProfileBufferChunk&path=&case=true&regexp=false
+.. _ProfileBufferChunkManagerWithLocalLimit: https://searchfox.org/mozilla-central/search?q=ProfileBufferChunkManagerWithLocalLimit&case=true&path=
+.. _ProfilerParent.h: https://searchfox.org/mozilla-central/source/tools/profiler/public/ProfilerParent.h
+.. _ProfilerChild.h: https://searchfox.org/mozilla-central/source/tools/profiler/public/ProfilerChild.h
+.. _ProfileBuffer.h: https://searchfox.org/mozilla-central/source/tools/profiler/core/ProfileBuffer.h
+.. _ProfileBuffer: https://searchfox.org/mozilla-central/search?q=ProfileBuffer&path=&case=true&regexp=false
+.. _ProfilerParent: https://searchfox.org/mozilla-central/search?q=ProfilerParent&path=&case=true&regexp=false
+.. _ProfilerChild: https://searchfox.org/mozilla-central/search?q=ProfilerChild&path=&case=true&regexp=false
+.. _DestroyReleasedChunksAtOrBefore: https://searchfox.org/mozilla-central/search?q=DestroyReleasedChunksAtOrBefore&path=&case=true&regexp=false
diff --git a/tools/profiler/docs/code-overview.rst b/tools/profiler/docs/code-overview.rst
new file mode 100644
index 0000000000..3ca662e141
--- /dev/null
+++ b/tools/profiler/docs/code-overview.rst
@@ -0,0 +1,1494 @@
+Profiler Code Overview
+######################
+
+This is an overview of the code that implements the Profiler inside Firefox
+with dome details around tricky subjects, or pointers to more detailed
+documentation and/or source code.
+
+It assumes familiarity with Firefox development, including Mercurial (hg), mach,
+moz.build files, Try, Phabricator, etc.
+
+It also assumes knowledge of the user-visible part of the Firefox Profiler, that
+is: How to use the Firefox Profiler, and what profiles contain that is shown
+when capturing a profile. See the main website https://profiler.firefox.com, and
+its `documentation <https://profiler.firefox.com/docs/>`_.
+
+For just an "overview", it may look like a huge amount of information, but the
+Profiler code is indeed quite expansive, so it takes a lot of words to explain
+even just a high-level view of it! For on-the-spot needs, it should be possible
+to search for some terms here and follow the clues. But for long-term
+maintainers, it would be worth skimming this whole document to get a grasp of
+the domain, and return to get some more detailed information before diving into
+the code.
+
+WIP note: This document should be correct at the time it is written, but the
+profiler code constantly evolves to respond to bugs or to provide new exciting
+features, so this document could become obsolete in parts! It should still be
+useful as an overview, but its correctness should be verified by looking at the
+actual code. If you notice any significant discrepancy or broken links, please
+help by
+`filing a bug <https://bugzilla.mozilla.org/enter_bug.cgi?product=Core&component=Gecko+Profiler>`_.
+
+*****
+Terms
+*****
+
+This is the common usage for some frequently-used terms, as understood by the
+Dev Tools team. But incorrect usage can sometimes happen, context is key!
+
+* **profiler** (a): Generic name for software that enables the profiling of
+  code. (`"Profiling" on Wikipedia <https://en.wikipedia.org/wiki/Profiling_(computer_programming)>`_)
+* **Profiler** (the): All parts of the profiler code inside Firefox.
+* **Base Profiler** (the): Parts of the Profiler that live in
+  mozglue/baseprofiler, and can be used from anywhere, but has limited
+  functionality.
+* **Gecko Profiler** (the): Parts of the Profiler that live in tools/profiler,
+  and can only be used from other code in the XUL library.
+* **Profilers** (the): Both the Base Profiler and the Gecko Profiler.
+* **profiling session**: This is the time during which the profiler is running
+  and collecting data.
+* **profile** (a): The output from a profiling session, either as a file, or a
+  shared viewable profile on https://profiler.firefox.com
+* **Profiler back-end** (the): Other name for the Profiler code inside Firefox,
+  to distinguish it from...
+* **Profiler front-end** (the): The website https://profiler.firefox.com that
+  displays profiles captured by the back-end.
+* **Firefox Profiler** (the): The whole suite comprised of the back-end and front-end.
+
+******************
+Guiding Principles
+******************
+
+When working on the profiler, here are some guiding principles to keep in mind:
+
+* Low profiling overhead in cpu and memory. For the Profiler to provide the best
+  value, it should stay out of the way and consume as few resources (in time and
+  memory) as possible, so as not to skew the actual Firefox code too much.
+
+* Common data structures and code should be in the Base Profiler when possible.
+
+  WIP note: Deduplication is slowly happening, see
+  `meta bug 1557566 <https://bugzilla.mozilla.org/show_bug.cgi?id=1557566>`_.
+  This document focuses on the Profiler back-end, and mainly the Gecko Profiler
+  (because this is where most of the code lives, the Base Profiler is mostly a
+  subset, originally just a cut-down version of the Gecko Profiler); so unless
+  specified, descriptions below are about the Gecko Profiler, but know that
+  there may be some equivalent code in the Base Profiler as well.
+
+* Use appropriate programming-language features where possible to reduce coding
+  errors in both our code, and our users' usage of it. In C++, this can be done
+  by using a specific class/struct types for a given usage, to avoid misuse
+  (e.g., an generic integer representing a **process** could be incorrectly
+  given to a function expecting a **thread**; we have specific types for these
+  instead, more below.)
+
+* Follow the
+  `Coding Style <https://firefox-source-docs.mozilla.org/code-quality/coding-style/index.html>`_.
+
+* Whenever possible, write tests (if not present already) for code you add or
+  modify -- but this may be too difficult in some case, use good judgement and
+  at least test manually instead.
+
+******************
+Profiler Lifecycle
+******************
+
+Here is a high-level view of the Base **or** Gecko Profiler lifecycle, as part
+of a Firefox run. The following sections will go into much more details.
+
+* Profiler initialization, preparing some common data.
+* Threads de/register themselves as they start and stop.
+* During each User/test-controlled profiling session:
+
+  * Profiler start, preparing data structures that will store the profiling data.
+  * Periodic sampling from a separate thread, happening at a user-selected
+    frequency (usually once every 1-2 ms), and recording snapshots of what
+    Firefox is doing:
+
+    * CPU sampling, measuring how much time each thread has spent actually
+      running on the CPU.
+    * Stack sampling, capturing a stack of functions calls from whichever leaf
+      function the program is in at this point in time, up to the top-most
+      caller (i.e., at least the ``main()`` function, or its callers if any).
+      Note that unlike most external profilers, the Firefox Profiler back-end
+      is capable or getting more useful information than just native functions
+      calls (compiled from C++ or Rust):
+
+      * Labels added by Firefox developers along the stack, usually to identify
+        regions of code that perform "interesting" operations (like layout, file
+        I/Os, etc.).
+      * JavaScript function calls, including the level of optimization applied.
+      * Java function calls.
+  * At any time, Markers may record more specific details of what is happening,
+    e.g.: User operations, page rendering steps, garbage collection, etc.
+  * Optional profiler pause, which stops most recording, usually near the end of
+    a session so that no data gets recorded past this point.
+  * Profile JSON output, generated from all the recorded profiling data.
+  * Profiler stop, tearing down profiling session objects.
+* Profiler shutdown.
+
+Note that the Base Profiler can start earlier, and then the data collected so
+far, as well as the responsibility for periodic sampling, is handed over to the
+Gecko Profiler:
+
+#. (Firefox starts)
+#. Base Profiler init
+#. Base Profiler start
+#. (Firefox loads the libxul library and initializes XPCOM)
+#. Gecko Profiler init
+#. Gecko Profiler start
+#. Handover from Base to Gecko
+#. Base Profiler stop
+#. (Bulk of the profiling session)
+#. JSON generation
+#. Gecko Profiler stop
+#. Gecko Profiler shutdown
+#. (Firefox ends XPCOM)
+#. Base Profiler shutdown
+#. (Firefox exits)
+
+Base Profiler functions that add data (mostly markers and labels) may be called
+from anywhere, and will be recorded by either Profiler. The corresponding
+functions in Gecko Profiler can only be called from other libxul code, and can
+only be recorded by the Gecko Profiler.
+
+Whenever possible, Gecko Profiler functions should be preferred if accessible,
+as they may provide extended functionality (e.g., better stacks with JS in
+markers). Otherwise fallback on Base Profiler functions.
+
+***********
+Directories
+***********
+
+* Non-Profiler supporting code
+
+  * `mfbt <https://searchfox.org/mozilla-central/source/mfbt>`_ - Mostly
+    replacements for C++ std library facilities.
+
+  * `mozglue/misc <https://searchfox.org/mozilla-central/source/mozglue/misc>`_
+
+    * `PlatformMutex.h <https://searchfox.org/mozilla-central/source/mozglue/misc/PlatformMutex.h>`_ -
+      Mutex base classes.
+    * `StackWalk.h <https://searchfox.org/mozilla-central/source/mozglue/misc/StackWalk.h>`_ -
+      Stack-walking functions.
+    * `TimeStamp.h <https://searchfox.org/mozilla-central/source/mozglue/misc/TimeStamp.h>`_ -
+      Timestamps and time durations.
+
+  * `xpcom <https://searchfox.org/mozilla-central/source/xpcom>`_
+
+    * `ds <https://searchfox.org/mozilla-central/source/xpcom/ds>`_ -
+      Data structures like arrays, strings.
+
+    * `threads <https://searchfox.org/mozilla-central/source/xpcom/threads>`_ -
+      Threading functions.
+
+* Profiler back-end
+
+  * `mozglue/baseprofiler <https://searchfox.org/mozilla-central/source/mozglue/baseprofiler>`_ -
+    Base Profiler code, usable from anywhere in Firefox. Because it lives in
+    mozglue, it's loaded right at the beginning, so it's possible to start the
+    profiler very early, even before Firefox loads its big&heavy "xul" library.
+
+    * `baseprofiler's public <https://searchfox.org/mozilla-central/source/mozglue/baseprofiler/public>`_ -
+      Public headers, may be #included from anywhere.
+    * `baseprofiler's core <https://searchfox.org/mozilla-central/source/mozglue/baseprofiler/core>`_ -
+      Main implementation code.
+    * `baseprofiler's lul <https://searchfox.org/mozilla-central/source/mozglue/baseprofiler/lul>`_ -
+      Special stack-walking code for Linux.
+    * `../tests/TestBaseProfiler.cpp <https://searchfox.org/mozilla-central/source/mozglue/tests/TestBaseProfiler.cpp>`_ -
+      Unit tests.
+
+  * `tools/profiler <https://searchfox.org/mozilla-central/source/tools/profiler>`_ -
+    Gecko Profiler code, only usable from the xul library. That library is
+    loaded a short time after Firefox starts, so the Gecko Profiler is not able
+    to profile the early phase of the application, Base Profiler handles that,
+    and can pass its collected data to the Gecko Profiler when the latter
+    starts.
+
+    * `public <https://searchfox.org/mozilla-central/source/tools/profiler/public>`_ -
+      Public headers, may be #included from most libxul code.
+    * `core <https://searchfox.org/mozilla-central/source/tools/profiler/core>`_ -
+      Main implementation code.
+    * `gecko <https://searchfox.org/mozilla-central/source/tools/profiler/gecko>`_ -
+      Control from JS, and multi-process/IPC code.
+    * `lul <https://searchfox.org/mozilla-central/source/tools/profiler/lul>`_ -
+      Special stack-walking code for Linux.
+    * `rust-api <https://searchfox.org/mozilla-central/source/tools/profiler/rust-api>`_,
+      `rust-helper <https://searchfox.org/mozilla-central/source/tools/profiler/rust-helper>`_
+    * `tests <https://searchfox.org/mozilla-central/source/tools/profiler/tests>`_
+
+  * `devtools/client/performance-new <https://searchfox.org/mozilla-central/source/devtools/client/performance-new>`_,
+    `devtools/shared/performance-new <https://searchfox.org/mozilla-central/source/devtools/shared/performance-new>`_ -
+    Middleware code for about:profiling and devtools panel functionality.
+
+  * js, starting with
+    `js/src/vm/GeckoProfiler.h <https://searchfox.org/mozilla-central/source/js/src/vm/GeckoProfiler.h>`_ -
+    JavaScript engine support, mostly to capture JS stacks.
+
+  * `toolkit/components/extensions/schemas/geckoProfiler.json <https://searchfox.org/mozilla-central/source/toolkit/components/extensions/schemas/geckoProfiler.json>`_ -
+    File that needs to be updated when Profiler features change.
+
+* Profiler front-end
+
+  * Out of scope for this document, but its code and bug repository can be found at:
+    https://github.com/firefox-devtools/profiler . Sometimes work needs to be
+    done on both the back-end of the front-end, especially when modifying the
+    back-end's JSON output format.
+
+*******
+Headers
+*******
+
+The most central public header is
+`GeckoProfiler.h <https://searchfox.org/mozilla-central/source/tools/profiler/public/GeckoProfiler.h>`_,
+from which almost everything else can be found, it can be a good starting point
+for exploration.
+It includes other headers, which together contain important top-level macros and
+functions.
+
+WIP note: GeckoProfiler.h used to be the header that contained everything!
+To better separate areas of functionality, and to hopefully reduce compilation
+times, parts of it have been split into smaller headers, and this work will
+continue, see `bug 1681416 <https://bugzilla.mozilla.org/show_bug.cgi?id=1681416>`_.
+
+MOZ_GECKO_PROFILER and Macros
+=============================
+
+Mozilla officially supports the Profiler on `tier-1 platforms
+<https://firefox-source-docs.mozilla.org/contributing/build/supported.html>`_:
+Windows, macos, Linux and Android.
+There is also some code running on tier 2-3 platforms (e.g., for FreeBSD), but
+the team at Mozilla is not obligated to maintain it; we do try to keep it
+running, and some external contributors are keeping an eye on it and provide
+patches when things do break.
+
+To reduce the burden on unsupported platforms, a lot of the Profilers code is
+only compiled when ``MOZ_GECKO_PROFILER`` is #defined. This means that some
+public functions may not always be declared or implemented, and should be
+surrounded by guards like ``#ifdef MOZ_GECKO_PROFILER``.
+
+Some commonly-used functions offer an empty definition in the
+non-``MOZ_GECKO_PROFILER`` case, so these functions may be called from anywhere
+without guard.
+
+Other functions have associated macros that can always be used, and resolve to
+nothing on unsupported platforms. E.g.,
+``PROFILER_REGISTER_THREAD`` calls ``profiler_register_thread`` where supported,
+otherwise does nothing.
+
+WIP note: There is an effort to eventually get rid of ``MOZ_GECKO_PROFILER`` and
+its associated macros, see
+`bug 1635350 <https://bugzilla.mozilla.org/show_bug.cgi?id=1635350>`_.
+
+RAII "Auto" macros and classes
+==============================
+A number of functions are intended to be called in pairs, usually to start and
+then end some operation. To ease their use, and ensure that both functions are
+always called together, they usually have an associated class and/or macro that
+may be called only once. This pattern of using an object's destructor to ensure
+that some action always eventually happens, is called
+`RAII <https://en.cppreference.com/w/cpp/language/raii>`_ in C++, with the
+common prefix "auto".
+
+E.g.: In ``MOZ_GECKO_PROFILER`` builds,
+`AUTO_PROFILER_INIT <https://searchfox.org/mozilla-central/search?q=AUTO_PROFILER_INIT>`_
+instantiates an
+`AutoProfilerInit <https://searchfox.org/mozilla-central/search?q=symbol:T_mozilla%3A%3AAutoProfilerInit>`_
+object, which calls ``profiler_init`` when constructed, and
+``profiler_shutdown`` when destroyed.
+
+*********************
+Platform Abstractions
+*********************
+
+This section describes some platform abstractions that are used throughout the
+Profilers. (Other platform abstractions will be described where they are used.)
+
+Process and Thread IDs
+======================
+
+The Profiler back-end often uses process and thread IDs (aka "pid" and "tid"),
+which are commonly just a number.
+For better code correctness, and to hide specific platform details, they are
+encapsulated in opaque types
+`BaseProfilerProcessId <https://searchfox.org/mozilla-central/search?q=BaseProfilerProcessId>`_
+and
+`BaseProfilerThreadId <https://searchfox.org/mozilla-central/search?q=BaseProfilerThreadId>`_.
+These types should be used wherever possible.
+When interfacing with other code, they may be converted using the member
+functions ``FromNumber`` and ``ToNumber``.
+
+To find the current process or thread ID, use
+`profiler_current_process_id <https://searchfox.org/mozilla-central/search?q=profiler_current_process_id>`_
+or
+`profiler_current_thread_id <https://searchfox.org/mozilla-central/search?q=profiler_current_thread_id>`_.
+
+The main thread ID is available through
+`profiler_main_thread_id <https://searchfox.org/mozilla-central/search?q=profiler_main_thread_id>`_
+(assuming
+`profiler_init_main_thread_id <https://searchfox.org/mozilla-central/search?q=profiler_init_main_thread_id>`_
+was called when the application started -- especially important in stand-alone
+test programs.)
+And
+`profiler_is_main_thread <https://searchfox.org/mozilla-central/search?q=profiler_is_main_thread>`_
+is a quick way to find out if the current thread is the main thread.
+
+Locking
+=======
+The locking primitives in PlatformMutex.h are not supposed to be used as-is, but
+through a user-accessible implementation. For the Profilers, this is in
+`BaseProfilerDetail.h <https://searchfox.org/mozilla-central/source/mozglue/baseprofiler/public/BaseProfilerDetail.h>`_.
+
+In addition to the usual ``Lock``, ``TryLock``, and ``Unlock`` functions,
+`BaseProfilerMutex <https://searchfox.org/mozilla-central/search?q=BaseProfilerMutex>`_
+objects have a name (which may be helpful when debugging),
+they record the thread on which they are locked (making it possible to know if
+the mutex is locked on the current thread), and in ``DEBUG`` builds there are
+assertions verifying that the mutex is not incorrectly used recursively, to
+verify the correct ordering of different Profiler mutexes, and that it is
+unlocked before destruction.
+
+Mutexes should preferably be locked within C++ block scopes, or as class
+members, by using
+`BaseProfilerAutoLock <https://searchfox.org/mozilla-central/search?q=BaseProfilerAutoLock>`_.
+
+Some classes give the option to use a mutex or not (so that single-threaded code
+can more efficiently bypass locking operations), for these we have
+`BaseProfilerMaybeMutex <https://searchfox.org/mozilla-central/search?q=BaseProfilerMaybeMutex>`_
+and
+`BaseProfilerMaybeAutoLock <https://searchfox.org/mozilla-central/search?q=BaseProfilerMaybeAutoLock>`_.
+
+There is also a special type of shared lock (aka RWLock, see
+`RWLock on wikipedia <https://en.wikipedia.org/wiki/Readers%E2%80%93writer_lock>`_),
+which may be locked in multiple threads (through ``LockShared`` or preferably
+`BaseProfilerAutoLockShared <https://searchfox.org/mozilla-central/search?q=BaseProfilerAutoLockShared>`_),
+or locked exclusively, preventing any other locking (through ``LockExclusive`` or preferably
+`BaseProfilerAutoLockExclusive <https://searchfox.org/mozilla-central/search?q=BaseProfilerAutoLockExclusive>`_).
+
+*********************
+Main Profiler Classes
+*********************
+
+Diagram showing the most important Profiler classes, see details in the
+following sections:
+
+(As noted, the "RegisteredThread" classes are now obsolete in the Gecko
+Profiler, see the "Thread Registration" section below for an updated diagram and
+description.)
+
+.. image:: profilerclasses-20220913.png
+
+***********************
+Profiler Initialization
+***********************
+
+`profiler_init <https://searchfox.org/mozilla-central/search?q=symbol:_Z13profiler_initPv>`_
+and
+`baseprofiler::profiler_init <https://searchfox.org/mozilla-central/search?q=symbol:_ZN7mozilla12baseprofiler13profiler_initEPv>`_
+must be called from the main thread, and are used to prepare important aspects
+of the profiler, including:
+
+* Making sure the main thread ID is recorded.
+* Handling ``MOZ_PROFILER_HELP=1 ./mach run`` to display the command-line help.
+* Creating the ``CorePS`` instance -- more details below.
+* Registering the main thread.
+* Initializing some platform-specific code.
+* Handling other environment variables that are used to immediately start the
+  profiler, with optional settings provided in other env-vars.
+
+CorePS
+======
+
+The `CorePS class <https://searchfox.org/mozilla-central/search?q=symbol:T_CorePS>`_
+has a single instance that should live for the duration of the Firefox
+application, and contains important information that could be needed even when
+the Profiler is not running.
+
+It includes:
+
+* A static pointer to its single instance.
+* The process start time.
+* JavaScript-specific data structures.
+* A list of registered
+  `PageInformations <https://searchfox.org/mozilla-central/search?q=symbol:T_PageInformation>`_,
+  used to keep track of the tabs that this process handles.
+* A list of
+  `BaseProfilerCounts <https://searchfox.org/mozilla-central/search?q=symbol:T_BaseProfilerCount>`_,
+  used to record things like the process memory usage.
+* The process name, and optionally the "eTLD+1" (roughly sub-domain) that this
+  process handles.
+* In the Base Profiler only, a list of
+  `RegisteredThreads <https://searchfox.org/mozilla-central/search?q=symbol:T_mozilla%253A%253Abaseprofiler%253A%253ARegisteredThread>`_.
+  WIP note: This storage has been reworked in the Gecko Profiler (more below),
+  and in practice the Base Profiler only registers the main thread. This should
+  eventually disappear as part of the de-duplication work
+  (`bug 1557566 <https://bugzilla.mozilla.org/show_bug.cgi?id=1557566>`_).
+
+*******************
+Thread Registration
+*******************
+
+Threads need to register themselves in order to get fully profiled.
+This section describes the main data structures that record the list of
+registered threads and their data.
+
+WIP note: There is some work happening to add limited profiling of unregistered
+threads, with the hope that more and more functionality could be added to
+eventually use the same registration data structures.
+
+Diagram showing the relevant classes, see details in the following sub-sections:
+
+.. image:: profilerthreadregistration-20220913.png
+
+ProfilerThreadRegistry
+======================
+
+The
+`static ProfilerThreadRegistry object <https://searchfox.org/mozilla-central/search?q=symbol:T_mozilla%3A%3Aprofiler%3A%3AThreadRegistry>`_
+contains a list of ``OffThreadRef`` objects.
+
+Each ``OffThreadRef`` points to a ``ProfilerThreadRegistration``, and restricts
+access to a safe subset of the thread data, and forces a mutex lock if necessary
+(more information under ProfilerThreadRegistrationData below).
+
+ProfilerThreadRegistration
+==========================
+
+A
+`ProfilerThreadRegistration object <https://searchfox.org/mozilla-central/search?q=symbol:T_mozilla%3A%3Aprofiler%3A%3AThreadRegistration>`_
+contains a lot of information relevant to its thread, to help with profiling it.
+
+This data is accessible from the thread itself through an ``OnThreadRef``
+object, which points to the ``ThreadRegistration``, and restricts access to a
+safe subset of thread data, and forces a mutex lock if necessary (more
+information under ProfilerThreadRegistrationData below).
+
+ThreadRegistrationData and accessors
+====================================
+
+`The ProfilerThreadRegistrationData.h header <https://searchfox.org/mozilla-central/source/tools/profiler/public/ProfilerThreadRegistrationData.h>`_
+contains a hierarchy of classes that encapsulate all the thread-related data.
+
+``ThreadRegistrationData`` contains all the actual data members, including:
+
+* Some long-lived
+  `ThreadRegistrationInfo <https://searchfox.org/mozilla-central/search?q=symbol:T_mozilla%253A%253Aprofiler%253A%253AThreadRegistrationInfo>`_,
+  containing the thread name, its registration time, the thread ID, and whether
+  it's the main thread.
+* A ``ProfilingStack`` that gathers developer-provided pseudo-frames, and JS
+  frames.
+* Some platform-specific ``PlatformData`` (usually required to actually record
+  profiling measurements for that thread).
+* A pointer to the top of the stack.
+* A shared pointer to the thread's ``nsIThread``.
+* A pointer to the ``JSContext``.
+* An optional pre-allocated ``JsFrame`` buffer used during stack-sampling.
+* Some JS flags.
+* Sleep-related data (to avoid costly sampling while the thread is known to not
+  be doing anything).
+* The current ``ThreadProfilingFeatures``, to know what kind of data to record.
+* When profiling, a pointer to a ``ProfiledThreadData``, which contains some
+  more data needed during and just after profiling.
+
+As described in their respective code comments, each data member is supposed to
+be accessed in certain ways, e.g., the ``JSContext`` should only be "written
+from thread, read from thread and suspended thread". To enforce these rules,
+data members can only be accessed through certain classes, which themselves can
+only be instantiated in the correct conditions.
+
+The accessor classes are, from base to most-derived:
+
+* ``ThreadRegistrationData``, not an accessor itself, but it's the base class
+  with all the ``protected`` data.
+* ``ThreadRegistrationUnlockedConstReader``, giving unlocked ``const`` access to
+   the ``ThreadRegistrationInfo``, ``PlatformData``, and stack top.
+* ``ThreadRegistrationUnlockedConstReaderAndAtomicRW``, giving unlocked
+  access to the atomic data members: ``ProfilingStack``, sleep-related data,
+  ``ThreadProfilingFeatures``.
+* ``ThreadRegistrationUnlockedRWForLockedProfiler``, giving access that's
+  protected by the Profiler's main lock, but doesn't require a
+  ``ThreadRegistration`` lock, to the ``ProfiledThreadData``
+* ``ThreadRegistrationUnlockedReaderAndAtomicRWOnThread``, giving unlocked
+  mutable access, but only on the thread itself, to the ``JSContext``.
+* ``ThreadRegistrationLockedRWFromAnyThread``, giving locked access from any
+  thread to mutex-protected data: ``ThreadProfilingFeatures``, ``JsFrame``,
+  ``nsIThread``, and the JS flags.
+* ``ThreadRegistrationLockedRWOnThread``, giving locked access, but only from
+  the thread itself, to the ``JSContext`` and a JS flag-related operation.
+* ``ThreadRegistration::EmbeddedData``, containing all of the above, and stored
+  as a data member in each ``ThreadRegistration``.
+
+To recapitulate, if some code needs some data on the thread, it can use
+``ThreadRegistration`` functions to request access (with the required rights,
+like a mutex lock).
+To access data about another thread, use similar functions from
+``ThreadRegistry`` instead.
+You may find some examples in the implementations of the functions in
+ProfilerThreadState.h (see the following section).
+
+ProfilerThreadState.h functions
+===============================
+
+The
+`ProfilerThreadState.h <https://searchfox.org/mozilla-central/source/tools/profiler/public/ProfilerThreadState.h>`_
+header provides a few helpful functions related to threads, including:
+
+* ``profiler_is_active_and_thread_is_registered``
+* ``profiler_thread_is_being_profiled`` (for the current thread or another
+  thread, and for a given set of features)
+* ``profiler_thread_is_sleeping``
+
+**************
+Profiler Start
+**************
+
+There are multiple ways to start the profiler, through command line env-vars,
+and programmatically in C++ and JS.
+
+The main public C++ function is
+`profiler_start <https://searchfox.org/mozilla-central/search?q=symbol:_Z14profiler_startN7mozilla10PowerOfTwoIjEEdjPPKcjyRKNS_5MaybeIdEE%2C_Z14profiler_startN7mozilla10PowerOfTwoIjEEdjPPKcjmRKNS_5MaybeIdEE>`_.
+It takes all the features specifications, and returns a promise that gets
+resolved when the Profiler has fully started in all processes (multi-process
+profiling is described later in this document, for now the focus will be on each
+process running its instance of the Profiler). It first calls ``profiler_init``
+if needed, and also ``profiler_stop`` if the profiler was already running.
+
+The main implementation, which can be called from multiple sources, is
+`locked_profiler_start <https://searchfox.org/mozilla-central/search?q=locked_profiler_start>`_.
+It performs a number of operations to start the profiling session, including:
+
+* Record the session start time.
+* Pre-allocate some work buffer to capture stacks for markers on the main thread.
+* In the Gecko Profiler only: If the Base Profiler was running, take ownership
+  of the data collected so far, and stop the Base Profiler (we don't want both
+  trying to collect the same data at the same time!)
+* Create the ActivePS, which keeps track of most of the profiling session
+  information, more about it below.
+* For each registered thread found in the ``ThreadRegistry``, check if it's one
+  of the threads to profile, and if yes set the appropriate data into the
+  corresponding ``ThreadRegistrationData`` (including informing the JS engine to
+  start recording profiling data).
+* On Android, start the Java sampler.
+* If native allocations are to be profiled, setup the appropriate hooks.
+* Start the audio callback tracing if requested.
+* Set the public shared "active" state, used by many functions to quickly assess
+  whether to actually record profiling data.
+
+ActivePS
+========
+
+The `ActivePS class <https://searchfox.org/mozilla-central/search?q=symbol:T_ActivePS>`_
+has a single instance at a time, that should live for the length of the
+profiling session.
+
+It includes:
+
+* The session start time.
+* A way to track "generations" (in case an old ActivePS still lives when the
+  next one starts, so that in-flight data goes to the correct place.)
+* Requested features: Buffer capacity, periodic sampling interval, feature set,
+  list of threads to profile, optional: specific tab to profile.
+* The profile data storage buffer and its chunk manager (see "Storage" section
+  below for details.)
+* More data about live and dead profiled threads.
+* Optional counters for per-process CPU usage, and power usage.
+* A pointer to the ``SamplerThread`` object (see "Periodic Sampling" section
+  below for details.)
+
+*******
+Storage
+*******
+
+During a session, the profiling data is serialized into a buffer, which is made
+of "chunks", each of which contains "blocks", which have a size and the "entry"
+data.
+
+During a profiling session, there is one main profile buffer, which may be
+started by the Base Profiler, and then handed over to the Gecko Profiler when
+the latter starts.
+
+The buffer is divided in chunks of equal size, which are allocated before they
+are needed. When the data reaches a user-set limit, the oldest chunk is
+recycled. This means that for long-enough profiling sessions, only the most
+recent data (that could fit under the limit) is kept.
+
+Each chunk stores a sequence of blocks of variable length. The chunk itself
+only knows where the first full block starts, and where the last block ends,
+which is where the next block will be reserved.
+
+To add an entry to the buffer, a block is reserved, the size is written first
+(so that readers can find the start of the next block), and then the entry bytes
+are written.
+
+The following sessions give more technical details.
+
+leb128iterator.h
+================
+
+`This utility header <https://searchfox.org/mozilla-central/source/mozglue/baseprofiler/public/leb128iterator.h>`_
+contains some functions to read and write unsigned "LEB128" numbers
+(`LEB128 on wikipedia <https://en.wikipedia.org/wiki/LEB128>`_).
+
+They are an efficient way to serialize numbers that are usually small, e.g.,
+numbers up to 127 only take one byte, two bytes up to 16,383, etc.
+
+ProfileBufferBlockIndex
+=======================
+
+`A ProfileBufferBlockIndex object <https://searchfox.org/mozilla-central/search?q=symbol:T_mozilla%3A%3AProfileBufferBlockIndex>`_
+encapsulates a block index that is known to be the valid start of a block. It is
+created when a block is reserved, or when trusted code computes the start of a
+block in a chunk.
+
+The more generic
+`ProfileBufferIndex <https://searchfox.org/mozilla-central/search?q=symbol:T_mozilla%3A%3AProfileBufferIndex>`_
+type is used when working inside blocks.
+
+ProfileBufferChunk
+==================
+
+`A ProfileBufferChunk <https://searchfox.org/mozilla-central/search?q=symbol:T_mozilla%3A%3AProfileBufferChunk>`_
+is a variable-sized object. It contains:
+
+* A public copyable header, itself containing:
+
+  * The local offset to the first full block (a chunk may start with the end of
+    a block that was started at the end of the previous chunk). That offset in
+    the very first chunk is the natural start to read all the data in the
+    buffer.
+  * The local offset past the last reserved block. This is where the next block
+    should be reserved, unless it points past the end of this chunk size.
+  * The timestamp when the chunk was first used.
+  * The timestamp when the chunk became full.
+  * The number of bytes that may be stored in this chunk.
+  * The number of reserved blocks.
+  * The global index where this chunk starts.
+  * The process ID writing into this chunk.
+
+* An owning unique pointer to the next chunk. It may be null for the last chunk
+  in a chain.
+
+* In ``DEBUG`` builds, a state variable, which is used to ensure that the chunk
+  goes through a known sequence of states (e.g., Created, then InUse, then
+  Done, etc.) See the sequence diagram
+  `where the member variable is defined <https://searchfox.org/mozilla-central/search?q=symbol:F_%3CT_mozilla%3A%3AProfileBufferChunk%3A%3AInternalHeader%3E_mState>`_.
+
+* The actual buffer data.
+
+Because a ProfileBufferChunk is variable-size, it must be created through its
+static ``Create`` function, which takes care of allocating the correct amount
+of bytes, at the correct alignment.
+
+Chunk Managers
+==============
+
+ProfilerBufferChunkManager
+--------------------------
+
+`The ProfileBufferChunkManager abstract class <https://searchfox.org/mozilla-central/search?q=symbol:T_mozilla%3A%3AProfileBufferChunkManager>`_
+defines the interface of classes that manage chunks.
+
+Concrete implementations are responsible for:
+* Creating chunks for their user, with a mechanism to pre-allocate chunks before they are actually needed.
+* Taking back and owning chunks when they are "released" (usually when full).
+* Automatically destroying or recycling the oldest released chunks.
+* Giving temporary access to extant released chunks.
+
+ProfileBufferChunkManagerSingle
+-------------------------------
+
+`A ProfileBufferChunkManagerSingle object <https://searchfox.org/mozilla-central/search?q=symbol:T_mozilla%3A%3AProfileBufferChunkManagerSingle>`_
+manages a single chunk.
+
+That chunk is always the same, it is never destroyed. The user may use it and
+optionally release it. The manager can then be reset, and that one chunk will
+be available again for use.
+
+A request for a second chunk would always fail.
+
+This manager is short-lived and not thread-safe. It is useful when there is some
+limited data that needs to be captured without blocking the global profiling
+buffer, usually one stack sample. This data may then be extracted and quickly
+added to the global buffer.
+
+ProfileBufferChunkManagerWithLocalLimit
+---------------------------------------
+
+`A ProfileBufferChunkManagerWithLocalLimit object <https://searchfox.org/mozilla-central/search?q=symbol:T_mozilla%3A%3AProfileBufferChunkManagerSingle>`_
+implements the ``ProfileBufferChunkManager`` interface fully, managing a number
+of chunks, and making sure their total combined size stays under a given limit.
+This is the main chunk manager user during a profiling session.
+
+Note: It also implements the ``ProfileBufferControlledChunkManager`` interface,
+this is explained in the later section "Multi-Process Profiling".
+
+It is thread-safe, and one instance is shared by both Profilers.
+
+ProfileChunkedBuffer
+====================
+
+`A ProfileChunkedBuffer object <https://searchfox.org/mozilla-central/search?q=symbol:T_mozilla%3A%3AProfileChunkedBuffer>`_
+uses a ``ProfilerBufferChunkManager`` to store data, and handles the different
+C++ types of data that the Profilers want to read/write as entries in buffer
+chunks.
+
+Its main function is ``ReserveAndPut``:
+
+* It takes an invocable object (like a lambda) that should return the size of
+  the entry to store, this is to potentially avoid costly operations just to
+  compute a size, when the profiler may not be running.
+* It attempts to reserve the space in its chunks, requesting a new chunk if
+  necessary.
+* It then calls a provided invocable object with a
+  `ProfileBufferEntryWriter <https://searchfox.org/mozilla-central/search?q=symbol:T_mozilla%3A%3AProfileBufferEntryWriter>`_,
+  which offers a range of functions to help serialize C++ objects. The
+  de/serialization functions are found in specializations of
+  `ProfileBufferEntryWriter::Serializer <https://searchfox.org/mozilla-central/search?q=symbol:T_mozilla%3A%3AProfileBufferEntryWriter%3A%3ASerializer>`_
+  and
+  `ProfileBufferEntryReader::Deserializer <https://searchfox.org/mozilla-central/search?q=symbol:T_mozilla%3A%3AProfileBufferEntryReader%3A%3ADeserializer>`_.
+
+More "put" functions use ``ReserveAndPut`` to more easily serialize blocks of
+memory, or C++ objects.
+
+``ProfileChunkedBuffer`` is optionally thread-safe, using a
+``BaseProfilerMaybeMutex``.
+
+WIP note: Using a mutex makes this storage too noisy for profiling some
+real-time (like audio processing).
+`Bug 1697953 <https://bugzilla.mozilla.org/show_bug.cgi?id=1697953>`_ will look
+at switching to using atomic variables instead.
+An alternative would be to use a totally separate non-thread-safe buffers for
+each real-time thread that requires it (see
+`bug 1754889 <https://bugzilla.mozilla.org/show_bug.cgi?id=1754889>`_).
+
+ProfileBuffer
+=============
+
+`A ProfileBuffer object <https://searchfox.org/mozilla-central/search?q=symbol:T_ProfileBuffer>`_
+uses a ``ProfileChunkedBuffer`` to store data, and handles the different kinds
+of entries that the Profilers want to read/write.
+
+Each entry starts with a tag identifying a kind. These kinds can be found in
+`ProfileBufferEntryKinds.h <https://searchfox.org/mozilla-central/source/mozglue/baseprofiler/public/ProfileBufferEntryKinds.h>`_.
+
+There are "legacy" kinds, which are small fixed-length entries, such as:
+Categories, labels, frame information, counters, etc. These can be stored in
+`ProfileBufferEntry objects <https://searchfox.org/mozilla-central/search?q=symbol:T_ProfileBufferEntry>`_
+
+And there are "modern" kinds, which have variable sizes, such as: Markers, CPU
+running times, full stacks, etc. These are more directly handled by code that
+can access the underlying ``ProfileChunkedBuffer``.
+
+The other major responsibility of a ``ProfileChunkedBuffer`` is to read back all
+this data, sometimes during profiling (e.g., to duplicate a stack), but mainly
+at the end of a session when generating the output JSON profile.
+
+*****************
+Periodic Sampling
+*****************
+
+Probably the most important job of the Profiler is to sample stacks of a number
+of running threads, to help developers know which functions get used a lot when
+performing some operation on Firefox.
+
+This is accomplished from a special thread, which regularly springs into action
+and captures all this data.
+
+SamplerThread
+=============
+
+`The SamplerThread object <https://searchfox.org/mozilla-central/search?q=symbol:T_SamplerThread>`_
+manages the information needed during sampling. It is created when the profiler
+starts, and is stored inside the ``ActivePS``, see above for details.
+
+It includes:
+
+* A ``Sampler`` object that contains platform-specific details, which are
+  implemented in separate files like platform-win32.cpp, etc.
+* The same generation index as its owning ``ActivePS``.
+* The requested interval between samples.
+* A handle to the thread where the sampling happens, its main function is
+  `Run() function <https://searchfox.org/mozilla-central/search?q=symbol:_ZN13SamplerThread3RunEv>`_.
+* A list of callbacks to invoke after the next sampling. These may be used by
+  tests to wait for sampling to actually happen.
+* The unregistered-thread-spy data, and an optional handle on another thread
+  that takes care of "spying" on unregistered thread (on platforms where that
+  operation is too expensive to run directly on the sampling thread).
+
+The ``Run()`` function takes care of performing the periodic sampling work:
+(more details in the following sections)
+
+* Retrieve the sampling parameters.
+* Instantiate a ``ProfileBuffer`` on the stack, to capture samples from other threads.
+* Loop until a ``break``:
+
+  * Lock the main profiler mutex, and do:
+
+    * Check if sampling should stop, and break from the loop.
+    * Clean-up exit profiles (these are profiles sent from dying sub-processes,
+      and are kept for as long as they overlap with this process' own buffer range).
+    * Record the CPU utilization of the whole process.
+    * Record the power consumption.
+    * Sample each registered counter, including the memory counter.
+    * For each registered thread to be profiled:
+
+      * Record the CPU utilization.
+      * If the thread is marked as "still sleeping", record a "same as before"
+        sample, otherwise suspend the thread and take a full stack sample.
+      * On some threads, record the event delay to compute the
+        (un)responsiveness. WIP note: This implementation may change.
+
+    * Record profiling overhead durations.
+
+  * Unlock the main profiler mutex.
+  * Invoke registered post-sampling callbacks.
+  * Spy on unregistered threads.
+  * Based on the requested sampling interval, and how much time this loop took,
+    compute when the next sampling loop should start, and make the thread sleep
+    for the appropriate amount of time. The goal is to be as regular as
+    possible, but if some/all loops take too much time, don't try too hard to
+    catch up, because the system is probably under stress already.
+  * Go back to the top of the loop.
+
+* If we're here, we hit a loop ``break`` above.
+* Invoke registered post-sampling callbacks, to let them know that sampling
+  stopped.
+
+CPU Utilization
+===============
+
+CPU Utilization is stored as a number of milliseconds that a thread or process
+has spent running on the CPU since the previous sampling.
+
+Implementations are platform-dependent, and can be found in
+`the GetThreadRunningTimesDiff function <https://searchfox.org/mozilla-central/search?q=symbol:_ZL25GetThreadRunningTimesDiffRK10PSAutoLockRN7mozilla8profiler45ThreadRegistrationUnlockedRWForLockedProfilerE>`_
+and
+`the GetProcessRunningTimesDiff function <https://searchfox.org/mozilla-central/search?q=symbol:_ZL26GetProcessRunningTimesDiffRK10PSAutoLockR12RunningTimes>`_.
+
+Power Consumption
+=================
+
+Energy probes added in 2022.
+
+Stacks
+======
+
+Stacks are the sequence of calls going from the entry point in the program
+(generally ``main()`` and some OS-specific functions above), down to the
+function where code is currently being executed.
+
+Native Frames
+-------------
+
+Compiled code, from C++ and Rust source.
+
+Label Frames
+------------
+
+Pseudo-frames with arbitrary text, added from any language, mostly C++.
+
+JS, Wasm Frames
+---------------
+
+Frames corresponding to JavaScript functions.
+
+Java Frames
+-----------
+
+Recorded by the JavaSampler.
+
+Stack Merging
+-------------
+
+The above types of frames are all captured in different ways, and when finally
+taking an actual stack sample (apart from Java), they get merged into one stack.
+
+All frames have an associated address in the call stack, and can therefore be
+merged mostly by ordering them by this stack address. See
+`MergeStacks <https://searchfox.org/mozilla-central/search?q=symbol:_ZL11MergeStacksjbRKN7mozilla8profiler51ThreadRegistrationUnlockedReaderAndAtomicRWOnThreadERK9RegistersRK11NativeStackR22ProfilerStackCollectorPN2JS22ProfilingFrameIterator5FrameEj>`_
+for the implementation details.
+
+Counters
+========
+
+Counters are a special kind of probe, which can be continuously updated during
+profiling, and the ``SamplerThread`` will sample their value at every loop.
+
+Memory Counter
+--------------
+
+This is the main counter. During a profiling session, hooks into the memory
+manager keep track of each de/allocation, so at each sampling we know how many
+operations were performed, and what is the current memory usage compared to the
+previous sampling.
+
+Profiling Overhead
+==================
+
+The ``SamplerThread`` records timestamps between parts of its sampling loop, and
+records this as the sampling overhead. This may be useful to determine if the
+profiler itself may have used too much of the computer resources, which could
+skew the profile and give wrong impressions.
+
+Unregistered Thread Profiling
+=============================
+
+At some intervals (not necessarily every sampling loop, depending on the OS),
+the profiler may attempt to find unregistered threads, and record some
+information about them.
+
+WIP note: This feature is experimental, and data is captured in markers on the
+main thread. More work is needed to put this data in tracks like regular
+registered threads, and capture more data like stack samples and markers.
+
+*******
+Markers
+*******
+
+Markers are events with a precise timestamp or time range, they have a name, a
+category, options (out of a few choices), and optional marker-type-specific
+payload data.
+
+Before describing the implementation, it is useful to be familiar with how
+markers are natively added from C++, because this drives how the implementation
+takes all this information and eventually outputs it in the final JSON profile.
+
+Adding Markers from C++
+=======================
+
+See https://firefox-source-docs.mozilla.org/tools/profiler/markers-guide.html
+
+Implementation
+==============
+
+The main function that records markers is
+`profiler_add_marker <https://searchfox.org/mozilla-central/search?q=symbol:_Z19profiler_add_markerRKN7mozilla18ProfilerStringViewIcEERKNS_14MarkerCategoryEONS_13MarkerOptionsET_DpRKT0_>`_.
+It's a variadic templated function that takes the different the expected
+arguments, first checks if the marker should actually be recorded (the profiler
+should be running, and the target thread should be profiled), and then calls
+into the deeper implementation function ``AddMarkerToBuffer`` with a reference
+to the main profiler buffer.
+
+`AddMarkerToBuffer <https://searchfox.org/mozilla-central/search?q=symbol:_Z17AddMarkerToBufferRN7mozilla20ProfileChunkedBufferERKNS_18ProfilerStringViewIcEERKNS_14MarkerCategoryEONS_13MarkerOptionsET_DpRKT0_>`_
+takes the marker type as an object, removes it from the function parameter list,
+and calls the next function with the marker type as an explicit template
+parameter, and also a pointer to the function that can capture the stack
+(because it is different between Base and Gecko Profilers, in particular the
+latter one knows about JS).
+
+From here, we enter the land of
+`BaseProfilerMarkersDetail.h <https://searchfox.org/mozilla-central/source/mozglue/baseprofiler/public/BaseProfilerMarkersDetail.h>`_,
+which employs some heavy template techniques, in order to most efficiently
+serialize the given marker payload arguments, in order to make them
+deserializable when outputting the final JSON. In previous implementations, for
+each new marker type, a new C++ class derived from a payload abstract class was
+required, that had to implement all the constructors and virtual functions to:
+
+* Create the payload object.
+* Serialize the payload into the profile buffer.
+* Deserialize from the profile buffer to a new payload object.
+* Convert the payload into the final output JSON.
+
+Now, the templated functions automatically take care of serializing all given
+function call arguments directly (instead of storing them somewhere first), and
+preparing a deserialization function that will recreate them on the stack and
+directly call the user-provided JSONification function with these arguments.
+
+Continuing from the public ``AddMarkerToBuffer``,
+`mozilla::base_profiler_markers_detail::AddMarkerToBuffer <https://searchfox.org/mozilla-central/search?q=symbol:_ZN7mozilla28base_profiler_markers_detail17AddMarkerToBufferERNS_20ProfileChunkedBufferERKNS_18ProfilerStringViewIcEERKNS_14MarkerCategoryEONS_13MarkerOptionsEPFbS2_NS_19StackCaptureOptionsEEDpRKT0_>`_
+sets some defaults if not specified by the caller: Target the current thread,
+use the current time.
+
+Then if a stack capture was requested, attempt to do it in
+the most efficient way, using a pre-allocated buffer if possible.
+
+WIP note: This potential allocation should be avoided in time-critical thread.
+There is already a buffer for the main thread (because it's the busiest thread),
+but there could be more pre-allocated threads, for specific real-time thread
+that need it, or picked from a pool of pre-allocated buffers. See
+`bug 1578792 <https://bugzilla.mozilla.org/show_bug.cgi?id=1578792>`_.
+
+From there, `AddMarkerWithOptionalStackToBuffer <https://searchfox.org/mozilla-central/search?q=AddMarkerWithOptionalStackToBuffer>`_
+handles ``NoPayload`` markers (usually added with ``PROFILER_MARKER_UNTYPED``)
+in a special way, mostly to avoid the extra work associated with handling
+payloads. Otherwise it continues with the following function.
+
+`MarkerTypeSerialization<MarkerType>::Serialize <symbol:_ZN7mozilla28base_profiler_markers_detail23MarkerTypeSerialization9SerializeERNS_20ProfileChunkedBufferERKNS_18ProfilerStringViewIcEERKNS_14MarkerCategoryEONS_13MarkerOptionsEDpRKTL0__>`_
+retrieves the deserialization tag associated with the marker type. If it's the
+first time this marker type is used,
+`Streaming::TagForMarkerTypeFunctions <symbol:_ZN7mozilla28base_profiler_markers_detail9Streaming25TagForMarkerTypeFunctionsEPFvRNS_24ProfileBufferEntryReaderERNS_12baseprofiler20SpliceableJSONWriterEEPFNS_4SpanIKcLy18446744073709551615EEEvEPFNS_12MarkerSchemaEvE,_ZN7mozilla28base_profiler_markers_detail9Streaming25TagForMarkerTypeFunctionsEPFvRNS_24ProfileBufferEntryReaderERNS_12baseprofiler20SpliceableJSONWriterEEPFNS_4SpanIKcLm18446744073709551615EEEvEPFNS_12MarkerSchemaEvE,_ZN7mozilla28base_profiler_markers_detail9Streaming25TagForMarkerTypeFunctionsEPFvRNS_24ProfileBufferEntryReaderERNS_12baseprofiler20SpliceableJSONWriterEEPFNS_4SpanIKcLj4294967295EEEvEPFNS_12MarkerSchemaEvE>`_
+adds it to the global list (which stores some function pointers used during
+deserialization).
+
+Then the main serialization happens in
+`StreamFunctionTypeHelper<decltype(MarkerType::StreamJSONMarkerData)>::Serialize <symbol:_ZN7mozilla28base_profiler_markers_detail24StreamFunctionTypeHelperIFT_RNS_12baseprofiler20SpliceableJSONWriterEDpT0_EE9SerializeERNS_20ProfileChunkedBufferERKNS_18ProfilerStringViewIcEERKNS_14MarkerCategoryEONS_13MarkerOptionsEhDpRKS6_>`_.
+Deconstructing this mouthful of an template:
+
+* ``MarkerType::StreamJSONMarkerData`` is the user-provided function that will
+  eventually produce the final JSON, but here it's only used to know the
+  parameter types that it expects.
+* ``StreamFunctionTypeHelper`` takes that function prototype, and can extract
+  its argument by specializing on ```R(SpliceableJSONWriter&, As...)``, now
+  ``As...`` is a parameter pack matching the function parameters.
+* Note that ``Serialize`` also takes a parameter pack, which contains all the
+  referenced arguments given to the top ``AddBufferToMarker`` call. These two
+  packs are supposed to match, at least the given arguments should be
+  convertible to the target pack parameter types.
+* That specialization's ``Serialize`` function calls the buffer's ``PutObjects``
+  variadic function to write all the marker data, that is:
+
+  * The entry kind that must be at the beginning of every buffer entry, in this
+    case `ProfileBufferEntryKind::Marker <https://searchfox.org/mozilla-central/source/mozglue/baseprofiler/public/ProfileBufferEntryKinds.h#78>`_.
+  * The common marker data (options first, name, category, deserialization tag).
+  * Then all the marker-type-specific arguments. Note that the C++ types
+    are those extracted from the deserialization function, so we know that
+    whatever is serialized here can be later deserialized using those same
+    types.
+
+The deserialization side is described in the later section "JSON output of
+Markers".
+
+Adding Markers from Rust
+========================
+
+See https://firefox-source-docs.mozilla.org/tools/profiler/instrumenting-rust.html#adding-markers
+
+Adding Markers from JS
+======================
+
+See https://firefox-source-docs.mozilla.org/tools/profiler/instrumenting-javascript.html
+
+Adding Markers from Java
+========================
+
+See https://searchfox.org/mozilla-central/source/mobile/android/geckoview/src/main/java/org/mozilla/geckoview/ProfilerController.java
+
+*************
+Profiling Log
+*************
+
+During a profiling session, some profiler-related events may be recorded using
+`ProfilingLog::Access <https://searchfox.org/mozilla-central/search?q=symbol:_ZN12ProfilingLog6AccessEOT_>`_.
+
+The resulting JSON object is added near the end of the process' JSON generation,
+in a top-level property named "profilingLog". This object is free-form, and is
+not intended to be displayed, or even read by most people. But it may include
+interesting information for advanced users, or could be an early temporary
+prototyping ground for new features.
+
+See "profileGatheringLog" for another log related to late events.
+
+WIP note: This was introduced shortly before this documentation, so at this time
+it doesn't do much at all.
+
+***************
+Profile Capture
+***************
+
+Usually at the end of a profiling session, a profile is "captured", and either
+saved to disk, or sent to the front-end https://profiler.firefox.com for
+analysis. This section describes how the captured data is converted to the
+Gecko Profiler JSON format.
+
+FailureLatch
+============
+
+`The FailureLatch interface <https://searchfox.org/mozilla-central/search?q=symbol:T_mozilla%3A%3AFailureLatch>`_
+is used during the JSON generation, in order to catch any unrecoverable error
+(such as running Out Of Memory), to exit the process early, and to forward the
+error to callers.
+
+There are two main implementations, suffixed "source" as they are the one source
+of failure-handling, which is passed as ``FailureLatch&`` throughout the code:
+
+* `FailureLatchInfallibleSource <https://searchfox.org/mozilla-central/search?q=symbol:T_mozilla%3A%3AFailureLatchInfallibleSource>`_
+  is an "infallible" latch, meaning that it doesn't expect any failure. So if
+  a failure actually happened, the program would immediately terminate! (This
+  was the default behavior prior to introducing these latches.)
+* `FailureLatchSource <https://searchfox.org/mozilla-central/search?q=symbol:T_mozilla%3A%3AFailureLatchSource>`_
+  is a "fallible" latch, it will record the first failure that happens, and
+  "latch" into the failure state. The code should regularly examine this state,
+  and return early when possible. Eventually this failure state may be exposed
+  to end users.
+
+ProgressLogger, ProportionValue
+===============================
+
+`A ProgressLogger object <https://searchfox.org/mozilla-central/search?q=symbol:T_mozilla%3A%3AProgressLogger>`_
+is used to track the progress of a long operation, in this case the JSON
+generation process.
+
+To match how the JSON generation code works (as a tree of C++ functions calls),
+each ``ProgressLogger`` in a function usually records progress from 0 to 100%
+locally inside that function. If that function calls a sub-function, it gives it
+a sub-logger, which in the caller function is set to represent a local sub-range
+(like 20% to 40%), but to the called function it will look like its own local
+``ProgressLogger`` that goes from 0 to 100%. The very top ``ProgressLogger``
+converts the deepest local progress value to the corresponding global progress.
+
+Progress values are recorded in
+`ProportionValue objects <https://searchfox.org/mozilla-central/search?q=symbol:T_mozilla%3A%3AProportionValue>`_,
+which effectively record fractional value with no loss of precision.
+
+This progress is most useful when the parent process is waiting for child
+processes to do their work, to make sure progress does happen, otherwise to stop
+waiting for frozen processes. More about that in the "Multi-Process Profiling"
+section below.
+
+JSONWriter
+==========
+
+`A JSONWriter object <https://searchfox.org/mozilla-central/search?q=symbol:T_mozilla%3A%3AJSONWriter>`_
+offers a simple way to create a JSON stream (start/end collections, add
+elements, etc.), and calls back into a provided
+`JSONWriteFunc interface <https://searchfox.org/mozilla-central/search?q=symbol:T_mozilla%3A%3AJSONWriteFunc>`_
+to output characters.
+
+While these classes live outside of the Profiler directories, it may sometimes be
+worth maintaining and/or modifying them to better serve the Profiler's needs.
+But there are other users, so be careful not to break other things!
+
+SpliceableJSONWriter and SpliceableChunkedJSONWriter
+====================================================
+
+Because the Profiler deals with large amounts of data (big profiles can take
+tens to hundreds of megabytes!), some specialized wrappers add better handling
+of these large JSON streams.
+
+`SpliceableJSONWriter <https://searchfox.org/mozilla-central/search?q=symbol:T_mozilla%3A%3Abaseprofiler%3A%3ASpliceableJSONWriter>`_
+is a subclass of ``JSONWriter``, and allows the "splicing" of JSON strings,
+i.e., being able to take a whole well-formed JSON string, and directly inserting
+it as a JSON object in the target JSON being streamed.
+
+It also offers some functions that are often useful for the Profiler, such as:
+* Converting a timestamp into a JSON object in the stream, taking care of keeping a nanosecond precision, without unwanted zeroes or nines at the end.
+* Adding a number of null elements.
+* Adding a unique string index, and add that string to a provided unique-string list if necessary. (More about UniqueStrings below.)
+
+`SpliceableChunkedJSONWriter <https://searchfox.org/mozilla-central/search?q=symbol:T_mozilla%3A%3Abaseprofiler%3A%3ASpliceableChunkedJSONWriter>`_
+is a subclass of ``SpliceableJSONWriter``. Its main attribute is that it provides its own writer
+(`ChunkedJSONWriteFunc <https://searchfox.org/mozilla-central/search?q=symbol:T_mozilla%3A%3Abaseprofiler%3A%3AChunkedJSONWriteFunc>`_),
+which stores the stream as a sequence of "chunks" (heap-allocated buffers).
+It starts with a chunk of a default size, and writes incoming data into it,
+later allocating more chunks as needed. This avoids having massive buffers being
+resized all the time.
+
+It also offers the same splicing abilities as its parent class, but in case an
+incoming JSON string comes from another ``SpliceableChunkedJSONWriter``, it's
+able to just steal the chunks and add them to its list, thereby avoiding
+expensive allocations and copies and destructions.
+
+UniqueStrings
+=============
+
+Because a lot of strings would be repeated in profiles (e.g., frequent marker
+names), such strings are stored in a separate JSON array of strings, and an
+index into this list is used instead of that full string object.
+
+Note that these unique-string indices are currently only located in specific
+spots in the JSON tree, they cannot be used just anywhere strings are accepted.
+
+`The UniqueJSONStrings class <https://searchfox.org/mozilla-central/search?q=symbol:T_mozilla%3A%3Abaseprofiler%3A%3AUniqueJSONStrings>`_
+stores this list of unique strings in a ``SpliceableChunkedJSONWriter``.
+Given a string, it takes care of storing it if encountered for the first time,
+and inserts the index into a target ``SpliceableJSONWriter``.
+
+JSON Generation
+===============
+
+The "Gecko Profile Format" can be found at
+https://github.com/firefox-devtools/profiler/blob/main/docs-developer/gecko-profile-format.md .
+
+The implementation in the back-end is
+`locked_profiler_stream_json_for_this_process <https://searchfox.org/mozilla-central/search?q=locked_profiler_stream_json_for_this_process>`_.
+It outputs each JSON top-level JSON object, mostly in sequence. See the code for
+how each object is output. Note that there is special handling for samples and
+markers, as explained in the following section.
+
+ProcessStreamingContext and ThreadStreamingContext
+--------------------------------------------------
+
+In JSON profiles, samples and markers are separated by thread and by
+samples/markers. Because there are potentially tens to a hundred threads, it
+would be very costly to read the full profile buffer once for each of these
+groups. So instead the buffer is read once, and all samples and markers are
+handled as they are read, and their JSON output is sent to separate JSON
+writers.
+
+`A ProcessStreamingContext object <https://searchfox.org/mozilla-central/search?q=symbol:T_ProcessStreamingContext>`_
+contains all the information to facilitate this output, including a list of
+`ThreadStreamingContext's <https://searchfox.org/mozilla-central/search?q=symbol:T_ThreadStreamingContext>`_,
+which each contain one ``SpliceableChunkedJSONWriter`` for the samples, and one
+for the markers in this thread.
+
+When reading entries from the profile buffer, samples and markers are found by
+their ``ProfileBufferEntryKind``, and as part of deserializing either kind (more
+about each below), the thread ID is read, and determines which
+``ThreadStreamingContext`` will receive the JSON output.
+
+At the end of this process, all ``SpliceableChunkedJSONWriters`` are efficiently
+spliced (mainly a pointer move) into the final JSON output.
+
+JSON output of Samples
+----------------------
+
+This work is done in
+`ProfileBuffer::DoStreamSamplesAndMarkersToJSON <https://searchfox.org/mozilla-central/search?q=DoStreamSamplesAndMarkersToJSON>`_.
+
+From the main ``ProfileChunkedBuffer``, each entry is visited, its
+``ProfileBufferEntryKind`` is read first, and for samples all frames from
+captured stack are converted to the appropriate JSON.
+
+`A UniqueStacks object <https://searchfox.org/mozilla-central/search?q=symbol:T_UniqueStacks>`_
+is used to de-duplicate frames and even sub-stacks:
+
+* Each unique frame string is written into a JSON array inside a
+  ``SpliceableChunkedJSONWriter``, and its index is the frame identifier.
+* Each stack level is also de-duplicated, and identifies the associated frame
+  string, and points at the calling stack level (i.e., closer to the root).
+* Finally, the identifier for the top of the stack is stored, along with a
+  timestamp (and potentially some more information) as the sample.
+
+For example, if we have collected the following samples:
+
+#. A -> B -> C
+#. A -> B
+#. A -> B -> D
+
+The frame table would contain each frame name, something like:
+``["A", "B", "C", "D"]``. So the frame containing "A" has index 0, "B" is at 1,
+etc.
+
+The stack table would contain each stack level, something like:
+``[[0, null], [1, 0], [2, 1], [3, 1]]``. ``[0, null]`` means the frame is 0
+("A"), and it has no caller, it's the root frame. ``[1, 0]`` means the frame is
+1 ("B"), and its caller is stack 0, which is just the previous one in this
+example.
+
+And the three samples stored in the thread data would be therefore be: 2, 1, 3
+(E.g.: "2" points in the stack table at the frame [2,1] with "C", and from them
+down to "B", then "A").
+
+All this contains all the information needed to reconstruct all full stack
+samples.
+
+JSON output of Markers
+----------------------
+
+This also happens
+`inside ProfileBuffer::DoStreamSamplesAndMarkersToJSON <https://searchfox.org/mozilla-central/search?q=DoStreamSamplesAndMarkersToJSON>`_.
+
+When a ``ProfileBufferEntryKind::Marker`` is encountered,
+`the DeserializeAfterKindAndStream function <https://searchfox.org/mozilla-central/search?q=DeserializeAfterKindAndStream>`_
+reads the ``MarkerOptions`` (stored as explained above), which include the
+thread ID, identifying which ``ThreadStreamingContext``'s
+``SpliceableChunkedJSONWriter`` to use.
+
+After that, the common marker data (timing, category, etc.) is output.
+
+Then the ``Streaming::DeserializerTag`` identifies which type of marker this is.
+The special case of ``0`` (no payload) means nothing more is output.
+
+Otherwise some more common data is output as part of the payload if present, in
+particular the "inner window id" (used to match markers with specific html
+frames), and stack.
+
+WIP note: Some of these may move around in the future, see
+`bug 1774326 <https://bugzilla.mozilla.org/show_bug.cgi?id=1774326>`_,
+`bug 1774328 <https://bugzilla.mozilla.org/show_bug.cgi?id=1774328>`_, and
+others.
+
+In case of a C++-written payload, the ``DeserializerTag`` identifies the
+``MarkerDataDeserializer`` function to use. This is part of the heavy templated
+code in BaseProfilerMarkersDetail.h, the function is defined as
+`MarkerTypeSerialization<MarkerType>::Deserialize <https://searchfox.org/mozilla-central/search?q=symbol:_ZN7mozilla28base_profiler_markers_detail23MarkerTypeSerialization11DeserializeERNS_24ProfileBufferEntryReaderERNS_12baseprofiler20SpliceableJSONWriterE>`_,
+which outputs the marker type name, and then each marker payload argument. The
+latter is done by using the user-defined ``MarkerType::StreamJSONMarkerData``
+parameter list, and recursively deserializing each parameter from the profile
+buffer into an on-stack variable of a corresponding type, at the end of which
+``MarkerType::StreamJSONMarkerData`` can be called with all of these arguments
+at it expects, and that function does the actual JSON streaming as the user
+programmed.
+
+*************
+Profiler Stop
+*************
+
+See "Profiler Start" and do the reverse!
+
+There is some special handling of the ``SampleThread`` object, just to ensure
+that it gets deleted outside of the main profiler mutex being locked, otherwise
+this could result in a deadlock (because it needs to take the lock before being
+able to check the state variable indicating that the sampling loop and thread
+should end).
+
+*****************
+Profiler Shutdown
+*****************
+
+See "Profiler Initialization" and do the reverse!
+
+One additional action is handling the optional ``MOZ_PROFILER_SHUTDOWN``
+environment variable, to output a profile if the profiler was running.
+
+***********************
+Multi-Process Profiling
+***********************
+
+All of the above explanations focused on what the profiler is doing is each
+process: Starting, running and collecting samples, markers, and more data,
+outputting JSON profiles, and stopping.
+
+But Firefox is a multi-process program, since
+`Electrolysis aka e10s <https://wiki.mozilla.org/Electrolysis>`_ introduce child
+processes to handle web content and extensions, and especially since
+`Fission <https://wiki.mozilla.org/Project_Fission>`_ forced even parts of the
+same webpage to run in separate processes, mainly for added security. Since then
+Firefox can spawn many processes, sometimes 10 to 20 when visiting busy sites.
+
+The following sections explains how profiling Firefox as a whole works.
+
+IPC (Inter-Process Communication)
+=================================
+
+See https://firefox-source-docs.mozilla.org/ipc/.
+
+As a quick summary, some message-passing function-like declarations live in
+`PProfiler.ipdl <https://searchfox.org/mozilla-central/source/tools/profiler/gecko/PProfiler.ipdl>`_,
+and corresponding ``SendX`` and ``RecvX`` C++ functions are respectively
+generated in
+`PProfilerParent.h <https://searchfox.org/mozilla-central/source/__GENERATED__/ipc/ipdl/_ipdlheaders/mozilla/PProfilerParent.h>`_,
+and virtually declared (for user implementation) in
+`PProfilerChild.h <https://searchfox.org/mozilla-central/source/__GENERATED__/ipc/ipdl/_ipdlheaders/mozilla/PProfilerChild.h>`_.
+
+During Profiling
+================
+
+Exit profiles
+-------------
+
+One IPC message that is not in PProfiler.ipdl, is
+`ShutdownProfile <https://searchfox.org/mozilla-central/search?q=ShutdownProfile%28&path=&case=false&regexp=false>`_
+in
+`PContent.ipdl <https://searchfox.org/mozilla-central/source/dom/ipc/PContent.ipdl>`_.
+
+It's called from
+`ContentChild::ShutdownInternal <https://searchfox.org/mozilla-central/search?q=symbol:_ZN7mozilla3dom12ContentChild16ShutdownInternalEv>`_,
+just before a child process ends, and if the profiler was running, to ensure
+that the profile data is collected and sent to the parent, for storage in its
+``ActivePS``.
+
+See
+`ActivePS::AddExitProfile <https://searchfox.org/mozilla-central/search?q=symbol:_ZN8ActivePS14AddExitProfileERK10PSAutoLockRK12nsTSubstringIcE>`_
+for details. Note that the current "buffer position at gathering time" (which is
+effectively the largest ``ProfileBufferBlockIndex`` that is present in the
+global profile buffer) is recorded. Later,
+`ClearExpiredExitProfiles <https://searchfox.org/mozilla-central/search?q=ClearExpiredExitProfiles>`_
+looks at the **smallest** ``ProfileBufferBlockIndex`` still present in the
+buffer (because early chunks may have been discarded to limit memory usage), and
+discards exit profiles that were recorded before, because their data is now
+older than anything stored in the parent.
+
+Profile Buffer Global Memory Control
+------------------------------------
+
+Each process runs its own profiler, with each its own profile chunked buffer. To
+keep the overall memory usage of all these buffers under the user-picked limit,
+processes work together, with the parent process overseeing things.
+
+Diagram showing the relevant classes, see details in the following sub-sections:
+
+.. image:: fissionprofiler-20200424.png
+
+ProfileBufferControlledChunkManager
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+`The ProfileBufferControlledChunkManager interface <https://searchfox.org/mozilla-central/search?q=symbol:T_mozilla%3A%3AProfileBufferControlledChunkManager>`_
+allows a controller to get notified about all chunk updates, and to force the
+destruction/recycling of old chunks.
+`The ProfileBufferChunkManagerWithLocalLimit class <https://searchfox.org/mozilla-central/search?q=symbol:T_mozilla%3A%3AProfileBufferChunkManagerWithLocalLimit>`_
+implements it.
+
+`An Update object <https://searchfox.org/mozilla-central/search?q=symbol:T_mozilla%3A%3AProfileBufferControlledChunkManager%3A%3AUpdate>`_
+contains all information related to chunk changes: How much memory is currently
+used by the local chunk manager, how much has been "released" (and therefore
+could be destroyed/recycled), and a list of all chunks that were released since
+the previous update; it also has a special state meaning that the child is
+shutting down so there won't be updates anymore. An ``Update`` may be "folded"
+into a previous one, to create a combined update equivalent to the two separate
+ones one after the other.
+
+Update Handling in the ProfilerChild
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+When the profiler starts in a child process, the ``ProfilerChild``
+`starts to listen for updates <https://searchfox.org/mozilla-central/search?q=symbol:_ZN7mozilla13ProfilerChild17SetupChunkManagerEv>`_.
+
+These updates are stored and folded into previous ones (if any). At some point,
+`an AwaitNextChunkManagerUpdate message <https://searchfox.org/mozilla-central/search?q=RecvAwaitNextChunkManagerUpdate>`_
+will be received, and any update can be forwarded to the parent. The local
+update is cleared, ready to store future updates.
+
+Update Handling in the ProfilerParent
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+When the profiler starts AND when there are child processes, the
+`ProfilerParent's ProfilerParentTracker <https://searchfox.org/mozilla-central/search?q=ProfilerParentTracker>`_
+creates
+`a ProfileBufferGlobalController <https://searchfox.org/mozilla-central/search?q=ProfileBufferGlobalController>`_,
+which starts to listen for updates from the local chunk manager.
+
+The ``ProfilerParentTracker`` is also responsible for keeping track of child
+processes, and to regularly
+`send them AwaitNextChunkManagerUpdate messages <https://searchfox.org/mozilla-central/search?q=SendAwaitNextChunkManagerUpdate>`_,
+that the child's ``ProfilerChild`` answers to with updates. The update may
+indicate that the child is shutting down, in which case the tracker will stop
+tracking it.
+
+All these updates (from the local chunk manager, and from child processes' own
+chunk managers) are processed in
+`ProfileBufferGlobalController::HandleChunkManagerNonFinalUpdate <https://searchfox.org/mozilla-central/search?q=HandleChunkManagerNonFinalUpdate>`_.
+Based on this stream of updates, it is possible to calculate the total memory
+used by all profile buffers in all processes, and to keep track of all chunks
+that have been "released" (i.e., are full, and can be destroyed). When the total
+memory usage reaches the user-selected limit, the controller can lookup the
+oldest chunk, and get it destroyed (either a local call for parent chunks, or by
+sending
+`a DestroyReleasedChunksAtOrBefore message <https://searchfox.org/mozilla-central/search?q=DestroyReleasedChunksAtOrBefore>`_
+to the owning child).
+
+Historical note: Prior to Fission, the Profiler used to keep one fixed-size
+circular buffer in each process, but as Fission made the possible number of
+processes unlimited, the memory consumption grew too fast, and required the
+implementation of the above system. But there may still be mentions of
+"circular buffers" in the code or documents; these have effectively been
+replaced by chunked buffers, with centralized chunk control.
+
+Gathering Child Profiles
+========================
+
+When it's time to capture a full profile, the parent process performs its own
+JSON generation (as described above), and sends
+`a GatherProfile message <https://searchfox.org/mozilla-central/search?q=GatherProfile%28>`_
+to all child processes, which will make them generate their JSON profile and
+send it back to the parent.
+
+All child profiles, including the exit profiles collected during profiling, are
+stored as elements of a top-level array with property name "processes".
+
+During the gathering phase, while the parent is waiting for child responses, it
+regularly sends
+`GetGatherProfileProgress messages <https://searchfox.org/mozilla-central/search?q=GetGatherProfileProgress>`_
+to all child processes that have not sent their profile yet, and the parent
+expects responses within a short timeframe. The response carries a progress
+value. If at some point two messages went with no progress was made anywhere
+(either there was no response, or the progress value didn't change), the parent
+assumes that remaining child processes may be frozen indefinitely, stops the
+gathering and considers the JSON generation complete.
+
+During all of the above work, events are logged (especially issues with child
+processes), and are added at the end of the JSON profile, in a top-level object
+with property name "profileGatheringLog". This object is free-form, and is not
+intended to be displayed, or even read by most people. But it may include
+interesting information for advanced users regarding the profile-gathering
+phase.
diff --git a/tools/profiler/docs/fissionprofiler-20200424.png b/tools/profiler/docs/fissionprofiler-20200424.png
new file mode 100644
index 0000000000..1602877a5b
--- /dev/null
+++ b/tools/profiler/docs/fissionprofiler-20200424.png
diff --git a/tools/profiler/docs/fissionprofiler.umlet.uxf b/tools/profiler/docs/fissionprofiler.umlet.uxf
new file mode 100644
index 0000000000..3325294e25
--- /dev/null
+++ b/tools/profiler/docs/fissionprofiler.umlet.uxf
@@ -0,0 +1,546 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<diagram program="umlet" version="14.3.0">
+  <zoom_level>10</zoom_level>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>70</x>
+      <y>110</y>
+      <w>300</w>
+      <h>70</h>
+    </coordinates>
+    <panel_attributes>/PProfilerParent/
+bg=light_gray
+--
+*+SendAwaitNextChunkManagerUpdate()*
+*+SendDestroyReleasedChunksAtOrBefore()*</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>470</x>
+      <y>20</y>
+      <w>210</w>
+      <h>70</h>
+    </coordinates>
+    <panel_attributes>*ProfileBufferChunkMetadata*
+bg=light_gray
+--
++doneTimeStamp
++bufferBytes
+</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>780</x>
+      <y>110</y>
+      <w>330</w>
+      <h>70</h>
+    </coordinates>
+    <panel_attributes>/PProfilerChild/
+bg=light_gray
+--
+*/+RecvAwaitNextChunkManagerUpdate() = 0/*
+*/+RecvDestroyReleasedChunksAtOrBefore() = 0/*
+</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>110</x>
+      <y>260</y>
+      <w>220</w>
+      <h>70</h>
+    </coordinates>
+    <panel_attributes>ProfilerParent
+--
+*-processId*
+--
+</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>210</x>
+      <y>170</y>
+      <w>30</w>
+      <h>110</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;&lt;-</panel_attributes>
+    <additional_attributes>10.0;10.0;10.0;90.0</additional_attributes>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>740</x>
+      <y>250</y>
+      <w>410</w>
+      <h>90</h>
+    </coordinates>
+    <panel_attributes>ProfilerChild
+--
+-UpdateStorage: unreleased bytes, released: {pid, rangeStart[ ]}
+--
+*+RecvAwaitNextChunkUpdate()*
+*+RecvDestroyReleasedChunksAtOrBefore()*
+</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>930</x>
+      <y>170</y>
+      <w>30</w>
+      <h>100</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;&lt;-</panel_attributes>
+    <additional_attributes>10.0;10.0;10.0;80.0</additional_attributes>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>110</x>
+      <y>400</y>
+      <w>220</w>
+      <h>70</h>
+    </coordinates>
+    <panel_attributes>ProfilerParentTracker
+--
+_+Enumerate()_
+_*+ForChild()*_</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>210</x>
+      <y>320</y>
+      <w>190</w>
+      <h>100</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;-
+m1=0..n
+nsTArray&lt;ProfilerParent*&gt;</panel_attributes>
+    <additional_attributes>10.0;10.0;10.0;80.0</additional_attributes>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>80</x>
+      <y>1070</y>
+      <w>150</w>
+      <h>30</h>
+    </coordinates>
+    <panel_attributes>ProfileBufferChunk</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>380</x>
+      <y>1070</y>
+      <w>210</w>
+      <h>30</h>
+    </coordinates>
+    <panel_attributes>/ProfileBufferChunkManager/</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>180</x>
+      <y>900</y>
+      <w>700</w>
+      <h>50</h>
+    </coordinates>
+    <panel_attributes>ProfileBufferChunkManagerWithLocalLimit
+--
+-mUpdateCallback</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>480</x>
+      <y>940</y>
+      <w>30</w>
+      <h>150</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;&lt;-</panel_attributes>
+    <additional_attributes>10.0;130.0;10.0;10.0</additional_attributes>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>380</x>
+      <y>1200</y>
+      <w>210</w>
+      <h>30</h>
+    </coordinates>
+    <panel_attributes>ProfileChunkedBuffer</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>410</x>
+      <y>1090</y>
+      <w>140</w>
+      <h>130</h>
+    </coordinates>
+    <panel_attributes>lt=-&gt;&gt;&gt;&gt;
+mChunkManager</panel_attributes>
+    <additional_attributes>10.0;10.0;10.0;110.0</additional_attributes>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>960</x>
+      <y>1200</y>
+      <w>100</w>
+      <h>30</h>
+    </coordinates>
+    <panel_attributes>CorePS</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>960</x>
+      <y>1040</y>
+      <w>100</w>
+      <h>30</h>
+    </coordinates>
+    <panel_attributes>ActivePS</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>580</x>
+      <y>1200</y>
+      <w>400</w>
+      <h>40</h>
+    </coordinates>
+    <panel_attributes>lt=-&gt;&gt;&gt;&gt;&gt;
+mCoreBuffer</panel_attributes>
+    <additional_attributes>10.0;20.0;380.0;20.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>870</x>
+      <y>940</y>
+      <w>250</w>
+      <h>120</h>
+    </coordinates>
+    <panel_attributes>lt=-&gt;&gt;&gt;&gt;&gt;
+mProfileBufferChunkManager</panel_attributes>
+    <additional_attributes>10.0;10.0;90.0;100.0</additional_attributes>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>830</x>
+      <y>1140</y>
+      <w>100</w>
+      <h>30</h>
+    </coordinates>
+    <panel_attributes>ProfileBuffer</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>920</x>
+      <y>1060</y>
+      <w>130</w>
+      <h>110</h>
+    </coordinates>
+    <panel_attributes>lt=-&gt;&gt;&gt;&gt;&gt;
+mProfileBuffer</panel_attributes>
+    <additional_attributes>10.0;90.0;40.0;10.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>580</x>
+      <y>1160</y>
+      <w>270</w>
+      <h>70</h>
+    </coordinates>
+    <panel_attributes>lt=-&gt;&gt;&gt;&gt;
+mEntries</panel_attributes>
+    <additional_attributes>10.0;50.0;250.0;10.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>90</x>
+      <y>1090</y>
+      <w>310</w>
+      <h>150</h>
+    </coordinates>
+    <panel_attributes>lt=-&gt;&gt;&gt;&gt;&gt;
+m1=0..1
+mCurrentChunk: UniquePtr&lt;&gt;</panel_attributes>
+    <additional_attributes>10.0;10.0;10.0;130.0;290.0;130.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>210</x>
+      <y>1080</y>
+      <w>200</w>
+      <h>150</h>
+    </coordinates>
+    <panel_attributes>lt=-&gt;&gt;&gt;&gt;&gt;
+m1=0..N
+mNextChunks: UniquePtr&lt;&gt;</panel_attributes>
+    <additional_attributes>20.0;10.0;170.0;130.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>200</x>
+      <y>940</y>
+      <w>230</w>
+      <h>150</h>
+    </coordinates>
+    <panel_attributes>lt=-&gt;&gt;&gt;&gt;&gt;
+m1=0..N
+mReleasedChunks: UniquePtr&lt;&gt;</panel_attributes>
+    <additional_attributes>10.0;130.0;10.0;10.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>530</x>
+      <y>1090</y>
+      <w>270</w>
+      <h>130</h>
+    </coordinates>
+    <panel_attributes>lt=-&gt;&gt;&gt;&gt;&gt;
+mOwnedChunkManager: UniquePtr&lt;&gt;</panel_attributes>
+    <additional_attributes>10.0;10.0;10.0;110.0</additional_attributes>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>480</x>
+      <y>390</y>
+      <w>550</w>
+      <h>150</h>
+    </coordinates>
+    <panel_attributes>*ProfileBufferGlobalController*
+--
+-mMaximumBytes
+-mCurrentUnreleasedBytesTotal
+-mCurrentUnreleasedBytes: {pid, unreleased bytes}[ ] sorted by pid
+-mCurrentReleasedBytes
+-mReleasedChunks: {doneTimeStamp, bytes, pid}[ ] sorted by timestamp
+-mDestructionCallback: function&lt;void(pid, rangeStart)&gt;
+--
++Update(pid, unreleased bytes, released: ProfileBufferChunkMetadata[ ])</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>320</x>
+      <y>420</y>
+      <w>180</w>
+      <h>40</h>
+    </coordinates>
+    <panel_attributes>lt=-&gt;&gt;&gt;&gt;&gt;
+mController</panel_attributes>
+    <additional_attributes>160.0;20.0;10.0;20.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>20</x>
+      <y>400</y>
+      <w>110</w>
+      <h>80</h>
+    </coordinates>
+    <panel_attributes>lt=-&gt;&gt;&gt;&gt;&gt;
+_sInstance_</panel_attributes>
+    <additional_attributes>90.0;60.0;10.0;60.0;10.0;10.0;90.0;10.0</additional_attributes>
+  </element>
+  <element>
+    <id>UMLNote</id>
+    <coordinates>
+      <x>480</x>
+      <y>250</y>
+      <w>220</w>
+      <h>120</h>
+    </coordinates>
+    <panel_attributes>The controller is only needed
+if there *are* child processes,
+so we can create it with the first
+child (at which point the tracker
+can register itself with the local
+profiler), and destroyed with the
+last child.
+bg=blue</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>690</x>
+      <y>330</y>
+      <w>100</w>
+      <h>80</h>
+    </coordinates>
+    <panel_attributes/>
+    <additional_attributes>10.0;10.0;80.0;60.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>130</x>
+      <y>460</y>
+      <w>200</w>
+      <h>380</h>
+    </coordinates>
+    <panel_attributes>lt=-&gt;&gt;&gt;&gt;
+mParentChunkManager</panel_attributes>
+    <additional_attributes>180.0;360.0;10.0;360.0;10.0;10.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>740</x>
+      <y>330</y>
+      <w>350</w>
+      <h>510</h>
+    </coordinates>
+    <panel_attributes>lt=-&gt;&gt;&gt;&gt;
+mLocalBufferChunkManager</panel_attributes>
+    <additional_attributes>10.0;490.0;330.0;490.0;330.0;10.0</additional_attributes>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>470</x>
+      <y>650</y>
+      <w>400</w>
+      <h>100</h>
+    </coordinates>
+    <panel_attributes>*ProfileBufferControlledChunkManager::Update*
+--
+-mUnreleasedBytes
+-mReleasedBytes
+-mOldestDoneTimeStamp
+-mNewReleasedChunks: ChunkMetadata[ ]</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>470</x>
+      <y>560</y>
+      <w>400</w>
+      <h>60</h>
+    </coordinates>
+    <panel_attributes>*ProfileBufferControlledChunkManager::ChunkMetadata*
+--
+-mDoneTimeStamp
+-mBufferBytes</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>670</x>
+      <y>610</y>
+      <w>30</w>
+      <h>60</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;.</panel_attributes>
+    <additional_attributes>10.0;10.0;10.0;40.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>670</x>
+      <y>740</y>
+      <w>30</w>
+      <h>60</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;.</panel_attributes>
+    <additional_attributes>10.0;10.0;10.0;40.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>670</x>
+      <y>50</y>
+      <w>130</w>
+      <h>110</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;.</panel_attributes>
+    <additional_attributes>10.0;10.0;110.0;90.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>360</x>
+      <y>50</y>
+      <w>130</w>
+      <h>110</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;.</panel_attributes>
+    <additional_attributes>110.0;10.0;10.0;90.0</additional_attributes>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>400</x>
+      <y>130</y>
+      <w>350</w>
+      <h>100</h>
+    </coordinates>
+    <panel_attributes>*ProfileBufferChunkManagerUpdate*
+bg=light_gray
+--
+-unreleasedBytes
+-releasedBytes
+-oldestDoneTimeStamp
+-newlyReleasedChunks: ProfileBufferChunkMetadata[ ]</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>310</x>
+      <y>780</y>
+      <w>440</w>
+      <h>70</h>
+    </coordinates>
+    <panel_attributes>*ProfileBufferControlledChunkManager*
+--
+*/+SetUpdateCallback(function&lt;void(update: Update&amp;&amp;)&gt;)/*
+*/+DestroyChunksAtOrBefore(timeStamp)/*</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>480</x>
+      <y>840</y>
+      <w>30</w>
+      <h>80</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;&lt;-</panel_attributes>
+    <additional_attributes>10.0;10.0;10.0;60.0</additional_attributes>
+  </element>
+</diagram>
diff --git a/tools/profiler/docs/index.rst b/tools/profiler/docs/index.rst
new file mode 100644
index 0000000000..53920e7d2f
--- /dev/null
+++ b/tools/profiler/docs/index.rst
@@ -0,0 +1,37 @@
+Gecko Profiler
+==============
+
+The Firefox Profiler is the collection of tools used to profile Firefox. This is backed
+by the Gecko Profiler, which is the primarily C++ component that instruments Gecko. It
+is configurable, and supports a variety of data sources and recording modes. Primarily,
+it is used as a statistical profiler, where the execution of threads that have been
+registered with the profile is paused, and a sample is taken. Generally, this includes
+a stackwalk with combined native stack frame, JavaScript stack frames, and custom stack
+frame labels.
+
+In addition to the sampling, the profiler can collect markers, which are collected
+deterministically (as opposed to statistically, like samples). These include some
+kind of text description, and optionally a payload with more information.
+
+This documentation serves to document the Gecko Profiler and Base Profiler components,
+while the profiler.firefox.com interface is documented at `profiler.firefox.com/docs/ <https://profiler.firefox.com/docs/>`_
+
+.. toctree::
+   :maxdepth: 1
+
+   code-overview
+   buffer
+   instrumenting-javascript
+   instrumenting-rust
+   markers-guide
+   memory
+
+The following areas still need documentation:
+
+ * LUL
+ * Instrumenting Java
+ * Registering Threads
+ * Samples and Stack Walking
+ * Triggering Gecko Profiles in Automation
+ * JS Tracer
+ * Serialization
diff --git a/tools/profiler/docs/instrumenting-javascript.rst b/tools/profiler/docs/instrumenting-javascript.rst
new file mode 100644
index 0000000000..928d94781e
--- /dev/null
+++ b/tools/profiler/docs/instrumenting-javascript.rst
@@ -0,0 +1,60 @@
+Instrumenting JavaScript
+========================
+
+There are multiple ways to use the profiler with JavaScript. There is the "JavaScript"
+profiler feature (via about:profiling), which enables stack walking for JavaScript code.
+This is most likely turned on already for every profiler preset.
+
+In addition, markers can be created to specifically marker an instant in time, or a
+duration. This can be helpful to make sense of a particular piece of the front-end,
+or record events that normally wouldn't show up in samples.
+
+.. note::
+    This guide explains JavaScript markers in depth. To learn more about how to add a
+    marker in C++ or Rust, please take a look at their documentation
+    in :doc:`markers-guide` or :doc:`instrumenting-rust` respectively.
+
+Markers in Browser Chrome
+*************************
+
+If you have access to ChromeUtils, then adding a marker is relatively easily.
+
+.. code-block:: javascript
+
+  // Add an instant marker, representing a single point in time
+  ChromeUtils.addProfilerMarker("MarkerName");
+
+  // Add a duration marker, representing a span of time.
+  const startTime = Cu.now();
+  doWork();
+  ChromeUtils.addProfilerMarker("MarkerName", startTime);
+
+  // Add a duration marker, representing a span of time, with some additional tex
+  const startTime = Cu.now();
+  doWork();
+  ChromeUtils.addProfilerMarker("MarkerName", startTime, "Details about this event");
+
+  // Add an instant marker, with some additional tex
+  const startTime = Cu.now();
+  doWork();
+  ChromeUtils.addProfilerMarker("MarkerName", undefined, "Details about this event");
+
+Markers in Content Code
+***********************
+
+If instrumenting content code, then the `UserTiming`_ API is the best bet.
+:code:`performance.mark` will create an instant marker, and a :code:`performance.measure`
+will create a duration marker. These markers will show up under UserTiming in
+the profiler UI.
+
+.. code-block:: javascript
+
+  // Create an instant marker.
+  performance.mark("InstantMarkerName");
+
+  doWork();
+
+  // Measuring with the performance API will also create duration markers.
+  performance.measure("DurationMarkerName", "InstantMarkerName");
+
+.. _UserTiming: https://developer.mozilla.org/en-US/docs/Web/API/User_Timing_API
diff --git a/tools/profiler/docs/instrumenting-rust.rst b/tools/profiler/docs/instrumenting-rust.rst
new file mode 100644
index 0000000000..0c5021eec1
--- /dev/null
+++ b/tools/profiler/docs/instrumenting-rust.rst
@@ -0,0 +1,433 @@
+Instrumenting Rust
+==================
+
+There are multiple ways to use the profiler with Rust. Native stack sampling already
+includes the Rust frames without special handling. There is the "Native Stacks"
+profiler feature (via about:profiling), which enables stack walking for native code.
+This is most likely turned on already for every profiler presets.
+
+In addition to that, there is a profiler Rust API to instrument the Rust code
+and add more information to the profile data. There are three main functionalities
+to use:
+
+1. Register Rust threads with the profiler, so the profiler can record these threads.
+2. Add stack frame labels to annotate and categorize a part of the stack.
+3. Add markers to specifically mark instants in time, or durations. This can be
+   helpful to make sense of a particular piece of the code, or record events that
+   normally wouldn't show up in samples.
+
+Crate to Include as a Dependency
+--------------------------------
+
+Profiler Rust API is located inside the ``gecko-profiler`` crate. This needs to
+be included in the project dependencies before the following functionalities can
+be used.
+
+To be able to include it, a new dependency entry needs to be added to the project's
+``Cargo.toml`` file like this:
+
+.. code-block:: toml
+
+    [dependencies]
+    gecko-profiler = { path = "../../tools/profiler/rust-api" }
+
+Note that the relative path needs to be updated depending on the project's location
+in mozilla-central.
+
+Registering Threads
+-------------------
+
+To be able to see the threads in the profile data, they need to be registered
+with the profiler. Also, they need to be unregistered when they are exiting.
+It's important to give a unique name to the thread, so they can be filtered easily.
+
+Registering and unregistering a thread is straightforward:
+
+.. code-block:: rust
+
+    // Register it with a given name.
+    gecko_profiler::register_thread("Thread Name");
+    // After doing some work, and right before exiting the thread, unregister it.
+    gecko_profiler::unregister_thread();
+
+For example, here's how to register and unregister a simple thread:
+
+.. code-block:: rust
+
+    let thread_name = "New Thread";
+    std::thread::Builder::new()
+        .name(thread_name.into())
+        .spawn(move || {
+            gecko_profiler::register_thread(thread_name);
+            // DO SOME WORK
+            gecko_profiler::unregister_thread();
+        })
+        .unwrap();
+
+Or with a thread pool:
+
+.. code-block:: rust
+
+    let worker = rayon::ThreadPoolBuilder::new()
+        .thread_name(move |idx| format!("Worker#{}", idx))
+        .start_handler(move |idx| {
+            gecko_profiler::register_thread(&format!("Worker#{}", idx));
+        })
+        .exit_handler(|_idx| {
+            gecko_profiler::unregister_thread();
+        })
+        .build();
+
+.. note::
+    Registering a thread only will not make it appear in the profile data. In
+    addition, it needs to be added to the "Threads" filter in about:profiling.
+    This filter input is a comma-separated list. It matches partial names and
+    supports the wildcard ``*``.
+
+Adding Stack Frame Labels
+-------------------------
+
+Stack frame labels are useful for annotating a part of the call stack with a
+category. The category will appear in the various places on the Firefox Profiler
+analysis page like timeline, call tree tab, flame graph tab, etc.
+
+``gecko_profiler_label!`` macro is used to add a new label frame. The added label
+frame will exist between the call of this macro and the end of the current scope.
+
+Adding a stack frame label:
+
+.. code-block:: rust
+
+    // Marking the stack as "Layout" category, no subcategory provided.
+    gecko_profiler_label!(Layout);
+    // Marking the stack as "JavaScript" category and "Parsing" subcategory.
+    gecko_profiler_label!(JavaScript, Parsing);
+
+    // Or the entire function scope can be marked with a procedural macro. This is
+    // essentially a syntactical sugar and it expands into a function with a
+    // gecko_profiler_label! call at the very start:
+    #[gecko_profiler_fn_label(DOM)]
+    fn foo(bar: u32) -> u32 {
+        bar
+    }
+
+See the list of all profiling categories in the `profiling_categories.yaml`_ file.
+
+Adding Markers
+--------------
+
+Markers are packets of arbitrary data that are added to a profile by the Firefox code,
+usually to indicate something important happening at a point in time, or during an interval of time.
+
+Each marker has a name, a category, some common optional information (timing, backtrace, etc.),
+and an optional payload of a specific type (containing arbitrary data relevant to that type).
+
+.. note::
+    This guide explains Rust markers in depth. To learn more about how to add a
+    marker in C++ or JavaScript, please take a look at their documentation
+    in :doc:`markers-guide` or :doc:`instrumenting-javascript` respectively.
+
+Examples
+^^^^^^^^
+
+Short examples, details are below.
+
+.. code-block:: rust
+
+    // Record a simple marker with the category of Graphics, DisplayListBuilding.
+    gecko_profiler::add_untyped_marker(
+        // Name of the marker as a string.
+        "Marker Name",
+        // Category with an optional sub-category.
+        gecko_profiler_category!(Graphics, DisplayListBuilding),
+        // MarkerOptions that keeps options like marker timing and marker stack.
+        // It will be a point in type by default.
+        Default::default(),
+    );
+
+.. code-block:: rust
+
+    // Create a marker with some additional text information.
+    let info = "info about this marker";
+    gecko_profiler::add_text_marker(
+        // Name of the marker as a string.
+        "Marker Name",
+        // Category with an optional sub-category.
+        gecko_profiler_category!(DOM),
+        // MarkerOptions that keeps options like marker timing and marker stack.
+        MarkerOptions {
+            timing: MarkerTiming::instant_now(),
+            ..Default::default()
+        },
+        // Additional information as a string.
+        info,
+    );
+
+.. code-block:: rust
+
+    // Record a custom marker of type `ExampleNumberMarker` (see definition below).
+    gecko_profiler::add_marker(
+        // Name of the marker as a string.
+        "Marker Name",
+        // Category with an optional sub-category.
+        gecko_profiler_category!(Graphics, DisplayListBuilding),
+        // MarkerOptions that keeps options like marker timing and marker stack.
+        Default::default(),
+        // Marker payload.
+        ExampleNumberMarker { number: 5 },
+    );
+
+    ....
+
+    // Marker type definition. It needs to derive Serialize, Deserialize.
+    #[derive(Serialize, Deserialize, Debug)]
+    pub struct ExampleNumberMarker {
+        number: i32,
+    }
+
+    // Marker payload needs to implement the ProfilerMarker trait.
+    impl gecko_profiler::ProfilerMarker for ExampleNumberMarker {
+        // Unique marker type name.
+        fn marker_type_name() -> &'static str {
+            "example number"
+        }
+        // Data specific to this marker type, serialized to JSON for profiler.firefox.com.
+        fn stream_json_marker_data(&self, json_writer: &mut gecko_profiler::JSONWriter) {
+            json_writer.int_property("number", self.number.into());
+        }
+        // Where and how to display the marker and its data.
+        fn marker_type_display() -> gecko_profiler::MarkerSchema {
+            use gecko_profiler::marker::schema::*;
+            let mut schema = MarkerSchema::new(&[Location::MarkerChart]);
+            schema.set_chart_label("Name: {marker.name}");
+            schema.add_key_label_format("number", "Number", Format::Integer);
+            schema
+        }
+    }
+
+Untyped Markers
+^^^^^^^^^^^^^^^
+
+Untyped markers don't carry any information apart from common marker data:
+Name, category, options.
+
+.. code-block:: rust
+
+    gecko_profiler::add_untyped_marker(
+        // Name of the marker as a string.
+        "Marker Name",
+        // Category with an optional sub-category.
+        gecko_profiler_category!(Graphics, DisplayListBuilding),
+        // MarkerOptions that keeps options like marker timing and marker stack.
+        MarkerOptions {
+            timing: MarkerTiming::instant_now(),
+            ..Default::default()
+        },
+    );
+
+1. Marker name
+    The first argument is the name of this marker. This will be displayed in most places
+    the marker is shown. It can be a literal string, or any dynamic string.
+2. `Profiling category pair`_
+    A category + subcategory pair from the `the list of categories`_.
+    ``gecko_profiler_category!`` macro should be used to create a profiling category
+    pair since it's easier to use, e.g. ``gecko_profiler_category!(JavaScript, Parsing)``.
+    Second parameter can be omitted to use the default subcategory directly.
+    ``gecko_profiler_category!`` macro is encouraged to use, but ``ProfilingCategoryPair``
+    enum can also be used if needed.
+3. `MarkerOptions`_
+    See the options below. It can be omitted if there are no arguments with ``Default::default()``.
+    Some options can also be omitted, ``MarkerOptions {<options>, ..Default::default()}``,
+    with one or more of the following options types:
+
+    * `MarkerTiming`_
+        This specifies an instant or interval of time. It defaults to the current instant if
+        left unspecified. Otherwise use ``MarkerTiming::instant_at(ProfilerTime)`` or
+        ``MarkerTiming::interval(pt1, pt2)``; timestamps are usually captured with
+        ``ProfilerTime::Now()``. It is also possible to record only the start or the end of an
+        interval, pairs of start/end markers will be matched by their name.
+    * `MarkerStack`_
+        By default, markers do not record a "stack" (or "backtrace"). To record a stack at
+        this point, in the most efficient manner, specify ``MarkerStack::Full``. To
+        capture a stack without native frames for reduced overhead, specify
+        ``MarkerStack::NonNative``.
+
+    *Note: Currently, all C++ marker options are not present in the Rust side. They will
+    be added in the future.*
+
+Text Markers
+^^^^^^^^^^^^
+
+Text markers are very common, they carry an extra text as a fourth argument, in addition to
+the marker name. Use the following macro:
+
+.. code-block:: rust
+
+    let info = "info about this marker";
+    gecko_profiler::add_text_marker(
+        // Name of the marker as a string.
+        "Marker Name",
+        // Category with an optional sub-category.
+        gecko_profiler_category!(DOM),
+        // MarkerOptions that keeps options like marker timing and marker stack.
+        MarkerOptions {
+            stack: MarkerStack::Full,
+            ..Default::default()
+        },
+        // Additional information as a string.
+        info,
+    );
+
+As useful as it is, using an expensive ``format!`` operation to generate a complex text
+comes with a variety of issues. It can leak potentially sensitive information
+such as URLs during the profile sharing step. profiler.firefox.com cannot
+access the information programmatically. It won't get the formatting benefits of the
+built-in marker schema. Please consider using a custom marker type to separate and
+better present the data.
+
+Other Typed Markers
+^^^^^^^^^^^^^^^^^^^
+
+From Rust code, a marker of some type ``YourMarker`` (details about type definition follow) can be
+recorded like this:
+
+.. code-block:: rust
+
+    gecko_profiler::add_marker(
+        // Name of the marker as a string.
+        "Marker Name",
+        // Category with an optional sub-category.
+        gecko_profiler_category!(JavaScript),
+        // MarkerOptions that keeps options like marker timing and marker stack.
+        Default::default(),
+        // Marker payload.
+        YourMarker { number: 5, text: "some string".to_string() },
+    );
+
+After the first three common arguments (like in ``gecko_profiler::add_untyped_marker``),
+there is a marker payload struct and it needs to be defined. Let's take a look at
+how to define it.
+
+How to Define New Marker Types
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Each marker type must be defined once and only once.
+The definition is a Rust ``struct``, it's constructed when recording markers of
+that type in Rust. Each marker struct holds the data that is required for them
+to show in the profiler.firefox.com.
+By convention, the suffix "Marker" is recommended to better distinguish them
+from non-profiler entities in the source.
+
+Each marker payload must derive ``serde::Serialize`` and ``serde::Deserialize``.
+They are also exported from ``gecko-profiler`` crate if a project doesn't have it.
+Each marker payload should include its data as its fields like this:
+
+.. code-block:: rust
+
+    #[derive(Serialize, Deserialize, Debug)]
+    pub struct YourMarker {
+        number: i32,
+        text: String,
+    }
+
+Each marker struct must also implement the `ProfilerMarker`_ trait.
+
+``ProfilerMarker`` trait
+************************
+
+`ProfilerMarker`_ trait must be implemented for all marker types. Its methods are
+similar to C++ counterparts, please refer to :ref:`the C++ markers guide to learn
+more about them <how-to-define-new-marker-types>`. It includes three methods that
+needs to be implemented:
+
+1. ``marker_type_name() -> &'static str``:
+    A marker type must have a unique name, it is used to keep track of the type of
+    markers in the profiler storage, and to identify them uniquely on profiler.firefox.com.
+    (It does not need to be the same as the struct's name.)
+
+    E.g.:
+
+    .. code-block:: rust
+
+        fn marker_type_name() -> &'static str {
+            "your marker type"
+        }
+
+2. ``stream_json_marker_data(&self, json_writer: &mut JSONWriter)``
+    All markers of any type have some common data: A name, a category, options like
+    timing, etc. as previously explained.
+
+    In addition, a certain marker type may carry zero of more arbitrary pieces of
+    information, and they are always the same for all markers of that type.
+
+    These are defined in a special static member function ``stream_json_marker_data``.
+
+    It's a member method and takes a ``&mut JSONWriter`` as a parameter,
+    it will be used to stream the data as JSON, to later be read by
+    profiler.firefox.com. See `JSONWriter object and its methods`_.
+
+    E.g.:
+
+    .. code-block:: rust
+
+        fn stream_json_marker_data(&self, json_writer: &mut JSONWriter) {
+            json_writer.int_property("number", self.number.into());
+            json_writer.string_property("text", &self.text);
+        }
+
+3. ``marker_type_display() -> schema::MarkerSchema``
+    Now that how to stream type-specific data (from Firefox to
+    profiler.firefox.com) is defined, it needs to be described where and how this
+    data will be displayed on profiler.firefox.com.
+
+    The static member function ``marker_type_display`` returns an opaque ``MarkerSchema``
+    object, which will be forwarded to profiler.firefox.com.
+
+    See the `MarkerSchema::Location enumeration for the full list`_. Also see the
+    `MarkerSchema struct for its possible methods`_.
+
+    E.g.:
+
+    .. code-block:: rust
+
+        fn marker_type_display() -> schema::MarkerSchema {
+            // Import MarkerSchema related types for easier use.
+            use crate::marker::schema::*;
+            // Create a MarkerSchema struct with a list of locations provided.
+            // One or more constructor arguments determine where this marker will be displayed in
+            // the profiler.firefox.com UI.
+            let mut schema = MarkerSchema::new(&[Location::MarkerChart]);
+
+            // Some labels can optionally be specified, to display certain information in different
+            // locations: set_chart_label, set_tooltip_label, and set_table_label``; or
+            // set_all_labels to define all of them the same way.
+            schema.set_all_labels("{marker.name} - {marker.data.number});
+
+            // Next, define the main display of marker data, which will appear in the Marker Chart
+            // tooltips and the Marker Table sidebar.
+            schema.add_key_label_format("number", "Number", Format::Number);
+            schema.add_key_label_format("text", "Text", Format::String);
+            schema.add_static_label_value("Help", "This is my own marker type");
+
+            // Lastly, return the created schema.
+            schema
+        }
+
+    Note that the strings in ``set_all_labels`` may refer to marker data within braces:
+
+    * ``{marker.name}``: Marker name.
+    * ``{marker.data.X}``: Type-specific data, as streamed with property name "X"
+      from ``stream_json_marker_data``.
+
+    :ref:`See the C++ markers guide for more details about it <marker-type-display-schema>`.
+
+.. _profiling_categories.yaml: https://searchfox.org/mozilla-central/source/mozglue/baseprofiler/build/profiling_categories.yaml
+.. _Profiling category pair: https://searchfox.org/mozilla-central/define?q=gecko_profiler::gecko_bindings::profiling_categories::ProfilingCategoryPair
+.. _the list of categories: https://searchfox.org/mozilla-central/source/mozglue/baseprofiler/build/profiling_categories.yaml
+.. _MarkerOptions: https://searchfox.org/mozilla-central/define?q=gecko_profiler::marker::options::MarkerOptions
+.. _MarkerTiming: https://searchfox.org/mozilla-central/define?q=gecko_profiler::marker::options::MarkerTiming
+.. _MarkerStack: https://searchfox.org/mozilla-central/define?q=gecko_profiler::marker::options::MarkerStack
+.. _ProfilerMarker: https://searchfox.org/mozilla-central/define?q=gecko_profiler::marker::ProfilerMarker
+.. _MarkerSchema::Location enumeration for the full list: https://searchfox.org/mozilla-central/define?q=T_mozilla%3A%3AMarkerSchema%3A%3ALocation
+.. _JSONWriter object and its methods: https://searchfox.org/mozilla-central/define?q=gecko_profiler::json_writer::JSONWriter
+.. _MarkerSchema struct for its possible methods: https://searchfox.org/mozilla-central/define?q=gecko_profiler::marker::schema::MarkerSchema
diff --git a/tools/profiler/docs/markers-guide.rst b/tools/profiler/docs/markers-guide.rst
new file mode 100644
index 0000000000..82fe6f3cda
--- /dev/null
+++ b/tools/profiler/docs/markers-guide.rst
@@ -0,0 +1,485 @@
+Markers
+=======
+
+Markers are packets of arbitrary data that are added to a profile by the Firefox code, usually to
+indicate something important happening at a point in time, or during an interval of time.
+
+Each marker has a name, a category, some common optional information (timing, backtrace, etc.),
+and an optional payload of a specific type (containing arbitrary data relevant to that type).
+
+.. note::
+    This guide explains C++ markers in depth. To learn more about how to add a
+    marker in JavaScript or Rust, please take a look at their documentation
+    in :doc:`instrumenting-javascript` or :doc:`instrumenting-rust` respectively.
+
+Example
+-------
+
+Short example, details below.
+
+Note: Most marker-related identifiers are in the ``mozilla`` namespace, to be added where necessary.
+
+.. code-block:: c++
+
+    // Record a simple marker with the category of DOM.
+    PROFILER_MARKER_UNTYPED("Marker Name", DOM);
+
+    // Create a marker with some additional text information. (Be wary of printf!)
+    PROFILER_MARKER_TEXT("Marker Name", JS, MarkerOptions{}, "Additional text information.");
+
+    // Record a custom marker of type `ExampleNumberMarker` (see definition below).
+    PROFILER_MARKER("Number", OTHER, MarkerOptions{}, ExampleNumberMarker, 42);
+
+.. code-block:: c++
+
+    // Marker type definition.
+    struct ExampleNumberMarker {
+      // Unique marker type name.
+      static constexpr Span<const char> MarkerTypeName() { return MakeStringSpan("number"); }
+      // Data specific to this marker type, serialized to JSON for profiler.firefox.com.
+      static void StreamJSONMarkerData(SpliceableJSONWriter& aWriter, int aNumber) {
+        aWriter.IntProperty("number", aNumber);
+      }
+      // Where and how to display the marker and its data.
+      static MarkerSchema MarkerTypeDisplay() {
+        using MS = MarkerSchema;
+        MS schema(MS::Location::MarkerChart, MS::Location::MarkerTable);
+        schema.SetChartLabel("Number: {marker.data.number}");
+        schema.AddKeyLabelFormat("number", "Number", MS::Format::Number);
+        return schema;
+      }
+    };
+
+
+How to Record Markers
+---------------------
+
+Header to Include
+^^^^^^^^^^^^^^^^^
+
+If the compilation unit only defines and records untyped, text, and/or its own markers, include
+`the main profiler markers header <https://searchfox.org/mozilla-central/source/tools/profiler/public/ProfilerMarkers.h>`_:
+
+.. code-block:: c++
+
+    #include "mozilla/ProfilerMarkers.h"
+
+If it also records one of the other common markers defined in
+`ProfilerMarkerTypes.h <https://searchfox.org/mozilla-central/source/tools/profiler/public/ProfilerMarkerTypes.h>`_,
+include that one instead:
+
+.. code-block:: c++
+
+    #include "mozilla/ProfilerMarkerTypes.h"
+
+And if it uses any other profiler functions (e.g., labels), use
+`the main Gecko Profiler header <https://searchfox.org/mozilla-central/source/tools/profiler/public/GeckoProfiler.h>`_
+instead:
+
+.. code-block:: c++
+
+    #include "GeckoProfiler.h"
+
+The above works from source files that end up in libxul, which is true for the majority
+of Firefox source code. But some files live outside of libxul, such as mfbt, in which
+case the advice is the same but the equivalent headers are from the Base Profiler instead:
+
+.. code-block:: c++
+
+    #include "mozilla/BaseProfilerMarkers.h" // Only own/untyped/text markers
+    #include "mozilla/BaseProfilerMarkerTypes.h" // Only common markers
+    #include "BaseProfiler.h" // Markers and other profiler functions
+
+Untyped Markers
+^^^^^^^^^^^^^^^
+
+Untyped markers don't carry any information apart from common marker data:
+Name, category, options.
+
+.. code-block:: c++
+
+    PROFILER_MARKER_UNTYPED(
+        // Name, and category pair.
+        "Marker Name", OTHER,
+        // Marker options, may be omitted if all defaults are acceptable.
+        MarkerOptions(MarkerStack::Capture(), ...));
+
+``PROFILER_MARKER_UNTYPED`` is a macro that simplifies the use of the main
+``profiler_add_marker`` function, by adding the appropriate namespaces, and a surrounding
+``#ifdef MOZ_GECKO_PROFILER`` guard.
+
+1. Marker name
+    The first argument is the name of this marker. This will be displayed in most places
+    the marker is shown. It can be a literal C string, or any dynamic string object.
+2. `Category pair name <https://searchfox.org/mozilla-central/define?q=M_174bb0de187ee7d9>`_
+    Choose a category + subcategory from the `the list of categories <https://searchfox.org/mozilla-central/define?q=M_174bb0de187ee7d9>`_.
+    This is the second parameter of each ``SUBCATEGORY`` line, for instance ``LAYOUT_Reflow``.
+    (Internally, this is really a `MarkerCategory <https://searchfox.org/mozilla-central/define?q=T_mozilla%3A%3AMarkerCategory>`_
+    object, in case you need to construct it elsewhere.)
+3. `MarkerOptions <https://searchfox.org/mozilla-central/define?q=T_mozilla%3A%3AMarkerOptions>`_
+    See the options below. It can be omitted if there are no other arguments, ``{}``, or
+    ``MarkerOptions()`` (no specified options); only one of the following option types
+    alone; or ``MarkerOptions(...)`` with one or more of the following options types:
+
+    * `MarkerThreadId <https://searchfox.org/mozilla-central/define?q=T_mozilla%3A%3AMarkerThreadId>`_
+        Rarely used, as it defaults to the current thread. Otherwise it specifies the target
+        "thread id" (aka "track") where the marker should appear; This may be useful when
+        referring to something that happened on another thread (use ``profiler_current_thread_id()``
+        from the original thread to get its id); or for some important markers, they may be
+        sent to the "main thread", which can be specified with ``MarkerThreadId::MainThread()``.
+    * `MarkerTiming <https://searchfox.org/mozilla-central/define?q=T_mozilla%3A%3AMarkerTiming>`_
+        This specifies an instant or interval of time. It defaults to the current instant if
+        left unspecified. Otherwise use ``MarkerTiming::InstantAt(timestamp)`` or
+        ``MarkerTiming::Interval(ts1, ts2)``; timestamps are usually captured with
+        ``TimeStamp::Now()``. It is also possible to record only the start or the end of an
+        interval, pairs of start/end markers will be matched by their name. *Note: The
+        upcoming "marker sets" feature will make this pairing more reliable, and also
+        allow more than two markers to be connected*.
+    * `MarkerStack <https://searchfox.org/mozilla-central/define?q=T_mozilla%3A%3AMarkerStack>`_
+        By default, markers do not record a "stack" (or "backtrace"). To record a stack at
+        this point, in the most efficient manner, specify ``MarkerStack::Capture()``. To
+        record a previously captured stack, first store a stack into a
+        ``UniquePtr<ProfileChunkedBuffer>`` with ``profiler_capture_backtrace()``, then pass
+        it to the marker with ``MarkerStack::TakeBacktrace(std::move(stack))``.
+    * `MarkerInnerWindowId <https://searchfox.org/mozilla-central/define?q=T_mozilla%3A%3AMarkerInnerWindowId>`_
+        If you have access to an "inner window id", consider specifying it as an option, to
+        help profiler.firefox.com to classify them by tab.
+
+Text Markers
+^^^^^^^^^^^^
+
+Text markers are very common, they carry an extra text as a fourth argument, in addition to
+the marker name. Use the following macro:
+
+.. code-block:: c++
+
+    PROFILER_MARKER_TEXT(
+        // Name, category pair, options.
+        "Marker Name", OTHER, {},
+        // Text string.
+        "Here are some more details."
+    );
+
+As useful as it is, using an expensive ``printf`` operation to generate a complex text
+comes with a variety of issues string. It can leak potentially sensitive information
+such as URLs can be leaked during the profile sharing step. profiler.firefox.com cannot
+access the information programmatically. It won't get the formatting benefits of the
+built-in marker schema. Please consider using a custom marker type to separate and
+better present the data.
+
+Other Typed Markers
+^^^^^^^^^^^^^^^^^^^
+
+From C++ code, a marker of some type ``YourMarker`` (details about type definition follow) can be
+recorded like this:
+
+.. code-block:: c++
+
+    PROFILER_MARKER(
+        "YourMarker name", OTHER,
+        MarkerOptions(MarkerTiming::IntervalUntilNowFrom(someStartTimestamp),
+                      MarkerInnerWindowId(innerWindowId))),
+        YourMarker, "some string", 12345, "http://example.com", someTimeStamp);
+
+After the first three common arguments (like in ``PROFILER_MARKER_UNTYPED``), there are:
+
+4. The marker type, which is the name of the C++ ``struct`` that defines that type.
+5. A variadic list of type-specific argument. They must match the number of, and must
+   be convertible to, ``StreamJSONMarkerData`` parameters as specified in the marker type definition.
+
+"Auto" Scoped Interval Markers
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+To capture time intervals around some important operations, it is common to store a timestamp, do the work,
+and then record a marker, e.g.:
+
+.. code-block:: c++
+
+    void DoTimedWork() {
+      TimeStamp start = TimeStamp::Now();
+      DoWork();
+      PROFILER_MARKER_TEXT("Timed work", OTHER, MarkerTiming::IntervalUntilNowFrom(start), "Details");
+    }
+
+`RAII <https://en.cppreference.com/w/cpp/language/raii>`_ objects automate this, by recording the time
+when the object is constructed, and later recording the marker when the object is destroyed at the end
+of its C++ scope.
+This is especially useful if there are multiple scope exit points.
+
+``AUTO_PROFILER_MARKER_TEXT`` is `the only one implemented <https://searchfox.org/mozilla-central/define?q=M_ac7b392646edf5a5>`_ at this time.
+
+.. code-block:: c++
+
+    void MaybeDoTimedWork(bool aDoIt) {
+      AUTO_PROFILER_MARKER_TEXT("Timed work", OTHER, "Details");
+      if (!aDoIt) { /* Marker recorded here... */ return; }
+      DoWork();
+      /* ... or here. */
+    }
+
+Note that these RAII objects only record one marker. In some situation, a very long
+operation could be missed if it hasn't completed by the end of the profiling session.
+In this case, consider recording two distinct markers, using
+``MarkerTiming::IntervalStart()`` and ``MarkerTiming::IntervalEnd()``.
+
+Where to Define New Marker Types
+--------------------------------
+
+The first step is to determine the location of the marker type definition:
+
+* If this type is only used in one function, or a component, it can be defined in a
+  local common place relative to its use.
+* For a more common type that could be used from multiple locations:
+
+  * If there is no dependency on XUL, it can be defined in the Base Profiler, which can
+    be used in most locations in the codebase:
+    `mozglue/baseprofiler/public/BaseProfilerMarkerTypes.h <https://searchfox.org/mozilla-central/source/mozglue/baseprofiler/public/BaseProfilerMarkerTypes.h>`__
+
+  * However, if there is a XUL dependency, then it needs to be defined in the Gecko Profiler:
+    `tools/profiler/public/ProfilerMarkerTypes.h <https://searchfox.org/mozilla-central/source/tools/profiler/public/ProfilerMarkerTypes.h>`__
+
+.. _how-to-define-new-marker-types:
+
+How to Define New Marker Types
+------------------------------
+
+Each marker type must be defined once and only once.
+The definition is a C++ ``struct``, its identifier is used when recording
+markers of that type in C++.
+By convention, the suffix "Marker" is recommended to better distinguish them
+from non-profiler entities in the source.
+
+.. code-block:: c++
+
+    struct YourMarker {
+
+Marker Type Name
+^^^^^^^^^^^^^^^^
+
+A marker type must have a unique name, it is used to keep track of the type of
+markers in the profiler storage, and to identify them uniquely on profiler.firefox.com.
+(It does not need to be the same as the ``struct``'s name.)
+
+This name is defined in a special static member function ``MarkerTypeName``:
+
+.. code-block:: c++
+
+    // …
+      static constexpr Span<const char> MarkerTypeName() {
+        return MakeStringSpan("YourMarker");
+      }
+
+Marker Type Data
+^^^^^^^^^^^^^^^^
+
+All markers of any type have some common data: A name, a category, options like
+timing, etc. as previously explained.
+
+In addition, a certain marker type may carry zero of more arbitrary pieces of
+information, and they are always the same for all markers of that type.
+
+These are defined in a special static member function ``StreamJSONMarkerData``.
+
+The first function parameters is always ``SpliceableJSONWriter& aWriter``,
+it will be used to stream the data as JSON, to later be read by
+profiler.firefox.com.
+
+.. code-block:: c++
+
+    // …
+      static void StreamJSONMarkerData(SpliceableJSONWriter& aWriter,
+
+The following function parameters is how the data is received as C++ objects
+from the call sites.
+
+* Most C/C++ `POD (Plain Old Data) <https://en.cppreference.com/w/cpp/named_req/PODType>`_
+  and `trivially-copyable <https://en.cppreference.com/w/cpp/named_req/TriviallyCopyable>`_
+  types should work as-is, including ``TimeStamp``.
+* Character strings should be passed using ``const ProfilerString8View&`` (this handles
+  literal strings, and various ``std::string`` and ``nsCString`` types, and spans with or
+  without null terminator). Use ``const ProfilerString16View&`` for 16-bit strings such as
+  ``nsString``.
+* Other types can be used if they define specializations for ``ProfileBufferEntryWriter::Serializer``
+  and ``ProfileBufferEntryReader::Deserializer``. You should rarely need to define new
+  ones, but if needed see how existing specializations are written, or contact the
+  `perf-tools team for help <https://chat.mozilla.org/#/room/#profiler:mozilla.org>`_.
+
+Passing by value or by reference-to-const is recommended, because arguments are serialized
+in binary form (i.e., there are no optimizable ``move`` operations).
+
+For example, here's how to handle a string, a 64-bit number, another string, and
+a timestamp:
+
+.. code-block:: c++
+
+    // …
+                                       const ProfilerString8View& aString,
+                                       const int64_t aBytes,
+                                       const ProfilerString8View& aURL,
+                                       const TimeStamp& aTime) {
+
+Then the body of the function turns these parameters into a JSON stream.
+
+When this function is called, the writer has just started a JSON object, so
+everything that is written should be a named object property. Use
+``SpliceableJSONWriter`` functions, in most cases ``...Property`` functions
+from its parent class ``JSONWriter``: ``NullProperty``, ``BoolProperty``,
+``IntProperty``, ``DoubleProperty``, ``StringProperty``. (Other nested JSON
+types like arrays or objects are not supported by the profiler.)
+
+As a special case, ``TimeStamps`` must be streamed using ``aWriter.TimeProperty(timestamp)``.
+
+The property names will be used to identify where each piece of data is stored and
+how it should be displayed on profiler.firefox.com (see next section).
+
+Here's how the above functions parameters could be streamed:
+
+.. code-block:: c++
+
+    // …
+        aWriter.StringProperty("myString", aString);
+        aWriter.IntProperty("myBytes", aBytes);
+        aWriter.StringProperty("myURL", aURL);
+        aWriter.TimeProperty("myTime", aTime);
+      }
+
+.. _marker-type-display-schema:
+
+Marker Type Display Schema
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Now that we have defined how to stream type-specific data (from Firefox to
+profiler.firefox.com), we need to describe where and how this data will be
+displayed on profiler.firefox.com.
+
+The static member function ``MarkerTypeDisplay`` returns an opaque ``MarkerSchema``
+object, which will be forwarded to profiler.firefox.com.
+
+.. code-block:: c++
+
+    // …
+      static MarkerSchema MarkerTypeDisplay() {
+
+The ``MarkerSchema`` type will be used repeatedly, so for convenience we can define
+a local type alias:
+
+.. code-block:: c++
+
+    // …
+        using MS = MarkerSchema;
+
+First, we construct the ``MarkerSchema`` object to be returned at the end.
+
+One or more constructor arguments determine where this marker will be displayed in
+the profiler.firefox.com UI. See the `MarkerSchema::Location enumeration for the
+full list <https://searchfox.org/mozilla-central/define?q=T_mozilla%3A%3AMarkerSchema%3A%3ALocation>`_.
+
+Here is the most common set of locations, showing markers of that type in both the
+Marker Chart and the Marker Table panels:
+
+.. code-block:: c++
+
+    // …
+        MS schema(MS::Location::MarkerChart, MS::Location::MarkerTable);
+
+Some labels can optionally be specified, to display certain information in different
+locations: ``SetChartLabel``, ``SetTooltipLabel``, and ``SetTableLabel``; or
+``SetAllLabels`` to define all of them the same way.
+
+The arguments is a string that may refer to marker data within braces:
+
+* ``{marker.name}``: Marker name.
+* ``{marker.data.X}``: Type-specific data, as streamed with property name "X" from ``StreamJSONMarkerData`` (e.g., ``aWriter.IntProperty("X", aNumber);``
+
+For example, here's how to set the Marker Chart label to show the marker name and the
+``myBytes`` number of bytes:
+
+.. code-block:: c++
+
+    // …
+        schema.SetChartLabel("{marker.name} – {marker.data.myBytes}");
+
+profiler.firefox.com will apply the label with the data in a consistent manner. For
+example, with this label definition, it could display marker information like the
+following in the Firefox Profiler's Marker Chart:
+
+ * "Marker Name – 10B"
+ * "Marker Name – 25.204KB"
+ * "Marker Name – 512.54MB"
+
+For implementation details on this processing, see `src/profiler-logic/marker-schema.js <https://github.com/firefox-devtools/profiler/blob/main/src/profile-logic/marker-schema.js>`_
+in the profiler's front-end.
+
+Next, define the main display of marker data, which will appear in the Marker
+Chart tooltips and the Marker Table sidebar.
+
+Each row may either be:
+
+* A dynamic key-value pair, using one of the ``MarkerSchema::AddKey...`` functions. Each function is given:
+
+  * Key: Element property name as streamed in ``StreamJSONMarkerData``.
+  * Label: Optional prefix. Defaults to the key name.
+  * Format: How to format the data element value, see `MarkerSchema::Format for details <https://searchfox.org/mozilla-central/define?q=T_mozilla%3A%3AMarkerSchema%3A%3AFormat>`_.
+  * Searchable: Optional boolean, indicates if the value is used in searches, defaults to false.
+
+* Or a fixed label and value strings, using ``MarkerSchema::AddStaticLabelValue``.
+
+.. code-block:: c++
+
+    // …
+        schema.AddKeyLabelFormatSearchable(
+            "myString", "My String", MS::Format::String, true);
+        schema.AddKeyLabelFormat(
+            "myBytes", "My Bytes", MS::Format::Bytes);
+        schema.AddKeyLabelFormat(
+            "myUrl", "My URL", MS::Format::Url);
+        schema.AddKeyLabelFormat(
+            "myTime", "Event time", MS::Format::Time);
+
+Finally the ``schema`` object is returned from the function:
+
+.. code-block:: c++
+
+    // …
+        return schema;
+      }
+
+Any other ``struct`` member function is ignored. There could be utility functions used by the above
+compulsory functions, to make the code clearer.
+
+And that is the end of the marker definition ``struct``.
+
+.. code-block:: c++
+
+    // …
+    };
+
+Performance Considerations
+--------------------------
+
+During profiling, it is best to reduce the amount of work spent doing profiler
+operations, as they can influence the performance of the code that you want to profile.
+
+Whenever possible, consider passing simple types to marker functions, such that
+``StreamJSONMarkerData`` will do the minimum amount of work necessary to serialize
+the marker type-specific arguments to its internal buffer representation. POD types
+(numbers) and strings are the easiest and cheapest to serialize. Look at the
+corresponding ``ProfileBufferEntryWriter::Serializer`` specializations if you
+want to better understand the work done.
+
+Avoid doing expensive operations when recording markers. E.g.: ``printf`` of
+different things into a string, or complex computations; instead pass the
+``printf``/computation arguments straight through to the marker function, so that
+``StreamJSONMarkerData`` can do the expensive work at the end of the profiling session.
+
+Marker Architecture Description
+-------------------------------
+
+The above sections should give all the information needed for adding your own marker
+types. However, if you are wanting to work on the marker architecture itself, this
+section will describe how the system works.
+
+TODO:
+ * Briefly describe the buffer and serialization.
+ * Describe the template strategy for generating marker types
+ * Describe the serialization and link to profiler front-end docs on marker processing (if they exist)
diff --git a/tools/profiler/docs/memory.rst b/tools/profiler/docs/memory.rst
new file mode 100644
index 0000000000..347a91f9e7
--- /dev/null
+++ b/tools/profiler/docs/memory.rst
@@ -0,0 +1,46 @@
+Profiling Memory
+================
+
+Sampling stacks from native allocations
+---------------------------------------
+
+The profiler can sample allocations and de-allocations from malloc using the
+"Native Allocations" feature. This can be enabled by going to `about:profiling` and
+enabling the "Native Allocations" checkbox. It is only available in Nightly, as it
+uses a technique of hooking into malloc that could be a little more risky to apply to
+the broader population of Firefox users.
+
+This implementation is located in: `tools/profiler/core/memory_hooks.cpp
+<https://searchfox.org/mozilla-central/source/tools/profiler/core/memory_hooks.cpp>`_
+
+It works by hooking into all of the malloc calls. When the profiler is running, it
+performs a `Bernoulli trial`_, that will pass for a given probability of per-byte
+allocated. What this means is that larger allocations have a higher chance of being
+recorded compared to smaller allocations. Currently, there is no way to configure
+the per-byte probability. This means that sampled allocation sizes will be closer
+to the actual allocated bytes.
+
+This infrastructure is quite similar to DMD, but with the additional motiviations of
+making it easy to turn on and use with the profiler. The overhead is quite high,
+especially on systems with more expensive stack walking, like Linux. Turning off
+thee "Native Stacks" feature can help lower overhead, but will give less information.
+
+For more information on analyzing these profiles, see the `Firefox Profiler docs`_.
+
+Memory counters
+---------------
+
+Similar to the Native Allocations feature, memory counters use the malloc memory hook
+that is only available in Nightly. When it's available, the memory counters are always
+turned on. This is a lightweight way to count in a very granular fashion how much
+memory is being allocated and deallocated during the profiling session.
+
+This information is then visualized in the `Firefox Profiler memory track`_.
+
+This feature uses the `Profiler Counters`_, which can be used to create other types
+of cheap counting instrumentation.
+
+.. _Bernoulli trial: https://en.wikipedia.org/wiki/Bernoulli_trial
+.. _Firefox Profiler docs: https://profiler.firefox.com/docs/#/./memory-allocations
+.. _Firefox Profiler memory track: https://profiler.firefox.com/docs/#/./memory-allocations?id=memory-track
+.. _Profiler Counters: https://searchfox.org/mozilla-central/source/tools/profiler/public/ProfilerCounts.h
diff --git a/tools/profiler/docs/profilerclasses-20220913.png b/tools/profiler/docs/profilerclasses-20220913.png
new file mode 100644
index 0000000000..a5ba265407
--- /dev/null
+++ b/tools/profiler/docs/profilerclasses-20220913.png
diff --git a/tools/profiler/docs/profilerclasses.umlet.uxf b/tools/profiler/docs/profilerclasses.umlet.uxf
new file mode 100644
index 0000000000..c807853401
--- /dev/null
+++ b/tools/profiler/docs/profilerclasses.umlet.uxf
@@ -0,0 +1,811 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<diagram program="umlet" version="15.0.0">
+  <zoom_level>10</zoom_level>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>80</x>
+      <y>370</y>
+      <w>340</w>
+      <h>190</h>
+    </coordinates>
+    <panel_attributes>ThreadInfo
+--
+-mName: nsCString
+-mRegisterTime: TimeStamp
+-mThreadId: int
+-mIsMainThread: bool
+--
+NS_INLINE_DECL_THREADSAFE_REFCOUNTING
++Name()
++RegisterTime()
++ThreadId()
++IsMainThread()
+</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>470</x>
+      <y>300</y>
+      <w>600</w>
+      <h>260</h>
+    </coordinates>
+    <panel_attributes>RacyRegisteredThread
+--
+-mProfilingStackOwner: NotNull&lt;RefPtr&lt;ProfilingStackOwner&gt;&gt;
+-mThreadId
+-mSleep: Atomic&lt;int&gt; /* AWAKE, SLEEPING_NOT_OBSERVED, SLEEPING_OBSERVED */
+-mIsBeingProfiled: Atomic&lt;bool, Relaxed&gt;
+--
++SetIsBeingProfiled()
++IsBeingProfiled()
++ReinitializeOnResume()
++CanDuplicateLastSampleDueToSleep()
++SetSleeping()
++SetAwake()
++IsSleeping()
++ThreadId()
++ProfilingStack()
++ProfilingStackOwner()</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>470</x>
+      <y>650</y>
+      <w>350</w>
+      <h>360</h>
+    </coordinates>
+    <panel_attributes>RegisteredThread
+--
+-mPlatformData: UniquePlatformData
+-mStackTop: const void*
+-mThread: nsCOMPtr&lt;nsIThread&gt;
+-mContext: JSContext*
+-mJSSampling: enum {INACTIVE, ACTIVE_REQUESTED, ACTIVE, INACTIVE_REQUESTED}
+-mmJSFlags: uint32_t
+--
++RacyRegisteredThread()
++GetPlatformData()
++StackTop()
++GetRunningEventDelay()
++SizeOfIncludingThis()
++SetJSContext()
++ClearJSContext()
++GetJSContext()
++Info(): RefPtr&lt;ThreadInfo&gt;
++GetEventTarget(): nsCOMPtr&lt;nsIEventTarget&gt;
++ResetMainThread(nsIThread*)
++StartJSSampling()
++StopJSSampling()
++PollJSSampling()
+</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>750</x>
+      <y>550</y>
+      <w>180</w>
+      <h>120</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;&lt;&lt;&lt;&lt;-
+mRacyRegisteredThread</panel_attributes>
+    <additional_attributes>10.0;100.0;10.0;10.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>290</x>
+      <y>550</y>
+      <w>230</w>
+      <h>120</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;&lt;&lt;&lt;-
+mThreadInfo: RefPtr&lt;&gt;</panel_attributes>
+    <additional_attributes>210.0;100.0;10.0;10.0</additional_attributes>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>70</x>
+      <y>660</y>
+      <w>340</w>
+      <h>190</h>
+    </coordinates>
+    <panel_attributes>PageInformation
+--
+-mBrowsingContextID: uint64_t
+-mInnerWindowID: uint64_t
+-mUrl: nsCString
+-mEmbedderInnerWindowID: uint64_t
+--
+NS_INLINE_DECL_THREADSAFE_REFCOUNTING
++SizeOfIncludingThis(MallocSizeOf)
++Equals(PageInformation*)
++StreamJSON(SpliceableJSONWriter&amp;)
++InnerWindowID()
++BrowsingContextID()
++Url()
++EmbedderInnerWindowID()
++BufferPositionWhenUnregistered(): Maybe&lt;uint64_t&gt;
++NotifyUnregistered(aBufferPosition: uint64_t)</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>760</x>
+      <y>1890</y>
+      <w>570</w>
+      <h>120</h>
+    </coordinates>
+    <panel_attributes>ProfilerBacktrace
+--
+-mName: UniqueFreePtr&lt;char&gt;
+-mThreadId: int
+-mProfileChunkedBuffer: UniquePtr&lt;ProfileChunkedBuffer&gt;
+-mProfileBuffer: UniquePtr&lt;ProfileBuffer&gt;
+--
++StreamJSON(SpliceableJSONWriter&amp;, aProcessStartTime: TimeStamp, UniqueStacks&amp;)
+</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>20</x>
+      <y>2140</y>
+      <w>620</w>
+      <h>580</h>
+    </coordinates>
+    <panel_attributes>ProfileChunkedBuffer
+--
+-mMutex: BaseProfilerMaybeMutex
+-mChunkManager: ProfileBufferChunkManager*
+-mOwnedChunkManager: UniquePtr&lt;ProfileBufferChunkManager&gt;
+-mCurrentChunk: UniquePtr&lt;ProfileBufferChunk&gt;
+-mNextChunks: UniquePtr&lt;ProfileBufferChunk&gt;
+-mRequestedChunkHolder: RefPtr&lt;RequestedChunkRefCountedHolder&gt;
+-mNextChunkRangeStart: ProfileBufferIndex
+-mRangeStart: Atomic&lt;ProfileBufferIndex, ReleaseAcquire&gt;
+-mRangeEnd: ProfileBufferIndex
+-mPushedBlockCount: uint64_t
+-mClearedBlockCount: Atomic&lt;uint64_t, ReleaseAcquire&gt;
+--
++Byte = ProfileBufferChunk::Byte
++Length = ProfileBufferChunk::Length
++IsThreadSafe()
++IsInSession()
++ResetChunkManager()
++SetChunkManager()
++Clear()
++BufferLength(): Maybe&lt;size_t&gt;
++SizeOfExcludingThis(MallocSizeOf)
++SizeOfIncludingThis(MallocSizeOf)
++GetState()
++IsThreadSafeAndLockedOnCurrentThread(): bool
++LockAndRun(Callback&amp;&amp;)
++ReserveAndPut(CallbackEntryBytes&amp;&amp;, Callback&lt;auto(Maybe&lt;ProfileBufferEntryWriter&gt;&amp;)&gt;&amp;&amp;)
++Put(aEntryBytes: Length, Callback&lt;auto(Maybe&lt;ProfileBufferEntryWriter&gt;&amp;)&gt;&amp;&amp;)
++PutFrom(const void*, Length)
++PutObjects(const Ts&amp;...)
++PutObject(const T&amp;)
++GetAllChunks()
++Read(Callback&lt;void(Reader&amp;)&gt;&amp;&amp;): bool
++ReadEach(Callback&lt;void(ProfileBufferEntryReader&amp; [, ProfileBufferBlockIndex])&gt;&amp;&amp;)
++ReadAt(ProfileBufferBlockIndex, Callback&lt;void(Maybe&lt;ProfileBufferEntryReader&gt;&amp;&amp;)&gt;&amp;&amp;)
++AppendContents</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>810</x>
+      <y>2100</y>
+      <w>500</w>
+      <h>620</h>
+    </coordinates>
+    <panel_attributes>ProfileBufferChunk
+--
++Header: {
+    mOffsetFirstBlock; mOffsetPastLastBlock; mDoneTimeStamp;
+    mBufferBytes; mBlockCount; mRangeStart; mProcessId;
+ }
+-InternalHeader: { mHeader: Header; mNext: UniquePtr&lt;ProfileBufferChunk&gt;; }
+--
+-mInternalHeader: InternalHeader
+-mBuffer: Byte /* First byte */
+--
++Byte = uint8_t
++Length = uint32_t
++SpanOfBytes = Span&lt;Byte&gt;
+/+Create(aMinBufferBytes: Length): UniquePtr&lt;ProfileBufferChunk&gt;/
++ReserveInitialBlockAsTail(Length): SpanOfBytes
++ReserveBlock(Length): { SpanOfBytes, ProfileBufferBlockIndex }
++MarkDone()
++MarkRecycled()
++ChunkHeader()
++BufferBytes()
++ChunkBytes()
++SizeOfExcludingThis(MallocSizeOf)
++SizeOfIncludingThis(MallocSizeOf)
++RemainingBytes(): Length
++OffsetFirstBlock(): Length
++OffsetPastLastBlock(): Length
++BlockCount(): Length
++ProcessId(): int
++SetProcessId(int)
++RangeStart(): ProfileBufferIndex
++SetRangeStart(ProfileBufferIndex)
++BufferSpan(): Span&lt;const Byte&gt;
++ByteAt(aOffset: Length)
++GetNext(): maybe-const ProfileBufferChunk*
++ReleaseNext(): UniquePtr&lt;ProfileBufferChunk&gt;
++InsertNext(UniquePtr&lt;ProfileBufferChunk&gt;&amp;&amp;)
++Last(): const ProfileBufferChunk*
++SetLast(UniquePtr&lt;ProfileBufferChunk&gt;&amp;&amp;)
+/+Join(UniquePtr&lt;ProfileBufferChunk&gt;&amp;&amp;, UniquePtr&lt;ProfileBufferChunk&gt;&amp;&amp;)/
+</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>120</x>
+      <y>2850</y>
+      <w>570</w>
+      <h>350</h>
+    </coordinates>
+    <panel_attributes>ProfileBufferEntryReader
+--
+-mCurrentSpan: SpanOfConstBytes
+-mNextSpanOrEmpty: SpanOfConstBytes
+-mCurrentBlockIndex: ProfileBufferBlockIndex
+-mNextBlockIndex: ProfileBufferBlockIndex
+--
++RemainingBytes(): Length
++SetRemainingBytes(Length)
++CurrentBlockIndex(): ProfileBufferBlockIndex
++NextBlockIndex(): ProfileBufferBlockIndex
++EmptyIteratorAtOffset(Length): ProfileBufferEntryReader
++operator*(): const Byte&amp;
++operator++(): ProfileBufferEntryReader&amp;
++operator+=(Length): ProfileBufferEntryReader&amp;
++operator==(const ProfileBufferEntryReader&amp;)
++operator!=(const ProfileBufferEntryReader&amp;)
++ReadULEB128&lt;T&gt;(): T
++ReadBytes(void*, Length)
++ReadIntoObject(T&amp;)
++ReadIntoObjects(Ts&amp;...)
++ReadObject&lt;T&gt;(): T</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>740</x>
+      <y>2850</y>
+      <w>570</w>
+      <h>300</h>
+    </coordinates>
+    <panel_attributes>ProfileBufferEntryWriter
+--
+-mCurrentSpan: SpanOfBytes
+-mNextSpanOrEmpty: SpanOfBytes
+-mCurrentBlockIndex: ProfileBufferBlockIndex
+-mNextBlockIndex: ProfileBufferBlockIndex
+--
++RemainingBytes(): Length
++CurrentBlockIndex(): ProfileBufferBlockIndex
++NextBlockIndex(): ProfileBufferBlockIndex
++operator*(): Byte&amp;
++operator++(): ProfileBufferEntryReader&amp;
++operator+=(Length): ProfileBufferEntryReader&amp;
+/+ULEB128Size(T): unsigned/
++WriteULEB128(T)
+/+SumBytes(const Ts&amp;...): Length/
++WriteFromReader(ProfileBufferEntryReader&amp;, Length)
++WriteObject(const T&amp;)
++WriteObjects(const T&amp;)</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>120</x>
+      <y>3270</y>
+      <w>570</w>
+      <h>80</h>
+    </coordinates>
+    <panel_attributes>ProfileBufferEntryReader::Deserializer&lt;T&gt;
+/to be specialized for all types read from ProfileBufferEntryReader/
+--
+/+ReadInto(ProfileBufferEntryReader&amp;, T&amp;)/
+/+Read&lt;T&gt;(ProfileBufferEntryReader&amp;): T/</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>740</x>
+      <y>3270</y>
+      <w>570</w>
+      <h>80</h>
+    </coordinates>
+    <panel_attributes>ProfileBufferEntryWriter::Serializer&lt;T&gt;
+/to be specialized for all types written into ProfileBufferEntryWriter/
+--
+/+Bytes(const T&amp;): Length/
+/+Write(ProfileBufferEntryWriter&amp;, const T&amp;)/</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>330</x>
+      <y>2710</y>
+      <w>110</w>
+      <h>160</h>
+    </coordinates>
+    <panel_attributes>lt=.&gt;
+&lt;&lt;creates&gt;&gt;</panel_attributes>
+    <additional_attributes>10.0;10.0;60.0;140.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>430</x>
+      <y>2710</y>
+      <w>360</w>
+      <h>160</h>
+    </coordinates>
+    <panel_attributes>lt=.&gt;
+&lt;&lt;creates&gt;&gt;</panel_attributes>
+    <additional_attributes>10.0;10.0;340.0;140.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>660</x>
+      <y>2710</y>
+      <w>260</w>
+      <h>160</h>
+    </coordinates>
+    <panel_attributes>lt=.&gt;
+&lt;&lt;points into&gt;&gt;</panel_attributes>
+    <additional_attributes>10.0;140.0;240.0;10.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>870</x>
+      <y>2710</y>
+      <w>140</w>
+      <h>160</h>
+    </coordinates>
+    <panel_attributes>lt=.&gt;
+&lt;&lt;points into&gt;&gt;</panel_attributes>
+    <additional_attributes>10.0;140.0;80.0;10.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>630</x>
+      <y>2170</y>
+      <w>200</w>
+      <h>40</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;&lt;&lt;&lt;-
+mCurrentChunk</panel_attributes>
+    <additional_attributes>10.0;20.0;180.0;20.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>630</x>
+      <y>2230</y>
+      <w>200</w>
+      <h>40</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;&lt;&lt;&lt;-
+mNextChunks</panel_attributes>
+    <additional_attributes>10.0;20.0;180.0;20.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>1100</x>
+      <y>2030</y>
+      <w>170</w>
+      <h>90</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;&lt;&lt;&lt;-
+mInternalHeader.mNext</panel_attributes>
+    <additional_attributes>10.0;70.0;10.0;20.0;150.0;20.0;150.0;70.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>490</x>
+      <y>3190</y>
+      <w>70</w>
+      <h>100</h>
+    </coordinates>
+    <panel_attributes>lt=.&gt;
+&lt;&lt;uses&gt;&gt;</panel_attributes>
+    <additional_attributes>10.0;10.0;10.0;80.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>580</x>
+      <y>3190</y>
+      <w>230</w>
+      <h>100</h>
+    </coordinates>
+    <panel_attributes>lt=.&gt;
+&lt;&lt;uses&gt;&gt;</panel_attributes>
+    <additional_attributes>10.0;10.0;210.0;80.0</additional_attributes>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>50</x>
+      <y>1620</y>
+      <w>570</w>
+      <h>410</h>
+    </coordinates>
+    <panel_attributes>ProfileBuffer
+--
+-mFirstSamplingTimeNs: double
+-mLastSamplingTimeNs: double
+-mIntervalNs, etc.: ProfilerStats
+--
++IsThreadSafe(): bool
++AddEntry(const ProfileBufferEntry&amp;): uint64_t
++AddThreadIdEntry(int): uint64_t
++PutObjects(Kind, const Ts&amp;...): ProfileBufferBlockIndex
++CollectCodeLocation(...)
++AddJITInfoForRange(...)
++StreamSamplesToJSON(SpliceableJSONWriter&amp;, aThreadId: int, aSinceTime: double, UniqueStacks&amp;)
++StreamMarkersToJSON(SpliceableJSONWriter&amp;, ...)
++StreamPausedRangesToJSON(SpliceableJSONWriter&amp;, aSinceTime: double)
++StreamProfilerOverheadToJSON(SpliceableJSONWriter&amp;, ...)
++StreamCountersToJSON(SpliceableJSONWriter&amp;, ...)
++DuplicateLsstSample
++DiscardSamplesBeforeTime(aTime: double)
++GetEntry(aPosition: uint64_t): ProfileBufferEntry
++SizeOfExcludingThis(MallocSizeOf)
++SizeOfIncludingThis(MallocSizeOf)
++CollectOverheadStats(...)
++GetProfilerBufferInfo(): ProfilerBufferInfo
++BufferRangeStart(): uint64_t
++BufferRangeEnd(): uint64_t</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>690</x>
+      <y>1620</y>
+      <w>230</w>
+      <h>60</h>
+    </coordinates>
+    <panel_attributes>ProfileBufferEntry
+--
++mKind: Kind
++mStorage: uint8_t[kNumChars=8]</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>930</x>
+      <y>1620</y>
+      <w>440</w>
+      <h>130</h>
+    </coordinates>
+    <panel_attributes>UniqueJSONStrings
+--
+-mStringTableWriter: SpliceableChunkedJSONWriter
+-mStringHashToIndexMap: HashMap&lt;HashNumber, uint32_t&gt;
+--
++SpliceStringTableElements(SpliceableJSONWriter&amp;)
++WriteProperty(JSONWriter&amp;, aName: const char*, aStr: const char*)
++WriteElement(JSONWriter&amp;, aStr: const char*)
++GetOrAddIndex(const char*): uint32_t</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>680</x>
+      <y>1760</y>
+      <w>470</w>
+      <h>110</h>
+    </coordinates>
+    <panel_attributes>UniqueStack
+--
+-mFrameTableWriter: SpliceableChunkedJSONWriter
+-mFrameToIndexMap: HashMap&lt;FrameKey, uint32_t, FrameKeyHasher&gt;
+-mStackTableWriter: SpliceableChunkedJSONWriter
+-mStackToIndexMap: HashMap&lt;StackKey, uint32_t, StackKeyHasher&gt;
+-mJITInfoRanges: Vector&lt;JITFrameInfoForBufferRange&gt;</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>320</x>
+      <y>2020</y>
+      <w>230</w>
+      <h>140</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;&lt;&lt;&lt;-
+mEntries: ProfileChunkedBuffer&amp;</panel_attributes>
+    <additional_attributes>10.0;10.0;10.0;120.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>610</x>
+      <y>1640</y>
+      <w>100</w>
+      <h>40</h>
+    </coordinates>
+    <panel_attributes>lt=.&gt;
+&lt;&lt;uses&gt;&gt;</panel_attributes>
+    <additional_attributes>10.0;20.0;80.0;20.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>610</x>
+      <y>1710</y>
+      <w>340</w>
+      <h>40</h>
+    </coordinates>
+    <panel_attributes>lt=.&gt;
+&lt;&lt;uses&gt;&gt;</panel_attributes>
+    <additional_attributes>10.0;20.0;320.0;20.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>610</x>
+      <y>1800</y>
+      <w>90</w>
+      <h>40</h>
+    </coordinates>
+    <panel_attributes>lt=.&gt;
+&lt;&lt;uses&gt;&gt;</panel_attributes>
+    <additional_attributes>10.0;20.0;70.0;20.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>610</x>
+      <y>1900</y>
+      <w>170</w>
+      <h>40</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;&lt;&lt;&lt;-
+mProfileBuffer</panel_attributes>
+    <additional_attributes>150.0;20.0;10.0;20.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>590</x>
+      <y>1940</y>
+      <w>250</w>
+      <h>220</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;&lt;&lt;&lt;-
+mProfileChunkedBuffer</panel_attributes>
+    <additional_attributes>170.0;10.0;10.0;200.0</additional_attributes>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>20</x>
+      <y>1030</y>
+      <w>490</w>
+      <h>550</h>
+    </coordinates>
+    <panel_attributes>CorePS
+--
+/-sInstance: CorePS*/
+-mMainThreadId: int
+-mProcessStartTime: TimeStamp
+-mCoreBuffer: ProfileChunkedBuffer
+-mRegisteredThreads: Vector&lt;UniquePtr&lt;RegisteredThread&gt;&gt;
+-mRegisteredPages: Vector&lt;RefPtr&lt;PageInformation&gt;&gt;
+-mCounters: Vector&lt;BaseProfilerCount*&gt;
+-mLul: UniquePtr&lt;lul::LUL&gt; /* linux only */
+-mProcessName: nsAutoCString
+-mJsFrames: JsFrameBuffer
+--
++Create
++Destroy
++Exists(): bool
++AddSizeOf(...)
++MainThreadId()
++ProcessStartTime()
++CoreBuffer()
++RegisteredThreads(PSLockRef)
++JsFrames(PSLockRef)
+/+AppendRegisteredThread(PSLockRef, UniquePtr&lt;RegisteredThread&gt;)/
+/+RemoveRegisteredThread(PSLockRef, RegisteredThread*)/
++RegisteredPages(PSLockRef)
+/+AppendRegisteredPage(PSLockRef, RefPtr&lt;PageInformation&gt;)/
+/+RemoveRegisteredPage(PSLockRef, aRegisteredInnerWindowID: uint64_t)/
+/+ClearRegisteredPages(PSLockRef)/
++Counters(PSLockRef)
++AppendCounter
++RemoveCounter
++Lul(PSLockRef)
++SetLul(PSLockRef, UniquePtr&lt;lul::LUL&gt;)
++ProcessName(PSLockRef)
++SetProcessName(PSLockRef, const nsACString&amp;)
+</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>20</x>
+      <y>1570</y>
+      <w>110</w>
+      <h>590</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;&lt;&lt;&lt;&lt;-
+mCoreBuffer</panel_attributes>
+    <additional_attributes>10.0;10.0;10.0;570.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>160</x>
+      <y>840</y>
+      <w>150</w>
+      <h>210</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;&lt;&lt;&lt;-
+mRegisteredPages</panel_attributes>
+    <additional_attributes>10.0;190.0;10.0;10.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>250</x>
+      <y>840</y>
+      <w>240</w>
+      <h>210</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;&lt;&lt;&lt;-
+mRegisteredThreads</panel_attributes>
+    <additional_attributes>10.0;190.0;220.0;10.0</additional_attributes>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>920</x>
+      <y>860</y>
+      <w>340</w>
+      <h>190</h>
+    </coordinates>
+    <panel_attributes>SamplerThread
+--
+-mSampler: Sampler
+-mActivityGeneration: uint32_t
+-mIntervalMicroseconds: int
+-mThread /* OS-specific */
+-mPostSamplingCallbackList: UniquePtr&lt;PostSamplingCallbackListItem&gt;
+--
++Run()
++Stop(PSLockRef)
++AppendPostSamplingCallback(PSLockRef, PostSamplingCallback&amp;&amp;)</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>1060</x>
+      <y>600</y>
+      <w>340</w>
+      <h>190</h>
+    </coordinates>
+    <panel_attributes>Sampler
+--
+-mOldSigprofHandler: sigaction
+-mMyPid: int
+-mSamplerTid: int
++sSigHandlerCoordinator
+--
++Disable(PSLockRef)
++SuspendAndSampleAndResumeThread(PSLockRef, const RegisteredThread&amp;, aNow: TimeStamp, const Func&amp;)
+</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>1190</x>
+      <y>780</y>
+      <w>90</w>
+      <h>100</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;&lt;&lt;&lt;&lt;-
+mSampler</panel_attributes>
+    <additional_attributes>10.0;80.0;10.0;10.0</additional_attributes>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>610</x>
+      <y>1130</y>
+      <w>470</w>
+      <h>400</h>
+    </coordinates>
+    <panel_attributes>ActivePS
+--
+/-sInstance: ActivePS*/
+-mGeneration: const uint32_t
+/-sNextGeneration: uint32_t/
+-mCapacity: const PowerOfTwo
+-mDuration: const Maybe&lt;double&gt;
+-mInterval: const double /* milliseconds */
+-mFeatures: const uint32_t
+-mFilters: Vector&lt;std::string&gt;
+-mActiveBrowsingContextID: uint64_t
+-mProfileBufferChunkManager: ProfileBufferChunkManagerWithLocalLimit
+-mProfileBuffer: ProfileBuffer
+-mLiveProfiledThreads: Vector&lt;LiveProfiledThreadData&gt;
+-mDeadProfiledThreads: Vector&lt;UniquePtr&lt;ProfiledThreadData&gt;&gt;
+-mDeadProfiledPages: Vector&lt;RefPtr&lt;PageInformation&gt;&gt;
+-mSamplerThread: SamplerThread* const
+-mInterposeObserver: RefPtr&lt;ProfilerIOInterposeObserver&gt;
+-mPaused: bool
+-mWasPaused: bool /* linux */
+-mBaseProfileThreads: UniquePtr&lt;char[]&gt;
+-mGeckoIndexWhenBaseProfileAdded: ProfileBufferBlockIndex
+-mExitProfiles: Vector&lt;ExitProfile&gt;
+--
++</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>970</x>
+      <y>1040</y>
+      <w>140</w>
+      <h>110</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;&lt;&lt;&lt;-
+mSamplerThread</panel_attributes>
+    <additional_attributes>10.0;90.0;10.0;10.0</additional_attributes>
+  </element>
+  <element>
+    <id>UMLNote</id>
+    <coordinates>
+      <x>500</x>
+      <y>160</y>
+      <w>510</w>
+      <h>100</h>
+    </coordinates>
+    <panel_attributes>bg=red
+This document pre-dates the generated image profilerclasses-20220913.png!
+Unfortunately, the changes to make the image were lost.
+
+This previous version may still be useful to start reconstructing the image,
+if there is a need to update it.</panel_attributes>
+    <additional_attributes/>
+  </element>
+</diagram>
diff --git a/tools/profiler/docs/profilerthreadregistration-20220913.png b/tools/profiler/docs/profilerthreadregistration-20220913.png
new file mode 100644
index 0000000000..8f7049d743
--- /dev/null
+++ b/tools/profiler/docs/profilerthreadregistration-20220913.png
diff --git a/tools/profiler/docs/profilerthreadregistration.umlet.uxf b/tools/profiler/docs/profilerthreadregistration.umlet.uxf
new file mode 100644
index 0000000000..3e07215db4
--- /dev/null
+++ b/tools/profiler/docs/profilerthreadregistration.umlet.uxf
@@ -0,0 +1,710 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<diagram program="umlet" version="15.0.0">
+  <zoom_level>10</zoom_level>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>200</x>
+      <y>330</y>
+      <w>370</w>
+      <h>250</h>
+    </coordinates>
+    <panel_attributes>ThreadRegistry::OffThreadRef
+--
++UnlockedConstReaderCRef() const
++WithUnlockedConstReader(F&amp;&amp; aF) const
++UnlockedConstReaderAndAtomicRWCRef() const
++WithUnlockedConstReaderAndAtomicRW(F&amp;&amp; aF) const
++UnlockedConstReaderAndAtomicRWRef()
++WithUnlockedConstReaderAndAtomicRW(F&amp;&amp; aF)
++UnlockedRWForLockedProfilerCRef()
++WithUnlockedRWForLockedProfiler(F&amp;&amp; aF)
++UnlockedRWForLockedProfilerRef()
++WithUnlockedRWForLockedProfiler(F&amp;&amp; aF)
++ConstLockedRWFromAnyThread()
++WithConstLockedRWFromAnyThread(F&amp;&amp; aF)
++LockedRWFromAnyThread()
++WithLockedRWFromAnyThread(F&amp;&amp; aF)</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>310</x>
+      <y>80</y>
+      <w>560</w>
+      <h>160</h>
+    </coordinates>
+    <panel_attributes>ThreadRegistry
+--
+-sRegistryMutex: RegistryMutex (aka BaseProfilerSharedMutex)
+/exclusive lock used during un/registration, shared lock for other accesses/
+--
+friend class ThreadRegistration
+-Register(ThreadRegistration::OnThreadRef)
+-Unregister(ThreadRegistration::OnThreadRef)
+--
++WithOffThreadRef(ProfilerThreadId, auto&amp;&amp; aF) static
++WithOffThreadRefOr(ProfilerThreadId, auto&amp;&amp; aF, auto&amp;&amp; aFallbackReturn) static: auto</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>310</x>
+      <y>630</y>
+      <w>530</w>
+      <h>260</h>
+    </coordinates>
+    <panel_attributes>ThreadRegistration
+--
+-mDataMutex: DataMutex (aka BaseProfilerMutex)
+-mIsOnHeap: bool
+-mIsRegistryLockedSharedOnThisThread: bool
+-tlsThreadRegistration: MOZ_THREAD_LOCAL(ThreadRegistration*)
+-GetTLS() static: tlsThreadRegistration*
+-GetFromTLS() static: ThreadRegistration*
+--
++ThreadRegistration(const char* aName, const void* aStackTop)
++~ThreadRegistration()
++RegisterThread(const char* aName, const void* aStackTop) static: ProfilingStack*
++UnregisterThread() static
++IsRegistered() static: bool
++GetOnThreadPtr() static OnThreadPtr
++WithOnThreadRefOr(auto&amp;&amp; aF, auto&amp;&amp; aFallbackReturn) static: auto
++IsDataMutexLockedOnCurrentThread() static: bool</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>880</x>
+      <y>620</y>
+      <w>450</w>
+      <h>290</h>
+    </coordinates>
+    <panel_attributes>ThreadRegistration::OnThreadRef
+--
++UnlockedConstReaderCRef() const
++WithUnlockedConstReader(auto&amp;&amp; aF) const: auto
++UnlockedConstReaderAndAtomicRWCRef() const
++WithUnlockedConstReaderAndAtomicRW(auto&amp;&amp; aF) const: auto
++UnlockedConstReaderAndAtomicRWRef()
++WithUnlockedConstReaderAndAtomicRW(auto&amp;&amp; aF): auto
++UnlockedRWForLockedProfilerCRef() const
++WithUnlockedRWForLockedProfiler(auto&amp;&amp; aF) const: auto
++UnlockedRWForLockedProfilerRef()
++WithUnlockedRWForLockedProfiler(auto&amp;&amp; aF): auto
++UnlockedReaderAndAtomicRWOnThreadCRef() const
++WithUnlockedReaderAndAtomicRWOnThread(auto&amp;&amp; aF) const: auto
++UnlockedReaderAndAtomicRWOnThreadRef()
++WithUnlockedReaderAndAtomicRWOnThread(auto&amp;&amp; aF): auto
++RWOnThreadWithLock LockedRWOnThread()
++WithLockedRWOnThread(auto&amp;&amp; aF): auto</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>1040</x>
+      <y>440</y>
+      <w>230</w>
+      <h>70</h>
+    </coordinates>
+    <panel_attributes>ThreadRegistration::OnThreadPtr
+--
++operator*(): OnThreadRef
++operator-&gt;(): OnThreadRef</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>450</x>
+      <y>940</y>
+      <w>350</w>
+      <h>240</h>
+    </coordinates>
+    <panel_attributes>ThreadRegistrationData
+--
+-mProfilingStack: ProfilingStack
+-mStackTop: const void* const
+-mThread: nsCOMPtr&lt;nsIThread&gt;
+-mJSContext: JSContext*
+-mJsFrameBuffer: JsFrame*
+-mJSFlags: uint32_t
+-Sleep: Atomic&lt;int&gt;
+-mThreadCpuTimeInNsAtLastSleep: Atomic&lt;uint64_t&gt;
+-mWakeCount: Atomic&lt;uint64_t, Relaxed&gt;
+-mRecordWakeCountMutex: BaseProfilerMutex
+-mAlreadyRecordedWakeCount: uint64_t
+-mAlreadyRecordedCpuTimeInMs: uin64_t
+-mThreadProfilingFeatures: ThreadProfilingFeatures</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>460</x>
+      <y>1220</y>
+      <w>330</w>
+      <h>80</h>
+    </coordinates>
+    <panel_attributes>ThreadRegistrationUnlockedConstReader
+--
++Info() const: const ThreadRegistrationInfo&amp;
++PlatformDataCRef() const: const PlatformData&amp;
++StackTop() const: const void*</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>440</x>
+      <y>1340</y>
+      <w>370</w>
+      <h>190</h>
+    </coordinates>
+    <panel_attributes>ThreadRegistrationUnlockedConstReaderAndAtomicRW
+--
++ProfilingStackCRef() const: const ProfilingStack&amp;
++ProfilingStackRef(): ProfilingStack&amp;
++ProfilingFeatures() const: ThreadProfilingFeatures
++SetSleeping()
++SetAwake()
++GetNewCpuTimeInNs(): uint64_t
++RecordWakeCount() const
++ReinitializeOnResume()
++CanDuplicateLastSampleDueToSleep(): bool
++IsSleeping(): bool</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>460</x>
+      <y>1570</y>
+      <w>330</w>
+      <h>60</h>
+    </coordinates>
+    <panel_attributes>ThreadRegistrationUnlockedRWForLockedProfiler
+--
++GetProfiledThreadData(): const ProfiledThreadData*
++GetProfiliedThreadData(): ProfiledThreadData*</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>430</x>
+      <y>1670</y>
+      <w>390</w>
+      <h>50</h>
+    </coordinates>
+    <panel_attributes>ThreadRegistrationUnlockedReaderAndAtomicRWOnThread
+--
++GetJSContext(): JSContext*</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>380</x>
+      <y>1840</y>
+      <w>490</w>
+      <h>190</h>
+    </coordinates>
+    <panel_attributes>ThreadRegistrationLockedRWFromAnyThread
+--
++SetProfilingFeaturesAndData(
+  ThreadProfilingFeatures, ProfiledThreadData*, const PSAutoLock&amp;)
++ClearProfilingFeaturesAndData(const PSAutoLock&amp;)
++GetJsFrameBuffer() const JsFrame*
++GetEventTarget() const: const nsCOMPtr&lt;nsIEventTarget&gt;
++ResetMainThread()
++GetRunningEventDelay(const TimeStamp&amp;, TimeDuration&amp;, TimeDuration&amp;)
++StartJSSampling(uint32_t)
++StopJSSampling()</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>490</x>
+      <y>2070</y>
+      <w>260</w>
+      <h>80</h>
+    </coordinates>
+    <panel_attributes>ThreadRegistrationLockedRWOnThread
+--
++SetJSContext(JSContext*)
++ClearJSContext()
++PollJSSampling()</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>610</x>
+      <y>1170</y>
+      <w>30</w>
+      <h>70</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;&lt;-</panel_attributes>
+    <additional_attributes>10.0;10.0;10.0;50.0</additional_attributes>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>500</x>
+      <y>2190</y>
+      <w>240</w>
+      <h>60</h>
+    </coordinates>
+    <panel_attributes>ThreadRegistration::EmbeddedData
+--</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>610</x>
+      <y>1290</y>
+      <w>30</w>
+      <h>70</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;&lt;-</panel_attributes>
+    <additional_attributes>10.0;10.0;10.0;50.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>610</x>
+      <y>1520</y>
+      <w>30</w>
+      <h>70</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;&lt;-</panel_attributes>
+    <additional_attributes>10.0;10.0;10.0;50.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>610</x>
+      <y>1620</y>
+      <w>30</w>
+      <h>70</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;&lt;-</panel_attributes>
+    <additional_attributes>10.0;10.0;10.0;50.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>650</x>
+      <y>1710</y>
+      <w>30</w>
+      <h>150</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;&lt;-</panel_attributes>
+    <additional_attributes>10.0;10.0;10.0;130.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>610</x>
+      <y>2020</y>
+      <w>30</w>
+      <h>70</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;&lt;-</panel_attributes>
+    <additional_attributes>10.0;10.0;10.0;50.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>610</x>
+      <y>2140</y>
+      <w>30</w>
+      <h>70</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;&lt;-</panel_attributes>
+    <additional_attributes>10.0;10.0;10.0;50.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>340</x>
+      <y>880</y>
+      <w>180</w>
+      <h>1370</h>
+    </coordinates>
+    <panel_attributes>lt=-&gt;&gt;&gt;&gt;&gt;
+mData</panel_attributes>
+    <additional_attributes>160.0;1350.0;10.0;1350.0;10.0;10.0</additional_attributes>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>990</x>
+      <y>930</y>
+      <w>210</w>
+      <h>100</h>
+    </coordinates>
+    <panel_attributes>ThreadRegistrationInfo
+--
++Name(): const char*
++RegisterTime(): const TimeStamp&amp;
++ThreadId(): ProfilerThreadId
++IsMainThread(): bool</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>790</x>
+      <y>980</y>
+      <w>220</w>
+      <h>40</h>
+    </coordinates>
+    <panel_attributes>lt=-&gt;&gt;&gt;&gt;&gt;
+mInfo</panel_attributes>
+    <additional_attributes>200.0;20.0;10.0;20.0</additional_attributes>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>990</x>
+      <y>1040</y>
+      <w>210</w>
+      <h>50</h>
+    </coordinates>
+    <panel_attributes>PlatformData
+--
+</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>790</x>
+      <y>1040</y>
+      <w>220</w>
+      <h>40</h>
+    </coordinates>
+    <panel_attributes>lt=-&gt;&gt;&gt;&gt;&gt;
+mPlatformData</panel_attributes>
+    <additional_attributes>200.0;20.0;10.0;20.0</additional_attributes>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>990</x>
+      <y>1100</y>
+      <w>210</w>
+      <h>60</h>
+    </coordinates>
+    <panel_attributes>ProfiledThreadData
+--</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>790</x>
+      <y>1100</y>
+      <w>220</w>
+      <h>40</h>
+    </coordinates>
+    <panel_attributes>lt=-&gt;&gt;&gt;&gt;
+mProfiledThreadData: *</panel_attributes>
+    <additional_attributes>200.0;20.0;10.0;20.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>710</x>
+      <y>480</y>
+      <w>350</w>
+      <h>170</h>
+    </coordinates>
+    <panel_attributes>lt=-&gt;&gt;&gt;&gt;
+m1=0..1
+mThreadRegistration: *</panel_attributes>
+    <additional_attributes>10.0;150.0;330.0;10.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>830</x>
+      <y>580</y>
+      <w>260</w>
+      <h>130</h>
+    </coordinates>
+    <panel_attributes>lt=-&gt;&gt;&gt;&gt;
+m1=1
+mThreadRegistration: *</panel_attributes>
+    <additional_attributes>10.0;110.0;40.0;20.0;220.0;20.0;240.0;40.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>1140</x>
+      <y>500</y>
+      <w>90</w>
+      <h>140</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;.
+&lt;creates&gt;</panel_attributes>
+    <additional_attributes>10.0;120.0;10.0;10.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>780</x>
+      <y>900</y>
+      <w>450</w>
+      <h>380</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;.
+&lt;accesses&gt;</panel_attributes>
+    <additional_attributes>10.0;360.0;430.0;360.0;430.0;10.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>800</x>
+      <y>900</y>
+      <w>510</w>
+      <h>560</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;.
+&lt;accesses&gt;</panel_attributes>
+    <additional_attributes>10.0;540.0;420.0;540.0;420.0;10.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>780</x>
+      <y>900</y>
+      <w>540</w>
+      <h>720</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;.
+&lt;accesses&gt;</panel_attributes>
+    <additional_attributes>10.0;700.0;450.0;700.0;450.0;10.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>810</x>
+      <y>900</y>
+      <w>520</w>
+      <h>820</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;.
+&lt;accesses&gt;</panel_attributes>
+    <additional_attributes>10.0;800.0;430.0;800.0;430.0;10.0</additional_attributes>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>900</x>
+      <y>2070</y>
+      <w>410</w>
+      <h>80</h>
+    </coordinates>
+    <panel_attributes>ThreadRegistration::OnThreadRef::ConstRWOnThreadWithLock
+--
+-mDataLock: BaseProfilerAutoLock
+--
++DataCRef() const: ThreadRegistrationLockedRWOnThread&amp;
++operator-&gt;() const: ThreadRegistrationLockedRWOnThread&amp;</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>740</x>
+      <y>2100</y>
+      <w>180</w>
+      <h>40</h>
+    </coordinates>
+    <panel_attributes>lt=-&gt;&gt;&gt;&gt;
+mLockedRWOnThread</panel_attributes>
+    <additional_attributes>10.0;20.0;160.0;20.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>1250</x>
+      <y>900</y>
+      <w>90</w>
+      <h>1190</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;.
+&lt;creates&gt;</panel_attributes>
+    <additional_attributes>10.0;1170.0;10.0;10.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>660</x>
+      <y>440</y>
+      <w>400</w>
+      <h>210</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;.
+&lt;creates&gt;</panel_attributes>
+    <additional_attributes>380.0;10.0;10.0;190.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>740</x>
+      <y>880</y>
+      <w>160</w>
+      <h>50</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;.
+&lt;creates&gt;</panel_attributes>
+    <additional_attributes>140.0;30.0;50.0;30.0;10.0;10.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>460</x>
+      <y>230</y>
+      <w>150</w>
+      <h>120</h>
+    </coordinates>
+    <panel_attributes>lt=-&gt;&gt;&gt;&gt;
+m1=0..N
+sRegistryContainer:
+static Vector&lt;&gt;</panel_attributes>
+    <additional_attributes>10.0;100.0;10.0;10.0</additional_attributes>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>800</x>
+      <y>250</y>
+      <w>470</w>
+      <h>150</h>
+    </coordinates>
+    <panel_attributes>ThreadRegistry::LockedRegistry
+--
+-mRegistryLock: RegistryLockShared (aka BaseProfilerAutoLockShared)
+--
++LockedRegistry()
++~LockedRegistry()
++begin() const: const OffThreadRef*
++end() const: const OffThreadRef*
++begin(): OffThreadRef*
++end(): OffThreadRef*</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>560</x>
+      <y>350</y>
+      <w>260</w>
+      <h>50</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;.
+&lt;accesses with
+shared lock&gt;</panel_attributes>
+    <additional_attributes>10.0;20.0;240.0;20.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>550</x>
+      <y>390</y>
+      <w>330</w>
+      <h>260</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;.
+&lt;updates
+mIsRegistryLockedSharedOnThisThread&gt;</panel_attributes>
+    <additional_attributes>10.0;240.0;310.0;10.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>330</x>
+      <y>570</y>
+      <w>170</w>
+      <h>80</h>
+    </coordinates>
+    <panel_attributes>lt=-&gt;&gt;&gt;&gt;
+m1=1
+mThreadRegistration: *</panel_attributes>
+    <additional_attributes>120.0;60.0;40.0;10.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>280</x>
+      <y>570</y>
+      <w>200</w>
+      <h>710</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;.
+&lt;accesses&gt;</panel_attributes>
+    <additional_attributes>180.0;690.0;10.0;690.0;10.0;10.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>270</x>
+      <y>570</y>
+      <w>190</w>
+      <h>890</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;.
+&lt;accesses&gt;</panel_attributes>
+    <additional_attributes>170.0;870.0;10.0;870.0;10.0;10.0</additional_attributes>
+  </element>
+  <element>
+    <id>UMLClass</id>
+    <coordinates>
+      <x>200</x>
+      <y>1740</y>
+      <w>440</w>
+      <h>80</h>
+    </coordinates>
+    <panel_attributes>ThreadRegistry::OffThreadRef::{,Const}RWFromAnyThreadWithLock
+--
+-mDataLock: BaseProfilerAutoLock
+--
++DataCRef() {,const}: ThreadRegistrationLockedRWOnThread&amp;
++operator-&gt;() {,const}: ThreadRegistrationLockedRWOnThread&amp;</panel_attributes>
+    <additional_attributes/>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>250</x>
+      <y>570</y>
+      <w>90</w>
+      <h>1190</h>
+    </coordinates>
+    <panel_attributes>lt=&lt;.
+&lt;creates&gt;</panel_attributes>
+    <additional_attributes>10.0;1170.0;10.0;10.0</additional_attributes>
+  </element>
+  <element>
+    <id>Relation</id>
+    <coordinates>
+      <x>180</x>
+      <y>1810</y>
+      <w>220</w>
+      <h>120</h>
+    </coordinates>
+    <panel_attributes>lt=-&gt;&gt;&gt;&gt;
+mLockedRWFromAnyThread</panel_attributes>
+    <additional_attributes>200.0;100.0;80.0;100.0;80.0;10.0</additional_attributes>
+  </element>
+</diagram>
diff --git a/tools/profiler/gecko/ChildProfilerController.cpp b/tools/profiler/gecko/ChildProfilerController.cpp
new file mode 100644
index 0000000000..f51cb9437d
--- /dev/null
+++ b/tools/profiler/gecko/ChildProfilerController.cpp
@@ -0,0 +1,170 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ChildProfilerController.h"
+
+#include "ProfilerChild.h"
+
+#include "mozilla/ProfilerState.h"
+#include "mozilla/ipc/Endpoint.h"
+#include "nsExceptionHandler.h"
+#include "nsIThread.h"
+#include "nsThreadUtils.h"
+
+using namespace mozilla::ipc;
+
+namespace mozilla {
+
+/* static */
+already_AddRefed<ChildProfilerController> ChildProfilerController::Create(
+    mozilla::ipc::Endpoint<PProfilerChild>&& aEndpoint) {
+  MOZ_RELEASE_ASSERT(NS_IsMainThread());
+  RefPtr<ChildProfilerController> cpc = new ChildProfilerController();
+  cpc->Init(std::move(aEndpoint));
+  return cpc.forget();
+}
+
+ChildProfilerController::ChildProfilerController()
+    : mThread(nullptr, "ChildProfilerController::mThread") {
+  MOZ_COUNT_CTOR(ChildProfilerController);
+}
+
+void ChildProfilerController::Init(Endpoint<PProfilerChild>&& aEndpoint) {
+  RefPtr<nsIThread> newProfilerChildThread;
+  if (NS_SUCCEEDED(NS_NewNamedThread("ProfilerChild",
+                                     getter_AddRefs(newProfilerChildThread)))) {
+    {
+      auto lock = mThread.Lock();
+      RefPtr<nsIThread>& lockedmThread = lock.ref();
+      MOZ_ASSERT(!lockedmThread, "There is already a ProfilerChild thread");
+      // Copy ref'd ptr into mThread. Don't move/swap, so that
+      // newProfilerChildThread can be used below.
+      lockedmThread = newProfilerChildThread;
+    }
+    // Now that mThread has been set, run SetupProfilerChild on the thread.
+    newProfilerChildThread->Dispatch(
+        NewRunnableMethod<Endpoint<PProfilerChild>&&>(
+            "ChildProfilerController::SetupProfilerChild", this,
+            &ChildProfilerController::SetupProfilerChild, std::move(aEndpoint)),
+        NS_DISPATCH_NORMAL);
+  }
+}
+
+ProfileAndAdditionalInformation
+ChildProfilerController::GrabShutdownProfileAndShutdown() {
+  ProfileAndAdditionalInformation profileAndAdditionalInformation;
+  ShutdownAndMaybeGrabShutdownProfileFirst(&profileAndAdditionalInformation);
+  return profileAndAdditionalInformation;
+}
+
+void ChildProfilerController::Shutdown() {
+  ShutdownAndMaybeGrabShutdownProfileFirst(nullptr);
+}
+
+void ChildProfilerController::ShutdownAndMaybeGrabShutdownProfileFirst(
+    ProfileAndAdditionalInformation* aOutShutdownProfileInformation) {
+  // First, get the owning reference out of mThread, so it cannot be used in
+  // ChildProfilerController after this (including re-entrantly during the
+  // profilerChildThread->Shutdown() inner event loop below).
+  RefPtr<nsIThread> profilerChildThread;
+  {
+    auto lock = mThread.Lock();
+    RefPtr<nsIThread>& lockedmThread = lock.ref();
+    lockedmThread.swap(profilerChildThread);
+  }
+  if (profilerChildThread) {
+    if (profiler_is_active()) {
+      CrashReporter::AnnotateCrashReport(
+          CrashReporter::Annotation::ProfilerChildShutdownPhase,
+          "Profiling - Dispatching ShutdownProfilerChild"_ns);
+      profilerChildThread->Dispatch(
+          NewRunnableMethod<ProfileAndAdditionalInformation*>(
+              "ChildProfilerController::ShutdownProfilerChild", this,
+              &ChildProfilerController::ShutdownProfilerChild,
+              aOutShutdownProfileInformation),
+          NS_DISPATCH_NORMAL);
+      // Shut down the thread. This call will spin until all runnables
+      // (including the ShutdownProfilerChild runnable) have been processed.
+      profilerChildThread->Shutdown();
+    } else {
+      CrashReporter::AnnotateCrashReport(
+          CrashReporter::Annotation::ProfilerChildShutdownPhase,
+          "Not profiling - Running ShutdownProfilerChild"_ns);
+      // If we're not profiling, this operation will be very quick, so it can be
+      // done synchronously. This avoids having to manually shutdown the thread,
+      // which runs a risky inner event loop, see bug 1613798.
+      NS_DispatchAndSpinEventLoopUntilComplete(
+          "ChildProfilerController::ShutdownProfilerChild SYNC"_ns,
+          profilerChildThread,
+          NewRunnableMethod<ProfileAndAdditionalInformation*>(
+              "ChildProfilerController::ShutdownProfilerChild SYNC", this,
+              &ChildProfilerController::ShutdownProfilerChild, nullptr));
+    }
+    // At this point, `profilerChildThread` should be the last reference to the
+    // thread, so it will now get destroyed.
+  }
+}
+
+ChildProfilerController::~ChildProfilerController() {
+  MOZ_COUNT_DTOR(ChildProfilerController);
+
+#ifdef DEBUG
+  {
+    auto lock = mThread.Lock();
+    RefPtr<nsIThread>& lockedmThread = lock.ref();
+    MOZ_ASSERT(
+        !lockedmThread,
+        "Please call Shutdown before destroying ChildProfilerController");
+  }
+#endif
+  MOZ_ASSERT(!mProfilerChild);
+}
+
+void ChildProfilerController::SetupProfilerChild(
+    Endpoint<PProfilerChild>&& aEndpoint) {
+  {
+    auto lock = mThread.Lock();
+    RefPtr<nsIThread>& lockedmThread = lock.ref();
+    // We should be on the ProfilerChild thread. In rare cases, we could already
+    // be in shutdown, in which case mThread is null; we still need to continue,
+    // so that ShutdownProfilerChild can work on a valid mProfilerChild.
+    MOZ_RELEASE_ASSERT(!lockedmThread ||
+                       lockedmThread == NS_GetCurrentThread());
+  }
+  MOZ_ASSERT(aEndpoint.IsValid());
+
+  mProfilerChild = new ProfilerChild();
+  Endpoint<PProfilerChild> endpoint = std::move(aEndpoint);
+
+  if (!endpoint.Bind(mProfilerChild)) {
+    MOZ_CRASH("Failed to bind ProfilerChild!");
+  }
+}
+
+void ChildProfilerController::ShutdownProfilerChild(
+    ProfileAndAdditionalInformation* aOutShutdownProfileInformation) {
+  const bool isProfiling = profiler_is_active();
+  if (aOutShutdownProfileInformation) {
+    CrashReporter::AnnotateCrashReport(
+        CrashReporter::Annotation::ProfilerChildShutdownPhase,
+        isProfiling ? "Profiling - GrabShutdownProfile"_ns
+                    : "Not profiling - GrabShutdownProfile"_ns);
+    *aOutShutdownProfileInformation = mProfilerChild->GrabShutdownProfile();
+  }
+  CrashReporter::AnnotateCrashReport(
+      CrashReporter::Annotation::ProfilerChildShutdownPhase,
+      isProfiling ? "Profiling - Destroying ProfilerChild"_ns
+                  : "Not profiling - Destroying ProfilerChild"_ns);
+  mProfilerChild->Destroy();
+  mProfilerChild = nullptr;
+  CrashReporter::AnnotateCrashReport(
+      CrashReporter::Annotation::ProfilerChildShutdownPhase,
+      isProfiling
+          ? "Profiling - ShutdownProfilerChild complete, waiting for thread shutdown"_ns
+          : "Not Profiling - ShutdownProfilerChild complete, waiting for thread shutdown"_ns);
+}
+
+}  // namespace mozilla
diff --git a/tools/profiler/gecko/PProfiler.ipdl b/tools/profiler/gecko/PProfiler.ipdl
new file mode 100644
index 0000000000..65778b892c
--- /dev/null
+++ b/tools/profiler/gecko/PProfiler.ipdl
@@ -0,0 +1,44 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+include ProfilerTypes;
+
+namespace mozilla {
+
+// PProfiler is a top-level protocol. It is used to let the main process
+// control the Gecko Profiler in other processes, and request profiles from
+// those processes.
+// It is a top-level protocol so that its child endpoint can be on a
+// background thread, so that profiles can be gathered even if the main thread
+// is unresponsive.
+[ChildImpl=virtual, ParentImpl=virtual]
+async protocol PProfiler
+{
+child:
+  // The unused returned value is to have a promise we can await.
+  async Start(ProfilerInitParams params) returns (bool unused);
+  async EnsureStarted(ProfilerInitParams params) returns (bool unused);
+  async Stop() returns (bool unused);
+  async Pause() returns (bool unused);
+  async Resume() returns (bool unused);
+  async PauseSampling() returns (bool unused);
+  async ResumeSampling() returns (bool unused);
+
+  async WaitOnePeriodicSampling() returns (bool sampled);
+
+  async AwaitNextChunkManagerUpdate() returns (ProfileBufferChunkManagerUpdate update);
+  async DestroyReleasedChunksAtOrBefore(TimeStamp timeStamp);
+
+  // The returned shmem may contain an empty string (unavailable), an error
+  // message starting with '*', or a profile as a stringified JSON object.
+  async GatherProfile() returns (IPCProfileAndAdditionalInformation profileAndAdditionalInformation);
+  async GetGatherProfileProgress() returns (GatherProfileProgress progress);
+
+  async ClearAllPages();
+};
+
+} // namespace mozilla
+
diff --git a/tools/profiler/gecko/ProfilerChild.cpp b/tools/profiler/gecko/ProfilerChild.cpp
new file mode 100644
index 0000000000..db7ef99423
--- /dev/null
+++ b/tools/profiler/gecko/ProfilerChild.cpp
@@ -0,0 +1,565 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ProfilerChild.h"
+
+#include "GeckoProfiler.h"
+#include "platform.h"
+#include "ProfilerCodeAddressService.h"
+#include "ProfilerControl.h"
+#include "ProfilerParent.h"
+
+#include "chrome/common/ipc_channel.h"
+#include "nsPrintfCString.h"
+#include "nsThreadUtils.h"
+
+#include <memory>
+
+namespace mozilla {
+
+/* static */ DataMutexBase<ProfilerChild::ProfilerChildAndUpdate,
+                           baseprofiler::detail::BaseProfilerMutex>
+    ProfilerChild::sPendingChunkManagerUpdate{
+        "ProfilerChild::sPendingChunkManagerUpdate"};
+
+ProfilerChild::ProfilerChild()
+    : mThread(NS_GetCurrentThread()), mDestroyed(false) {
+  MOZ_COUNT_CTOR(ProfilerChild);
+}
+
+ProfilerChild::~ProfilerChild() { MOZ_COUNT_DTOR(ProfilerChild); }
+
+void ProfilerChild::ResolveChunkUpdate(
+    PProfilerChild::AwaitNextChunkManagerUpdateResolver& aResolve) {
+  MOZ_ASSERT(!!aResolve,
+             "ResolveChunkUpdate should only be called when there's a pending "
+             "resolver");
+  MOZ_ASSERT(
+      !mChunkManagerUpdate.IsNotUpdate(),
+      "ResolveChunkUpdate should only be called with a real or final update");
+  MOZ_ASSERT(
+      !mDestroyed,
+      "ResolveChunkUpdate should not be called if the actor was destroyed");
+  if (mChunkManagerUpdate.IsFinal()) {
+    // Final update, send a special "unreleased value", but don't clear the
+    // local copy so we know we got the final update.
+    std::move(aResolve)(ProfilerParent::MakeFinalUpdate());
+  } else {
+    // Optimization note: The ProfileBufferChunkManagerUpdate constructor takes
+    // the newly-released chunks nsTArray by reference-to-const, therefore
+    // constructing and then moving the array here would make a copy. So instead
+    // we first give it an empty array, and then we can write the data directly
+    // into the update's array.
+    ProfileBufferChunkManagerUpdate update{
+        mChunkManagerUpdate.UnreleasedBytes(),
+        mChunkManagerUpdate.ReleasedBytes(),
+        mChunkManagerUpdate.OldestDoneTimeStamp(),
+        {}};
+    update.newlyReleasedChunks().SetCapacity(
+        mChunkManagerUpdate.NewlyReleasedChunksRef().size());
+    for (const ProfileBufferControlledChunkManager::ChunkMetadata& chunk :
+         mChunkManagerUpdate.NewlyReleasedChunksRef()) {
+      update.newlyReleasedChunks().EmplaceBack(chunk.mDoneTimeStamp,
+                                               chunk.mBufferBytes);
+    }
+
+    std::move(aResolve)(update);
+
+    // Clear the update we just sent, so it's ready for later updates to be
+    // folded into it.
+    mChunkManagerUpdate.Clear();
+  }
+
+  // Discard the resolver, so it's empty next time there's a new request.
+  aResolve = nullptr;
+}
+
+void ProfilerChild::ProcessChunkManagerUpdate(
+    ProfileBufferControlledChunkManager::Update&& aUpdate) {
+  if (mDestroyed) {
+    return;
+  }
+  // Always store the data, it could be the final update.
+  mChunkManagerUpdate.Fold(std::move(aUpdate));
+  if (mAwaitNextChunkManagerUpdateResolver) {
+    // There is already a pending resolver, give it the info now.
+    ResolveChunkUpdate(mAwaitNextChunkManagerUpdateResolver);
+  }
+}
+
+/* static */ void ProfilerChild::ProcessPendingUpdate() {
+  auto lockedUpdate = sPendingChunkManagerUpdate.Lock();
+  if (!lockedUpdate->mProfilerChild || lockedUpdate->mUpdate.IsNotUpdate()) {
+    return;
+  }
+  lockedUpdate->mProfilerChild->mThread->Dispatch(NS_NewRunnableFunction(
+      "ProfilerChild::ProcessPendingUpdate", []() mutable {
+        auto lockedUpdate = sPendingChunkManagerUpdate.Lock();
+        if (!lockedUpdate->mProfilerChild ||
+            lockedUpdate->mUpdate.IsNotUpdate()) {
+          return;
+        }
+        lockedUpdate->mProfilerChild->ProcessChunkManagerUpdate(
+            std::move(lockedUpdate->mUpdate));
+        lockedUpdate->mUpdate.Clear();
+      }));
+}
+
+/* static */ bool ProfilerChild::IsLockedOnCurrentThread() {
+  return sPendingChunkManagerUpdate.Mutex().IsLockedOnCurrentThread();
+}
+
+void ProfilerChild::SetupChunkManager() {
+  mChunkManager = profiler_get_controlled_chunk_manager();
+  if (NS_WARN_IF(!mChunkManager)) {
+    return;
+  }
+
+  // Make sure there are no updates (from a previous run).
+  mChunkManagerUpdate.Clear();
+  {
+    auto lockedUpdate = sPendingChunkManagerUpdate.Lock();
+    lockedUpdate->mProfilerChild = this;
+    lockedUpdate->mUpdate.Clear();
+  }
+
+  mChunkManager->SetUpdateCallback(
+      [](ProfileBufferControlledChunkManager::Update&& aUpdate) {
+        // Updates from the chunk manager are stored for later processing.
+        // We avoid dispatching a task, as this could deadlock (if the queueing
+        // mutex is held elsewhere).
+        auto lockedUpdate = sPendingChunkManagerUpdate.Lock();
+        if (!lockedUpdate->mProfilerChild) {
+          return;
+        }
+        lockedUpdate->mUpdate.Fold(std::move(aUpdate));
+      });
+}
+
+void ProfilerChild::ResetChunkManager() {
+  if (!mChunkManager) {
+    return;
+  }
+
+  // We have a chunk manager, reset the callback, which will add a final
+  // pending update.
+  mChunkManager->SetUpdateCallback({});
+
+  // Clear the pending update.
+  auto lockedUpdate = sPendingChunkManagerUpdate.Lock();
+  lockedUpdate->mProfilerChild = nullptr;
+  lockedUpdate->mUpdate.Clear();
+  // And process a final update right now.
+  ProcessChunkManagerUpdate(
+      ProfileBufferControlledChunkManager::Update(nullptr));
+
+  mChunkManager = nullptr;
+  mAwaitNextChunkManagerUpdateResolver = nullptr;
+}
+
+mozilla::ipc::IPCResult ProfilerChild::RecvStart(
+    const ProfilerInitParams& params, StartResolver&& aResolve) {
+  nsTArray<const char*> filterArray;
+  for (size_t i = 0; i < params.filters().Length(); ++i) {
+    filterArray.AppendElement(params.filters()[i].get());
+  }
+
+  profiler_start(PowerOfTwo32(params.entries()), params.interval(),
+                 params.features(), filterArray.Elements(),
+                 filterArray.Length(), params.activeTabID(), params.duration());
+
+  SetupChunkManager();
+
+  aResolve(/* unused */ true);
+  return IPC_OK();
+}
+
+mozilla::ipc::IPCResult ProfilerChild::RecvEnsureStarted(
+    const ProfilerInitParams& params, EnsureStartedResolver&& aResolve) {
+  nsTArray<const char*> filterArray;
+  for (size_t i = 0; i < params.filters().Length(); ++i) {
+    filterArray.AppendElement(params.filters()[i].get());
+  }
+
+  profiler_ensure_started(PowerOfTwo32(params.entries()), params.interval(),
+                          params.features(), filterArray.Elements(),
+                          filterArray.Length(), params.activeTabID(),
+                          params.duration());
+
+  SetupChunkManager();
+
+  aResolve(/* unused */ true);
+  return IPC_OK();
+}
+
+mozilla::ipc::IPCResult ProfilerChild::RecvStop(StopResolver&& aResolve) {
+  ResetChunkManager();
+  profiler_stop();
+  aResolve(/* unused */ true);
+  return IPC_OK();
+}
+
+mozilla::ipc::IPCResult ProfilerChild::RecvPause(PauseResolver&& aResolve) {
+  profiler_pause();
+  aResolve(/* unused */ true);
+  return IPC_OK();
+}
+
+mozilla::ipc::IPCResult ProfilerChild::RecvResume(ResumeResolver&& aResolve) {
+  profiler_resume();
+  aResolve(/* unused */ true);
+  return IPC_OK();
+}
+
+mozilla::ipc::IPCResult ProfilerChild::RecvPauseSampling(
+    PauseSamplingResolver&& aResolve) {
+  profiler_pause_sampling();
+  aResolve(/* unused */ true);
+  return IPC_OK();
+}
+
+mozilla::ipc::IPCResult ProfilerChild::RecvResumeSampling(
+    ResumeSamplingResolver&& aResolve) {
+  profiler_resume_sampling();
+  aResolve(/* unused */ true);
+  return IPC_OK();
+}
+
+mozilla::ipc::IPCResult ProfilerChild::RecvWaitOnePeriodicSampling(
+    WaitOnePeriodicSamplingResolver&& aResolve) {
+  std::shared_ptr<WaitOnePeriodicSamplingResolver> resolve =
+      std::make_shared<WaitOnePeriodicSamplingResolver>(std::move(aResolve));
+  if (!profiler_callback_after_sampling(
+          [self = RefPtr(this), resolve](SamplingState aSamplingState) mutable {
+            if (self->mDestroyed) {
+              return;
+            }
+            MOZ_RELEASE_ASSERT(self->mThread);
+            self->mThread->Dispatch(NS_NewRunnableFunction(
+                "nsProfiler::WaitOnePeriodicSampling result on main thread",
+                [resolve = std::move(resolve), aSamplingState]() {
+                  (*resolve)(aSamplingState ==
+                                 SamplingState::SamplingCompleted ||
+                             aSamplingState ==
+                                 SamplingState::NoStackSamplingCompleted);
+                }));
+          })) {
+    // Callback was not added (e.g., profiler is not running) and will never be
+    // invoked, so we need to resolve the promise here.
+    (*resolve)(false);
+  }
+  return IPC_OK();
+}
+
+mozilla::ipc::IPCResult ProfilerChild::RecvClearAllPages() {
+  profiler_clear_all_pages();
+  return IPC_OK();
+}
+
+mozilla::ipc::IPCResult ProfilerChild::RecvAwaitNextChunkManagerUpdate(
+    AwaitNextChunkManagerUpdateResolver&& aResolve) {
+  MOZ_ASSERT(!mDestroyed,
+             "Recv... should not be called if the actor was destroyed");
+  // Pick up pending updates if any.
+  {
+    auto lockedUpdate = sPendingChunkManagerUpdate.Lock();
+    if (lockedUpdate->mProfilerChild && !lockedUpdate->mUpdate.IsNotUpdate()) {
+      mChunkManagerUpdate.Fold(std::move(lockedUpdate->mUpdate));
+      lockedUpdate->mUpdate.Clear();
+    }
+  }
+  if (mChunkManagerUpdate.IsNotUpdate()) {
+    // No data yet, store the resolver for later.
+    mAwaitNextChunkManagerUpdateResolver = std::move(aResolve);
+  } else {
+    // We have data, send it now.
+    ResolveChunkUpdate(aResolve);
+  }
+  return IPC_OK();
+}
+
+mozilla::ipc::IPCResult ProfilerChild::RecvDestroyReleasedChunksAtOrBefore(
+    const TimeStamp& aTimeStamp) {
+  if (mChunkManager) {
+    mChunkManager->DestroyChunksAtOrBefore(aTimeStamp);
+  }
+  return IPC_OK();
+}
+
+struct GatherProfileThreadParameters
+    : public external::AtomicRefCounted<GatherProfileThreadParameters> {
+  MOZ_DECLARE_REFCOUNTED_TYPENAME(GatherProfileThreadParameters)
+
+  GatherProfileThreadParameters(
+      RefPtr<ProfilerChild> aProfilerChild,
+      RefPtr<ProgressLogger::SharedProgress> aProgress,
+      ProfilerChild::GatherProfileResolver&& aResolver)
+      : profilerChild(std::move(aProfilerChild)),
+        progress(std::move(aProgress)),
+        resolver(std::move(aResolver)) {}
+
+  RefPtr<ProfilerChild> profilerChild;
+
+  FailureLatchSource failureLatchSource;
+
+  // Separate RefPtr used when working on separate thread. This way, if the
+  // "ProfilerChild" thread decides to overwrite its mGatherProfileProgress with
+  // a new one, the work done here will still only use the old one.
+  RefPtr<ProgressLogger::SharedProgress> progress;
+
+  // Resolver for the GatherProfile promise. Must only be called on the
+  // "ProfilerChild" thread.
+  ProfilerChild::GatherProfileResolver resolver;
+};
+
+/* static */
+void ProfilerChild::GatherProfileThreadFunction(
+    void* already_AddRefedParameters) {
+  PR_SetCurrentThreadName("GatherProfileThread");
+
+  RefPtr<GatherProfileThreadParameters> parameters =
+      already_AddRefed<GatherProfileThreadParameters>{
+          static_cast<GatherProfileThreadParameters*>(
+              already_AddRefedParameters)};
+
+  ProgressLogger progressLogger(
+      parameters->progress, "Gather-profile thread started", "Profile sent");
+  using namespace mozilla::literals::ProportionValue_literals;  // For `1_pc`.
+
+  auto writer =
+      MakeUnique<SpliceableChunkedJSONWriter>(parameters->failureLatchSource);
+  if (!profiler_get_profile_json(
+          *writer,
+          /* aSinceTime */ 0,
+          /* aIsShuttingDown */ false,
+          progressLogger.CreateSubLoggerFromTo(
+              1_pc, "profiler_get_profile_json started", 99_pc,
+              "profiler_get_profile_json done"))) {
+    // Failed to get a profile, reset the writer pointer, so that we'll send a
+    // failure message.
+    writer.reset();
+  }
+
+  if (NS_WARN_IF(NS_FAILED(
+          parameters->profilerChild->mThread->Dispatch(NS_NewRunnableFunction(
+              "ProfilerChild::ProcessPendingUpdate",
+              [parameters,
+               // Forward progress logger to on-ProfilerChild-thread task, so
+               // that it doesn't get marked as 100% done when this off-thread
+               // function ends.
+               progressLogger = std::move(progressLogger),
+               writer = std::move(writer)]() mutable {
+                // We are now on the ProfilerChild thread, about to send the
+                // completed profile. Any incoming progress request will now be
+                // handled after this task ends, so updating the progress is now
+                // useless and we can just get rid of the progress storage.
+                if (parameters->profilerChild->mGatherProfileProgress ==
+                    parameters->progress) {
+                  // The ProfilerChild progress is still the one we know.
+                  parameters->profilerChild->mGatherProfileProgress = nullptr;
+                }
+
+                // Shmem allocation and promise resolution must be made on the
+                // ProfilerChild thread, that's why this task was needed here.
+                mozilla::ipc::Shmem shmem;
+                if (writer) {
+                  if (const size_t len = writer->ChunkedWriteFunc().Length();
+                      len < UINT32_MAX) {
+                    bool shmemSuccess = true;
+                    const bool copySuccess =
+                        writer->ChunkedWriteFunc()
+                            .CopyDataIntoLazilyAllocatedBuffer(
+                                [&](size_t allocationSize) -> char* {
+                                  MOZ_ASSERT(allocationSize == len + 1);
+                                  if (parameters->profilerChild->AllocShmem(
+                                          allocationSize, &shmem)) {
+                                    return shmem.get<char>();
+                                  }
+                                  shmemSuccess = false;
+                                  return nullptr;
+                                });
+                    if (!shmemSuccess || !copySuccess) {
+                      const nsPrintfCString message(
+                          (!shmemSuccess)
+                              ? "*Could not create shmem for profile from pid "
+                                "%u (%zu B)"
+                              : "*Could not write profile from pid %u (%zu B)",
+                          unsigned(profiler_current_process_id().ToNumber()),
+                          len);
+                      if (parameters->profilerChild->AllocShmem(
+                              message.Length() + 1, &shmem)) {
+                        strcpy(shmem.get<char>(), message.Data());
+                      }
+                    }
+                  } else {
+                    const nsPrintfCString message(
+                        "*Profile from pid %u bigger (%zu) than shmem max "
+                        "(%zu)",
+                        unsigned(profiler_current_process_id().ToNumber()), len,
+                        size_t(UINT32_MAX));
+                    if (parameters->profilerChild->AllocShmem(
+                            message.Length() + 1, &shmem)) {
+                      strcpy(shmem.get<char>(), message.Data());
+                    }
+                  }
+                  writer = nullptr;
+                } else {
+                  // No profile.
+                  const char* failure =
+                      parameters->failureLatchSource.GetFailure();
+                  const nsPrintfCString message(
+                      "*Could not generate profile from pid %u%s%s",
+                      unsigned(profiler_current_process_id().ToNumber()),
+                      failure ? ", failure: " : "", failure ? failure : "");
+                  if (parameters->profilerChild->AllocShmem(
+                          message.Length() + 1, &shmem)) {
+                    strcpy(shmem.get<char>(), message.Data());
+                  }
+                }
+
+                SharedLibraryInfo sharedLibraryInfo =
+                    SharedLibraryInfo::GetInfoForSelf();
+                parameters->resolver(IPCProfileAndAdditionalInformation{
+                    shmem, Some(ProfileGenerationAdditionalInformation{
+                               std::move(sharedLibraryInfo)})});
+              }))))) {
+    // Failed to dispatch the task to the ProfilerChild thread. The IPC cannot
+    // be resolved on this thread, so it will never be resolved!
+    // And it would be unsafe to modify mGatherProfileProgress; But the parent
+    // should notice that's it's not advancing anymore.
+  }
+}
+
+mozilla::ipc::IPCResult ProfilerChild::RecvGatherProfile(
+    GatherProfileResolver&& aResolve) {
+  mGatherProfileProgress = MakeRefPtr<ProgressLogger::SharedProgress>();
+  mGatherProfileProgress->SetProgress(ProportionValue{0.0},
+                                      "Received gather-profile request");
+
+  auto parameters = MakeRefPtr<GatherProfileThreadParameters>(
+      this, mGatherProfileProgress, std::move(aResolve));
+
+  // The GatherProfileThreadFunction thread function will cast its void*
+  // argument to already_AddRefed<GatherProfileThreadParameters>.
+  parameters.get()->AddRef();
+  PRThread* gatherProfileThread = PR_CreateThread(
+      PR_SYSTEM_THREAD, GatherProfileThreadFunction, parameters.get(),
+      PR_PRIORITY_NORMAL, PR_GLOBAL_THREAD, PR_UNJOINABLE_THREAD, 0);
+
+  if (!gatherProfileThread) {
+    // Failed to create and start worker thread, resolve with an empty profile.
+    mozilla::ipc::Shmem shmem;
+    if (AllocShmem(1, &shmem)) {
+      shmem.get<char>()[0] = '\0';
+    }
+    parameters->resolver(IPCProfileAndAdditionalInformation{shmem, Nothing()});
+    // And clean up.
+    parameters.get()->Release();
+    mGatherProfileProgress = nullptr;
+  }
+
+  return IPC_OK();
+}
+
+mozilla::ipc::IPCResult ProfilerChild::RecvGetGatherProfileProgress(
+    GetGatherProfileProgressResolver&& aResolve) {
+  if (mGatherProfileProgress) {
+    aResolve(GatherProfileProgress{
+        mGatherProfileProgress->Progress().ToUnderlyingType(),
+        nsCString(mGatherProfileProgress->LastLocation())});
+  } else {
+    aResolve(
+        GatherProfileProgress{ProportionValue::MakeInvalid().ToUnderlyingType(),
+                              nsCString("No gather-profile in progress")});
+  }
+  return IPC_OK();
+}
+
+void ProfilerChild::ActorDestroy(ActorDestroyReason aActorDestroyReason) {
+  mDestroyed = true;
+}
+
+void ProfilerChild::Destroy() {
+  ResetChunkManager();
+  if (!mDestroyed) {
+    Close();
+  }
+}
+
+ProfileAndAdditionalInformation ProfilerChild::GrabShutdownProfile() {
+  LOG("GrabShutdownProfile");
+
+  UniquePtr<ProfilerCodeAddressService> service =
+      profiler_code_address_service_for_presymbolication();
+  FailureLatchSource failureLatch;
+  SpliceableChunkedJSONWriter writer{failureLatch};
+  writer.Start();
+  auto rv = profiler_stream_json_for_this_process(
+      writer, /* aSinceTime */ 0,
+      /* aIsShuttingDown */ true, service.get(), ProgressLogger{});
+  if (rv.isErr()) {
+    const char* failure = writer.GetFailure();
+    return ProfileAndAdditionalInformation(
+        nsPrintfCString("*Profile unavailable for pid %u%s%s",
+                        unsigned(profiler_current_process_id().ToNumber()),
+                        failure ? ", failure: " : "", failure ? failure : ""));
+  }
+
+  auto additionalInfo = rv.unwrap();
+
+  writer.StartArrayProperty("processes");
+  writer.EndArray();
+  writer.End();
+
+  const size_t len = writer.ChunkedWriteFunc().Length();
+  // This string and information are destined to be sent as a shutdown profile,
+  // which is limited by the maximum IPC message size.
+  // TODO: IPC to change to shmem (bug 1780330), raising this limit to
+  // JS::MaxStringLength.
+  if (len + additionalInfo.SizeOf() >=
+      size_t(IPC::Channel::kMaximumMessageSize)) {
+    return ProfileAndAdditionalInformation(
+        nsPrintfCString("*Profile from pid %u bigger (%zu) than IPC max (%zu)",
+                        unsigned(profiler_current_process_id().ToNumber()), len,
+                        size_t(IPC::Channel::kMaximumMessageSize)));
+  }
+
+  nsCString profileCString;
+  if (!profileCString.SetLength(len, fallible)) {
+    return ProfileAndAdditionalInformation(nsPrintfCString(
+        "*Could not allocate %zu bytes for profile from pid %u", len,
+        unsigned(profiler_current_process_id().ToNumber())));
+  }
+  MOZ_ASSERT(*(profileCString.Data() + len) == '\0',
+             "We expected a null at the end of the string buffer, to be "
+             "rewritten by CopyDataIntoLazilyAllocatedBuffer");
+
+  char* const profileBeginWriting = profileCString.BeginWriting();
+  if (!profileBeginWriting) {
+    return ProfileAndAdditionalInformation(
+        nsPrintfCString("*Could not write profile from pid %u",
+                        unsigned(profiler_current_process_id().ToNumber())));
+  }
+
+  // Here, we have enough space reserved in `profileCString`, starting at
+  // `profileBeginWriting`, copy the JSON profile there.
+  if (!writer.ChunkedWriteFunc().CopyDataIntoLazilyAllocatedBuffer(
+          [&](size_t aBufferLen) -> char* {
+            MOZ_RELEASE_ASSERT(aBufferLen == len + 1);
+            return profileBeginWriting;
+          })) {
+    return ProfileAndAdditionalInformation(
+        nsPrintfCString("*Could not copy profile from pid %u",
+                        unsigned(profiler_current_process_id().ToNumber())));
+  }
+  MOZ_ASSERT(*(profileCString.Data() + len) == '\0',
+             "We still expected a null at the end of the string buffer");
+
+  return ProfileAndAdditionalInformation{std::move(profileCString),
+                                         std::move(additionalInfo)};
+}
+
+}  // namespace mozilla
diff --git a/tools/profiler/gecko/ProfilerIOInterposeObserver.cpp b/tools/profiler/gecko/ProfilerIOInterposeObserver.cpp
new file mode 100644
index 0000000000..cf33789f69
--- /dev/null
+++ b/tools/profiler/gecko/ProfilerIOInterposeObserver.cpp
@@ -0,0 +1,216 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ProfilerIOInterposeObserver.h"
+#include "GeckoProfiler.h"
+
+using namespace mozilla;
+
+/* static */
+ProfilerIOInterposeObserver& ProfilerIOInterposeObserver::GetInstance() {
+  static ProfilerIOInterposeObserver sProfilerIOInterposeObserver;
+  return sProfilerIOInterposeObserver;
+}
+
+namespace geckoprofiler::markers {
+struct FileIOMarker {
+  static constexpr Span<const char> MarkerTypeName() {
+    return MakeStringSpan("FileIO");
+  }
+  static void StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter& aWriter,
+                                   const ProfilerString8View& aOperation,
+                                   const ProfilerString8View& aSource,
+                                   const ProfilerString8View& aFilename,
+                                   MarkerThreadId aOperationThreadId) {
+    aWriter.StringProperty("operation", aOperation);
+    aWriter.StringProperty("source", aSource);
+    if (aFilename.Length() != 0) {
+      aWriter.StringProperty("filename", aFilename);
+    }
+    if (!aOperationThreadId.IsUnspecified()) {
+      // Tech note: If `ToNumber()` returns a uint64_t, the conversion to
+      // int64_t is "implementation-defined" before C++20. This is acceptable
+      // here, because this is a one-way conversion to a unique identifier
+      // that's used to visually separate data by thread on the front-end.
+      aWriter.IntProperty(
+          "threadId",
+          static_cast<int64_t>(aOperationThreadId.ThreadId().ToNumber()));
+    }
+  }
+  static MarkerSchema MarkerTypeDisplay() {
+    using MS = MarkerSchema;
+    MS schema{MS::Location::MarkerChart, MS::Location::MarkerTable,
+              MS::Location::TimelineFileIO};
+    schema.AddKeyLabelFormatSearchable("operation", "Operation",
+                                       MS::Format::String,
+                                       MS::Searchable::Searchable);
+    schema.AddKeyLabelFormatSearchable("source", "Source", MS::Format::String,
+                                       MS::Searchable::Searchable);
+    schema.AddKeyLabelFormatSearchable("filename", "Filename",
+                                       MS::Format::FilePath,
+                                       MS::Searchable::Searchable);
+    schema.AddKeyLabelFormatSearchable("threadId", "Thread ID",
+                                       MS::Format::String,
+                                       MS::Searchable::Searchable);
+    return schema;
+  }
+};
+}  // namespace geckoprofiler::markers
+
+static auto GetFilename(IOInterposeObserver::Observation& aObservation) {
+  AUTO_PROFILER_STATS(IO_filename);
+  constexpr size_t scExpectedMaxFilename = 512;
+  nsAutoStringN<scExpectedMaxFilename> filename16;
+  aObservation.Filename(filename16);
+  nsAutoCStringN<scExpectedMaxFilename> filename8;
+  if (!filename16.IsEmpty()) {
+    CopyUTF16toUTF8(filename16, filename8);
+  }
+  return filename8;
+}
+
+void ProfilerIOInterposeObserver::Observe(Observation& aObservation) {
+  if (profiler_is_locked_on_current_thread()) {
+    // Don't observe I/Os originating from the profiler itself (when internally
+    // locked) to avoid deadlocks when calling profiler functions.
+    AUTO_PROFILER_STATS(IO_profiler_locked);
+    return;
+  }
+
+  Maybe<uint32_t> maybeFeatures = profiler_features_if_active_and_unpaused();
+  if (maybeFeatures.isNothing()) {
+    return;
+  }
+  uint32_t features = *maybeFeatures;
+
+  if (!profiler_thread_is_being_profiled_for_markers(
+          profiler_main_thread_id()) &&
+      !profiler_thread_is_being_profiled_for_markers()) {
+    return;
+  }
+
+  AUTO_PROFILER_LABEL("ProfilerIOInterposeObserver", PROFILER);
+  if (IsMainThread()) {
+    // This is the main thread.
+    // Capture a marker if any "IO" feature is on.
+    // If it's not being profiled, we have nowhere to store FileIO markers.
+    if (!profiler_thread_is_being_profiled_for_markers() ||
+        !(features & ProfilerFeature::MainThreadIO)) {
+      return;
+    }
+    AUTO_PROFILER_STATS(IO_MT);
+    nsAutoCString type{aObservation.FileType()};
+    type.AppendLiteral("IO");
+
+    // Store the marker in the current thread.
+    PROFILER_MARKER(
+        type, OTHER,
+        MarkerOptions(
+            MarkerTiming::Interval(aObservation.Start(), aObservation.End()),
+            MarkerStack::Capture()),
+        FileIOMarker,
+        // aOperation
+        ProfilerString8View::WrapNullTerminatedString(
+            aObservation.ObservedOperationString()),
+        // aSource
+        ProfilerString8View::WrapNullTerminatedString(aObservation.Reference()),
+        // aFilename
+        GetFilename(aObservation),
+        // aOperationThreadId - Do not include a thread ID, as it's the same as
+        // the markers. Only include this field when the marker is being sent
+        // from another thread.
+        MarkerThreadId{});
+
+  } else if (profiler_thread_is_being_profiled_for_markers()) {
+    // This is a non-main thread that is being profiled.
+    if (!(features & ProfilerFeature::FileIO)) {
+      return;
+    }
+    AUTO_PROFILER_STATS(IO_off_MT);
+
+    nsAutoCString type{aObservation.FileType()};
+    type.AppendLiteral("IO");
+
+    // Share a backtrace between the marker on this thread, and the marker on
+    // the main thread.
+    UniquePtr<ProfileChunkedBuffer> backtrace = profiler_capture_backtrace();
+
+    // Store the marker in the current thread.
+    PROFILER_MARKER(
+        type, OTHER,
+        MarkerOptions(
+            MarkerTiming::Interval(aObservation.Start(), aObservation.End()),
+            backtrace ? MarkerStack::UseBacktrace(*backtrace)
+                      : MarkerStack::NoStack()),
+        FileIOMarker,
+        // aOperation
+        ProfilerString8View::WrapNullTerminatedString(
+            aObservation.ObservedOperationString()),
+        // aSource
+        ProfilerString8View::WrapNullTerminatedString(aObservation.Reference()),
+        // aFilename
+        GetFilename(aObservation),
+        // aOperationThreadId - Do not include a thread ID, as it's the same as
+        // the markers. Only include this field when the marker is being sent
+        // from another thread.
+        MarkerThreadId{});
+
+    // Store the marker in the main thread as well, with a distinct marker name
+    // and thread id.
+    type.AppendLiteral(" (non-main thread)");
+    PROFILER_MARKER(
+        type, OTHER,
+        MarkerOptions(
+            MarkerTiming::Interval(aObservation.Start(), aObservation.End()),
+            backtrace ? MarkerStack::UseBacktrace(*backtrace)
+                      : MarkerStack::NoStack(),
+            // This is the important piece that changed.
+            // It will send a marker to the main thread.
+            MarkerThreadId::MainThread()),
+        FileIOMarker,
+        // aOperation
+        ProfilerString8View::WrapNullTerminatedString(
+            aObservation.ObservedOperationString()),
+        // aSource
+        ProfilerString8View::WrapNullTerminatedString(aObservation.Reference()),
+        // aFilename
+        GetFilename(aObservation),
+        // aOperationThreadId - Include the thread ID in the payload.
+        MarkerThreadId::CurrentThread());
+
+  } else {
+    // This is a thread that is not being profiled. We still want to capture
+    // file I/Os (to the main thread) if the "FileIOAll" feature is on.
+    if (!(features & ProfilerFeature::FileIOAll)) {
+      return;
+    }
+    AUTO_PROFILER_STATS(IO_other);
+    nsAutoCString type{aObservation.FileType()};
+    if (profiler_is_active_and_thread_is_registered()) {
+      type.AppendLiteral("IO (non-profiled thread)");
+    } else {
+      type.AppendLiteral("IO (unregistered thread)");
+    }
+
+    // Only store this marker on the main thread, as this thread was not being
+    // profiled.
+    PROFILER_MARKER(
+        type, OTHER,
+        MarkerOptions(
+            MarkerTiming::Interval(aObservation.Start(), aObservation.End()),
+            MarkerStack::Capture(),
+            // Store this marker on the main thread.
+            MarkerThreadId::MainThread()),
+        FileIOMarker,
+        // aOperation
+        ProfilerString8View::WrapNullTerminatedString(
+            aObservation.ObservedOperationString()),
+        // aSource
+        ProfilerString8View::WrapNullTerminatedString(aObservation.Reference()),
+        // aFilename
+        GetFilename(aObservation),
+        // aOperationThreadId - Note which thread this marker is coming from.
+        MarkerThreadId::CurrentThread());
+  }
+}
diff --git a/tools/profiler/gecko/ProfilerIOInterposeObserver.h b/tools/profiler/gecko/ProfilerIOInterposeObserver.h
new file mode 100644
index 0000000000..9e22a34f15
--- /dev/null
+++ b/tools/profiler/gecko/ProfilerIOInterposeObserver.h
@@ -0,0 +1,32 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef PROFILERIOINTERPOSEOBSERVER_H
+#define PROFILERIOINTERPOSEOBSERVER_H
+
+#include "mozilla/IOInterposer.h"
+#include "nsISupportsImpl.h"
+
+namespace mozilla {
+
+/**
+ * This class is the observer that calls into the profiler whenever
+ * main thread I/O occurs.
+ */
+class ProfilerIOInterposeObserver final : public IOInterposeObserver {
+  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(ProfilerIOInterposeObserver)
+
+ public:
+  static ProfilerIOInterposeObserver& GetInstance();
+
+  virtual void Observe(Observation& aObservation) override;
+
+ private:
+  ProfilerIOInterposeObserver() = default;
+  virtual ~ProfilerIOInterposeObserver() {}
+};
+
+}  // namespace mozilla
+
+#endif  // PROFILERIOINTERPOSEOBSERVER_H
diff --git a/tools/profiler/gecko/ProfilerParent.cpp b/tools/profiler/gecko/ProfilerParent.cpp
new file mode 100644
index 0000000000..83bce6d982
--- /dev/null
+++ b/tools/profiler/gecko/ProfilerParent.cpp
@@ -0,0 +1,1002 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ProfilerParent.h"
+
+#ifdef MOZ_GECKO_PROFILER
+#  include "nsProfiler.h"
+#  include "platform.h"
+#endif
+
+#include "GeckoProfiler.h"
+#include "ProfilerControl.h"
+#include "mozilla/BaseAndGeckoProfilerDetail.h"
+#include "mozilla/BaseProfilerDetail.h"
+#include "mozilla/ClearOnShutdown.h"
+#include "mozilla/DataMutex.h"
+#include "mozilla/IOInterposer.h"
+#include "mozilla/ipc/Endpoint.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/ProfileBufferControlledChunkManager.h"
+#include "mozilla/RefPtr.h"
+#include "mozilla/Unused.h"
+#include "nsTArray.h"
+#include "nsThreadUtils.h"
+
+#include <utility>
+
+namespace mozilla {
+
+using namespace ipc;
+
+/* static */
+Endpoint<PProfilerChild> ProfilerParent::CreateForProcess(
+    base::ProcessId aOtherPid) {
+  MOZ_RELEASE_ASSERT(NS_IsMainThread());
+  Endpoint<PProfilerChild> child;
+#ifdef MOZ_GECKO_PROFILER
+  Endpoint<PProfilerParent> parent;
+  nsresult rv = PProfiler::CreateEndpoints(&parent, &child);
+
+  if (NS_FAILED(rv)) {
+    MOZ_CRASH("Failed to create top level actor for PProfiler!");
+  }
+
+  RefPtr<ProfilerParent> actor = new ProfilerParent(aOtherPid);
+  if (!parent.Bind(actor)) {
+    MOZ_CRASH("Failed to bind parent actor for PProfiler!");
+  }
+
+  actor->Init();
+#endif
+
+  return child;
+}
+
+#ifdef MOZ_GECKO_PROFILER
+
+class ProfilerParentTracker;
+
+// This class is responsible for gathering updates from chunk managers in
+// different process, and request for the oldest chunks to be destroyed whenever
+// the given memory limit is reached.
+class ProfileBufferGlobalController final {
+ public:
+  explicit ProfileBufferGlobalController(size_t aMaximumBytes);
+
+  ~ProfileBufferGlobalController();
+
+  void HandleChildChunkManagerUpdate(
+      base::ProcessId aProcessId,
+      ProfileBufferControlledChunkManager::Update&& aUpdate);
+
+  static bool IsLockedOnCurrentThread();
+
+ private:
+  // Calls aF(Json::Value&).
+  template <typename F>
+  void Log(F&& aF);
+
+  static void LogUpdateChunks(Json::Value& updates, base::ProcessId aProcessId,
+                              const TimeStamp& aTimeStamp, int aChunkDiff);
+  void LogUpdate(base::ProcessId aProcessId,
+                 const ProfileBufferControlledChunkManager::Update& aUpdate);
+  void LogDeletion(base::ProcessId aProcessId, const TimeStamp& aTimeStamp);
+
+  void HandleChunkManagerNonFinalUpdate(
+      base::ProcessId aProcessId,
+      ProfileBufferControlledChunkManager::Update&& aUpdate,
+      ProfileBufferControlledChunkManager& aParentChunkManager);
+
+  const size_t mMaximumBytes;
+
+  const base::ProcessId mParentProcessId = base::GetCurrentProcId();
+
+  struct ParentChunkManagerAndPendingUpdate {
+    ProfileBufferControlledChunkManager* mChunkManager = nullptr;
+    ProfileBufferControlledChunkManager::Update mPendingUpdate;
+  };
+
+  static DataMutexBase<ParentChunkManagerAndPendingUpdate,
+                       baseprofiler::detail::BaseProfilerMutex>
+      sParentChunkManagerAndPendingUpdate;
+
+  size_t mUnreleasedTotalBytes = 0;
+
+  struct PidAndBytes {
+    base::ProcessId mProcessId;
+    size_t mBytes;
+
+    // For searching and sorting.
+    bool operator==(base::ProcessId aSearchedProcessId) const {
+      return mProcessId == aSearchedProcessId;
+    }
+    bool operator==(const PidAndBytes& aOther) const {
+      return mProcessId == aOther.mProcessId;
+    }
+    bool operator<(base::ProcessId aSearchedProcessId) const {
+      return mProcessId < aSearchedProcessId;
+    }
+    bool operator<(const PidAndBytes& aOther) const {
+      return mProcessId < aOther.mProcessId;
+    }
+  };
+  using PidAndBytesArray = nsTArray<PidAndBytes>;
+  PidAndBytesArray mUnreleasedBytesByPid;
+
+  size_t mReleasedTotalBytes = 0;
+
+  struct TimeStampAndBytesAndPid {
+    TimeStamp mTimeStamp;
+    size_t mBytes;
+    base::ProcessId mProcessId;
+
+    // For searching and sorting.
+    bool operator==(const TimeStampAndBytesAndPid& aOther) const {
+      // Sort first by timestamps, and then by pid in rare cases with the same
+      // timestamps.
+      return mTimeStamp == aOther.mTimeStamp && mProcessId == aOther.mProcessId;
+    }
+    bool operator<(const TimeStampAndBytesAndPid& aOther) const {
+      // Sort first by timestamps, and then by pid in rare cases with the same
+      // timestamps.
+      return mTimeStamp < aOther.mTimeStamp ||
+             (MOZ_UNLIKELY(mTimeStamp == aOther.mTimeStamp) &&
+              mProcessId < aOther.mProcessId);
+    }
+  };
+  using TimeStampAndBytesAndPidArray = nsTArray<TimeStampAndBytesAndPid>;
+  TimeStampAndBytesAndPidArray mReleasedChunksByTime;
+};
+
+/* static */
+DataMutexBase<ProfileBufferGlobalController::ParentChunkManagerAndPendingUpdate,
+              baseprofiler::detail::BaseProfilerMutex>
+    ProfileBufferGlobalController::sParentChunkManagerAndPendingUpdate{
+        "ProfileBufferGlobalController::sParentChunkManagerAndPendingUpdate"};
+
+// This singleton class tracks live ProfilerParent's (meaning there's a current
+// connection with a child process).
+// It also knows when the local profiler is running.
+// And when both the profiler is running and at least one child is present, it
+// creates a ProfileBufferGlobalController and forwards chunk updates to it.
+class ProfilerParentTracker final {
+ public:
+  static void StartTracking(ProfilerParent* aParent);
+  static void StopTracking(ProfilerParent* aParent);
+
+  static void ProfilerStarted(uint32_t aEntries);
+  static void ProfilerWillStopIfStarted();
+
+  // Number of non-destroyed tracked ProfilerParents.
+  static size_t ProfilerParentCount();
+
+  template <typename FuncType>
+  static void Enumerate(FuncType&& aIterFunc);
+
+  template <typename FuncType>
+  static void ForChild(base::ProcessId aChildPid, FuncType&& aIterFunc);
+
+  static void ForwardChildChunkManagerUpdate(
+      base::ProcessId aProcessId,
+      ProfileBufferControlledChunkManager::Update&& aUpdate);
+
+  ProfilerParentTracker();
+  ~ProfilerParentTracker();
+
+ private:
+  // Get the singleton instance; Create one on the first request, unless we are
+  // past XPCOMShutdownThreads, which is when it should get destroyed.
+  static ProfilerParentTracker* GetInstance();
+
+  // List of parents for currently-connected child processes.
+  nsTArray<ProfilerParent*> mProfilerParents;
+
+  // If non-0, the parent profiler is running, with this limit (in number of
+  // entries.) This is needed here, because the parent profiler may start
+  // running before child processes are known (e.g., startup profiling).
+  uint32_t mEntries = 0;
+
+  // When the profiler is running and there is at least one parent-child
+  // connection, this is the controller that should receive chunk updates.
+  Maybe<ProfileBufferGlobalController> mMaybeController;
+};
+
+static const Json::StaticString logRoot{"bufferGlobalController"};
+
+template <typename F>
+void ProfileBufferGlobalController::Log(F&& aF) {
+  ProfilingLog::Access([&](Json::Value& aLog) {
+    Json::Value& root = aLog[logRoot];
+    if (!root.isObject()) {
+      root = Json::Value(Json::objectValue);
+      root[Json::StaticString{"logBegin" TIMESTAMP_JSON_SUFFIX}] =
+          ProfilingLog::Timestamp();
+    }
+    std::forward<F>(aF)(root);
+  });
+}
+
+/* static */
+void ProfileBufferGlobalController::LogUpdateChunks(Json::Value& updates,
+                                                    base::ProcessId aProcessId,
+                                                    const TimeStamp& aTimeStamp,
+                                                    int aChunkDiff) {
+  MOZ_ASSERT(updates.isArray());
+  Json::Value row{Json::arrayValue};
+  row.append(Json::Value{Json::UInt64(aProcessId)});
+  row.append(ProfilingLog::Timestamp(aTimeStamp));
+  row.append(Json::Value{Json::Int(aChunkDiff)});
+  updates.append(std::move(row));
+}
+
+void ProfileBufferGlobalController::LogUpdate(
+    base::ProcessId aProcessId,
+    const ProfileBufferControlledChunkManager::Update& aUpdate) {
+  Log([&](Json::Value& aRoot) {
+    Json::Value& updates = aRoot[Json::StaticString{"updates"}];
+    if (!updates.isArray()) {
+      aRoot[Json::StaticString{"updatesSchema"}] =
+          Json::StaticString{"0: pid, 1: chunkRelease_TSms, 3: chunkDiff"};
+      updates = Json::Value{Json::arrayValue};
+    }
+    if (aUpdate.IsFinal()) {
+      LogUpdateChunks(updates, aProcessId, TimeStamp{}, 0);
+    } else if (!aUpdate.IsNotUpdate()) {
+      for (const auto& chunk : aUpdate.NewlyReleasedChunksRef()) {
+        LogUpdateChunks(updates, aProcessId, chunk.mDoneTimeStamp, 1);
+      }
+    }
+  });
+}
+
+void ProfileBufferGlobalController::LogDeletion(base::ProcessId aProcessId,
+                                                const TimeStamp& aTimeStamp) {
+  Log([&](Json::Value& aRoot) {
+    Json::Value& updates = aRoot[Json::StaticString{"updates"}];
+    if (!updates.isArray()) {
+      updates = Json::Value{Json::arrayValue};
+    }
+    LogUpdateChunks(updates, aProcessId, aTimeStamp, -1);
+  });
+}
+
+ProfileBufferGlobalController::ProfileBufferGlobalController(
+    size_t aMaximumBytes)
+    : mMaximumBytes(aMaximumBytes) {
+  MOZ_RELEASE_ASSERT(NS_IsMainThread());
+
+  Log([](Json::Value& aRoot) {
+    aRoot[Json::StaticString{"controllerCreationTime" TIMESTAMP_JSON_SUFFIX}] =
+        ProfilingLog::Timestamp();
+  });
+
+  // This is the local chunk manager for this parent process, so updates can be
+  // handled here.
+  ProfileBufferControlledChunkManager* parentChunkManager =
+      profiler_get_controlled_chunk_manager();
+
+  if (NS_WARN_IF(!parentChunkManager)) {
+    Log([](Json::Value& aRoot) {
+      aRoot[Json::StaticString{"controllerCreationFailureReason"}] =
+          "No parent chunk manager";
+    });
+    return;
+  }
+
+  {
+    auto lockedParentChunkManagerAndPendingUpdate =
+        sParentChunkManagerAndPendingUpdate.Lock();
+    lockedParentChunkManagerAndPendingUpdate->mChunkManager =
+        parentChunkManager;
+  }
+
+  parentChunkManager->SetUpdateCallback(
+      [this](ProfileBufferControlledChunkManager::Update&& aUpdate) {
+        MOZ_ASSERT(!aUpdate.IsNotUpdate(),
+                   "Update callback should never be given a non-update");
+        auto lockedParentChunkManagerAndPendingUpdate =
+            sParentChunkManagerAndPendingUpdate.Lock();
+        if (aUpdate.IsFinal()) {
+          // Final update of the parent.
+          // We cannot keep the chunk manager, and there's no point handling
+          // updates anymore. Do some cleanup now, to free resources before
+          // we're destroyed.
+          lockedParentChunkManagerAndPendingUpdate->mChunkManager = nullptr;
+          lockedParentChunkManagerAndPendingUpdate->mPendingUpdate.Clear();
+          mUnreleasedTotalBytes = 0;
+          mUnreleasedBytesByPid.Clear();
+          mReleasedTotalBytes = 0;
+          mReleasedChunksByTime.Clear();
+          return;
+        }
+        if (!lockedParentChunkManagerAndPendingUpdate->mChunkManager) {
+          // No chunk manager, ignore updates.
+          return;
+        }
+        // Special handling of parent non-final updates:
+        // These updates are coming from *this* process, and may originate from
+        // scopes in any thread where any lock is held, so using other locks (to
+        // e.g., dispatch tasks or send IPCs) could trigger a deadlock. Instead,
+        // parent updates are stored locally and handled when the next
+        // non-parent update needs handling, see HandleChildChunkManagerUpdate.
+        lockedParentChunkManagerAndPendingUpdate->mPendingUpdate.Fold(
+            std::move(aUpdate));
+      });
+}
+
+ProfileBufferGlobalController ::~ProfileBufferGlobalController() {
+  MOZ_RELEASE_ASSERT(NS_IsMainThread());
+  // Extract the parent chunk manager (if still set).
+  // This means any update after this will be ignored.
+  ProfileBufferControlledChunkManager* parentChunkManager = []() {
+    auto lockedParentChunkManagerAndPendingUpdate =
+        sParentChunkManagerAndPendingUpdate.Lock();
+    lockedParentChunkManagerAndPendingUpdate->mPendingUpdate.Clear();
+    return std::exchange(
+        lockedParentChunkManagerAndPendingUpdate->mChunkManager, nullptr);
+  }();
+  if (parentChunkManager) {
+    // We had not received a final update yet, so the chunk manager is still
+    // valid. Reset the callback in the chunk manager, this will immediately
+    // invoke the callback with the final empty update; see handling above.
+    parentChunkManager->SetUpdateCallback({});
+  }
+}
+
+void ProfileBufferGlobalController::HandleChildChunkManagerUpdate(
+    base::ProcessId aProcessId,
+    ProfileBufferControlledChunkManager::Update&& aUpdate) {
+  MOZ_RELEASE_ASSERT(NS_IsMainThread());
+
+  MOZ_ASSERT(aProcessId != mParentProcessId);
+
+  MOZ_ASSERT(!aUpdate.IsNotUpdate(),
+             "HandleChildChunkManagerUpdate should not be given a non-update");
+
+  auto lockedParentChunkManagerAndPendingUpdate =
+      sParentChunkManagerAndPendingUpdate.Lock();
+  if (!lockedParentChunkManagerAndPendingUpdate->mChunkManager) {
+    // No chunk manager, ignore updates.
+    return;
+  }
+
+  if (aUpdate.IsFinal()) {
+    // Final update in a child process, remove all traces of that process.
+    LogUpdate(aProcessId, aUpdate);
+    size_t index = mUnreleasedBytesByPid.BinaryIndexOf(aProcessId);
+    if (index != PidAndBytesArray::NoIndex) {
+      // We already have a value for this pid.
+      PidAndBytes& pidAndBytes = mUnreleasedBytesByPid[index];
+      mUnreleasedTotalBytes -= pidAndBytes.mBytes;
+      mUnreleasedBytesByPid.RemoveElementAt(index);
+    }
+
+    size_t released = 0;
+    mReleasedChunksByTime.RemoveElementsBy(
+        [&released, aProcessId](const auto& chunk) {
+          const bool match = chunk.mProcessId == aProcessId;
+          if (match) {
+            released += chunk.mBytes;
+          }
+          return match;
+        });
+    if (released != 0) {
+      mReleasedTotalBytes -= released;
+    }
+
+    // Total can only have gone down, so there's no need to check the limit.
+    return;
+  }
+
+  // Non-final update in child process.
+
+  // Before handling the child update, we may have pending updates from the
+  // parent, which can be processed now since we're in an IPC callback outside
+  // of any profiler-related scope.
+  if (!lockedParentChunkManagerAndPendingUpdate->mPendingUpdate.IsNotUpdate()) {
+    MOZ_ASSERT(
+        !lockedParentChunkManagerAndPendingUpdate->mPendingUpdate.IsFinal());
+    HandleChunkManagerNonFinalUpdate(
+        mParentProcessId,
+        std::move(lockedParentChunkManagerAndPendingUpdate->mPendingUpdate),
+        *lockedParentChunkManagerAndPendingUpdate->mChunkManager);
+    lockedParentChunkManagerAndPendingUpdate->mPendingUpdate.Clear();
+  }
+
+  HandleChunkManagerNonFinalUpdate(
+      aProcessId, std::move(aUpdate),
+      *lockedParentChunkManagerAndPendingUpdate->mChunkManager);
+}
+
+/* static */
+bool ProfileBufferGlobalController::IsLockedOnCurrentThread() {
+  return sParentChunkManagerAndPendingUpdate.Mutex().IsLockedOnCurrentThread();
+}
+
+void ProfileBufferGlobalController::HandleChunkManagerNonFinalUpdate(
+    base::ProcessId aProcessId,
+    ProfileBufferControlledChunkManager::Update&& aUpdate,
+    ProfileBufferControlledChunkManager& aParentChunkManager) {
+  MOZ_ASSERT(!aUpdate.IsFinal());
+  LogUpdate(aProcessId, aUpdate);
+
+  size_t index = mUnreleasedBytesByPid.BinaryIndexOf(aProcessId);
+  if (index != PidAndBytesArray::NoIndex) {
+    // We already have a value for this pid.
+    PidAndBytes& pidAndBytes = mUnreleasedBytesByPid[index];
+    mUnreleasedTotalBytes =
+        mUnreleasedTotalBytes - pidAndBytes.mBytes + aUpdate.UnreleasedBytes();
+    pidAndBytes.mBytes = aUpdate.UnreleasedBytes();
+  } else {
+    // New pid.
+    mUnreleasedBytesByPid.InsertElementSorted(
+        PidAndBytes{aProcessId, aUpdate.UnreleasedBytes()});
+    mUnreleasedTotalBytes += aUpdate.UnreleasedBytes();
+  }
+
+  size_t destroyedReleased = 0;
+  if (!aUpdate.OldestDoneTimeStamp().IsNull()) {
+    size_t i = 0;
+    for (; i < mReleasedChunksByTime.Length(); ++i) {
+      if (mReleasedChunksByTime[i].mTimeStamp >=
+          aUpdate.OldestDoneTimeStamp()) {
+        break;
+      }
+    }
+    // Here, i is the index of the first item that's at or after
+    // aUpdate.mOldestDoneTimeStamp, so chunks from aProcessId before that have
+    // been destroyed.
+    while (i != 0) {
+      --i;
+      const TimeStampAndBytesAndPid& item = mReleasedChunksByTime[i];
+      if (item.mProcessId == aProcessId) {
+        destroyedReleased += item.mBytes;
+        mReleasedChunksByTime.RemoveElementAt(i);
+      }
+    }
+  }
+
+  size_t newlyReleased = 0;
+  for (const ProfileBufferControlledChunkManager::ChunkMetadata& chunk :
+       aUpdate.NewlyReleasedChunksRef()) {
+    newlyReleased += chunk.mBufferBytes;
+    mReleasedChunksByTime.InsertElementSorted(TimeStampAndBytesAndPid{
+        chunk.mDoneTimeStamp, chunk.mBufferBytes, aProcessId});
+  }
+
+  mReleasedTotalBytes = mReleasedTotalBytes - destroyedReleased + newlyReleased;
+
+#  ifdef DEBUG
+  size_t totalReleased = 0;
+  for (const TimeStampAndBytesAndPid& item : mReleasedChunksByTime) {
+    totalReleased += item.mBytes;
+  }
+  MOZ_ASSERT(mReleasedTotalBytes == totalReleased);
+#  endif  // DEBUG
+
+  std::vector<ProfileBufferControlledChunkManager::ChunkMetadata> toDestroy;
+  while (mUnreleasedTotalBytes + mReleasedTotalBytes > mMaximumBytes &&
+         !mReleasedChunksByTime.IsEmpty()) {
+    // We have reached the global memory limit, and there *are* released chunks
+    // that can be destroyed. Start with the first one, which is the oldest.
+    const TimeStampAndBytesAndPid& oldest = mReleasedChunksByTime[0];
+    LogDeletion(oldest.mProcessId, oldest.mTimeStamp);
+    mReleasedTotalBytes -= oldest.mBytes;
+    if (oldest.mProcessId == mParentProcessId) {
+      aParentChunkManager.DestroyChunksAtOrBefore(oldest.mTimeStamp);
+    } else {
+      ProfilerParentTracker::ForChild(
+          oldest.mProcessId,
+          [timestamp = oldest.mTimeStamp](ProfilerParent* profilerParent) {
+            Unused << profilerParent->SendDestroyReleasedChunksAtOrBefore(
+                timestamp);
+          });
+    }
+    mReleasedChunksByTime.RemoveElementAt(0);
+  }
+}
+
+/* static */
+ProfilerParentTracker* ProfilerParentTracker::GetInstance() {
+  MOZ_RELEASE_ASSERT(NS_IsMainThread());
+
+  // The main instance pointer, it will be initialized at most once, before
+  // XPCOMShutdownThreads.
+  static UniquePtr<ProfilerParentTracker> instance = nullptr;
+  if (MOZ_UNLIKELY(!instance)) {
+    if (PastShutdownPhase(ShutdownPhase::XPCOMShutdownThreads)) {
+      return nullptr;
+    }
+
+    instance = MakeUnique<ProfilerParentTracker>();
+
+    // The tracker should get destroyed before threads are shutdown, because its
+    // destruction closes extant channels, which could trigger promise
+    // rejections that need to be dispatched to other threads.
+    ClearOnShutdown(&instance, ShutdownPhase::XPCOMShutdownThreads);
+  }
+
+  return instance.get();
+}
+
+/* static */
+void ProfilerParentTracker::StartTracking(ProfilerParent* aProfilerParent) {
+  ProfilerParentTracker* tracker = GetInstance();
+  if (!tracker) {
+    return;
+  }
+
+  if (tracker->mMaybeController.isNothing() && tracker->mEntries != 0) {
+    // There is no controller yet, but the profiler has started.
+    // Since we're adding a ProfilerParent, it's a good time to start
+    // controlling the global memory usage of the profiler.
+    // (And this helps delay the Controller startup, because the parent profiler
+    // can start *very* early in the process, when some resources like threads
+    // are not ready yet.)
+    tracker->mMaybeController.emplace(size_t(tracker->mEntries) * 8u);
+  }
+
+  tracker->mProfilerParents.AppendElement(aProfilerParent);
+}
+
+/* static */
+void ProfilerParentTracker::StopTracking(ProfilerParent* aParent) {
+  ProfilerParentTracker* tracker = GetInstance();
+  if (!tracker) {
+    return;
+  }
+
+  tracker->mProfilerParents.RemoveElement(aParent);
+}
+
+/* static */
+void ProfilerParentTracker::ProfilerStarted(uint32_t aEntries) {
+  ProfilerParentTracker* tracker = GetInstance();
+  if (!tracker) {
+    return;
+  }
+
+  tracker->mEntries = aEntries;
+
+  if (tracker->mMaybeController.isNothing() &&
+      !tracker->mProfilerParents.IsEmpty()) {
+    // We are already tracking child processes, so it's a good time to start
+    // controlling the global memory usage of the profiler.
+    tracker->mMaybeController.emplace(size_t(tracker->mEntries) * 8u);
+  }
+}
+
+/* static */
+void ProfilerParentTracker::ProfilerWillStopIfStarted() {
+  ProfilerParentTracker* tracker = GetInstance();
+  if (!tracker) {
+    return;
+  }
+
+  tracker->mEntries = 0;
+  tracker->mMaybeController = Nothing{};
+}
+
+/* static */
+size_t ProfilerParentTracker::ProfilerParentCount() {
+  size_t count = 0;
+  ProfilerParentTracker* tracker = GetInstance();
+  if (tracker) {
+    for (ProfilerParent* profilerParent : tracker->mProfilerParents) {
+      if (!profilerParent->mDestroyed) {
+        ++count;
+      }
+    }
+  }
+  return count;
+}
+
+template <typename FuncType>
+/* static */
+void ProfilerParentTracker::Enumerate(FuncType&& aIterFunc) {
+  ProfilerParentTracker* tracker = GetInstance();
+  if (!tracker) {
+    return;
+  }
+
+  for (ProfilerParent* profilerParent : tracker->mProfilerParents) {
+    if (!profilerParent->mDestroyed) {
+      aIterFunc(profilerParent);
+    }
+  }
+}
+
+template <typename FuncType>
+/* static */
+void ProfilerParentTracker::ForChild(base::ProcessId aChildPid,
+                                     FuncType&& aIterFunc) {
+  ProfilerParentTracker* tracker = GetInstance();
+  if (!tracker) {
+    return;
+  }
+
+  for (ProfilerParent* profilerParent : tracker->mProfilerParents) {
+    if (profilerParent->mChildPid == aChildPid) {
+      if (!profilerParent->mDestroyed) {
+        std::forward<FuncType>(aIterFunc)(profilerParent);
+      }
+      return;
+    }
+  }
+}
+
+/* static */
+void ProfilerParentTracker::ForwardChildChunkManagerUpdate(
+    base::ProcessId aProcessId,
+    ProfileBufferControlledChunkManager::Update&& aUpdate) {
+  ProfilerParentTracker* tracker = GetInstance();
+  if (!tracker || tracker->mMaybeController.isNothing()) {
+    return;
+  }
+
+  MOZ_ASSERT(!aUpdate.IsNotUpdate(),
+             "No process should ever send a non-update");
+  tracker->mMaybeController->HandleChildChunkManagerUpdate(aProcessId,
+                                                           std::move(aUpdate));
+}
+
+ProfilerParentTracker::ProfilerParentTracker() {
+  MOZ_RELEASE_ASSERT(NS_IsMainThread());
+  MOZ_COUNT_CTOR(ProfilerParentTracker);
+}
+
+ProfilerParentTracker::~ProfilerParentTracker() {
+  // This destructor should only be called on the main thread.
+  MOZ_RELEASE_ASSERT(NS_IsMainThread() ||
+                     // OR we're not on the main thread (including if we are
+                     // past the end of `main()`), which is fine *if* there are
+                     // no ProfilerParent's still registered, in which case
+                     // nothing else will happen in this destructor anyway.
+                     // See bug 1713971 for more information.
+                     mProfilerParents.IsEmpty());
+  MOZ_COUNT_DTOR(ProfilerParentTracker);
+
+  // Close the channels of any profiler parents that haven't been destroyed.
+  for (ProfilerParent* profilerParent : mProfilerParents.Clone()) {
+    if (!profilerParent->mDestroyed) {
+      // Keep the object alive until the call to Close() has completed.
+      // Close() will trigger a call to DeallocPProfilerParent.
+      RefPtr<ProfilerParent> actor = profilerParent;
+      actor->Close();
+    }
+  }
+}
+
+ProfilerParent::ProfilerParent(base::ProcessId aChildPid)
+    : mChildPid(aChildPid), mDestroyed(false) {
+  MOZ_COUNT_CTOR(ProfilerParent);
+
+  MOZ_RELEASE_ASSERT(NS_IsMainThread());
+}
+
+void ProfilerParent::Init() {
+  MOZ_RELEASE_ASSERT(NS_IsMainThread());
+
+  ProfilerParentTracker::StartTracking(this);
+
+  // We propagated the profiler state from the parent process to the child
+  // process through MOZ_PROFILER_STARTUP* environment variables.
+  // However, the profiler state might have changed in this process since then,
+  // and now that an active communication channel has been established with the
+  // child process, it's a good time to sync up the two profilers again.
+
+  int entries = 0;
+  Maybe<double> duration = Nothing();
+  double interval = 0;
+  mozilla::Vector<const char*> filters;
+  uint32_t features;
+  uint64_t activeTabID;
+  profiler_get_start_params(&entries, &duration, &interval, &features, &filters,
+                            &activeTabID);
+
+  if (entries != 0) {
+    ProfilerInitParams ipcParams;
+    ipcParams.enabled() = true;
+    ipcParams.entries() = entries;
+    ipcParams.duration() = duration;
+    ipcParams.interval() = interval;
+    ipcParams.features() = features;
+    ipcParams.activeTabID() = activeTabID;
+
+    // If the filters exclude our pid, make sure it's stopped, otherwise
+    // continue with starting it.
+    if (!profiler::detail::FiltersExcludePid(
+            filters, ProfilerProcessId::FromNumber(mChildPid))) {
+      ipcParams.filters().SetCapacity(filters.length());
+      for (const char* filter : filters) {
+        ipcParams.filters().AppendElement(filter);
+      }
+
+      Unused << SendEnsureStarted(ipcParams);
+      RequestChunkManagerUpdate();
+      return;
+    }
+  }
+
+  Unused << SendStop();
+}
+#endif  // MOZ_GECKO_PROFILER
+
+ProfilerParent::~ProfilerParent() {
+  MOZ_COUNT_DTOR(ProfilerParent);
+
+  MOZ_RELEASE_ASSERT(NS_IsMainThread());
+#ifdef MOZ_GECKO_PROFILER
+  ProfilerParentTracker::StopTracking(this);
+#endif
+}
+
+#ifdef MOZ_GECKO_PROFILER
+/* static */
+nsTArray<ProfilerParent::SingleProcessProfilePromiseAndChildPid>
+ProfilerParent::GatherProfiles() {
+  nsTArray<SingleProcessProfilePromiseAndChildPid> results;
+  if (!NS_IsMainThread()) {
+    return results;
+  }
+
+  results.SetCapacity(ProfilerParentTracker::ProfilerParentCount());
+  ProfilerParentTracker::Enumerate([&](ProfilerParent* profilerParent) {
+    results.AppendElement(SingleProcessProfilePromiseAndChildPid{
+        profilerParent->SendGatherProfile(), profilerParent->mChildPid});
+  });
+  return results;
+}
+
+/* static */
+RefPtr<ProfilerParent::SingleProcessProgressPromise>
+ProfilerParent::RequestGatherProfileProgress(base::ProcessId aChildPid) {
+  RefPtr<SingleProcessProgressPromise> promise;
+  ProfilerParentTracker::ForChild(
+      aChildPid, [&promise](ProfilerParent* profilerParent) {
+        promise = profilerParent->SendGetGatherProfileProgress();
+      });
+  return promise;
+}
+
+// Magic value for ProfileBufferChunkManagerUpdate::unreleasedBytes meaning
+// that this is a final update from a child.
+constexpr static uint64_t scUpdateUnreleasedBytesFINAL = uint64_t(-1);
+
+/* static */
+ProfileBufferChunkManagerUpdate ProfilerParent::MakeFinalUpdate() {
+  return ProfileBufferChunkManagerUpdate{
+      uint64_t(scUpdateUnreleasedBytesFINAL), 0, TimeStamp{},
+      nsTArray<ProfileBufferChunkMetadata>{}};
+}
+
+/* static */
+bool ProfilerParent::IsLockedOnCurrentThread() {
+  return ProfileBufferGlobalController::IsLockedOnCurrentThread();
+}
+
+void ProfilerParent::RequestChunkManagerUpdate() {
+  if (mDestroyed) {
+    return;
+  }
+
+  RefPtr<AwaitNextChunkManagerUpdatePromise> updatePromise =
+      SendAwaitNextChunkManagerUpdate();
+  updatePromise->Then(
+      GetMainThreadSerialEventTarget(), __func__,
+      [self = RefPtr<ProfilerParent>(this)](
+          const ProfileBufferChunkManagerUpdate& aUpdate) {
+        if (aUpdate.unreleasedBytes() == scUpdateUnreleasedBytesFINAL) {
+          // Special value meaning it's the final update from that child.
+          ProfilerParentTracker::ForwardChildChunkManagerUpdate(
+              self->mChildPid,
+              ProfileBufferControlledChunkManager::Update(nullptr));
+        } else {
+          // Not the final update, translate it.
+          std::vector<ProfileBufferControlledChunkManager::ChunkMetadata>
+              chunks;
+          if (!aUpdate.newlyReleasedChunks().IsEmpty()) {
+            chunks.reserve(aUpdate.newlyReleasedChunks().Length());
+            for (const ProfileBufferChunkMetadata& chunk :
+                 aUpdate.newlyReleasedChunks()) {
+              chunks.emplace_back(chunk.doneTimeStamp(), chunk.bufferBytes());
+            }
+          }
+          // Let the tracker handle it.
+          ProfilerParentTracker::ForwardChildChunkManagerUpdate(
+              self->mChildPid,
+              ProfileBufferControlledChunkManager::Update(
+                  aUpdate.unreleasedBytes(), aUpdate.releasedBytes(),
+                  aUpdate.oldestDoneTimeStamp(), std::move(chunks)));
+          // This was not a final update, so start a new request.
+          self->RequestChunkManagerUpdate();
+        }
+      },
+      [self = RefPtr<ProfilerParent>(this)](
+          mozilla::ipc::ResponseRejectReason aReason) {
+        // Rejection could be for a number of reasons, assume the child will
+        // not respond anymore, so we pretend we received a final update.
+        ProfilerParentTracker::ForwardChildChunkManagerUpdate(
+            self->mChildPid,
+            ProfileBufferControlledChunkManager::Update(nullptr));
+      });
+}
+
+// Ref-counted class that resolves a promise on destruction.
+// Usage:
+// RefPtr<GenericPromise> f() {
+//   return PromiseResolverOnDestruction::RunTask(
+//     [](RefPtr<PromiseResolverOnDestruction> aPromiseResolver){
+//       // Give *copies* of aPromiseResolver to asynchronous sub-tasks, the
+//       // last remaining RefPtr destruction will resolve the promise.
+//     });
+// }
+class PromiseResolverOnDestruction {
+ public:
+  NS_INLINE_DECL_REFCOUNTING(PromiseResolverOnDestruction)
+
+  template <typename TaskFunction>
+  static RefPtr<GenericPromise> RunTask(TaskFunction&& aTaskFunction) {
+    RefPtr<PromiseResolverOnDestruction> promiseResolver =
+        new PromiseResolverOnDestruction();
+    RefPtr<GenericPromise> promise =
+        promiseResolver->mPromiseHolder.Ensure(__func__);
+    std::forward<TaskFunction>(aTaskFunction)(std::move(promiseResolver));
+    return promise;
+  }
+
+ private:
+  PromiseResolverOnDestruction() = default;
+
+  ~PromiseResolverOnDestruction() {
+    mPromiseHolder.ResolveIfExists(/* unused */ true, __func__);
+  }
+
+  MozPromiseHolder<GenericPromise> mPromiseHolder;
+};
+
+// Given a ProfilerParentSendFunction: (ProfilerParent*) -> some MozPromise,
+// run the function on all live ProfilerParents and return a GenericPromise, and
+// when their promise gets resolve, resolve our Generic promise.
+template <typename ProfilerParentSendFunction>
+static RefPtr<GenericPromise> SendAndConvertPromise(
+    ProfilerParentSendFunction&& aProfilerParentSendFunction) {
+  if (!NS_IsMainThread()) {
+    return GenericPromise::CreateAndResolve(/* unused */ true, __func__);
+  }
+
+  return PromiseResolverOnDestruction::RunTask(
+      [&](RefPtr<PromiseResolverOnDestruction> aPromiseResolver) {
+        ProfilerParentTracker::Enumerate([&](ProfilerParent* profilerParent) {
+          std::forward<ProfilerParentSendFunction>(aProfilerParentSendFunction)(
+              profilerParent)
+              ->Then(GetMainThreadSerialEventTarget(), __func__,
+                     [aPromiseResolver](
+                         typename std::remove_reference_t<
+                             decltype(*std::forward<ProfilerParentSendFunction>(
+                                 aProfilerParentSendFunction)(
+                                 profilerParent))>::ResolveOrRejectValue&&) {
+                       // Whatever the resolution/rejection is, do nothing.
+                       // The lambda aPromiseResolver ref-count will decrease.
+                     });
+        });
+      });
+}
+
+/* static */
+RefPtr<GenericPromise> ProfilerParent::ProfilerStarted(
+    nsIProfilerStartParams* aParams) {
+  if (!NS_IsMainThread()) {
+    return GenericPromise::CreateAndResolve(/* unused */ true, __func__);
+  }
+
+  ProfilerInitParams ipcParams;
+  double duration;
+  ipcParams.enabled() = true;
+  aParams->GetEntries(&ipcParams.entries());
+  aParams->GetDuration(&duration);
+  if (duration > 0.0) {
+    ipcParams.duration() = Some(duration);
+  } else {
+    ipcParams.duration() = Nothing();
+  }
+  aParams->GetInterval(&ipcParams.interval());
+  aParams->GetFeatures(&ipcParams.features());
+  ipcParams.filters() = aParams->GetFilters().Clone();
+  // We need filters as a Span<const char*> to test pids in the lambda below.
+  auto filtersCStrings = nsTArray<const char*>{aParams->GetFilters().Length()};
+  for (const auto& filter : aParams->GetFilters()) {
+    filtersCStrings.AppendElement(filter.Data());
+  }
+  aParams->GetActiveTabID(&ipcParams.activeTabID());
+
+  ProfilerParentTracker::ProfilerStarted(ipcParams.entries());
+
+  return SendAndConvertPromise([&](ProfilerParent* profilerParent) {
+    if (profiler::detail::FiltersExcludePid(
+            filtersCStrings,
+            ProfilerProcessId::FromNumber(profilerParent->mChildPid))) {
+      // This pid is excluded, don't start the profiler at all.
+      return PProfilerParent::StartPromise::CreateAndResolve(/* unused */ true,
+                                                             __func__);
+    }
+    auto promise = profilerParent->SendStart(ipcParams);
+    profilerParent->RequestChunkManagerUpdate();
+    return promise;
+  });
+}
+
+/* static */
+void ProfilerParent::ProfilerWillStopIfStarted() {
+  if (!NS_IsMainThread()) {
+    return;
+  }
+
+  ProfilerParentTracker::ProfilerWillStopIfStarted();
+}
+
+/* static */
+RefPtr<GenericPromise> ProfilerParent::ProfilerStopped() {
+  return SendAndConvertPromise([](ProfilerParent* profilerParent) {
+    return profilerParent->SendStop();
+  });
+}
+
+/* static */
+RefPtr<GenericPromise> ProfilerParent::ProfilerPaused() {
+  return SendAndConvertPromise([](ProfilerParent* profilerParent) {
+    return profilerParent->SendPause();
+  });
+}
+
+/* static */
+RefPtr<GenericPromise> ProfilerParent::ProfilerResumed() {
+  return SendAndConvertPromise([](ProfilerParent* profilerParent) {
+    return profilerParent->SendResume();
+  });
+}
+
+/* static */
+RefPtr<GenericPromise> ProfilerParent::ProfilerPausedSampling() {
+  return SendAndConvertPromise([](ProfilerParent* profilerParent) {
+    return profilerParent->SendPauseSampling();
+  });
+}
+
+/* static */
+RefPtr<GenericPromise> ProfilerParent::ProfilerResumedSampling() {
+  return SendAndConvertPromise([](ProfilerParent* profilerParent) {
+    return profilerParent->SendResumeSampling();
+  });
+}
+
+/* static */
+void ProfilerParent::ClearAllPages() {
+  if (!NS_IsMainThread()) {
+    return;
+  }
+
+  ProfilerParentTracker::Enumerate([](ProfilerParent* profilerParent) {
+    Unused << profilerParent->SendClearAllPages();
+  });
+}
+
+/* static */
+RefPtr<GenericPromise> ProfilerParent::WaitOnePeriodicSampling() {
+  return SendAndConvertPromise([](ProfilerParent* profilerParent) {
+    return profilerParent->SendWaitOnePeriodicSampling();
+  });
+}
+
+void ProfilerParent::ActorDestroy(ActorDestroyReason aActorDestroyReason) {
+  MOZ_RELEASE_ASSERT(NS_IsMainThread());
+  mDestroyed = true;
+}
+
+#endif
+
+}  // namespace mozilla
diff --git a/tools/profiler/gecko/ProfilerTypes.ipdlh b/tools/profiler/gecko/ProfilerTypes.ipdlh
new file mode 100644
index 0000000000..6255d47db0
--- /dev/null
+++ b/tools/profiler/gecko/ProfilerTypes.ipdlh
@@ -0,0 +1,43 @@
+/* -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil; tab-width: 8 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+using class mozilla::TimeStamp from "mozilla/TimeStamp.h";
+using struct mozilla::ProfileGenerationAdditionalInformation from "ProfileAdditionalInformation.h";
+
+namespace mozilla {
+
+struct ProfilerInitParams {
+  bool enabled;
+  uint32_t entries;
+  double? duration;
+  double interval;
+  uint32_t features;
+  uint64_t activeTabID;
+  nsCString[] filters;
+};
+
+struct ProfileBufferChunkMetadata {
+  TimeStamp doneTimeStamp;
+  uint32_t bufferBytes;
+};
+
+struct ProfileBufferChunkManagerUpdate {
+  uint64_t unreleasedBytes;
+  uint64_t releasedBytes;
+  TimeStamp oldestDoneTimeStamp;
+  ProfileBufferChunkMetadata[] newlyReleasedChunks;
+};
+
+struct GatherProfileProgress {
+  uint32_t progressProportionValueUnderlyingType;
+  nsCString progressLocation;
+};
+
+struct IPCProfileAndAdditionalInformation {
+  Shmem profileShmem;
+  ProfileGenerationAdditionalInformation? additionalInformation;
+};
+
+} // namespace mozilla
diff --git a/tools/profiler/gecko/components.conf b/tools/profiler/gecko/components.conf
new file mode 100644
index 0000000000..b1775c37ab
--- /dev/null
+++ b/tools/profiler/gecko/components.conf
@@ -0,0 +1,17 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+Classes = [
+    {
+        'js_name': 'profiler',
+        'cid': '{25db9b8e-8123-4de1-b66d-8bbbedf2cdf4}',
+        'contract_ids': ['@mozilla.org/tools/profiler;1'],
+        'interfaces': ['nsIProfiler'],
+        'type': 'nsProfiler',
+        'headers': ['/tools/profiler/gecko/nsProfiler.h'],
+        'init_method': 'Init',
+    },
+]
diff --git a/tools/profiler/gecko/nsIProfiler.idl b/tools/profiler/gecko/nsIProfiler.idl
new file mode 100644
index 0000000000..8b501d4b9f
--- /dev/null
+++ b/tools/profiler/gecko/nsIProfiler.idl
@@ -0,0 +1,208 @@
+/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsISupports.idl"
+
+%{C++
+#include "mozilla/Maybe.h"
+#include "nsTArrayForwardDeclare.h"
+#include "nsStringFwd.h"
+#include "mozilla/MozPromise.h"
+%}
+
+[ref] native nsCString(const nsCString);
+[ref] native StringArrayRef(const nsTArray<nsCString>);
+native ProfileDataBufferMozPromise(RefPtr<mozilla::MozPromise<FallibleTArray<uint8_t>, nsresult, true>>);
+
+/**
+ * Start-up parameters for subprocesses are passed through nsIObserverService,
+ * which, unfortunately, means we need to implement nsISupports in order to
+ * go through it.
+ */
+[scriptable, builtinclass, uuid(0a175ba7-8fcf-4ce9-9c4b-ccc6272f4425)]
+interface nsIProfilerStartParams : nsISupports
+{
+  readonly attribute uint32_t entries;
+  readonly attribute double duration;
+  readonly attribute double interval;
+  readonly attribute uint32_t features;
+  readonly attribute uint64_t activeTabID;
+
+  [noscript, notxpcom, nostdcall] StringArrayRef getFilters();
+};
+
+[scriptable, builtinclass, uuid(ead3f75c-0e0e-4fbb-901c-1e5392ef5b2a)]
+interface nsIProfiler : nsISupports
+{
+  /*
+   * Control functions return as soon as this process' profiler has done its
+   * work. The returned promise gets resolved when sub-processes have completed
+   * their operation, or immediately if there are no sub-processes.
+   */
+  [implicit_jscontext]
+  Promise StartProfiler(in uint32_t aEntries, in double aInterval,
+                        in Array<AUTF8String> aFeatures,
+                        [optional] in Array<AUTF8String> aFilters,
+                        [optional] in uint64_t aActiveTabID,
+                        [optional] in double aDuration);
+  [implicit_jscontext]
+  Promise StopProfiler();
+  boolean IsPaused();
+  [implicit_jscontext]
+  Promise Pause();
+  [implicit_jscontext]
+  Promise Resume();
+  boolean IsSamplingPaused();
+  [implicit_jscontext]
+  Promise PauseSampling();
+  [implicit_jscontext]
+  Promise ResumeSampling();
+
+  /*
+   * Resolves the returned promise after at least one full periodic sampling in
+   * each process.
+   * Rejects the promise if sampler is not running (yet, or anymore, or paused)
+   * in the parent process.
+   * This is mainly useful in tests, to wait just long enough to guarantee that
+   * at least one sample was taken in each process.
+   */
+  [implicit_jscontext]
+  Promise waitOnePeriodicSampling();
+
+  /*
+   * Returns the JSON string of the profile. If aSinceTime is passed, only
+   * report samples taken at >= aSinceTime.
+   */
+  string GetProfile([optional] in double aSinceTime);
+
+  /*
+   * Returns a JS object of the profile. If aSinceTime is passed, only report
+   * samples taken at >= aSinceTime.
+   */
+  [implicit_jscontext]
+  jsval getProfileData([optional] in double aSinceTime);
+
+  [implicit_jscontext]
+  Promise getProfileDataAsync([optional] in double aSinceTime);
+
+  [implicit_jscontext]
+  Promise getProfileDataAsArrayBuffer([optional] in double aSinceTime);
+
+  [implicit_jscontext]
+  Promise getProfileDataAsGzippedArrayBuffer([optional] in double aSinceTime);
+
+  /**
+   * Asynchronously dump the profile collected so far to a file.
+   * Returns a promise that resolves once the file has been written, with data
+   * from all responsive Firefox processes. Note: This blocks the parent process
+   * while collecting its own data, then unblocks while child processes data is
+   * being collected.
+   * `aFilename` may be a full path, or a path relative to where Firefox was
+   * launched. The target directory must already exist.
+   */
+  [implicit_jscontext]
+  Promise dumpProfileToFileAsync(in ACString aFilename,
+                                 [optional] in double aSinceTime);
+
+  /**
+   * Synchronously dump the profile collected so far in this process to a file.
+   * This profile will only contain data from the parent process, and from child
+   * processes that have ended during the session; other currently-live
+   * processes are ignored.
+   * `aFilename` may be a full path, or a path relative to where Firefox was
+   * launched. The target directory must already exist.
+   */
+  void dumpProfileToFile(in string aFilename);
+
+  boolean IsActive();
+
+  /**
+   * Clear all registered and unregistered page information in prifiler.
+   */
+  void ClearAllPages();
+
+  /**
+   * Returns an array of the features that are supported in this build.
+   * Features may vary depending on platform and build flags.
+   */
+  Array<AUTF8String> GetFeatures();
+
+  /**
+   * Returns a JavaScript object that contains a description of the currently configured
+   * state of the profiler when the profiler is active. This can be useful to assert
+   * the UI of the profiler's recording panel in tests. It returns null when the profiler
+   * is not active.
+   */
+  [implicit_jscontext]
+  readonly attribute jsval activeConfiguration;
+
+  /**
+   * Returns an array of all features that are supported by the profiler.
+   * The array may contain features that are not supported in this build.
+   */
+  Array<AUTF8String> GetAllFeatures();
+
+  void GetBufferInfo(out uint32_t aCurrentPosition, out uint32_t aTotalSize,
+                     out uint32_t aGeneration);
+
+  /**
+   * Returns the elapsed time, in milliseconds, since the profiler's epoch.
+   * The epoch is guaranteed to be constant for the duration of the
+   * process, but is otherwise arbitrary.
+   */
+  double getElapsedTime();
+
+  /**
+   * Contains an array of shared library objects.
+   * Every object has the properties:
+   *  - start:      The start address of the memory region occupied by this library.
+   *  - end:        The end address of the memory region occupied by this library.
+   *  - offset:     Usually zero, except on Linux / Android if the first mapped
+   *                section of the library has been mapped to an address that's
+   *                different from the library's base address.
+   *                Then offset = start - baseAddress.
+   *  - name:       The name (file basename) of the binary.
+   *  - path:       The full absolute path to the binary.
+   *  - debugName:  On Windows, the name of the pdb file for the binary. On other
+   *                platforms, the same as |name|.
+   *  - debugPath:  On Windows, the full absolute path of the pdb file for the
+   *                binary. On other platforms, the same as |path|.
+   *  - arch:       On Mac, the name of the architecture that identifies the right
+   *                binary image of a fat binary. Example values are "i386", "x86_64",
+   *                and "x86_64h". (x86_64h is used for binaries that contain
+   *                instructions that are specific to the Intel Haswell microarchitecture.)
+   *                On non-Mac platforms, arch is "".
+   *  - breakpadId: A unique identifier string for this library, as used by breakpad.
+   */
+  [implicit_jscontext]
+  readonly attribute jsval sharedLibraries;
+
+  /**
+   * Returns a promise that resolves to a SymbolTableAsTuple for the binary at
+   * the given path.
+   *
+   * SymbolTable as tuple: [addrs, index, buffer]
+   * Contains a symbol table, which can be used to map addresses to strings.
+   *
+   * The first element of this tuple, commonly named "addrs", is a sorted array of
+   * symbol addresses, as library-relative offsets in bytes, in ascending order.
+   * The third element of this tuple, commonly named "buffer", is a buffer of
+   * bytes that contains all strings from this symbol table, in the order of the
+   * addresses they correspond to, in utf-8 encoded form, all concatenated
+   * together.
+   * The second element of this tuple, commonly named "index", contains positions
+   * into "buffer". For every address, that position is where the string for that
+   * address starts in the buffer.
+   * index.length == addrs.length + 1.
+   * index[addrs.length] is the end position of the last string in the buffer.
+   *
+   * The string for the address addrs[i] is
+   * (new TextDecoder()).decode(buffer.subarray(index[i], index[i + 1]))
+   */
+  [implicit_jscontext]
+  Promise getSymbolTable(in ACString aDebugPath, in ACString aBreakpadID);
+
+  [notxpcom, nostdcall] ProfileDataBufferMozPromise getProfileDataAsGzippedArrayBufferAndroid(in double aSinceTime);
+};
diff --git a/tools/profiler/gecko/nsProfiler.cpp b/tools/profiler/gecko/nsProfiler.cpp
new file mode 100644
index 0000000000..66e32ce2bc
--- /dev/null
+++ b/tools/profiler/gecko/nsProfiler.cpp
@@ -0,0 +1,1487 @@
+/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsProfiler.h"
+
+#include <fstream>
+#include <limits>
+#include <sstream>
+#include <string>
+#include <utility>
+
+#include "GeckoProfiler.h"
+#include "ProfilerControl.h"
+#include "ProfilerParent.h"
+#include "js/Array.h"  // JS::NewArrayObject
+#include "js/JSON.h"
+#include "js/PropertyAndElement.h"  // JS_SetElement
+#include "js/Value.h"
+#include "json/json.h"
+#include "mozilla/ErrorResult.h"
+#include "mozilla/JSONStringWriteFuncs.h"
+#include "mozilla/SchedulerGroup.h"
+#include "mozilla/Services.h"
+#include "mozilla/dom/Promise.h"
+#include "mozilla/dom/TypedArray.h"
+#include "mozilla/Preferences.h"
+#include "nsComponentManagerUtils.h"
+#include "nsIInterfaceRequestor.h"
+#include "nsIInterfaceRequestorUtils.h"
+#include "nsILoadContext.h"
+#include "nsIWebNavigation.h"
+#include "nsProfilerStartParams.h"
+#include "nsProxyRelease.h"
+#include "nsString.h"
+#include "nsThreadUtils.h"
+#include "platform.h"
+#include "shared-libraries.h"
+#include "zlib.h"
+
+#ifndef ANDROID
+#  include <cstdio>
+#else
+#  include <android/log.h>
+#endif
+
+using namespace mozilla;
+
+using dom::AutoJSAPI;
+using dom::Promise;
+using std::string;
+
+static constexpr size_t scLengthMax = size_t(JS::MaxStringLength);
+// Used when trying to add more JSON data, to account for the extra space needed
+// for the log and to close the profile.
+static constexpr size_t scLengthAccumulationThreshold = scLengthMax - 16 * 1024;
+
+NS_IMPL_ISUPPORTS(nsProfiler, nsIProfiler)
+
+nsProfiler::nsProfiler() : mGathering(false) {}
+
+nsProfiler::~nsProfiler() {
+  if (mSymbolTableThread) {
+    mSymbolTableThread->Shutdown();
+  }
+  ResetGathering(NS_ERROR_ILLEGAL_DURING_SHUTDOWN);
+}
+
+nsresult nsProfiler::Init() { return NS_OK; }
+
+template <typename JsonLogObjectUpdater>
+void nsProfiler::Log(JsonLogObjectUpdater&& aJsonLogObjectUpdater) {
+  if (mGatheringLog) {
+    MOZ_ASSERT(mGatheringLog->isObject());
+    std::forward<JsonLogObjectUpdater>(aJsonLogObjectUpdater)(*mGatheringLog);
+    MOZ_ASSERT(mGatheringLog->isObject());
+  }
+}
+
+template <typename JsonArrayAppender>
+void nsProfiler::LogEvent(JsonArrayAppender&& aJsonArrayAppender) {
+  Log([&](Json::Value& aRoot) {
+    Json::Value& events = aRoot[Json::StaticString{"events"}];
+    if (!events.isArray()) {
+      events = Json::Value{Json::arrayValue};
+    }
+    Json::Value newEvent{Json::arrayValue};
+    newEvent.append(ProfilingLog::Timestamp());
+    std::forward<JsonArrayAppender>(aJsonArrayAppender)(newEvent);
+    MOZ_ASSERT(newEvent.isArray());
+    events.append(std::move(newEvent));
+  });
+}
+
+void nsProfiler::LogEventLiteralString(const char* aEventString) {
+  LogEvent([&](Json::Value& aEvent) {
+    aEvent.append(Json::StaticString{aEventString});
+  });
+}
+
+static nsresult FillVectorFromStringArray(Vector<const char*>& aVector,
+                                          const nsTArray<nsCString>& aArray) {
+  if (NS_WARN_IF(!aVector.reserve(aArray.Length()))) {
+    return NS_ERROR_OUT_OF_MEMORY;
+  }
+  for (auto& entry : aArray) {
+    aVector.infallibleAppend(entry.get());
+  }
+  return NS_OK;
+}
+
+// Given a PromiseReturningFunction: () -> GenericPromise,
+// run the function, and return a JS Promise (through aPromise) that will be
+// resolved when the function's GenericPromise gets resolved.
+template <typename PromiseReturningFunction>
+static nsresult RunFunctionAndConvertPromise(
+    JSContext* aCx, Promise** aPromise,
+    PromiseReturningFunction&& aPromiseReturningFunction) {
+  MOZ_ASSERT(NS_IsMainThread());
+
+  if (NS_WARN_IF(!aCx)) {
+    return NS_ERROR_FAILURE;
+  }
+
+  nsIGlobalObject* globalObject = xpc::CurrentNativeGlobal(aCx);
+  if (NS_WARN_IF(!globalObject)) {
+    return NS_ERROR_FAILURE;
+  }
+
+  ErrorResult result;
+  RefPtr<Promise> promise = Promise::Create(globalObject, result);
+  if (NS_WARN_IF(result.Failed())) {
+    return result.StealNSResult();
+  }
+
+  std::forward<PromiseReturningFunction>(aPromiseReturningFunction)()->Then(
+      GetMainThreadSerialEventTarget(), __func__,
+      [promise](GenericPromise::ResolveOrRejectValue&&) {
+        promise->MaybeResolveWithUndefined();
+      });
+
+  promise.forget(aPromise);
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfiler::StartProfiler(uint32_t aEntries, double aInterval,
+                          const nsTArray<nsCString>& aFeatures,
+                          const nsTArray<nsCString>& aFilters,
+                          uint64_t aActiveTabID, double aDuration,
+                          JSContext* aCx, Promise** aPromise) {
+  ResetGathering(NS_ERROR_DOM_ABORT_ERR);
+
+  Vector<const char*> featureStringVector;
+  nsresult rv = FillVectorFromStringArray(featureStringVector, aFeatures);
+  if (NS_FAILED(rv)) {
+    return rv;
+  }
+  uint32_t features = ParseFeaturesFromStringArray(
+      featureStringVector.begin(), featureStringVector.length());
+  Maybe<double> duration = aDuration > 0.0 ? Some(aDuration) : Nothing();
+
+  Vector<const char*> filterStringVector;
+  rv = FillVectorFromStringArray(filterStringVector, aFilters);
+  if (NS_FAILED(rv)) {
+    return rv;
+  }
+
+  return RunFunctionAndConvertPromise(aCx, aPromise, [&]() {
+    return profiler_start(PowerOfTwo32(aEntries), aInterval, features,
+                          filterStringVector.begin(),
+                          filterStringVector.length(), aActiveTabID, duration);
+  });
+}
+
+NS_IMETHODIMP
+nsProfiler::StopProfiler(JSContext* aCx, Promise** aPromise) {
+  ResetGathering(NS_ERROR_DOM_ABORT_ERR);
+  return RunFunctionAndConvertPromise(aCx, aPromise,
+                                      []() { return profiler_stop(); });
+}
+
+NS_IMETHODIMP
+nsProfiler::IsPaused(bool* aIsPaused) {
+  *aIsPaused = profiler_is_paused();
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfiler::Pause(JSContext* aCx, Promise** aPromise) {
+  return RunFunctionAndConvertPromise(aCx, aPromise,
+                                      []() { return profiler_pause(); });
+}
+
+NS_IMETHODIMP
+nsProfiler::Resume(JSContext* aCx, Promise** aPromise) {
+  return RunFunctionAndConvertPromise(aCx, aPromise,
+                                      []() { return profiler_resume(); });
+}
+
+NS_IMETHODIMP
+nsProfiler::IsSamplingPaused(bool* aIsSamplingPaused) {
+  *aIsSamplingPaused = profiler_is_sampling_paused();
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfiler::PauseSampling(JSContext* aCx, Promise** aPromise) {
+  return RunFunctionAndConvertPromise(
+      aCx, aPromise, []() { return profiler_pause_sampling(); });
+}
+
+NS_IMETHODIMP
+nsProfiler::ResumeSampling(JSContext* aCx, Promise** aPromise) {
+  return RunFunctionAndConvertPromise(
+      aCx, aPromise, []() { return profiler_resume_sampling(); });
+}
+
+NS_IMETHODIMP
+nsProfiler::ClearAllPages() {
+  profiler_clear_all_pages();
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfiler::WaitOnePeriodicSampling(JSContext* aCx, Promise** aPromise) {
+  MOZ_ASSERT(NS_IsMainThread());
+
+  if (NS_WARN_IF(!aCx)) {
+    return NS_ERROR_FAILURE;
+  }
+
+  nsIGlobalObject* globalObject = xpc::CurrentNativeGlobal(aCx);
+  if (NS_WARN_IF(!globalObject)) {
+    return NS_ERROR_FAILURE;
+  }
+
+  ErrorResult result;
+  RefPtr<Promise> promise = Promise::Create(globalObject, result);
+  if (NS_WARN_IF(result.Failed())) {
+    return result.StealNSResult();
+  }
+
+  // The callback cannot officially own the promise RefPtr directly, because
+  // `Promise` doesn't support multi-threading, and the callback could destroy
+  // the promise in the sampler thread.
+  // `nsMainThreadPtrHandle` ensures that the promise can only be destroyed on
+  // the main thread. And the invocation from the Sampler thread immediately
+  // dispatches a task back to the main thread, to resolve/reject the promise.
+  // The lambda needs to be `mutable`, to allow moving-from
+  // `promiseHandleInSampler`.
+  if (!profiler_callback_after_sampling(
+          [promiseHandleInSampler = nsMainThreadPtrHandle<Promise>(
+               new nsMainThreadPtrHolder<Promise>(
+                   "WaitOnePeriodicSampling promise for Sampler", promise))](
+              SamplingState aSamplingState) mutable {
+            SchedulerGroup::Dispatch(
+                TaskCategory::Other,
+                NS_NewRunnableFunction(
+                    "nsProfiler::WaitOnePeriodicSampling result on main thread",
+                    [promiseHandleInMT = std::move(promiseHandleInSampler),
+                     aSamplingState]() mutable {
+                      switch (aSamplingState) {
+                        case SamplingState::JustStopped:
+                        case SamplingState::SamplingPaused:
+                          promiseHandleInMT->MaybeReject(NS_ERROR_FAILURE);
+                          break;
+
+                        case SamplingState::NoStackSamplingCompleted:
+                        case SamplingState::SamplingCompleted:
+                          // The parent process has succesfully done a sampling,
+                          // check the child processes (if any).
+                          ProfilerParent::WaitOnePeriodicSampling()->Then(
+                              GetMainThreadSerialEventTarget(), __func__,
+                              [promiseHandleInMT =
+                                   std::move(promiseHandleInMT)](
+                                  GenericPromise::ResolveOrRejectValue&&) {
+                                promiseHandleInMT->MaybeResolveWithUndefined();
+                              });
+                          break;
+
+                        default:
+                          MOZ_ASSERT(false, "Unexpected SamplingState value");
+                          promiseHandleInMT->MaybeReject(
+                              NS_ERROR_DOM_UNKNOWN_ERR);
+                          break;
+                      }
+                    }));
+          })) {
+    // Callback was not added (e.g., profiler is not running) and will never be
+    // invoked, so we need to resolve the promise here.
+    promise->MaybeReject(NS_ERROR_DOM_UNKNOWN_ERR);
+  }
+
+  promise.forget(aPromise);
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfiler::GetProfile(double aSinceTime, char** aProfile) {
+  mozilla::UniquePtr<char[]> profile = profiler_get_profile(aSinceTime);
+  *aProfile = profile.release();
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfiler::GetSharedLibraries(JSContext* aCx,
+                               JS::MutableHandle<JS::Value> aResult) {
+  JS::Rooted<JS::Value> val(aCx);
+  {
+    JSONStringWriteFunc<nsCString> buffer;
+    JSONWriter w(buffer, JSONWriter::SingleLineStyle);
+    w.StartArrayElement();
+    SharedLibraryInfo sharedLibraryInfo = SharedLibraryInfo::GetInfoForSelf();
+    sharedLibraryInfo.SortByAddress();
+    AppendSharedLibraries(w, sharedLibraryInfo);
+    w.EndArray();
+    NS_ConvertUTF8toUTF16 buffer16(buffer.StringCRef());
+    MOZ_ALWAYS_TRUE(JS_ParseJSON(aCx,
+                                 static_cast<const char16_t*>(buffer16.get()),
+                                 buffer16.Length(), &val));
+  }
+  JS::Rooted<JSObject*> obj(aCx, &val.toObject());
+  if (!obj) {
+    return NS_ERROR_FAILURE;
+  }
+  aResult.setObject(*obj);
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfiler::GetActiveConfiguration(JSContext* aCx,
+                                   JS::MutableHandle<JS::Value> aResult) {
+  JS::Rooted<JS::Value> jsValue(aCx);
+  {
+    JSONStringWriteFunc<nsCString> buffer;
+    JSONWriter writer(buffer, JSONWriter::SingleLineStyle);
+    profiler_write_active_configuration(writer);
+    NS_ConvertUTF8toUTF16 buffer16(buffer.StringCRef());
+    MOZ_ALWAYS_TRUE(JS_ParseJSON(aCx,
+                                 static_cast<const char16_t*>(buffer16.get()),
+                                 buffer16.Length(), &jsValue));
+  }
+  if (jsValue.isNull()) {
+    aResult.setNull();
+  } else {
+    JS::Rooted<JSObject*> obj(aCx, &jsValue.toObject());
+    if (!obj) {
+      return NS_ERROR_FAILURE;
+    }
+    aResult.setObject(*obj);
+  }
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfiler::DumpProfileToFile(const char* aFilename) {
+  profiler_save_profile_to_file(aFilename);
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfiler::GetProfileData(double aSinceTime, JSContext* aCx,
+                           JS::MutableHandle<JS::Value> aResult) {
+  mozilla::UniquePtr<char[]> profile = profiler_get_profile(aSinceTime);
+  if (!profile) {
+    return NS_ERROR_FAILURE;
+  }
+
+  NS_ConvertUTF8toUTF16 js_string(nsDependentCString(profile.get()));
+  auto profile16 = static_cast<const char16_t*>(js_string.get());
+
+  JS::Rooted<JS::Value> val(aCx);
+  MOZ_ALWAYS_TRUE(JS_ParseJSON(aCx, profile16, js_string.Length(), &val));
+
+  aResult.set(val);
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfiler::GetProfileDataAsync(double aSinceTime, JSContext* aCx,
+                                Promise** aPromise) {
+  MOZ_ASSERT(NS_IsMainThread());
+
+  if (!profiler_is_active()) {
+    return NS_ERROR_FAILURE;
+  }
+
+  if (NS_WARN_IF(!aCx)) {
+    return NS_ERROR_FAILURE;
+  }
+
+  nsIGlobalObject* globalObject = xpc::CurrentNativeGlobal(aCx);
+  if (NS_WARN_IF(!globalObject)) {
+    return NS_ERROR_FAILURE;
+  }
+
+  ErrorResult result;
+  RefPtr<Promise> promise = Promise::Create(globalObject, result);
+  if (NS_WARN_IF(result.Failed())) {
+    return result.StealNSResult();
+  }
+
+  StartGathering(aSinceTime)
+      ->Then(
+          GetMainThreadSerialEventTarget(), __func__,
+          [promise](const mozilla::ProfileAndAdditionalInformation& aResult) {
+            AutoJSAPI jsapi;
+            if (NS_WARN_IF(!jsapi.Init(promise->GetGlobalObject()))) {
+              // We're really hosed if we can't get a JS context for some
+              // reason.
+              promise->MaybeReject(NS_ERROR_DOM_UNKNOWN_ERR);
+              return;
+            }
+
+            JSContext* cx = jsapi.cx();
+
+            // Now parse the JSON so that we resolve with a JS Object.
+            JS::Rooted<JS::Value> val(cx);
+            {
+              NS_ConvertUTF8toUTF16 js_string(aResult.mProfile);
+              if (!JS_ParseJSON(cx,
+                                static_cast<const char16_t*>(js_string.get()),
+                                js_string.Length(), &val)) {
+                if (!jsapi.HasException()) {
+                  promise->MaybeReject(NS_ERROR_DOM_UNKNOWN_ERR);
+                } else {
+                  JS::Rooted<JS::Value> exn(cx);
+                  DebugOnly<bool> gotException = jsapi.StealException(&exn);
+                  MOZ_ASSERT(gotException);
+
+                  jsapi.ClearException();
+                  promise->MaybeReject(exn);
+                }
+              } else {
+                promise->MaybeResolve(val);
+              }
+            }
+          },
+          [promise](nsresult aRv) { promise->MaybeReject(aRv); });
+
+  promise.forget(aPromise);
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfiler::GetProfileDataAsArrayBuffer(double aSinceTime, JSContext* aCx,
+                                        Promise** aPromise) {
+  MOZ_ASSERT(NS_IsMainThread());
+
+  if (!profiler_is_active()) {
+    return NS_ERROR_FAILURE;
+  }
+
+  if (NS_WARN_IF(!aCx)) {
+    return NS_ERROR_FAILURE;
+  }
+
+  nsIGlobalObject* globalObject = xpc::CurrentNativeGlobal(aCx);
+  if (NS_WARN_IF(!globalObject)) {
+    return NS_ERROR_FAILURE;
+  }
+
+  ErrorResult result;
+  RefPtr<Promise> promise = Promise::Create(globalObject, result);
+  if (NS_WARN_IF(result.Failed())) {
+    return result.StealNSResult();
+  }
+
+  StartGathering(aSinceTime)
+      ->Then(
+          GetMainThreadSerialEventTarget(), __func__,
+          [promise](const mozilla::ProfileAndAdditionalInformation& aResult) {
+            AutoJSAPI jsapi;
+            if (NS_WARN_IF(!jsapi.Init(promise->GetGlobalObject()))) {
+              // We're really hosed if we can't get a JS context for some
+              // reason.
+              promise->MaybeReject(NS_ERROR_DOM_UNKNOWN_ERR);
+              return;
+            }
+
+            JSContext* cx = jsapi.cx();
+            JSObject* typedArray = dom::ArrayBuffer::Create(
+                cx, aResult.mProfile.Length(),
+                reinterpret_cast<const uint8_t*>(aResult.mProfile.Data()));
+            if (typedArray) {
+              JS::Rooted<JS::Value> val(cx, JS::ObjectValue(*typedArray));
+              promise->MaybeResolve(val);
+            } else {
+              promise->MaybeReject(NS_ERROR_OUT_OF_MEMORY);
+            }
+          },
+          [promise](nsresult aRv) { promise->MaybeReject(aRv); });
+
+  promise.forget(aPromise);
+  return NS_OK;
+}
+
+nsresult CompressString(const nsCString& aString,
+                        FallibleTArray<uint8_t>& aOutBuff) {
+  // Compress a buffer via zlib (as with `compress()`), but emit a
+  // gzip header as well. Like `compress()`, this is limited to 4GB in
+  // size, but that shouldn't be an issue for our purposes.
+  uLongf outSize = compressBound(aString.Length());
+  if (!aOutBuff.SetLength(outSize, fallible)) {
+    return NS_ERROR_OUT_OF_MEMORY;
+  }
+
+  int zerr;
+  z_stream stream;
+  stream.zalloc = nullptr;
+  stream.zfree = nullptr;
+  stream.opaque = nullptr;
+  stream.next_out = (Bytef*)aOutBuff.Elements();
+  stream.avail_out = aOutBuff.Length();
+  stream.next_in = (z_const Bytef*)aString.Data();
+  stream.avail_in = aString.Length();
+
+  // A windowBits of 31 is the default (15) plus 16 for emitting a
+  // gzip header; a memLevel of 8 is the default.
+  zerr =
+      deflateInit2(&stream, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
+                   /* windowBits */ 31, /* memLevel */ 8, Z_DEFAULT_STRATEGY);
+  if (zerr != Z_OK) {
+    return NS_ERROR_FAILURE;
+  }
+
+  zerr = deflate(&stream, Z_FINISH);
+  outSize = stream.total_out;
+  deflateEnd(&stream);
+
+  if (zerr != Z_STREAM_END) {
+    return NS_ERROR_FAILURE;
+  }
+
+  aOutBuff.TruncateLength(outSize);
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfiler::GetProfileDataAsGzippedArrayBuffer(double aSinceTime,
+                                               JSContext* aCx,
+                                               Promise** aPromise) {
+  MOZ_ASSERT(NS_IsMainThread());
+
+  if (!profiler_is_active()) {
+    return NS_ERROR_FAILURE;
+  }
+
+  if (NS_WARN_IF(!aCx)) {
+    return NS_ERROR_FAILURE;
+  }
+
+  nsIGlobalObject* globalObject = xpc::CurrentNativeGlobal(aCx);
+  if (NS_WARN_IF(!globalObject)) {
+    return NS_ERROR_FAILURE;
+  }
+
+  ErrorResult result;
+  RefPtr<Promise> promise = Promise::Create(globalObject, result);
+  if (NS_WARN_IF(result.Failed())) {
+    return result.StealNSResult();
+  }
+
+  StartGathering(aSinceTime)
+      ->Then(
+          GetMainThreadSerialEventTarget(), __func__,
+          [promise](const mozilla::ProfileAndAdditionalInformation& aResult) {
+            AutoJSAPI jsapi;
+            if (NS_WARN_IF(!jsapi.Init(promise->GetGlobalObject()))) {
+              // We're really hosed if we can't get a JS context for some
+              // reason.
+              promise->MaybeReject(NS_ERROR_DOM_UNKNOWN_ERR);
+              return;
+            }
+
+            FallibleTArray<uint8_t> outBuff;
+            nsresult result = CompressString(aResult.mProfile, outBuff);
+
+            if (result != NS_OK) {
+              promise->MaybeReject(result);
+              return;
+            }
+
+            JSContext* cx = jsapi.cx();
+            // Get the profile typedArray.
+            JSObject* typedArray = dom::ArrayBuffer::Create(
+                cx, outBuff.Length(), outBuff.Elements());
+            if (!typedArray) {
+              promise->MaybeReject(NS_ERROR_OUT_OF_MEMORY);
+              return;
+            }
+            JS::Rooted<JS::Value> typedArrayValue(cx,
+                                                  JS::ObjectValue(*typedArray));
+            // Get the additional information object.
+            JS::Rooted<JS::Value> additionalInfoVal(cx);
+            if (aResult.mAdditionalInformation.isSome()) {
+              aResult.mAdditionalInformation->ToJSValue(cx, &additionalInfoVal);
+            } else {
+              additionalInfoVal.setUndefined();
+            }
+
+            // Create the return object.
+            JS::Rooted<JSObject*> resultObj(cx, JS_NewPlainObject(cx));
+            JS_SetProperty(cx, resultObj, "profile", typedArrayValue);
+            JS_SetProperty(cx, resultObj, "additionalInformation",
+                           additionalInfoVal);
+            promise->MaybeResolve(resultObj);
+          },
+          [promise](nsresult aRv) { promise->MaybeReject(aRv); });
+
+  promise.forget(aPromise);
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfiler::DumpProfileToFileAsync(const nsACString& aFilename,
+                                   double aSinceTime, JSContext* aCx,
+                                   Promise** aPromise) {
+  MOZ_ASSERT(NS_IsMainThread());
+
+  if (!profiler_is_active()) {
+    return NS_ERROR_FAILURE;
+  }
+
+  if (NS_WARN_IF(!aCx)) {
+    return NS_ERROR_FAILURE;
+  }
+
+  nsIGlobalObject* globalObject = xpc::CurrentNativeGlobal(aCx);
+  if (NS_WARN_IF(!globalObject)) {
+    return NS_ERROR_FAILURE;
+  }
+
+  ErrorResult result;
+  RefPtr<Promise> promise = Promise::Create(globalObject, result);
+  if (NS_WARN_IF(result.Failed())) {
+    return result.StealNSResult();
+  }
+
+  nsCString filename(aFilename);
+
+  StartGathering(aSinceTime)
+      ->Then(
+          GetMainThreadSerialEventTarget(), __func__,
+          [filename,
+           promise](const mozilla::ProfileAndAdditionalInformation& aResult) {
+            if (aResult.mProfile.Length() >=
+                size_t(std::numeric_limits<std::streamsize>::max())) {
+              promise->MaybeReject(NS_ERROR_FILE_TOO_BIG);
+              return;
+            }
+
+            std::ofstream stream;
+            stream.open(filename.get());
+            if (!stream.is_open()) {
+              promise->MaybeReject(NS_ERROR_FILE_UNRECOGNIZED_PATH);
+              return;
+            }
+
+            stream.write(aResult.mProfile.get(),
+                         std::streamsize(aResult.mProfile.Length()));
+            stream.close();
+
+            promise->MaybeResolveWithUndefined();
+          },
+          [promise](nsresult aRv) { promise->MaybeReject(aRv); });
+
+  promise.forget(aPromise);
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfiler::GetSymbolTable(const nsACString& aDebugPath,
+                           const nsACString& aBreakpadID, JSContext* aCx,
+                           Promise** aPromise) {
+  MOZ_ASSERT(NS_IsMainThread());
+
+  if (NS_WARN_IF(!aCx)) {
+    return NS_ERROR_FAILURE;
+  }
+
+  nsIGlobalObject* globalObject =
+      xpc::NativeGlobal(JS::CurrentGlobalOrNull(aCx));
+
+  if (NS_WARN_IF(!globalObject)) {
+    return NS_ERROR_FAILURE;
+  }
+
+  ErrorResult result;
+  RefPtr<Promise> promise = Promise::Create(globalObject, result);
+  if (NS_WARN_IF(result.Failed())) {
+    return result.StealNSResult();
+  }
+
+  GetSymbolTableMozPromise(aDebugPath, aBreakpadID)
+      ->Then(
+          GetMainThreadSerialEventTarget(), __func__,
+          [promise](const SymbolTable& aSymbolTable) {
+            AutoJSAPI jsapi;
+            if (NS_WARN_IF(!jsapi.Init(promise->GetGlobalObject()))) {
+              // We're really hosed if we can't get a JS context for some
+              // reason.
+              promise->MaybeReject(NS_ERROR_DOM_UNKNOWN_ERR);
+              return;
+            }
+
+            JSContext* cx = jsapi.cx();
+
+            JS::Rooted<JSObject*> addrsArray(
+                cx, dom::Uint32Array::Create(cx, aSymbolTable.mAddrs.Length(),
+                                             aSymbolTable.mAddrs.Elements()));
+            JS::Rooted<JSObject*> indexArray(
+                cx, dom::Uint32Array::Create(cx, aSymbolTable.mIndex.Length(),
+                                             aSymbolTable.mIndex.Elements()));
+            JS::Rooted<JSObject*> bufferArray(
+                cx, dom::Uint8Array::Create(cx, aSymbolTable.mBuffer.Length(),
+                                            aSymbolTable.mBuffer.Elements()));
+
+            if (addrsArray && indexArray && bufferArray) {
+              JS::Rooted<JSObject*> tuple(cx, JS::NewArrayObject(cx, 3));
+              JS_SetElement(cx, tuple, 0, addrsArray);
+              JS_SetElement(cx, tuple, 1, indexArray);
+              JS_SetElement(cx, tuple, 2, bufferArray);
+              promise->MaybeResolve(tuple);
+            } else {
+              promise->MaybeReject(NS_ERROR_FAILURE);
+            }
+          },
+          [promise](nsresult aRv) { promise->MaybeReject(aRv); });
+
+  promise.forget(aPromise);
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfiler::GetElapsedTime(double* aElapsedTime) {
+  *aElapsedTime = profiler_time();
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfiler::IsActive(bool* aIsActive) {
+  *aIsActive = profiler_is_active();
+  return NS_OK;
+}
+
+static void GetArrayOfStringsForFeatures(uint32_t aFeatures,
+                                         nsTArray<nsCString>& aFeatureList) {
+#define COUNT_IF_SET(n_, str_, Name_, desc_)    \
+  if (ProfilerFeature::Has##Name_(aFeatures)) { \
+    len++;                                      \
+  }
+
+  // Count the number of features in use.
+  uint32_t len = 0;
+  PROFILER_FOR_EACH_FEATURE(COUNT_IF_SET)
+
+#undef COUNT_IF_SET
+
+  aFeatureList.SetCapacity(len);
+
+#define DUP_IF_SET(n_, str_, Name_, desc_)      \
+  if (ProfilerFeature::Has##Name_(aFeatures)) { \
+    aFeatureList.AppendElement(str_);           \
+  }
+
+  // Insert the strings for the features in use.
+  PROFILER_FOR_EACH_FEATURE(DUP_IF_SET)
+
+#undef DUP_IF_SET
+}
+
+NS_IMETHODIMP
+nsProfiler::GetFeatures(nsTArray<nsCString>& aFeatureList) {
+  uint32_t features = profiler_get_available_features();
+  GetArrayOfStringsForFeatures(features, aFeatureList);
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfiler::GetAllFeatures(nsTArray<nsCString>& aFeatureList) {
+  GetArrayOfStringsForFeatures((uint32_t)-1, aFeatureList);
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfiler::GetBufferInfo(uint32_t* aCurrentPosition, uint32_t* aTotalSize,
+                          uint32_t* aGeneration) {
+  MOZ_ASSERT(aCurrentPosition);
+  MOZ_ASSERT(aTotalSize);
+  MOZ_ASSERT(aGeneration);
+  Maybe<ProfilerBufferInfo> info = profiler_get_buffer_info();
+  if (info) {
+    *aCurrentPosition = info->mRangeEnd % info->mEntryCount;
+    *aTotalSize = info->mEntryCount;
+    *aGeneration = info->mRangeEnd / info->mEntryCount;
+  } else {
+    *aCurrentPosition = 0;
+    *aTotalSize = 0;
+    *aGeneration = 0;
+  }
+  return NS_OK;
+}
+
+bool nsProfiler::SendProgressRequest(PendingProfile& aPendingProfile) {
+  RefPtr<ProfilerParent::SingleProcessProgressPromise> progressPromise =
+      ProfilerParent::RequestGatherProfileProgress(aPendingProfile.childPid);
+  if (!progressPromise) {
+    LOG("RequestGatherProfileProgress(%u) -> null!",
+        unsigned(aPendingProfile.childPid));
+    LogEvent([&](Json::Value& aEvent) {
+      aEvent.append(
+          Json::StaticString{"Failed to send progress request to pid:"});
+      aEvent.append(Json::Value::UInt64(aPendingProfile.childPid));
+    });
+    // Failed to send request.
+    return false;
+  }
+
+  DEBUG_LOG("RequestGatherProfileProgress(%u) sent...",
+            unsigned(aPendingProfile.childPid));
+  LogEvent([&](Json::Value& aEvent) {
+    aEvent.append(Json::StaticString{"Requested progress from pid:"});
+    aEvent.append(Json::Value::UInt64(aPendingProfile.childPid));
+  });
+  aPendingProfile.lastProgressRequest = TimeStamp::Now();
+  progressPromise->Then(
+      GetMainThreadSerialEventTarget(), __func__,
+      [self = RefPtr<nsProfiler>(this),
+       childPid = aPendingProfile.childPid](GatherProfileProgress&& aResult) {
+        if (!self->mGathering) {
+          return;
+        }
+        PendingProfile* pendingProfile = self->GetPendingProfile(childPid);
+        DEBUG_LOG(
+            "RequestGatherProfileProgress(%u) response: %.2f '%s' "
+            "(%u were pending, %s %u)",
+            unsigned(childPid),
+            ProportionValue::FromUnderlyingType(
+                aResult.progressProportionValueUnderlyingType())
+                    .ToDouble() *
+                100.0,
+            aResult.progressLocation().Data(),
+            unsigned(self->mPendingProfiles.length()),
+            pendingProfile ? "including" : "excluding", unsigned(childPid));
+        self->LogEvent([&](Json::Value& aEvent) {
+          aEvent.append(
+              Json::StaticString{"Got response from pid, with progress:"});
+          aEvent.append(Json::Value::UInt64(childPid));
+          aEvent.append(
+              Json::Value{ProportionValue::FromUnderlyingType(
+                              aResult.progressProportionValueUnderlyingType())
+                              .ToDouble() *
+                          100.0});
+        });
+        if (pendingProfile) {
+          // We have a progress report for a still-pending profile.
+          pendingProfile->lastProgressResponse = TimeStamp::Now();
+          // Has it actually made progress?
+          if (aResult.progressProportionValueUnderlyingType() !=
+              pendingProfile->progressProportion.ToUnderlyingType()) {
+            pendingProfile->lastProgressChange =
+                pendingProfile->lastProgressResponse;
+            pendingProfile->progressProportion =
+                ProportionValue::FromUnderlyingType(
+                    aResult.progressProportionValueUnderlyingType());
+            pendingProfile->progressLocation = aResult.progressLocation();
+            self->RestartGatheringTimer();
+          }
+        }
+      },
+      [self = RefPtr<nsProfiler>(this), childPid = aPendingProfile.childPid](
+          ipc::ResponseRejectReason&& aReason) {
+        if (!self->mGathering) {
+          return;
+        }
+        PendingProfile* pendingProfile = self->GetPendingProfile(childPid);
+        LOG("RequestGatherProfileProgress(%u) rejection: %d "
+            "(%u were pending, %s %u)",
+            unsigned(childPid), (int)aReason,
+            unsigned(self->mPendingProfiles.length()),
+            pendingProfile ? "including" : "excluding", unsigned(childPid));
+        self->LogEvent([&](Json::Value& aEvent) {
+          aEvent.append(Json::StaticString{
+              "Got progress request rejection from pid, with reason:"});
+          aEvent.append(Json::Value::UInt64(childPid));
+          aEvent.append(Json::Value::UInt{static_cast<unsigned>(aReason)});
+        });
+        if (pendingProfile) {
+          // Failure response, assume the child process is gone.
+          MOZ_ASSERT(self->mPendingProfiles.begin() <= pendingProfile &&
+                     pendingProfile < self->mPendingProfiles.end());
+          self->mPendingProfiles.erase(pendingProfile);
+          if (self->mPendingProfiles.empty()) {
+            // We've got all of the async profiles now. Let's finish off the
+            // profile and resolve the Promise.
+            self->FinishGathering();
+          }
+        }
+      });
+  return true;
+}
+
+/* static */ void nsProfiler::GatheringTimerCallback(nsITimer* aTimer,
+                                                     void* aClosure) {
+  MOZ_RELEASE_ASSERT(NS_IsMainThread());
+  nsCOMPtr<nsIProfiler> profiler(
+      do_GetService("@mozilla.org/tools/profiler;1"));
+  if (!profiler) {
+    // No (more) profiler service.
+    return;
+  }
+  nsProfiler* self = static_cast<nsProfiler*>(profiler.get());
+  if (self != aClosure) {
+    // Different service object!?
+    return;
+  }
+  if (aTimer != self->mGatheringTimer) {
+    // This timer was cancelled after this callback was queued.
+    return;
+  }
+
+  bool progressWasMade = false;
+
+  // Going backwards, it's easier and cheaper to erase elements if needed.
+  for (auto iPlus1 = self->mPendingProfiles.length(); iPlus1 != 0; --iPlus1) {
+    PendingProfile& pendingProfile = self->mPendingProfiles[iPlus1 - 1];
+
+    bool needToSendProgressRequest = false;
+    if (pendingProfile.lastProgressRequest.IsNull()) {
+      DEBUG_LOG("GatheringTimerCallback() - child %u: No data yet",
+                unsigned(pendingProfile.childPid));
+      // First time going through the list, send an initial progress request.
+      needToSendProgressRequest = true;
+      // We pretend that progress was made, so we don't give up yet.
+      progressWasMade = true;
+    } else if (pendingProfile.lastProgressResponse.IsNull()) {
+      LOG("GatheringTimerCallback() - child %u: Waiting for first response",
+          unsigned(pendingProfile.childPid));
+      // Still waiting for the first response, no progress made here, don't send
+      // another request.
+    } else if (pendingProfile.lastProgressResponse <=
+               pendingProfile.lastProgressRequest) {
+      LOG("GatheringTimerCallback() - child %u: Waiting for response",
+          unsigned(pendingProfile.childPid));
+      // Still waiting for a response to the last request, no progress made
+      // here, don't send another request.
+    } else if (pendingProfile.lastProgressChange.IsNull()) {
+      LOG("GatheringTimerCallback() - child %u: Still waiting for first change",
+          unsigned(pendingProfile.childPid));
+      // Still waiting for the first change, no progress made here, but send a
+      // new request.
+      needToSendProgressRequest = true;
+    } else if (pendingProfile.lastProgressRequest <
+               pendingProfile.lastProgressChange) {
+      DEBUG_LOG("GatheringTimerCallback() - child %u: Recent change",
+                unsigned(pendingProfile.childPid));
+      // We have a recent change, progress was made.
+      needToSendProgressRequest = true;
+      progressWasMade = true;
+    } else {
+      LOG("GatheringTimerCallback() - child %u: No recent change",
+          unsigned(pendingProfile.childPid));
+      needToSendProgressRequest = true;
+    }
+
+    // And send a new progress request.
+    if (needToSendProgressRequest) {
+      if (!self->SendProgressRequest(pendingProfile)) {
+        // Failed to even send the request, consider this process gone.
+        self->mPendingProfiles.erase(&pendingProfile);
+        LOG("... Failed to send progress request");
+      } else {
+        DEBUG_LOG("... Sent progress request");
+      }
+    } else {
+      DEBUG_LOG("... No progress request");
+    }
+  }
+
+  if (self->mPendingProfiles.empty()) {
+    // We've got all of the async profiles now. Let's finish off the profile
+    // and resolve the Promise.
+    self->FinishGathering();
+    return;
+  }
+
+  // Not finished yet.
+
+  if (progressWasMade) {
+    // We made some progress, just restart the timer.
+    DEBUG_LOG("GatheringTimerCallback() - Progress made, restart timer");
+    self->RestartGatheringTimer();
+    return;
+  }
+
+  DEBUG_LOG("GatheringTimerCallback() - Timeout!");
+  self->mGatheringTimer = nullptr;
+  if (!profiler_is_active() || !self->mGathering) {
+    // Not gathering anymore.
+    return;
+  }
+  self->LogEvent([&](Json::Value& aEvent) {
+    aEvent.append(Json::StaticString{
+        "No progress made recently, giving up; pending pids:"});
+    for (const PendingProfile& pendingProfile : self->mPendingProfiles) {
+      aEvent.append(Json::Value::UInt64(pendingProfile.childPid));
+    }
+  });
+  NS_WARNING("Profiler failed to gather profiles from all sub-processes");
+  // We have really reached a timeout while gathering, finish now.
+  // TODO: Add information about missing processes.
+  self->FinishGathering();
+}
+
+void nsProfiler::RestartGatheringTimer() {
+  if (mGatheringTimer) {
+    uint32_t delayMs = 0;
+    const nsresult r = mGatheringTimer->GetDelay(&delayMs);
+    mGatheringTimer->Cancel();
+    if (NS_FAILED(r) || delayMs == 0 ||
+        NS_FAILED(mGatheringTimer->InitWithNamedFuncCallback(
+            GatheringTimerCallback, this, delayMs,
+            nsITimer::TYPE_ONE_SHOT_LOW_PRIORITY,
+            "nsProfilerGatheringTimer"))) {
+      // Can't restart the timer, so we can't wait any longer.
+      FinishGathering();
+    }
+  }
+}
+
+nsProfiler::PendingProfile* nsProfiler::GetPendingProfile(
+    base::ProcessId aChildPid) {
+  for (PendingProfile& pendingProfile : mPendingProfiles) {
+    if (pendingProfile.childPid == aChildPid) {
+      return &pendingProfile;
+    }
+  }
+  return nullptr;
+}
+
+void nsProfiler::GatheredOOPProfile(
+    base::ProcessId aChildPid, const nsACString& aProfile,
+    mozilla::Maybe<ProfileGenerationAdditionalInformation>&&
+        aAdditionalInformation) {
+  MOZ_RELEASE_ASSERT(NS_IsMainThread());
+
+  if (!profiler_is_active()) {
+    return;
+  }
+
+  if (!mGathering) {
+    // If we're not actively gathering, then we don't actually care that we
+    // gathered a profile here. This can happen for processes that exit while
+    // profiling.
+    return;
+  }
+
+  MOZ_RELEASE_ASSERT(mWriter.isSome(),
+                     "Should always have a writer if mGathering is true");
+
+  // Combine all the additional information into a single struct.
+  if (aAdditionalInformation.isSome()) {
+    mProfileGenerationAdditionalInformation->Append(
+        std::move(*aAdditionalInformation));
+  }
+
+  if (!aProfile.IsEmpty()) {
+    if (mWriter->ChunkedWriteFunc().Length() + aProfile.Length() <
+        scLengthAccumulationThreshold) {
+      // TODO: Remove PromiseFlatCString, see bug 1657033.
+      mWriter->Splice(PromiseFlatCString(aProfile));
+    } else {
+      LogEvent([&](Json::Value& aEvent) {
+        aEvent.append(
+            Json::StaticString{"Discarded child profile that would make the "
+                               "full profile too big, pid and size:"});
+        aEvent.append(Json::Value::UInt64(aChildPid));
+        aEvent.append(Json::Value::UInt64{aProfile.Length()});
+      });
+    }
+  }
+
+  if (PendingProfile* pendingProfile = GetPendingProfile(aChildPid);
+      pendingProfile) {
+    mPendingProfiles.erase(pendingProfile);
+
+    if (mPendingProfiles.empty()) {
+      // We've got all of the async profiles now. Let's finish off the profile
+      // and resolve the Promise.
+      FinishGathering();
+    }
+  }
+
+  // Not finished yet, restart the timer to let any remaining child enough time
+  // to do their profile-streaming.
+  RestartGatheringTimer();
+}
+
+RefPtr<nsProfiler::GatheringPromiseAndroid>
+nsProfiler::GetProfileDataAsGzippedArrayBufferAndroid(double aSinceTime) {
+  MOZ_ASSERT(NS_IsMainThread());
+
+  if (!profiler_is_active()) {
+    return GatheringPromiseAndroid::CreateAndReject(NS_ERROR_FAILURE, __func__);
+  }
+
+  return StartGathering(aSinceTime)
+      ->Then(
+          GetMainThreadSerialEventTarget(), __func__,
+          [](const mozilla::ProfileAndAdditionalInformation& aResult) {
+            FallibleTArray<uint8_t> outBuff;
+            nsresult result = CompressString(aResult.mProfile, outBuff);
+            if (result != NS_OK) {
+              return GatheringPromiseAndroid::CreateAndReject(result, __func__);
+            }
+            return GatheringPromiseAndroid::CreateAndResolve(std::move(outBuff),
+                                                             __func__);
+          },
+          [](nsresult aRv) {
+            return GatheringPromiseAndroid::CreateAndReject(aRv, __func__);
+          });
+}
+
+RefPtr<nsProfiler::GatheringPromise> nsProfiler::StartGathering(
+    double aSinceTime) {
+  MOZ_RELEASE_ASSERT(NS_IsMainThread());
+
+  if (mGathering) {
+    // If we're already gathering, return a rejected promise - this isn't
+    // going to end well.
+    return GatheringPromise::CreateAndReject(NS_ERROR_NOT_AVAILABLE, __func__);
+  }
+
+  mGathering = true;
+  mGatheringLog = mozilla::MakeUnique<Json::Value>(Json::objectValue);
+  (*mGatheringLog)[Json::StaticString{
+      "profileGatheringLogBegin" TIMESTAMP_JSON_SUFFIX}] =
+      ProfilingLog::Timestamp();
+
+  if (mGatheringTimer) {
+    mGatheringTimer->Cancel();
+    mGatheringTimer = nullptr;
+  }
+
+  // Start building shared library info starting from the current process.
+  mProfileGenerationAdditionalInformation.emplace(
+      SharedLibraryInfo::GetInfoForSelf());
+
+  // Request profiles from the other processes. This will trigger asynchronous
+  // calls to ProfileGatherer::GatheredOOPProfile as the profiles arrive.
+  //
+  // Do this before the call to profiler_stream_json_for_this_process() because
+  // that call is slow and we want to let the other processes grab their
+  // profiles as soon as possible.
+  nsTArray<ProfilerParent::SingleProcessProfilePromiseAndChildPid> profiles =
+      ProfilerParent::GatherProfiles();
+
+  MOZ_ASSERT(mPendingProfiles.empty());
+  if (!mPendingProfiles.reserve(profiles.Length())) {
+    ResetGathering(NS_ERROR_OUT_OF_MEMORY);
+    return GatheringPromise::CreateAndReject(NS_ERROR_OUT_OF_MEMORY, __func__);
+  }
+
+  mFailureLatchSource.emplace();
+  mWriter.emplace(*mFailureLatchSource);
+
+  UniquePtr<ProfilerCodeAddressService> service =
+      profiler_code_address_service_for_presymbolication();
+
+  // Start building up the JSON result and grab the profile from this process.
+  mWriter->Start();
+  auto rv = profiler_stream_json_for_this_process(*mWriter, aSinceTime,
+                                                  /* aIsShuttingDown */ false,
+                                                  service.get());
+  if (rv.isErr()) {
+    // The profiler is inactive. This either means that it was inactive even
+    // at the time that ProfileGatherer::Start() was called, or that it was
+    // stopped on a different thread since that call. Either way, we need to
+    // reject the promise and stop gathering.
+    ResetGathering(NS_ERROR_NOT_AVAILABLE);
+    return GatheringPromise::CreateAndReject(NS_ERROR_NOT_AVAILABLE, __func__);
+  }
+
+  LogEvent([&](Json::Value& aEvent) {
+    aEvent.append(
+        Json::StaticString{"Generated parent process profile, size:"});
+    aEvent.append(Json::Value::UInt64{mWriter->ChunkedWriteFunc().Length()});
+  });
+
+  mWriter->StartArrayProperty("processes");
+
+  // If we have any process exit profiles, add them immediately.
+  if (Vector<nsCString> exitProfiles = profiler_move_exit_profiles();
+      !exitProfiles.empty()) {
+    for (auto& exitProfile : exitProfiles) {
+      if (!exitProfile.IsEmpty()) {
+        if (exitProfile[0] == '*') {
+          LogEvent([&](Json::Value& aEvent) {
+            aEvent.append(
+                Json::StaticString{"Exit non-profile with error message:"});
+            aEvent.append(exitProfile.Data() + 1);
+          });
+        } else if (mWriter->ChunkedWriteFunc().Length() + exitProfile.Length() <
+                   scLengthAccumulationThreshold) {
+          mWriter->Splice(exitProfile);
+          LogEvent([&](Json::Value& aEvent) {
+            aEvent.append(Json::StaticString{"Added exit profile with size:"});
+            aEvent.append(Json::Value::UInt64{exitProfile.Length()});
+          });
+        } else {
+          LogEvent([&](Json::Value& aEvent) {
+            aEvent.append(
+                Json::StaticString{"Discarded an exit profile that would make "
+                                   "the full profile too big, size:"});
+            aEvent.append(Json::Value::UInt64{exitProfile.Length()});
+          });
+        }
+      }
+    }
+
+    LogEvent([&](Json::Value& aEvent) {
+      aEvent.append(Json::StaticString{
+          "Processed all exit profiles, total size so far:"});
+      aEvent.append(Json::Value::UInt64{mWriter->ChunkedWriteFunc().Length()});
+    });
+  } else {
+    // There are no pending profiles, we're already done.
+    LogEventLiteralString("No exit profiles.");
+  }
+
+  mPromiseHolder.emplace();
+  RefPtr<GatheringPromise> promise = mPromiseHolder->Ensure(__func__);
+
+  // Keep the array property "processes" and the root object in mWriter open
+  // until FinishGathering() is called. As profiles from the other processes
+  // come in, they will be inserted and end up in the right spot.
+  // FinishGathering() will close the array and the root object.
+
+  if (!profiles.IsEmpty()) {
+    // There *are* pending profiles, let's add handlers for their promises.
+
+    // This timeout value is used to monitor progress while gathering child
+    // profiles. The timer will be restarted after we receive a response with
+    // any progress.
+    constexpr uint32_t cMinChildTimeoutS = 1u;  // 1 second minimum and default.
+    constexpr uint32_t cMaxChildTimeoutS = 60u;  // 1 minute max.
+    uint32_t childTimeoutS = Preferences::GetUint(
+        "devtools.performance.recording.child.timeout_s", cMinChildTimeoutS);
+    if (childTimeoutS < cMinChildTimeoutS) {
+      childTimeoutS = cMinChildTimeoutS;
+    } else if (childTimeoutS > cMaxChildTimeoutS) {
+      childTimeoutS = cMaxChildTimeoutS;
+    }
+    const uint32_t childTimeoutMs = childTimeoutS * PR_MSEC_PER_SEC;
+    Unused << NS_NewTimerWithFuncCallback(
+        getter_AddRefs(mGatheringTimer), GatheringTimerCallback, this,
+        childTimeoutMs, nsITimer::TYPE_ONE_SHOT_LOW_PRIORITY,
+        "nsProfilerGatheringTimer", GetMainThreadSerialEventTarget());
+
+    MOZ_ASSERT(mPendingProfiles.capacity() >= profiles.Length());
+    for (const auto& profile : profiles) {
+      mPendingProfiles.infallibleAppend(PendingProfile{profile.childPid});
+      LogEvent([&](Json::Value& aEvent) {
+        aEvent.append(Json::StaticString{"Waiting for pending profile, pid:"});
+        aEvent.append(Json::Value::UInt64(profile.childPid));
+      });
+      profile.profilePromise->Then(
+          GetMainThreadSerialEventTarget(), __func__,
+          [self = RefPtr<nsProfiler>(this), childPid = profile.childPid](
+              IPCProfileAndAdditionalInformation&& aResult) {
+            PendingProfile* pendingProfile = self->GetPendingProfile(childPid);
+            mozilla::ipc::Shmem profileShmem = aResult.profileShmem();
+            LOG("GatherProfile(%u) response: %u bytes (%u were pending, %s %u)",
+                unsigned(childPid), unsigned(profileShmem.Size<char>()),
+                unsigned(self->mPendingProfiles.length()),
+                pendingProfile ? "including" : "excluding", unsigned(childPid));
+            if (profileShmem.IsReadable()) {
+              self->LogEvent([&](Json::Value& aEvent) {
+                aEvent.append(
+                    Json::StaticString{"Got profile from pid, with size:"});
+                aEvent.append(Json::Value::UInt64(childPid));
+                aEvent.append(Json::Value::UInt64{profileShmem.Size<char>()});
+              });
+              const nsDependentCSubstring profileString(
+                  profileShmem.get<char>(), profileShmem.Size<char>() - 1);
+              if (profileString.IsEmpty() || profileString[0] != '*') {
+                self->GatheredOOPProfile(
+                    childPid, profileString,
+                    std::move(aResult.additionalInformation()));
+              } else {
+                self->LogEvent([&](Json::Value& aEvent) {
+                  aEvent.append(Json::StaticString{
+                      "Child non-profile from pid, with error message:"});
+                  aEvent.append(Json::Value::UInt64(childPid));
+                  aEvent.append(profileString.Data() + 1);
+                });
+                self->GatheredOOPProfile(childPid, ""_ns, Nothing());
+              }
+            } else {
+              // This can happen if the child failed to allocate
+              // the Shmem (or maliciously sent an invalid Shmem).
+              self->LogEvent([&](Json::Value& aEvent) {
+                aEvent.append(Json::StaticString{"Got failure from pid:"});
+                aEvent.append(Json::Value::UInt64(childPid));
+              });
+              self->GatheredOOPProfile(childPid, ""_ns, Nothing());
+            }
+          },
+          [self = RefPtr<nsProfiler>(this),
+           childPid = profile.childPid](ipc::ResponseRejectReason&& aReason) {
+            PendingProfile* pendingProfile = self->GetPendingProfile(childPid);
+            LOG("GatherProfile(%u) rejection: %d (%u were pending, %s %u)",
+                unsigned(childPid), (int)aReason,
+                unsigned(self->mPendingProfiles.length()),
+                pendingProfile ? "including" : "excluding", unsigned(childPid));
+            self->LogEvent([&](Json::Value& aEvent) {
+              aEvent.append(
+                  Json::StaticString{"Got rejection from pid, with reason:"});
+              aEvent.append(Json::Value::UInt64(childPid));
+              aEvent.append(Json::Value::UInt{static_cast<unsigned>(aReason)});
+            });
+            self->GatheredOOPProfile(childPid, ""_ns, Nothing());
+          });
+    }
+  } else {
+    // There are no pending profiles, we're already done.
+    LogEventLiteralString("No pending child profiles.");
+    FinishGathering();
+  }
+
+  return promise;
+}
+
+RefPtr<nsProfiler::SymbolTablePromise> nsProfiler::GetSymbolTableMozPromise(
+    const nsACString& aDebugPath, const nsACString& aBreakpadID) {
+  MozPromiseHolder<SymbolTablePromise> promiseHolder;
+  RefPtr<SymbolTablePromise> promise = promiseHolder.Ensure(__func__);
+
+  if (!mSymbolTableThread) {
+    nsresult rv = NS_NewNamedThread("ProfSymbolTable",
+                                    getter_AddRefs(mSymbolTableThread));
+    if (NS_WARN_IF(NS_FAILED(rv))) {
+      promiseHolder.Reject(NS_ERROR_FAILURE, __func__);
+      return promise;
+    }
+  }
+
+  nsresult rv = mSymbolTableThread->Dispatch(NS_NewRunnableFunction(
+      "nsProfiler::GetSymbolTableMozPromise runnable on ProfSymbolTable thread",
+      [promiseHolder = std::move(promiseHolder),
+       debugPath = nsCString(aDebugPath),
+       breakpadID = nsCString(aBreakpadID)]() mutable {
+        AUTO_PROFILER_LABEL_DYNAMIC_NSCSTRING("profiler_get_symbol_table",
+                                              OTHER, debugPath);
+        SymbolTable symbolTable;
+        bool succeeded = profiler_get_symbol_table(
+            debugPath.get(), breakpadID.get(), &symbolTable);
+        if (succeeded) {
+          promiseHolder.Resolve(std::move(symbolTable), __func__);
+        } else {
+          promiseHolder.Reject(NS_ERROR_FAILURE, __func__);
+        }
+      }));
+
+  if (NS_WARN_IF(NS_FAILED(rv))) {
+    // Get-symbol task was not dispatched and therefore won't fulfill the
+    // promise, we must reject the promise now.
+    promiseHolder.Reject(NS_ERROR_FAILURE, __func__);
+  }
+
+  return promise;
+}
+
+void nsProfiler::FinishGathering() {
+  MOZ_RELEASE_ASSERT(NS_IsMainThread());
+  MOZ_RELEASE_ASSERT(mWriter.isSome());
+  MOZ_RELEASE_ASSERT(mPromiseHolder.isSome());
+  MOZ_RELEASE_ASSERT(mProfileGenerationAdditionalInformation.isSome());
+
+  // Close the "processes" array property.
+  mWriter->EndArray();
+
+  if (mGatheringLog) {
+    LogEvent([&](Json::Value& aEvent) {
+      aEvent.append(Json::StaticString{"Finished gathering, total size:"});
+      aEvent.append(Json::Value::UInt64{mWriter->ChunkedWriteFunc().Length()});
+    });
+    (*mGatheringLog)[Json::StaticString{
+        "profileGatheringLogEnd" TIMESTAMP_JSON_SUFFIX}] =
+        ProfilingLog::Timestamp();
+    mWriter->StartObjectProperty("profileGatheringLog");
+    {
+      nsAutoCString pid;
+      pid.AppendInt(int64_t(profiler_current_process_id().ToNumber()));
+      Json::String logString = ToCompactString(*mGatheringLog);
+      mGatheringLog = nullptr;
+      mWriter->SplicedJSONProperty(pid, logString);
+    }
+    mWriter->EndObject();
+  }
+
+  // Close the root object of the generated JSON.
+  mWriter->End();
+
+  if (const char* failure = mWriter->GetFailure(); failure) {
+#ifndef ANDROID
+    fprintf(stderr, "JSON generation failure: %s", failure);
+#else
+    __android_log_print(ANDROID_LOG_INFO, "GeckoProfiler",
+                        "JSON generation failure: %s", failure);
+#endif
+    NS_WARNING("Error during JSON generation, probably OOM.");
+    ResetGathering(NS_ERROR_OUT_OF_MEMORY);
+    return;
+  }
+
+  // And try to resolve the promise with the profile JSON.
+  const size_t len = mWriter->ChunkedWriteFunc().Length();
+  if (len >= scLengthMax) {
+    NS_WARNING("Profile JSON is too big to fit in a string.");
+    ResetGathering(NS_ERROR_FILE_TOO_BIG);
+    return;
+  }
+
+  nsCString result;
+  if (!result.SetLength(len, fallible)) {
+    NS_WARNING("Cannot allocate a string for the Profile JSON.");
+    ResetGathering(NS_ERROR_OUT_OF_MEMORY);
+    return;
+  }
+  MOZ_ASSERT(*(result.Data() + len) == '\0',
+             "We expected a null at the end of the string buffer, to be "
+             "rewritten by CopyDataIntoLazilyAllocatedBuffer");
+
+  char* const resultBeginWriting = result.BeginWriting();
+  if (!resultBeginWriting) {
+    NS_WARNING("Cannot access the string to write the Profile JSON.");
+    ResetGathering(NS_ERROR_CACHE_WRITE_ACCESS_DENIED);
+    return;
+  }
+
+  // Here, we have enough space reserved in `result`, starting at
+  // `resultBeginWriting`, copy the JSON profile there.
+  if (!mWriter->ChunkedWriteFunc().CopyDataIntoLazilyAllocatedBuffer(
+          [&](size_t aBufferLen) -> char* {
+            MOZ_RELEASE_ASSERT(aBufferLen == len + 1);
+            return resultBeginWriting;
+          })) {
+    NS_WARNING("Could not copy profile JSON, probably OOM.");
+    ResetGathering(NS_ERROR_FILE_TOO_BIG);
+    return;
+  }
+  MOZ_ASSERT(*(result.Data() + len) == '\0',
+             "We still expected a null at the end of the string buffer");
+
+  mProfileGenerationAdditionalInformation->FinishGathering();
+  mPromiseHolder->Resolve(
+      ProfileAndAdditionalInformation{
+          std::move(result),
+          std::move(*mProfileGenerationAdditionalInformation)},
+      __func__);
+
+  ResetGathering(NS_ERROR_UNEXPECTED);
+}
+
+void nsProfiler::ResetGathering(nsresult aPromiseRejectionIfPending) {
+  // If we have an unfulfilled Promise in flight, we should reject it before
+  // destroying the promise holder.
+  if (mPromiseHolder.isSome()) {
+    mPromiseHolder->RejectIfExists(aPromiseRejectionIfPending, __func__);
+    mPromiseHolder.reset();
+  }
+  mPendingProfiles.clearAndFree();
+  mGathering = false;
+  mGatheringLog = nullptr;
+  if (mGatheringTimer) {
+    mGatheringTimer->Cancel();
+    mGatheringTimer = nullptr;
+  }
+  mWriter.reset();
+  mFailureLatchSource.reset();
+  mProfileGenerationAdditionalInformation.reset();
+}
diff --git a/tools/profiler/gecko/nsProfiler.h b/tools/profiler/gecko/nsProfiler.h
new file mode 100644
index 0000000000..3757df3079
--- /dev/null
+++ b/tools/profiler/gecko/nsProfiler.h
@@ -0,0 +1,117 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsProfiler_h
+#define nsProfiler_h
+
+#include "base/process.h"
+#include "mozilla/Attributes.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/MozPromise.h"
+#include "mozilla/ProfileJSONWriter.h"
+#include "mozilla/ProportionValue.h"
+#include "mozilla/TimeStamp.h"
+#include "mozilla/UniquePtr.h"
+#include "mozilla/Vector.h"
+#include "nsIProfiler.h"
+#include "nsITimer.h"
+#include "nsServiceManagerUtils.h"
+#include "ProfilerCodeAddressService.h"
+#include "ProfileAdditionalInformation.h"
+
+namespace Json {
+class Value;
+}  // namespace Json
+
+class nsProfiler final : public nsIProfiler {
+ public:
+  nsProfiler();
+
+  NS_DECL_ISUPPORTS
+  NS_DECL_NSIPROFILER
+
+  nsresult Init();
+
+  static nsProfiler* GetOrCreate() {
+    nsCOMPtr<nsIProfiler> iprofiler =
+        do_GetService("@mozilla.org/tools/profiler;1");
+    return static_cast<nsProfiler*>(iprofiler.get());
+  }
+
+ private:
+  ~nsProfiler();
+
+  using GatheringPromiseAndroid =
+      mozilla::MozPromise<FallibleTArray<uint8_t>, nsresult, true>;
+  using GatheringPromise =
+      mozilla::MozPromise<mozilla::ProfileAndAdditionalInformation, nsresult,
+                          false>;
+  using SymbolTablePromise =
+      mozilla::MozPromise<mozilla::SymbolTable, nsresult, true>;
+
+  RefPtr<GatheringPromise> StartGathering(double aSinceTime);
+  void GatheredOOPProfile(
+      base::ProcessId aChildPid, const nsACString& aProfile,
+      mozilla::Maybe<mozilla::ProfileGenerationAdditionalInformation>&&
+          aAdditionalInformation);
+  void FinishGathering();
+  void ResetGathering(nsresult aPromiseRejectionIfPending);
+  static void GatheringTimerCallback(nsITimer* aTimer, void* aClosure);
+  void RestartGatheringTimer();
+
+  RefPtr<SymbolTablePromise> GetSymbolTableMozPromise(
+      const nsACString& aDebugPath, const nsACString& aBreakpadID);
+
+  struct ExitProfile {
+    nsCString mJSON;
+    uint64_t mBufferPositionAtGatherTime;
+  };
+
+  struct PendingProfile {
+    base::ProcessId childPid;
+
+    mozilla::ProportionValue progressProportion;
+    nsCString progressLocation;
+
+    mozilla::TimeStamp lastProgressRequest;
+    mozilla::TimeStamp lastProgressResponse;
+    mozilla::TimeStamp lastProgressChange;
+
+    explicit PendingProfile(base::ProcessId aChildPid) : childPid(aChildPid) {}
+  };
+
+  PendingProfile* GetPendingProfile(base::ProcessId aChildPid);
+  // Returns false if the request could not be sent.
+  bool SendProgressRequest(PendingProfile& aPendingProfile);
+
+  // If the log is active, call aJsonLogObjectUpdater(Json::Value&) on the log's
+  // root object.
+  template <typename JsonLogObjectUpdater>
+  void Log(JsonLogObjectUpdater&& aJsonLogObjectUpdater);
+  // If the log is active, call aJsonArrayAppender(Json::Value&) on a Json
+  // array that already contains a timestamp, and to which event-related
+  // elements may be appended.
+  template <typename JsonArrayAppender>
+  void LogEvent(JsonArrayAppender&& aJsonArrayAppender);
+  void LogEventLiteralString(const char* aEventString);
+
+  // These fields are all related to profile gathering.
+  mozilla::Vector<ExitProfile> mExitProfiles;
+  mozilla::Maybe<mozilla::MozPromiseHolder<GatheringPromise>> mPromiseHolder;
+  nsCOMPtr<nsIThread> mSymbolTableThread;
+  mozilla::Maybe<mozilla::FailureLatchSource> mFailureLatchSource;
+  mozilla::Maybe<SpliceableChunkedJSONWriter> mWriter;
+  mozilla::Maybe<mozilla::ProfileGenerationAdditionalInformation>
+      mProfileGenerationAdditionalInformation;
+  mozilla::Vector<PendingProfile> mPendingProfiles;
+  bool mGathering;
+  nsCOMPtr<nsITimer> mGatheringTimer;
+  // Supplemental log to the profiler's "profilingLog" (which has already been
+  // completed in JSON profiles that are gathered).
+  mozilla::UniquePtr<Json::Value> mGatheringLog;
+};
+
+#endif  // nsProfiler_h
diff --git a/tools/profiler/gecko/nsProfilerCIID.h b/tools/profiler/gecko/nsProfilerCIID.h
new file mode 100644
index 0000000000..3df44596b1
--- /dev/null
+++ b/tools/profiler/gecko/nsProfilerCIID.h
@@ -0,0 +1,16 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsProfilerCIID_h__
+#define nsProfilerCIID_h__
+
+#define NS_PROFILER_CID                              \
+  {                                                  \
+    0x25db9b8e, 0x8123, 0x4de1, {                    \
+      0xb6, 0x6d, 0x8b, 0xbb, 0xed, 0xf2, 0xcd, 0xf4 \
+    }                                                \
+  }
+
+#endif
diff --git a/tools/profiler/gecko/nsProfilerStartParams.cpp b/tools/profiler/gecko/nsProfilerStartParams.cpp
new file mode 100644
index 0000000000..dd7c3f4ab7
--- /dev/null
+++ b/tools/profiler/gecko/nsProfilerStartParams.cpp
@@ -0,0 +1,65 @@
+/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsProfilerStartParams.h"
+#include "ipc/IPCMessageUtils.h"
+
+NS_IMPL_ISUPPORTS(nsProfilerStartParams, nsIProfilerStartParams)
+
+nsProfilerStartParams::nsProfilerStartParams(
+    uint32_t aEntries, const mozilla::Maybe<double>& aDuration,
+    double aInterval, uint32_t aFeatures, nsTArray<nsCString>&& aFilters,
+    uint64_t aActiveTabID)
+    : mEntries(aEntries),
+      mDuration(aDuration),
+      mInterval(aInterval),
+      mFeatures(aFeatures),
+      mFilters(std::move(aFilters)),
+      mActiveTabID(aActiveTabID) {}
+
+nsProfilerStartParams::~nsProfilerStartParams() {}
+
+NS_IMETHODIMP
+nsProfilerStartParams::GetEntries(uint32_t* aEntries) {
+  NS_ENSURE_ARG_POINTER(aEntries);
+  *aEntries = mEntries;
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfilerStartParams::GetDuration(double* aDuration) {
+  NS_ENSURE_ARG_POINTER(aDuration);
+  if (mDuration) {
+    *aDuration = *mDuration;
+  } else {
+    *aDuration = 0;
+  }
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfilerStartParams::GetInterval(double* aInterval) {
+  NS_ENSURE_ARG_POINTER(aInterval);
+  *aInterval = mInterval;
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfilerStartParams::GetFeatures(uint32_t* aFeatures) {
+  NS_ENSURE_ARG_POINTER(aFeatures);
+  *aFeatures = mFeatures;
+  return NS_OK;
+}
+
+const nsTArray<nsCString>& nsProfilerStartParams::GetFilters() {
+  return mFilters;
+}
+
+NS_IMETHODIMP
+nsProfilerStartParams::GetActiveTabID(uint64_t* aActiveTabID) {
+  NS_ENSURE_ARG_POINTER(aActiveTabID);
+  *aActiveTabID = mActiveTabID;
+  return NS_OK;
+}
diff --git a/tools/profiler/gecko/nsProfilerStartParams.h b/tools/profiler/gecko/nsProfilerStartParams.h
new file mode 100644
index 0000000000..25c2b5082f
--- /dev/null
+++ b/tools/profiler/gecko/nsProfilerStartParams.h
@@ -0,0 +1,36 @@
+/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _NSPROFILERSTARTPARAMS_H_
+#define _NSPROFILERSTARTPARAMS_H_
+
+#include "nsIProfiler.h"
+#include "nsString.h"
+#include "nsTArray.h"
+
+class nsProfilerStartParams : public nsIProfilerStartParams {
+ public:
+  // This class can be used on multiple threads. For example, it's used for the
+  // observer notification from profiler_start, which can run on any thread but
+  // posts the notification to the main thread.
+  NS_DECL_THREADSAFE_ISUPPORTS
+  NS_DECL_NSIPROFILERSTARTPARAMS
+
+  nsProfilerStartParams(uint32_t aEntries,
+                        const mozilla::Maybe<double>& aDuration,
+                        double aInterval, uint32_t aFeatures,
+                        nsTArray<nsCString>&& aFilters, uint64_t aActiveTabID);
+
+ private:
+  virtual ~nsProfilerStartParams();
+  uint32_t mEntries;
+  mozilla::Maybe<double> mDuration;
+  double mInterval;
+  uint32_t mFeatures;
+  nsTArray<nsCString> mFilters;
+  uint64_t mActiveTabID;
+};
+
+#endif
diff --git a/tools/profiler/lul/AutoObjectMapper.cpp b/tools/profiler/lul/AutoObjectMapper.cpp
new file mode 100644
index 0000000000..f7489fbfee
--- /dev/null
+++ b/tools/profiler/lul/AutoObjectMapper.cpp
@@ -0,0 +1,79 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <sys/mman.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "mozilla/Assertions.h"
+#include "mozilla/Sprintf.h"
+
+#include "PlatformMacros.h"
+#include "AutoObjectMapper.h"
+
+// A helper function for creating failure error messages in
+// AutoObjectMapper*::Map.
+static void failedToMessage(void (*aLog)(const char*), const char* aHowFailed,
+                            std::string aFileName) {
+  char buf[300];
+  SprintfLiteral(buf, "AutoObjectMapper::Map: Failed to %s \'%s\'", aHowFailed,
+                 aFileName.c_str());
+  buf[sizeof(buf) - 1] = 0;
+  aLog(buf);
+}
+
+AutoObjectMapperPOSIX::AutoObjectMapperPOSIX(void (*aLog)(const char*))
+    : mImage(nullptr), mSize(0), mLog(aLog), mIsMapped(false) {}
+
+AutoObjectMapperPOSIX::~AutoObjectMapperPOSIX() {
+  if (!mIsMapped) {
+    // There's nothing to do.
+    MOZ_ASSERT(!mImage);
+    MOZ_ASSERT(mSize == 0);
+    return;
+  }
+  MOZ_ASSERT(mSize > 0);
+  // The following assertion doesn't necessarily have to be true,
+  // but we assume (reasonably enough) that no mmap facility would
+  // be crazy enough to map anything at page zero.
+  MOZ_ASSERT(mImage);
+  munmap(mImage, mSize);
+}
+
+bool AutoObjectMapperPOSIX::Map(/*OUT*/ void** start, /*OUT*/ size_t* length,
+                                std::string fileName) {
+  MOZ_ASSERT(!mIsMapped);
+
+  int fd = open(fileName.c_str(), O_RDONLY);
+  if (fd == -1) {
+    failedToMessage(mLog, "open", fileName);
+    return false;
+  }
+
+  struct stat st;
+  int err = fstat(fd, &st);
+  size_t sz = (err == 0) ? st.st_size : 0;
+  if (err != 0 || sz == 0) {
+    failedToMessage(mLog, "fstat", fileName);
+    close(fd);
+    return false;
+  }
+
+  void* image = mmap(nullptr, sz, PROT_READ, MAP_SHARED, fd, 0);
+  if (image == MAP_FAILED) {
+    failedToMessage(mLog, "mmap", fileName);
+    close(fd);
+    return false;
+  }
+
+  close(fd);
+  mIsMapped = true;
+  mImage = *start = image;
+  mSize = *length = sz;
+  return true;
+}
diff --git a/tools/profiler/lul/AutoObjectMapper.h b/tools/profiler/lul/AutoObjectMapper.h
new file mode 100644
index 0000000000..f63aa43e0e
--- /dev/null
+++ b/tools/profiler/lul/AutoObjectMapper.h
@@ -0,0 +1,64 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef AutoObjectMapper_h
+#define AutoObjectMapper_h
+
+#include <string>
+
+#include "mozilla/Attributes.h"
+#include "PlatformMacros.h"
+
+// A (nearly-) RAII class that maps an object in and then unmaps it on
+// destruction.  This base class version uses the "normal" POSIX
+// functions: open, fstat, close, mmap, munmap.
+
+class MOZ_STACK_CLASS AutoObjectMapperPOSIX {
+ public:
+  // The constructor does not attempt to map the file, because that
+  // might fail.  Instead, once the object has been constructed,
+  // call Map() to attempt the mapping.  There is no corresponding
+  // Unmap() since the unmapping is done in the destructor.  Failure
+  // messages are sent to |aLog|.
+  explicit AutoObjectMapperPOSIX(void (*aLog)(const char*));
+
+  // Unmap the file on destruction of this object.
+  ~AutoObjectMapperPOSIX();
+
+  // Map |fileName| into the address space and return the mapping
+  // extents.  If the file is zero sized this will fail.  The file is
+  // mapped read-only and private.  Returns true iff the mapping
+  // succeeded, in which case *start and *length hold its extent.
+  // Once a call to Map succeeds, all subsequent calls to it will
+  // fail.
+  bool Map(/*OUT*/ void** start, /*OUT*/ size_t* length, std::string fileName);
+
+ protected:
+  // If we are currently holding a mapped object, these record the
+  // mapped address range.
+  void* mImage;
+  size_t mSize;
+
+  // A logging sink, for complaining about mapping failures.
+  void (*mLog)(const char*);
+
+ private:
+  // Are we currently holding a mapped object?  This is private to
+  // the base class.  Derived classes need to have their own way to
+  // track whether they are holding a mapped object.
+  bool mIsMapped;
+
+  // Disable copying and assignment.
+  AutoObjectMapperPOSIX(const AutoObjectMapperPOSIX&);
+  AutoObjectMapperPOSIX& operator=(const AutoObjectMapperPOSIX&);
+  // Disable heap allocation of this class.
+  void* operator new(size_t);
+  void* operator new[](size_t);
+  void operator delete(void*);
+  void operator delete[](void*);
+};
+
+#endif  // AutoObjectMapper_h
diff --git a/tools/profiler/lul/LulCommon.cpp b/tools/profiler/lul/LulCommon.cpp
new file mode 100644
index 0000000000..428f102c42
--- /dev/null
+++ b/tools/profiler/lul/LulCommon.cpp
@@ -0,0 +1,100 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+
+// Copyright (c) 2011, 2013 Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
+
+// This file is derived from the following files in
+// toolkit/crashreporter/google-breakpad:
+//   src/common/module.cc
+//   src/common/unique_string.cc
+
+// There's no internal-only interface for LulCommon.  Hence include
+// the external interface directly.
+#include "LulCommonExt.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <string>
+#include <map>
+
+namespace lul {
+
+using std::string;
+
+////////////////////////////////////////////////////////////////
+// Module
+//
+Module::Module(const string& name, const string& os, const string& architecture,
+               const string& id)
+    : name_(name), os_(os), architecture_(architecture), id_(id) {}
+
+Module::~Module() {}
+
+////////////////////////////////////////////////////////////////
+// UniqueString
+//
+class UniqueString {
+ public:
+  explicit UniqueString(string str) { str_ = strdup(str.c_str()); }
+  ~UniqueString() { free(reinterpret_cast<void*>(const_cast<char*>(str_))); }
+  const char* str_;
+};
+
+const char* FromUniqueString(const UniqueString* ustr) { return ustr->str_; }
+
+bool IsEmptyUniqueString(const UniqueString* ustr) {
+  return (ustr->str_)[0] == '\0';
+}
+
+////////////////////////////////////////////////////////////////
+// UniqueStringUniverse
+//
+UniqueStringUniverse::~UniqueStringUniverse() {
+  for (std::map<string, UniqueString*>::iterator it = map_.begin();
+       it != map_.end(); it++) {
+    delete it->second;
+  }
+}
+
+const UniqueString* UniqueStringUniverse::ToUniqueString(string str) {
+  std::map<string, UniqueString*>::iterator it = map_.find(str);
+  if (it == map_.end()) {
+    UniqueString* ustr = new UniqueString(str);
+    map_[str] = ustr;
+    return ustr;
+  } else {
+    return it->second;
+  }
+}
+
+}  // namespace lul
diff --git a/tools/profiler/lul/LulCommonExt.h b/tools/profiler/lul/LulCommonExt.h
new file mode 100644
index 0000000000..b20a7321ff
--- /dev/null
+++ b/tools/profiler/lul/LulCommonExt.h
@@ -0,0 +1,509 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+
+// Copyright (c) 2006, 2010, 2012, 2013 Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
+
+// module.h: Define google_breakpad::Module. A Module holds debugging
+// information, and can write that information out as a Breakpad
+// symbol file.
+
+//  (C) Copyright Greg Colvin and Beman Dawes 1998, 1999.
+//  Copyright (c) 2001, 2002 Peter Dimov
+//
+//  Permission to copy, use, modify, sell and distribute this software
+//  is granted provided this copyright notice appears in all copies.
+//  This software is provided "as is" without express or implied
+//  warranty, and with no claim as to its suitability for any purpose.
+//
+//  See http://www.boost.org/libs/smart_ptr/scoped_ptr.htm for documentation.
+//
+
+// This file is derived from the following files in
+// toolkit/crashreporter/google-breakpad:
+//   src/common/unique_string.h
+//   src/common/scoped_ptr.h
+//   src/common/module.h
+
+// External interface for the "Common" component of LUL.
+
+#ifndef LulCommonExt_h
+#define LulCommonExt_h
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+
+#include <string>
+#include <map>
+#include <vector>
+#include <cstddef>  // for std::ptrdiff_t
+
+#include "mozilla/Assertions.h"
+
+namespace lul {
+
+using std::map;
+using std::string;
+
+////////////////////////////////////////////////////////////////
+// UniqueString
+//
+
+// Abstract type
+class UniqueString;
+
+// Get the contained C string (debugging only)
+const char* FromUniqueString(const UniqueString*);
+
+// Is the given string empty (that is, "") ?
+bool IsEmptyUniqueString(const UniqueString*);
+
+////////////////////////////////////////////////////////////////
+// UniqueStringUniverse
+//
+
+// All UniqueStrings live in some specific UniqueStringUniverse.
+class UniqueStringUniverse {
+ public:
+  UniqueStringUniverse() {}
+  ~UniqueStringUniverse();
+  // Convert a |string| to a UniqueString, that lives in this universe.
+  const UniqueString* ToUniqueString(string str);
+
+ private:
+  map<string, UniqueString*> map_;
+};
+
+////////////////////////////////////////////////////////////////
+// GUID
+//
+
+typedef struct {
+  uint32_t data1;
+  uint16_t data2;
+  uint16_t data3;
+  uint8_t data4[8];
+} MDGUID;  // GUID
+
+typedef MDGUID GUID;
+
+////////////////////////////////////////////////////////////////
+// scoped_ptr
+//
+
+//  scoped_ptr mimics a built-in pointer except that it guarantees deletion
+//  of the object pointed to, either on destruction of the scoped_ptr or via
+//  an explicit reset(). scoped_ptr is a simple solution for simple needs;
+//  use shared_ptr or std::auto_ptr if your needs are more complex.
+
+//  *** NOTE ***
+//  If your scoped_ptr is a class member of class FOO pointing to a
+//  forward declared type BAR (as shown below), then you MUST use a non-inlined
+//  version of the destructor.  The destructor of a scoped_ptr (called from
+//  FOO's destructor) must have a complete definition of BAR in order to
+//  destroy it.  Example:
+//
+//  -- foo.h --
+//  class BAR;
+//
+//  class FOO {
+//   public:
+//    FOO();
+//    ~FOO();  // Required for sources that instantiate class FOO to compile!
+//
+//   private:
+//    scoped_ptr<BAR> bar_;
+//  };
+//
+//  -- foo.cc --
+//  #include "foo.h"
+//  FOO::~FOO() {} // Empty, but must be non-inlined to FOO's class definition.
+
+//  scoped_ptr_malloc added by Google
+//  When one of these goes out of scope, instead of doing a delete or
+//  delete[], it calls free().  scoped_ptr_malloc<char> is likely to see
+//  much more use than any other specializations.
+
+//  release() added by Google
+//  Use this to conditionally transfer ownership of a heap-allocated object
+//  to the caller, usually on method success.
+
+template <typename T>
+class scoped_ptr {
+ private:
+  T* ptr;
+
+  scoped_ptr(scoped_ptr const&);
+  scoped_ptr& operator=(scoped_ptr const&);
+
+ public:
+  typedef T element_type;
+
+  explicit scoped_ptr(T* p = 0) : ptr(p) {}
+
+  ~scoped_ptr() { delete ptr; }
+
+  void reset(T* p = 0) {
+    if (ptr != p) {
+      delete ptr;
+      ptr = p;
+    }
+  }
+
+  T& operator*() const {
+    MOZ_ASSERT(ptr != 0);
+    return *ptr;
+  }
+
+  T* operator->() const {
+    MOZ_ASSERT(ptr != 0);
+    return ptr;
+  }
+
+  bool operator==(T* p) const { return ptr == p; }
+
+  bool operator!=(T* p) const { return ptr != p; }
+
+  T* get() const { return ptr; }
+
+  void swap(scoped_ptr& b) {
+    T* tmp = b.ptr;
+    b.ptr = ptr;
+    ptr = tmp;
+  }
+
+  T* release() {
+    T* tmp = ptr;
+    ptr = 0;
+    return tmp;
+  }
+
+ private:
+  // no reason to use these: each scoped_ptr should have its own object
+  template <typename U>
+  bool operator==(scoped_ptr<U> const& p) const;
+  template <typename U>
+  bool operator!=(scoped_ptr<U> const& p) const;
+};
+
+template <typename T>
+inline void swap(scoped_ptr<T>& a, scoped_ptr<T>& b) {
+  a.swap(b);
+}
+
+template <typename T>
+inline bool operator==(T* p, const scoped_ptr<T>& b) {
+  return p == b.get();
+}
+
+template <typename T>
+inline bool operator!=(T* p, const scoped_ptr<T>& b) {
+  return p != b.get();
+}
+
+//  scoped_array extends scoped_ptr to arrays. Deletion of the array pointed to
+//  is guaranteed, either on destruction of the scoped_array or via an explicit
+//  reset(). Use shared_array or std::vector if your needs are more complex.
+
+template <typename T>
+class scoped_array {
+ private:
+  T* ptr;
+
+  scoped_array(scoped_array const&);
+  scoped_array& operator=(scoped_array const&);
+
+ public:
+  typedef T element_type;
+
+  explicit scoped_array(T* p = 0) : ptr(p) {}
+
+  ~scoped_array() { delete[] ptr; }
+
+  void reset(T* p = 0) {
+    if (ptr != p) {
+      delete[] ptr;
+      ptr = p;
+    }
+  }
+
+  T& operator[](std::ptrdiff_t i) const {
+    MOZ_ASSERT(ptr != 0);
+    MOZ_ASSERT(i >= 0);
+    return ptr[i];
+  }
+
+  bool operator==(T* p) const { return ptr == p; }
+
+  bool operator!=(T* p) const { return ptr != p; }
+
+  T* get() const { return ptr; }
+
+  void swap(scoped_array& b) {
+    T* tmp = b.ptr;
+    b.ptr = ptr;
+    ptr = tmp;
+  }
+
+  T* release() {
+    T* tmp = ptr;
+    ptr = 0;
+    return tmp;
+  }
+
+ private:
+  // no reason to use these: each scoped_array should have its own object
+  template <typename U>
+  bool operator==(scoped_array<U> const& p) const;
+  template <typename U>
+  bool operator!=(scoped_array<U> const& p) const;
+};
+
+template <class T>
+inline void swap(scoped_array<T>& a, scoped_array<T>& b) {
+  a.swap(b);
+}
+
+template <typename T>
+inline bool operator==(T* p, const scoped_array<T>& b) {
+  return p == b.get();
+}
+
+template <typename T>
+inline bool operator!=(T* p, const scoped_array<T>& b) {
+  return p != b.get();
+}
+
+// This class wraps the c library function free() in a class that can be
+// passed as a template argument to scoped_ptr_malloc below.
+class ScopedPtrMallocFree {
+ public:
+  inline void operator()(void* x) const { free(x); }
+};
+
+// scoped_ptr_malloc<> is similar to scoped_ptr<>, but it accepts a
+// second template argument, the functor used to free the object.
+
+template <typename T, typename FreeProc = ScopedPtrMallocFree>
+class scoped_ptr_malloc {
+ private:
+  T* ptr;
+
+  scoped_ptr_malloc(scoped_ptr_malloc const&);
+  scoped_ptr_malloc& operator=(scoped_ptr_malloc const&);
+
+ public:
+  typedef T element_type;
+
+  explicit scoped_ptr_malloc(T* p = 0) : ptr(p) {}
+
+  ~scoped_ptr_malloc() { free_((void*)ptr); }
+
+  void reset(T* p = 0) {
+    if (ptr != p) {
+      free_((void*)ptr);
+      ptr = p;
+    }
+  }
+
+  T& operator*() const {
+    MOZ_ASSERT(ptr != 0);
+    return *ptr;
+  }
+
+  T* operator->() const {
+    MOZ_ASSERT(ptr != 0);
+    return ptr;
+  }
+
+  bool operator==(T* p) const { return ptr == p; }
+
+  bool operator!=(T* p) const { return ptr != p; }
+
+  T* get() const { return ptr; }
+
+  void swap(scoped_ptr_malloc& b) {
+    T* tmp = b.ptr;
+    b.ptr = ptr;
+    ptr = tmp;
+  }
+
+  T* release() {
+    T* tmp = ptr;
+    ptr = 0;
+    return tmp;
+  }
+
+ private:
+  // no reason to use these: each scoped_ptr_malloc should have its own object
+  template <typename U, typename GP>
+  bool operator==(scoped_ptr_malloc<U, GP> const& p) const;
+  template <typename U, typename GP>
+  bool operator!=(scoped_ptr_malloc<U, GP> const& p) const;
+
+  static FreeProc const free_;
+};
+
+template <typename T, typename FP>
+FP const scoped_ptr_malloc<T, FP>::free_ = FP();
+
+template <typename T, typename FP>
+inline void swap(scoped_ptr_malloc<T, FP>& a, scoped_ptr_malloc<T, FP>& b) {
+  a.swap(b);
+}
+
+template <typename T, typename FP>
+inline bool operator==(T* p, const scoped_ptr_malloc<T, FP>& b) {
+  return p == b.get();
+}
+
+template <typename T, typename FP>
+inline bool operator!=(T* p, const scoped_ptr_malloc<T, FP>& b) {
+  return p != b.get();
+}
+
+////////////////////////////////////////////////////////////////
+// Module
+//
+
+// A Module represents the contents of a module, and supports methods
+// for adding information produced by parsing STABS or DWARF data
+// --- possibly both from the same file --- and then writing out the
+// unified contents as a Breakpad-format symbol file.
+class Module {
+ public:
+  // The type of addresses and sizes in a symbol table.
+  typedef uint64_t Address;
+
+  // Representation of an expression.  This can either be a postfix
+  // expression, in which case it is stored as a string, or a simple
+  // expression of the form (identifier + imm) or *(identifier + imm).
+  // It can also be invalid (denoting "no value").
+  enum ExprHow { kExprInvalid = 1, kExprPostfix, kExprSimple, kExprSimpleMem };
+
+  struct Expr {
+    // Construct a simple-form expression
+    Expr(const UniqueString* ident, long offset, bool deref) {
+      if (IsEmptyUniqueString(ident)) {
+        Expr();
+      } else {
+        postfix_ = "";
+        ident_ = ident;
+        offset_ = offset;
+        how_ = deref ? kExprSimpleMem : kExprSimple;
+      }
+    }
+
+    // Construct an invalid expression
+    Expr() {
+      postfix_ = "";
+      ident_ = nullptr;
+      offset_ = 0;
+      how_ = kExprInvalid;
+    }
+
+    // Return the postfix expression string, either directly,
+    // if this is a postfix expression, or by synthesising it
+    // for a simple expression.
+    std::string getExprPostfix() const {
+      switch (how_) {
+        case kExprPostfix:
+          return postfix_;
+        case kExprSimple:
+        case kExprSimpleMem: {
+          char buf[40];
+          sprintf(buf, " %ld %c%s", labs(offset_), offset_ < 0 ? '-' : '+',
+                  how_ == kExprSimple ? "" : " ^");
+          return std::string(FromUniqueString(ident_)) + std::string(buf);
+        }
+        case kExprInvalid:
+        default:
+          MOZ_ASSERT(0 && "getExprPostfix: invalid Module::Expr type");
+          return "Expr::genExprPostfix: kExprInvalid";
+      }
+    }
+
+    // The identifier that gives the starting value for simple expressions.
+    const UniqueString* ident_;
+    // The offset to add for simple expressions.
+    long offset_;
+    // The Postfix expression string to evaluate for non-simple expressions.
+    std::string postfix_;
+    // The operation expressed by this expression.
+    ExprHow how_;
+  };
+
+  // A map from register names to expressions that recover
+  // their values. This can represent a complete set of rules to
+  // follow at some address, or a set of changes to be applied to an
+  // extant set of rules.
+  // NOTE! there are two completely different types called RuleMap.  This
+  // is one of them.
+  typedef std::map<const UniqueString*, Expr> RuleMap;
+
+  // A map from addresses to RuleMaps, representing changes that take
+  // effect at given addresses.
+  typedef std::map<Address, RuleMap> RuleChangeMap;
+
+  // A range of 'STACK CFI' stack walking information. An instance of
+  // this structure corresponds to a 'STACK CFI INIT' record and the
+  // subsequent 'STACK CFI' records that fall within its range.
+  struct StackFrameEntry {
+    // The starting address and number of bytes of machine code this
+    // entry covers.
+    Address address, size;
+
+    // The initial register recovery rules, in force at the starting
+    // address.
+    RuleMap initial_rules;
+
+    // A map from addresses to rule changes. To find the rules in
+    // force at a given address, start with initial_rules, and then
+    // apply the changes given in this map for all addresses up to and
+    // including the address you're interested in.
+    RuleChangeMap rule_changes;
+  };
+
+  // Create a new module with the given name, operating system,
+  // architecture, and ID string.
+  Module(const std::string& name, const std::string& os,
+         const std::string& architecture, const std::string& id);
+  ~Module();
+
+ private:
+  // Module header entries.
+  std::string name_, os_, architecture_, id_;
+};
+
+}  // namespace lul
+
+#endif  // LulCommonExt_h
diff --git a/tools/profiler/lul/LulDwarf.cpp b/tools/profiler/lul/LulDwarf.cpp
new file mode 100644
index 0000000000..ea38ce50ea
--- /dev/null
+++ b/tools/profiler/lul/LulDwarf.cpp
@@ -0,0 +1,2538 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+
+// Copyright (c) 2010 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// CFI reader author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
+// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
+
+// Implementation of dwarf2reader::LineInfo, dwarf2reader::CompilationUnit,
+// and dwarf2reader::CallFrameInfo. See dwarf2reader.h for details.
+
+// This file is derived from the following files in
+// toolkit/crashreporter/google-breakpad:
+//   src/common/dwarf/bytereader.cc
+//   src/common/dwarf/dwarf2reader.cc
+//   src/common/dwarf_cfi_to_module.cc
+
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include <stack>
+#include <string>
+
+#include "mozilla/Assertions.h"
+#include "mozilla/Attributes.h"
+#include "mozilla/Sprintf.h"
+#include "mozilla/Vector.h"
+
+#include "LulCommonExt.h"
+#include "LulDwarfInt.h"
+
+// Set this to 1 for verbose logging
+#define DEBUG_DWARF 0
+
+namespace lul {
+
+using std::pair;
+using std::string;
+
+ByteReader::ByteReader(enum Endianness endian)
+    : offset_reader_(NULL),
+      address_reader_(NULL),
+      endian_(endian),
+      address_size_(0),
+      offset_size_(0),
+      have_section_base_(),
+      have_text_base_(),
+      have_data_base_(),
+      have_function_base_() {}
+
+ByteReader::~ByteReader() {}
+
+void ByteReader::SetOffsetSize(uint8 size) {
+  offset_size_ = size;
+  MOZ_ASSERT(size == 4 || size == 8);
+  if (size == 4) {
+    this->offset_reader_ = &ByteReader::ReadFourBytes;
+  } else {
+    this->offset_reader_ = &ByteReader::ReadEightBytes;
+  }
+}
+
+void ByteReader::SetAddressSize(uint8 size) {
+  address_size_ = size;
+  MOZ_ASSERT(size == 4 || size == 8);
+  if (size == 4) {
+    this->address_reader_ = &ByteReader::ReadFourBytes;
+  } else {
+    this->address_reader_ = &ByteReader::ReadEightBytes;
+  }
+}
+
+uint64 ByteReader::ReadInitialLength(const char* start, size_t* len) {
+  const uint64 initial_length = ReadFourBytes(start);
+  start += 4;
+
+  // In DWARF2/3, if the initial length is all 1 bits, then the offset
+  // size is 8 and we need to read the next 8 bytes for the real length.
+  if (initial_length == 0xffffffff) {
+    SetOffsetSize(8);
+    *len = 12;
+    return ReadOffset(start);
+  } else {
+    SetOffsetSize(4);
+    *len = 4;
+  }
+  return initial_length;
+}
+
+bool ByteReader::ValidEncoding(DwarfPointerEncoding encoding) const {
+  if (encoding == DW_EH_PE_omit) return true;
+  if (encoding == DW_EH_PE_aligned) return true;
+  if ((encoding & 0x7) > DW_EH_PE_udata8) return false;
+  if ((encoding & 0x70) > DW_EH_PE_funcrel) return false;
+  return true;
+}
+
+bool ByteReader::UsableEncoding(DwarfPointerEncoding encoding) const {
+  switch (encoding & 0x70) {
+    case DW_EH_PE_absptr:
+      return true;
+    case DW_EH_PE_pcrel:
+      return have_section_base_;
+    case DW_EH_PE_textrel:
+      return have_text_base_;
+    case DW_EH_PE_datarel:
+      return have_data_base_;
+    case DW_EH_PE_funcrel:
+      return have_function_base_;
+    default:
+      return false;
+  }
+}
+
+uint64 ByteReader::ReadEncodedPointer(const char* buffer,
+                                      DwarfPointerEncoding encoding,
+                                      size_t* len) const {
+  // UsableEncoding doesn't approve of DW_EH_PE_omit, so we shouldn't
+  // see it here.
+  MOZ_ASSERT(encoding != DW_EH_PE_omit);
+
+  // The Linux Standards Base 4.0 does not make this clear, but the
+  // GNU tools (gcc/unwind-pe.h; readelf/dwarf.c; gdb/dwarf2-frame.c)
+  // agree that aligned pointers are always absolute, machine-sized,
+  // machine-signed pointers.
+  if (encoding == DW_EH_PE_aligned) {
+    MOZ_ASSERT(have_section_base_);
+
+    // We don't need to align BUFFER in *our* address space. Rather, we
+    // need to find the next position in our buffer that would be aligned
+    // when the .eh_frame section the buffer contains is loaded into the
+    // program's memory. So align assuming that buffer_base_ gets loaded at
+    // address section_base_, where section_base_ itself may or may not be
+    // aligned.
+
+    // First, find the offset to START from the closest prior aligned
+    // address.
+    uint64 skew = section_base_ & (AddressSize() - 1);
+    // Now find the offset from that aligned address to buffer.
+    uint64 offset = skew + (buffer - buffer_base_);
+    // Round up to the next boundary.
+    uint64 aligned = (offset + AddressSize() - 1) & -AddressSize();
+    // Convert back to a pointer.
+    const char* aligned_buffer = buffer_base_ + (aligned - skew);
+    // Finally, store the length and actually fetch the pointer.
+    *len = aligned_buffer - buffer + AddressSize();
+    return ReadAddress(aligned_buffer);
+  }
+
+  // Extract the value first, ignoring whether it's a pointer or an
+  // offset relative to some base.
+  uint64 offset;
+  switch (encoding & 0x0f) {
+    case DW_EH_PE_absptr:
+      // DW_EH_PE_absptr is weird, as it is used as a meaningful value for
+      // both the high and low nybble of encoding bytes. When it appears in
+      // the high nybble, it means that the pointer is absolute, not an
+      // offset from some base address. When it appears in the low nybble,
+      // as here, it means that the pointer is stored as a normal
+      // machine-sized and machine-signed address. A low nybble of
+      // DW_EH_PE_absptr does not imply that the pointer is absolute; it is
+      // correct for us to treat the value as an offset from a base address
+      // if the upper nybble is not DW_EH_PE_absptr.
+      offset = ReadAddress(buffer);
+      *len = AddressSize();
+      break;
+
+    case DW_EH_PE_uleb128:
+      offset = ReadUnsignedLEB128(buffer, len);
+      break;
+
+    case DW_EH_PE_udata2:
+      offset = ReadTwoBytes(buffer);
+      *len = 2;
+      break;
+
+    case DW_EH_PE_udata4:
+      offset = ReadFourBytes(buffer);
+      *len = 4;
+      break;
+
+    case DW_EH_PE_udata8:
+      offset = ReadEightBytes(buffer);
+      *len = 8;
+      break;
+
+    case DW_EH_PE_sleb128:
+      offset = ReadSignedLEB128(buffer, len);
+      break;
+
+    case DW_EH_PE_sdata2:
+      offset = ReadTwoBytes(buffer);
+      // Sign-extend from 16 bits.
+      offset = (offset ^ 0x8000) - 0x8000;
+      *len = 2;
+      break;
+
+    case DW_EH_PE_sdata4:
+      offset = ReadFourBytes(buffer);
+      // Sign-extend from 32 bits.
+      offset = (offset ^ 0x80000000ULL) - 0x80000000ULL;
+      *len = 4;
+      break;
+
+    case DW_EH_PE_sdata8:
+      // No need to sign-extend; this is the full width of our type.
+      offset = ReadEightBytes(buffer);
+      *len = 8;
+      break;
+
+    default:
+      abort();
+  }
+
+  // Find the appropriate base address.
+  uint64 base;
+  switch (encoding & 0x70) {
+    case DW_EH_PE_absptr:
+      base = 0;
+      break;
+
+    case DW_EH_PE_pcrel:
+      MOZ_ASSERT(have_section_base_);
+      base = section_base_ + (buffer - buffer_base_);
+      break;
+
+    case DW_EH_PE_textrel:
+      MOZ_ASSERT(have_text_base_);
+      base = text_base_;
+      break;
+
+    case DW_EH_PE_datarel:
+      MOZ_ASSERT(have_data_base_);
+      base = data_base_;
+      break;
+
+    case DW_EH_PE_funcrel:
+      MOZ_ASSERT(have_function_base_);
+      base = function_base_;
+      break;
+
+    default:
+      abort();
+  }
+
+  uint64 pointer = base + offset;
+
+  // Remove inappropriate upper bits.
+  if (AddressSize() == 4)
+    pointer = pointer & 0xffffffff;
+  else
+    MOZ_ASSERT(AddressSize() == sizeof(uint64));
+
+  return pointer;
+}
+
+// A DWARF rule for recovering the address or value of a register, or
+// computing the canonical frame address.  This is an 8-way sum-of-products
+// type.  Excluding the INVALID variant, there is one subclass of this for
+// each '*Rule' member function in CallFrameInfo::Handler.
+//
+// This could logically be nested within State, but then the qualified names
+// get horrendous.
+
+class CallFrameInfo::Rule final {
+ public:
+  enum Tag {
+    INVALID,
+    Undefined,
+    SameValue,
+    Offset,
+    ValOffset,
+    Register,
+    Expression,
+    ValExpression
+  };
+
+ private:
+  // tag_ (below) indicates the form of the expression.  There are 7 forms
+  // plus INVALID.  All non-INVALID expressions denote a machine-word-sized
+  // value at unwind time.  The description below assumes the presence of, at
+  // unwind time:
+  //
+  // * a function R, which takes a Dwarf register number and returns its value
+  //   in the callee frame (the one we are unwinding out of).
+  //
+  // * a function EvalDwarfExpr, which evaluates a Dwarf expression.
+  //
+  // Register numbers are encoded using the target ABI's Dwarf
+  // register-numbering conventions.  Except where otherwise noted, a register
+  // value may also be the special value CallFrameInfo::Handler::kCFARegister
+  // ("the CFA").
+  //
+  // The expression forms are represented using tag_, word1_ and word2_.  The
+  // forms and denoted values are as follows:
+  //
+  // * INVALID: not a valid expression.
+  //   valid fields:  (none)
+  //   denotes:       no value
+  //
+  // * Undefined: denotes no value.  This is used for a register whose value
+  //   cannot be recovered.
+  //   valid fields:  (none)
+  //   denotes:       no value
+  //
+  // * SameValue: the register's value is the same as in the callee.
+  //   valid fields:  (none)
+  //   denotes:       R(the register that this Rule is associated with,
+  //                    not stored here)
+  //
+  // * Offset: the register's value is in memory at word2_ bytes away from
+  //   Dwarf register number word1_.  word2_ is interpreted as a *signed*
+  //   offset.
+  //   valid fields: word1_=DwarfReg, word2=Offset
+  //   denotes:      *(R(word1_) + word2_)
+  //
+  // * ValOffset: same as Offset, without the dereference.
+  //   valid fields: word1_=DwarfReg, word2=Offset
+  //   denotes:      R(word1_) + word2_
+  //
+  // * Register:  the register's value is in some other register,
+  //              which may not be the CFA.
+  //   valid fields: word1_=DwarfReg
+  //   denotes:      R(word1_)
+  //
+  // * Expression: the register's value is in memory at a location that can be
+  //   computed from the Dwarf expression contained in the word2_ bytes
+  //   starting at word1_.  Note these locations are into the area of the .so
+  //   temporarily mmaped info for debuginfo reading and have no validity once
+  //   debuginfo reading has finished.
+  //   valid fields: ExprStart=word1_, ExprLen=word2_
+  //   denotes:      *(EvalDwarfExpr(word1_, word2_))
+  //
+  // * ValExpression: same as Expression, without the dereference.
+  //   valid fields: ExprStart=word1_, ExprLen=word2_
+  //   denotes:      EvalDwarfExpr(word1_, word2_)
+  //
+
+  // 3 words (or less) for representation.  Unused word1_/word2_ fields must
+  // be set to zero.
+  Tag tag_;
+  uintptr_t word1_;
+  uintptr_t word2_;
+
+  // To ensure that word1_ can hold a pointer to an expression string.
+  static_assert(sizeof(const char*) <= sizeof(word1_));
+  // To ensure that word2_ can hold any string length or memory offset.
+  static_assert(sizeof(size_t) <= sizeof(word2_));
+
+  // This class denotes an 8-way sum-of-product type, and accessing invalid
+  // fields is meaningless.  The accessors and constructors below enforce
+  // that.
+  bool isCanonical() const {
+    switch (tag_) {
+      case Tag::INVALID:
+      case Tag::Undefined:
+      case Tag::SameValue:
+        return word1_ == 0 && word2_ == 0;
+      case Tag::Offset:
+      case Tag::ValOffset:
+        return true;
+      case Tag::Register:
+        return word2_ == 0;
+      case Tag::Expression:
+      case Tag::ValExpression:
+        return true;
+      default:
+        MOZ_CRASH();
+    }
+  }
+
+ public:
+  Tag tag() const { return tag_; }
+  int dwreg() const {
+    switch (tag_) {
+      case Tag::Offset:
+      case Tag::ValOffset:
+      case Tag::Register:
+        return (int)word1_;
+      default:
+        MOZ_CRASH();
+    }
+  }
+  intptr_t offset() const {
+    switch (tag_) {
+      case Tag::Offset:
+      case Tag::ValOffset:
+        return (intptr_t)word2_;
+      default:
+        MOZ_CRASH();
+    }
+  }
+  ImageSlice expr() const {
+    switch (tag_) {
+      case Tag::Expression:
+      case Tag::ValExpression:
+        return ImageSlice((const char*)word1_, (size_t)word2_);
+      default:
+        MOZ_CRASH();
+    }
+  }
+
+  // Constructor-y stuff
+  Rule() {
+    tag_ = Tag::INVALID;
+    word1_ = 0;
+    word2_ = 0;
+  }
+
+  static Rule mkINVALID() {
+    Rule r;  // is initialised by Rule()
+    return r;
+  }
+  static Rule mkUndefinedRule() {
+    Rule r;
+    r.tag_ = Tag::Undefined;
+    r.word1_ = 0;
+    r.word2_ = 0;
+    return r;
+  }
+  static Rule mkSameValueRule() {
+    Rule r;
+    r.tag_ = Tag::SameValue;
+    r.word1_ = 0;
+    r.word2_ = 0;
+    return r;
+  }
+  static Rule mkOffsetRule(int dwreg, intptr_t offset) {
+    Rule r;
+    r.tag_ = Tag::Offset;
+    r.word1_ = (uintptr_t)dwreg;
+    r.word2_ = (uintptr_t)offset;
+    return r;
+  }
+  static Rule mkValOffsetRule(int dwreg, intptr_t offset) {
+    Rule r;
+    r.tag_ = Tag::ValOffset;
+    r.word1_ = (uintptr_t)dwreg;
+    r.word2_ = (uintptr_t)offset;
+    return r;
+  }
+  static Rule mkRegisterRule(int dwreg) {
+    Rule r;
+    r.tag_ = Tag::Register;
+    r.word1_ = (uintptr_t)dwreg;
+    r.word2_ = 0;
+    return r;
+  }
+  static Rule mkExpressionRule(ImageSlice expr) {
+    Rule r;
+    r.tag_ = Tag::Expression;
+    r.word1_ = (uintptr_t)expr.start_;
+    r.word2_ = (uintptr_t)expr.length_;
+    return r;
+  }
+  static Rule mkValExpressionRule(ImageSlice expr) {
+    Rule r;
+    r.tag_ = Tag::ValExpression;
+    r.word1_ = (uintptr_t)expr.start_;
+    r.word2_ = (uintptr_t)expr.length_;
+    return r;
+  }
+
+  // Misc
+  inline bool isVALID() const { return tag_ != Tag::INVALID; }
+
+  bool operator==(const Rule& rhs) const {
+    MOZ_ASSERT(isVALID() && rhs.isVALID());
+    MOZ_ASSERT(isCanonical());
+    MOZ_ASSERT(rhs.isCanonical());
+    if (tag_ != rhs.tag_) {
+      return false;
+    }
+    switch (tag_) {
+      case Tag::INVALID:
+        MOZ_CRASH();
+      case Tag::Undefined:
+      case Tag::SameValue:
+        return true;
+      case Tag::Offset:
+      case Tag::ValOffset:
+        return word1_ == rhs.word1_ && word2_ == rhs.word2_;
+      case Tag::Register:
+        return word1_ == rhs.word1_;
+      case Tag::Expression:
+      case Tag::ValExpression:
+        return expr() == rhs.expr();
+      default:
+        MOZ_CRASH();
+    }
+  }
+
+  bool operator!=(const Rule& rhs) const { return !(*this == rhs); }
+
+  // Tell HANDLER that, at ADDRESS in the program, REG can be
+  // recovered using this rule. If REG is kCFARegister, then this rule
+  // describes how to compute the canonical frame address. Return what the
+  // HANDLER member function returned.
+  bool Handle(Handler* handler, uint64 address, int reg) const {
+    MOZ_ASSERT(isVALID());
+    MOZ_ASSERT(isCanonical());
+    switch (tag_) {
+      case Tag::Undefined:
+        return handler->UndefinedRule(address, reg);
+      case Tag::SameValue:
+        return handler->SameValueRule(address, reg);
+      case Tag::Offset:
+        return handler->OffsetRule(address, reg, word1_, word2_);
+      case Tag::ValOffset:
+        return handler->ValOffsetRule(address, reg, word1_, word2_);
+      case Tag::Register:
+        return handler->RegisterRule(address, reg, word1_);
+      case Tag::Expression:
+        return handler->ExpressionRule(
+            address, reg, ImageSlice((const char*)word1_, (size_t)word2_));
+      case Tag::ValExpression:
+        return handler->ValExpressionRule(
+            address, reg, ImageSlice((const char*)word1_, (size_t)word2_));
+      default:
+        MOZ_CRASH();
+    }
+  }
+
+  void SetBaseRegister(unsigned reg) {
+    MOZ_ASSERT(isVALID());
+    MOZ_ASSERT(isCanonical());
+    switch (tag_) {
+      case Tag::ValOffset:
+        word1_ = reg;
+        break;
+      case Tag::Offset:
+        // We don't actually need SetBaseRegister or SetOffset here, since they
+        // are only ever applied to CFA rules, for DW_CFA_def_cfa_offset, and it
+        // doesn't make sense to use OffsetRule for computing the CFA: it
+        // computes the address at which a register is saved, not a value.
+        // (fallthrough)
+      case Tag::Undefined:
+      case Tag::SameValue:
+      case Tag::Register:
+      case Tag::Expression:
+      case Tag::ValExpression:
+        // Do nothing
+        break;
+      default:
+        MOZ_CRASH();
+    }
+  }
+
+  void SetOffset(long long offset) {
+    MOZ_ASSERT(isVALID());
+    MOZ_ASSERT(isCanonical());
+    switch (tag_) {
+      case Tag::ValOffset:
+        word2_ = offset;
+        break;
+      case Tag::Offset:
+        // Same comment as in SetBaseRegister applies
+        // (fallthrough)
+      case Tag::Undefined:
+      case Tag::SameValue:
+      case Tag::Register:
+      case Tag::Expression:
+      case Tag::ValExpression:
+        // Do nothing
+        break;
+      default:
+        MOZ_CRASH();
+    }
+  }
+
+  // For debugging only
+  string show() const {
+    char buf[100];
+    string s = "";
+    switch (tag_) {
+      case Tag::INVALID:
+        s = "INVALID";
+        break;
+      case Tag::Undefined:
+        s = "Undefined";
+        break;
+      case Tag::SameValue:
+        s = "SameValue";
+        break;
+      case Tag::Offset:
+        s = "Offset{..}";
+        break;
+      case Tag::ValOffset:
+        sprintf(buf, "ValOffset{reg=%d offs=%lld}", (int)word1_,
+                (long long int)word2_);
+        s = string(buf);
+        break;
+      case Tag::Register:
+        s = "Register{..}";
+        break;
+      case Tag::Expression:
+        s = "Expression{..}";
+        break;
+      case Tag::ValExpression:
+        s = "ValExpression{..}";
+        break;
+      default:
+        MOZ_CRASH();
+    }
+    return s;
+  }
+};
+
+// `RuleMapLowLevel` is a simple class that maps from `int` (register numbers)
+// to `Rule`.  This is implemented as a vector of `<int, Rule>` pairs, with a
+// 12-element inline capacity.  From a big-O perspective this is obviously a
+// terrible way to implement an associative map.  This workload is however
+// quite special in that the maximum number of elements is normally 7 (on
+// x86_64-linux), and so this implementation is much faster than one based on
+// std::map with its attendant R-B-tree node allocation and balancing
+// overheads.
+//
+// An iterator that enumerates the mapping in increasing order of the `int`
+// keys is provided.  This ordered iteration facility is required by
+// CallFrameInfo::RuleMap::HandleTransitionTo, which needs to iterate through
+// two such maps simultaneously and in-order so as to compare them.
+
+// All `Rule`s in the map must satisfy `isVALID()`.  That conveniently means
+// that `Rule::mkINVALID()` can be used to indicate "not found` in `get()`.
+
+class CallFrameInfo::RuleMapLowLevel {
+  using Entry = pair<int, Rule>;
+
+  // The inline capacity of 12 is carefully chosen.  It would be wise to make
+  // careful measurements of time, instruction count, allocation count and
+  // allocated bytes before changing it.  For x86_64-linux, a value of 8 is
+  // marginally better; using 12 increases the total heap bytes allocated by
+  // around 20%.  For arm64-linux, a value of 24 is better; using 12 increases
+  // the total blocks allocated by around 20%.  But it's a not bad tradeoff
+  // for both targets, and in any case is vastly superior to the previous
+  // scheme of using `std::map`.
+  mozilla::Vector<Entry, 12> entries_;
+
+ public:
+  void clear() { entries_.clear(); }
+
+  RuleMapLowLevel() { clear(); }
+
+  RuleMapLowLevel& operator=(const RuleMapLowLevel& rhs) {
+    entries_.clear();
+    for (size_t i = 0; i < rhs.entries_.length(); i++) {
+      bool ok = entries_.append(rhs.entries_[i]);
+      MOZ_RELEASE_ASSERT(ok);
+    }
+    return *this;
+  }
+
+  void set(int reg, Rule rule) {
+    MOZ_ASSERT(rule.isVALID());
+    // Find the place where it should go, if any
+    size_t i = 0;
+    size_t nEnt = entries_.length();
+    while (i < nEnt && entries_[i].first < reg) {
+      i++;
+    }
+    if (i == nEnt) {
+      // No entry exists, and all the existing ones are for lower register
+      // numbers.  So just add it at the end.
+      bool ok = entries_.append(Entry(reg, rule));
+      MOZ_RELEASE_ASSERT(ok);
+    } else {
+      // It needs to live at location `i`, and ..
+      MOZ_ASSERT(i < nEnt);
+      if (entries_[i].first == reg) {
+        // .. there's already an old entry, so just update it.
+        entries_[i].second = rule;
+      } else {
+        // .. there's no previous entry, so shift `i` and all those following
+        // it one place to the right, and put the new entry at `i`.  Doing it
+        // manually is measurably cheaper than using `Vector::insert`.
+        MOZ_ASSERT(entries_[i].first > reg);
+        bool ok = entries_.append(Entry(999999, Rule::mkINVALID()));
+        MOZ_RELEASE_ASSERT(ok);
+        for (size_t j = nEnt; j >= i + 1; j--) {
+          entries_[j] = entries_[j - 1];
+        }
+        entries_[i] = Entry(reg, rule);
+      }
+    }
+    // Check in-order-ness and validity.
+    for (size_t i = 0; i < entries_.length(); i++) {
+      MOZ_ASSERT(entries_[i].second.isVALID());
+      MOZ_ASSERT_IF(i > 0, entries_[i - 1].first < entries_[i].first);
+    }
+    MOZ_ASSERT(get(reg).isVALID());
+  }
+
+  // Find the entry for `reg`, or return `Rule::mkINVALID()` if not found.
+  Rule get(int reg) const {
+    size_t nEnt = entries_.length();
+    // "early exit" in the case where `entries_[i].first > reg` was tested on
+    // x86_64 and found to be slightly slower than just testing all entries,
+    // presumably because the reduced amount of searching was not offset by
+    // the cost of an extra test per iteration.
+    for (size_t i = 0; i < nEnt; i++) {
+      if (entries_[i].first == reg) {
+        CallFrameInfo::Rule ret = entries_[i].second;
+        MOZ_ASSERT(ret.isVALID());
+        return ret;
+      }
+    }
+    return CallFrameInfo::Rule::mkINVALID();
+  }
+
+  // A very simple in-order iteration facility.
+  class Iter {
+    const RuleMapLowLevel* rmll_;
+    size_t nextIx_;
+
+   public:
+    explicit Iter(const RuleMapLowLevel* rmll) : rmll_(rmll), nextIx_(0) {}
+    bool avail() const { return nextIx_ < rmll_->entries_.length(); }
+    bool finished() const { return !avail(); }
+    // Move the iterator to the next entry.
+    void step() {
+      MOZ_RELEASE_ASSERT(nextIx_ < rmll_->entries_.length());
+      nextIx_++;
+    }
+    // Get the value at the current iteration point, but don't advance to the
+    // next entry.
+    pair<int, Rule> peek() {
+      MOZ_RELEASE_ASSERT(nextIx_ < rmll_->entries_.length());
+      return rmll_->entries_[nextIx_];
+    }
+  };
+};
+
+// A map from register numbers to rules.  This is a wrapper around
+// `RuleMapLowLevel`, with added logic for dealing with the "special" CFA
+// rule, and with `HandleTransitionTo`, which effectively computes the
+// difference between two `RuleMaps`.
+
+class CallFrameInfo::RuleMap {
+ public:
+  RuleMap() : cfa_rule_(Rule::mkINVALID()) {}
+  RuleMap(const RuleMap& rhs) : cfa_rule_(Rule::mkINVALID()) { *this = rhs; }
+  ~RuleMap() { Clear(); }
+
+  RuleMap& operator=(const RuleMap& rhs);
+
+  // Set the rule for computing the CFA to RULE.
+  void SetCFARule(Rule rule) { cfa_rule_ = rule; }
+
+  // Return the current CFA rule.  Be careful not to modify it -- it's returned
+  // by value.  If you want to modify the CFA rule, use CFARuleRef() instead.
+  // We use these two for DW_CFA_def_cfa_offset and DW_CFA_def_cfa_register,
+  // and for detecting references to the CFA before a rule for it has been
+  // established.
+  Rule CFARule() const { return cfa_rule_; }
+  Rule* CFARuleRef() { return &cfa_rule_; }
+
+  // Return the rule for REG, or the INVALID rule if there is none.
+  Rule RegisterRule(int reg) const;
+
+  // Set the rule for computing REG to RULE.
+  void SetRegisterRule(int reg, Rule rule);
+
+  // Make all the appropriate calls to HANDLER as if we were changing from
+  // this RuleMap to NEW_RULES at ADDRESS. We use this to implement
+  // DW_CFA_restore_state, where lots of rules can change simultaneously.
+  // Return true if all handlers returned true; otherwise, return false.
+  bool HandleTransitionTo(Handler* handler, uint64 address,
+                          const RuleMap& new_rules) const;
+
+ private:
+  // Remove all register rules and clear cfa_rule_.
+  void Clear();
+
+  // The rule for computing the canonical frame address.
+  Rule cfa_rule_;
+
+  // A map from register numbers to postfix expressions to recover
+  // their values.
+  RuleMapLowLevel registers_;
+};
+
+CallFrameInfo::RuleMap& CallFrameInfo::RuleMap::operator=(const RuleMap& rhs) {
+  Clear();
+  if (rhs.cfa_rule_.isVALID()) cfa_rule_ = rhs.cfa_rule_;
+  registers_ = rhs.registers_;
+  return *this;
+}
+
+CallFrameInfo::Rule CallFrameInfo::RuleMap::RegisterRule(int reg) const {
+  MOZ_ASSERT(reg != Handler::kCFARegister);
+  return registers_.get(reg);
+}
+
+void CallFrameInfo::RuleMap::SetRegisterRule(int reg, Rule rule) {
+  MOZ_ASSERT(reg != Handler::kCFARegister);
+  MOZ_ASSERT(rule.isVALID());
+  registers_.set(reg, rule);
+}
+
+bool CallFrameInfo::RuleMap::HandleTransitionTo(
+    Handler* handler, uint64 address, const RuleMap& new_rules) const {
+  // Transition from cfa_rule_ to new_rules.cfa_rule_.
+  if (cfa_rule_.isVALID() && new_rules.cfa_rule_.isVALID()) {
+    if (cfa_rule_ != new_rules.cfa_rule_ &&
+        !new_rules.cfa_rule_.Handle(handler, address, Handler::kCFARegister)) {
+      return false;
+    }
+  } else if (cfa_rule_.isVALID()) {
+    // this RuleMap has a CFA rule but new_rules doesn't.
+    // CallFrameInfo::Handler has no way to handle this --- and shouldn't;
+    // it's garbage input. The instruction interpreter should have
+    // detected this and warned, so take no action here.
+  } else if (new_rules.cfa_rule_.isVALID()) {
+    // This shouldn't be possible: NEW_RULES is some prior state, and
+    // there's no way to remove entries.
+    MOZ_ASSERT(0);
+  } else {
+    // Both CFA rules are empty.  No action needed.
+  }
+
+  // Traverse the two maps in order by register number, and report
+  // whatever differences we find.
+  RuleMapLowLevel::Iter old_it(&registers_);
+  RuleMapLowLevel::Iter new_it(&new_rules.registers_);
+  while (!old_it.finished() && !new_it.finished()) {
+    pair<int, Rule> old_pair = old_it.peek();
+    pair<int, Rule> new_pair = new_it.peek();
+    if (old_pair.first < new_pair.first) {
+      // This RuleMap has an entry for old.first, but NEW_RULES doesn't.
+      //
+      // This isn't really the right thing to do, but since CFI generally
+      // only mentions callee-saves registers, and GCC's convention for
+      // callee-saves registers is that they are unchanged, it's a good
+      // approximation.
+      if (!handler->SameValueRule(address, old_pair.first)) {
+        return false;
+      }
+      old_it.step();
+    } else if (old_pair.first > new_pair.first) {
+      // NEW_RULES has an entry for new_pair.first, but this RuleMap
+      // doesn't. This shouldn't be possible: NEW_RULES is some prior
+      // state, and there's no way to remove entries.
+      MOZ_ASSERT(0);
+    } else {
+      // Both maps have an entry for this register. Report the new
+      // rule if it is different.
+      if (old_pair.second != new_pair.second &&
+          !new_pair.second.Handle(handler, address, new_pair.first)) {
+        return false;
+      }
+      new_it.step();
+      old_it.step();
+    }
+  }
+  // Finish off entries from this RuleMap with no counterparts in new_rules.
+  while (!old_it.finished()) {
+    pair<int, Rule> old_pair = old_it.peek();
+    if (!handler->SameValueRule(address, old_pair.first)) return false;
+    old_it.step();
+  }
+  // Since we only make transitions from a rule set to some previously
+  // saved rule set, and we can only add rules to the map, NEW_RULES
+  // must have fewer rules than *this.
+  MOZ_ASSERT(new_it.finished());
+
+  return true;
+}
+
+// Remove all register rules and clear cfa_rule_.
+void CallFrameInfo::RuleMap::Clear() {
+  cfa_rule_ = Rule::mkINVALID();
+  registers_.clear();
+}
+
+// The state of the call frame information interpreter as it processes
+// instructions from a CIE and FDE.
+class CallFrameInfo::State {
+ public:
+  // Create a call frame information interpreter state with the given
+  // reporter, reader, handler, and initial call frame info address.
+  State(ByteReader* reader, Handler* handler, Reporter* reporter,
+        uint64 address)
+      : reader_(reader),
+        handler_(handler),
+        reporter_(reporter),
+        address_(address),
+        entry_(NULL),
+        cursor_(NULL),
+        saved_rules_(NULL) {}
+
+  ~State() {
+    if (saved_rules_) delete saved_rules_;
+  }
+
+  // Interpret instructions from CIE, save the resulting rule set for
+  // DW_CFA_restore instructions, and return true. On error, report
+  // the problem to reporter_ and return false.
+  bool InterpretCIE(const CIE& cie);
+
+  // Interpret instructions from FDE, and return true. On error,
+  // report the problem to reporter_ and return false.
+  bool InterpretFDE(const FDE& fde);
+
+ private:
+  // The operands of a CFI instruction, for ParseOperands.
+  struct Operands {
+    unsigned register_number;  // A register number.
+    uint64 offset;             // An offset or address.
+    long signed_offset;        // A signed offset.
+    ImageSlice expression;     // A DWARF expression.
+  };
+
+  // Parse CFI instruction operands from STATE's instruction stream as
+  // described by FORMAT. On success, populate OPERANDS with the
+  // results, and return true. On failure, report the problem and
+  // return false.
+  //
+  // Each character of FORMAT should be one of the following:
+  //
+  //   'r'  unsigned LEB128 register number (OPERANDS->register_number)
+  //   'o'  unsigned LEB128 offset          (OPERANDS->offset)
+  //   's'  signed LEB128 offset            (OPERANDS->signed_offset)
+  //   'a'  machine-size address            (OPERANDS->offset)
+  //        (If the CIE has a 'z' augmentation string, 'a' uses the
+  //        encoding specified by the 'R' argument.)
+  //   '1'  a one-byte offset               (OPERANDS->offset)
+  //   '2'  a two-byte offset               (OPERANDS->offset)
+  //   '4'  a four-byte offset              (OPERANDS->offset)
+  //   '8'  an eight-byte offset            (OPERANDS->offset)
+  //   'e'  a DW_FORM_block holding a       (OPERANDS->expression)
+  //        DWARF expression
+  bool ParseOperands(const char* format, Operands* operands);
+
+  // Interpret one CFI instruction from STATE's instruction stream, update
+  // STATE, report any rule changes to handler_, and return true. On
+  // failure, report the problem and return false.
+  MOZ_ALWAYS_INLINE bool DoInstruction();
+
+  // Repeatedly call `DoInstruction`, until either:
+  // * it returns `false`, which indicates some kind of failure,
+  //   in which case return `false` from here too, or
+  // * we've run out of instructions (that is, `cursor_ >= entry_->end`),
+  //   in which case return `true`.
+  // This is marked as never-inline because it is the only place that
+  // `DoInstruction` is called from, and we want to maximise the chances that
+  // `DoInstruction` is inlined into this routine.
+  MOZ_NEVER_INLINE bool DoInstructions();
+
+  // The following Do* member functions are subroutines of DoInstruction,
+  // factoring out the actual work of operations that have several
+  // different encodings.
+
+  // Set the CFA rule to be the value of BASE_REGISTER plus OFFSET, and
+  // return true. On failure, report and return false. (Used for
+  // DW_CFA_def_cfa and DW_CFA_def_cfa_sf.)
+  bool DoDefCFA(unsigned base_register, long offset);
+
+  // Change the offset of the CFA rule to OFFSET, and return true. On
+  // failure, report and return false. (Subroutine for
+  // DW_CFA_def_cfa_offset and DW_CFA_def_cfa_offset_sf.)
+  bool DoDefCFAOffset(long offset);
+
+  // Specify that REG can be recovered using RULE, and return true. On
+  // failure, report and return false.
+  bool DoRule(unsigned reg, Rule rule);
+
+  // Specify that REG can be found at OFFSET from the CFA, and return true.
+  // On failure, report and return false. (Subroutine for DW_CFA_offset,
+  // DW_CFA_offset_extended, and DW_CFA_offset_extended_sf.)
+  bool DoOffset(unsigned reg, long offset);
+
+  // Specify that the caller's value for REG is the CFA plus OFFSET,
+  // and return true. On failure, report and return false. (Subroutine
+  // for DW_CFA_val_offset and DW_CFA_val_offset_sf.)
+  bool DoValOffset(unsigned reg, long offset);
+
+  // Restore REG to the rule established in the CIE, and return true. On
+  // failure, report and return false. (Subroutine for DW_CFA_restore and
+  // DW_CFA_restore_extended.)
+  bool DoRestore(unsigned reg);
+
+  // Return the section offset of the instruction at cursor. For use
+  // in error messages.
+  uint64 CursorOffset() { return entry_->offset + (cursor_ - entry_->start); }
+
+  // Report that entry_ is incomplete, and return false. For brevity.
+  bool ReportIncomplete() {
+    reporter_->Incomplete(entry_->offset, entry_->kind);
+    return false;
+  }
+
+  // For reading multi-byte values with the appropriate endianness.
+  ByteReader* reader_;
+
+  // The handler to which we should report the data we find.
+  Handler* handler_;
+
+  // For reporting problems in the info we're parsing.
+  Reporter* reporter_;
+
+  // The code address to which the next instruction in the stream applies.
+  uint64 address_;
+
+  // The entry whose instructions we are currently processing. This is
+  // first a CIE, and then an FDE.
+  const Entry* entry_;
+
+  // The next instruction to process.
+  const char* cursor_;
+
+  // The current set of rules.
+  RuleMap rules_;
+
+  // The set of rules established by the CIE, used by DW_CFA_restore
+  // and DW_CFA_restore_extended. We set this after interpreting the
+  // CIE's instructions.
+  RuleMap cie_rules_;
+
+  // A stack of saved states, for DW_CFA_remember_state and
+  // DW_CFA_restore_state.
+  std::stack<RuleMap>* saved_rules_;
+};
+
+bool CallFrameInfo::State::InterpretCIE(const CIE& cie) {
+  entry_ = &cie;
+  cursor_ = entry_->instructions;
+  if (!DoInstructions()) {
+    return false;
+  }
+  // Note the rules established by the CIE, for use by DW_CFA_restore
+  // and DW_CFA_restore_extended.
+  cie_rules_ = rules_;
+  return true;
+}
+
+bool CallFrameInfo::State::InterpretFDE(const FDE& fde) {
+  entry_ = &fde;
+  cursor_ = entry_->instructions;
+  return DoInstructions();
+}
+
+bool CallFrameInfo::State::ParseOperands(const char* format,
+                                         Operands* operands) {
+  size_t len;
+  const char* operand;
+
+  for (operand = format; *operand; operand++) {
+    size_t bytes_left = entry_->end - cursor_;
+    switch (*operand) {
+      case 'r':
+        operands->register_number = reader_->ReadUnsignedLEB128(cursor_, &len);
+        if (len > bytes_left) return ReportIncomplete();
+        cursor_ += len;
+        break;
+
+      case 'o':
+        operands->offset = reader_->ReadUnsignedLEB128(cursor_, &len);
+        if (len > bytes_left) return ReportIncomplete();
+        cursor_ += len;
+        break;
+
+      case 's':
+        operands->signed_offset = reader_->ReadSignedLEB128(cursor_, &len);
+        if (len > bytes_left) return ReportIncomplete();
+        cursor_ += len;
+        break;
+
+      case 'a':
+        operands->offset = reader_->ReadEncodedPointer(
+            cursor_, entry_->cie->pointer_encoding, &len);
+        if (len > bytes_left) return ReportIncomplete();
+        cursor_ += len;
+        break;
+
+      case '1':
+        if (1 > bytes_left) return ReportIncomplete();
+        operands->offset = static_cast<unsigned char>(*cursor_++);
+        break;
+
+      case '2':
+        if (2 > bytes_left) return ReportIncomplete();
+        operands->offset = reader_->ReadTwoBytes(cursor_);
+        cursor_ += 2;
+        break;
+
+      case '4':
+        if (4 > bytes_left) return ReportIncomplete();
+        operands->offset = reader_->ReadFourBytes(cursor_);
+        cursor_ += 4;
+        break;
+
+      case '8':
+        if (8 > bytes_left) return ReportIncomplete();
+        operands->offset = reader_->ReadEightBytes(cursor_);
+        cursor_ += 8;
+        break;
+
+      case 'e': {
+        size_t expression_length = reader_->ReadUnsignedLEB128(cursor_, &len);
+        if (len > bytes_left || expression_length > bytes_left - len)
+          return ReportIncomplete();
+        cursor_ += len;
+        operands->expression = ImageSlice(cursor_, expression_length);
+        cursor_ += expression_length;
+        break;
+      }
+
+      default:
+        MOZ_ASSERT(0);
+    }
+  }
+
+  return true;
+}
+
+MOZ_ALWAYS_INLINE
+bool CallFrameInfo::State::DoInstruction() {
+  CIE* cie = entry_->cie;
+  Operands ops;
+
+  // Our entry's kind should have been set by now.
+  MOZ_ASSERT(entry_->kind != kUnknown);
+
+  // We shouldn't have been invoked unless there were more
+  // instructions to parse.
+  MOZ_ASSERT(cursor_ < entry_->end);
+
+  unsigned opcode = *cursor_++;
+  if ((opcode & 0xc0) != 0) {
+    switch (opcode & 0xc0) {
+      // Advance the address.
+      case DW_CFA_advance_loc: {
+        size_t code_offset = opcode & 0x3f;
+        address_ += code_offset * cie->code_alignment_factor;
+        break;
+      }
+
+      // Find a register at an offset from the CFA.
+      case DW_CFA_offset:
+        if (!ParseOperands("o", &ops) ||
+            !DoOffset(opcode & 0x3f, ops.offset * cie->data_alignment_factor))
+          return false;
+        break;
+
+      // Restore the rule established for a register by the CIE.
+      case DW_CFA_restore:
+        if (!DoRestore(opcode & 0x3f)) return false;
+        break;
+
+      // The 'if' above should have excluded this possibility.
+      default:
+        MOZ_ASSERT(0);
+    }
+
+    // Return here, so the big switch below won't be indented.
+    return true;
+  }
+
+  switch (opcode) {
+    // Set the address.
+    case DW_CFA_set_loc:
+      if (!ParseOperands("a", &ops)) return false;
+      address_ = ops.offset;
+      break;
+
+    // Advance the address.
+    case DW_CFA_advance_loc1:
+      if (!ParseOperands("1", &ops)) return false;
+      address_ += ops.offset * cie->code_alignment_factor;
+      break;
+
+    // Advance the address.
+    case DW_CFA_advance_loc2:
+      if (!ParseOperands("2", &ops)) return false;
+      address_ += ops.offset * cie->code_alignment_factor;
+      break;
+
+    // Advance the address.
+    case DW_CFA_advance_loc4:
+      if (!ParseOperands("4", &ops)) return false;
+      address_ += ops.offset * cie->code_alignment_factor;
+      break;
+
+    // Advance the address.
+    case DW_CFA_MIPS_advance_loc8:
+      if (!ParseOperands("8", &ops)) return false;
+      address_ += ops.offset * cie->code_alignment_factor;
+      break;
+
+    // Compute the CFA by adding an offset to a register.
+    case DW_CFA_def_cfa:
+      if (!ParseOperands("ro", &ops) ||
+          !DoDefCFA(ops.register_number, ops.offset))
+        return false;
+      break;
+
+    // Compute the CFA by adding an offset to a register.
+    case DW_CFA_def_cfa_sf:
+      if (!ParseOperands("rs", &ops) ||
+          !DoDefCFA(ops.register_number,
+                    ops.signed_offset * cie->data_alignment_factor))
+        return false;
+      break;
+
+    // Change the base register used to compute the CFA.
+    case DW_CFA_def_cfa_register: {
+      Rule* cfa_rule = rules_.CFARuleRef();
+      if (!cfa_rule->isVALID()) {
+        reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
+        return false;
+      }
+      if (!ParseOperands("r", &ops)) return false;
+      cfa_rule->SetBaseRegister(ops.register_number);
+      if (!cfa_rule->Handle(handler_, address_, Handler::kCFARegister))
+        return false;
+      break;
+    }
+
+    // Change the offset used to compute the CFA.
+    case DW_CFA_def_cfa_offset:
+      if (!ParseOperands("o", &ops) || !DoDefCFAOffset(ops.offset))
+        return false;
+      break;
+
+    // Change the offset used to compute the CFA.
+    case DW_CFA_def_cfa_offset_sf:
+      if (!ParseOperands("s", &ops) ||
+          !DoDefCFAOffset(ops.signed_offset * cie->data_alignment_factor))
+        return false;
+      break;
+
+    // Specify an expression whose value is the CFA.
+    case DW_CFA_def_cfa_expression: {
+      if (!ParseOperands("e", &ops)) return false;
+      Rule rule = Rule::mkValExpressionRule(ops.expression);
+      rules_.SetCFARule(rule);
+      if (!rule.Handle(handler_, address_, Handler::kCFARegister)) return false;
+      break;
+    }
+
+    // The register's value cannot be recovered.
+    case DW_CFA_undefined: {
+      if (!ParseOperands("r", &ops) ||
+          !DoRule(ops.register_number, Rule::mkUndefinedRule()))
+        return false;
+      break;
+    }
+
+    // The register's value is unchanged from its value in the caller.
+    case DW_CFA_same_value: {
+      if (!ParseOperands("r", &ops) ||
+          !DoRule(ops.register_number, Rule::mkSameValueRule()))
+        return false;
+      break;
+    }
+
+    // Find a register at an offset from the CFA.
+    case DW_CFA_offset_extended:
+      if (!ParseOperands("ro", &ops) ||
+          !DoOffset(ops.register_number,
+                    ops.offset * cie->data_alignment_factor))
+        return false;
+      break;
+
+    // The register is saved at an offset from the CFA.
+    case DW_CFA_offset_extended_sf:
+      if (!ParseOperands("rs", &ops) ||
+          !DoOffset(ops.register_number,
+                    ops.signed_offset * cie->data_alignment_factor))
+        return false;
+      break;
+
+    // The register is saved at an offset from the CFA.
+    case DW_CFA_GNU_negative_offset_extended:
+      if (!ParseOperands("ro", &ops) ||
+          !DoOffset(ops.register_number,
+                    -ops.offset * cie->data_alignment_factor))
+        return false;
+      break;
+
+    // The register's value is the sum of the CFA plus an offset.
+    case DW_CFA_val_offset:
+      if (!ParseOperands("ro", &ops) ||
+          !DoValOffset(ops.register_number,
+                       ops.offset * cie->data_alignment_factor))
+        return false;
+      break;
+
+    // The register's value is the sum of the CFA plus an offset.
+    case DW_CFA_val_offset_sf:
+      if (!ParseOperands("rs", &ops) ||
+          !DoValOffset(ops.register_number,
+                       ops.signed_offset * cie->data_alignment_factor))
+        return false;
+      break;
+
+    // The register has been saved in another register.
+    case DW_CFA_register: {
+      if (!ParseOperands("ro", &ops) ||
+          !DoRule(ops.register_number, Rule::mkRegisterRule(ops.offset)))
+        return false;
+      break;
+    }
+
+    // An expression yields the address at which the register is saved.
+    case DW_CFA_expression: {
+      if (!ParseOperands("re", &ops) ||
+          !DoRule(ops.register_number, Rule::mkExpressionRule(ops.expression)))
+        return false;
+      break;
+    }
+
+    // An expression yields the caller's value for the register.
+    case DW_CFA_val_expression: {
+      if (!ParseOperands("re", &ops) ||
+          !DoRule(ops.register_number,
+                  Rule::mkValExpressionRule(ops.expression)))
+        return false;
+      break;
+    }
+
+    // Restore the rule established for a register by the CIE.
+    case DW_CFA_restore_extended:
+      if (!ParseOperands("r", &ops) || !DoRestore(ops.register_number))
+        return false;
+      break;
+
+    // Save the current set of rules on a stack.
+    case DW_CFA_remember_state:
+      if (!saved_rules_) {
+        saved_rules_ = new std::stack<RuleMap>();
+      }
+      saved_rules_->push(rules_);
+      break;
+
+    // Pop the current set of rules off the stack.
+    case DW_CFA_restore_state: {
+      if (!saved_rules_ || saved_rules_->empty()) {
+        reporter_->EmptyStateStack(entry_->offset, entry_->kind,
+                                   CursorOffset());
+        return false;
+      }
+      const RuleMap& new_rules = saved_rules_->top();
+      if (rules_.CFARule().isVALID() && !new_rules.CFARule().isVALID()) {
+        reporter_->ClearingCFARule(entry_->offset, entry_->kind,
+                                   CursorOffset());
+        return false;
+      }
+      rules_.HandleTransitionTo(handler_, address_, new_rules);
+      rules_ = new_rules;
+      saved_rules_->pop();
+      break;
+    }
+
+    // No operation.  (Padding instruction.)
+    case DW_CFA_nop:
+      break;
+
+    // A SPARC register window save: Registers 8 through 15 (%o0-%o7)
+    // are saved in registers 24 through 31 (%i0-%i7), and registers
+    // 16 through 31 (%l0-%l7 and %i0-%i7) are saved at CFA offsets
+    // (0-15 * the register size). The register numbers must be
+    // hard-coded. A GNU extension, and not a pretty one.
+    case DW_CFA_GNU_window_save: {
+      // Save %o0-%o7 in %i0-%i7.
+      for (int i = 8; i < 16; i++)
+        if (!DoRule(i, Rule::mkRegisterRule(i + 16))) return false;
+      // Save %l0-%l7 and %i0-%i7 at the CFA.
+      for (int i = 16; i < 32; i++)
+        // Assume that the byte reader's address size is the same as
+        // the architecture's register size. !@#%*^ hilarious.
+        if (!DoRule(i, Rule::mkOffsetRule(Handler::kCFARegister,
+                                          (i - 16) * reader_->AddressSize())))
+          return false;
+      break;
+    }
+
+    // I'm not sure what this is. GDB doesn't use it for unwinding.
+    case DW_CFA_GNU_args_size:
+      if (!ParseOperands("o", &ops)) return false;
+      break;
+
+    // An opcode we don't recognize.
+    default: {
+      reporter_->BadInstruction(entry_->offset, entry_->kind, CursorOffset());
+      return false;
+    }
+  }
+
+  return true;
+}
+
+// See declaration above for rationale re the no-inline directive.
+MOZ_NEVER_INLINE
+bool CallFrameInfo::State::DoInstructions() {
+  while (cursor_ < entry_->end) {
+    if (!DoInstruction()) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool CallFrameInfo::State::DoDefCFA(unsigned base_register, long offset) {
+  Rule rule = Rule::mkValOffsetRule(base_register, offset);
+  rules_.SetCFARule(rule);
+  return rule.Handle(handler_, address_, Handler::kCFARegister);
+}
+
+bool CallFrameInfo::State::DoDefCFAOffset(long offset) {
+  Rule* cfa_rule = rules_.CFARuleRef();
+  if (!cfa_rule->isVALID()) {
+    reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
+    return false;
+  }
+  cfa_rule->SetOffset(offset);
+  return cfa_rule->Handle(handler_, address_, Handler::kCFARegister);
+}
+
+bool CallFrameInfo::State::DoRule(unsigned reg, Rule rule) {
+  rules_.SetRegisterRule(reg, rule);
+  return rule.Handle(handler_, address_, reg);
+}
+
+bool CallFrameInfo::State::DoOffset(unsigned reg, long offset) {
+  if (!rules_.CFARule().isVALID()) {
+    reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
+    return false;
+  }
+  Rule rule = Rule::mkOffsetRule(Handler::kCFARegister, offset);
+  return DoRule(reg, rule);
+}
+
+bool CallFrameInfo::State::DoValOffset(unsigned reg, long offset) {
+  if (!rules_.CFARule().isVALID()) {
+    reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
+    return false;
+  }
+  return DoRule(reg, Rule::mkValOffsetRule(Handler::kCFARegister, offset));
+}
+
+bool CallFrameInfo::State::DoRestore(unsigned reg) {
+  // DW_CFA_restore and DW_CFA_restore_extended don't make sense in a CIE.
+  if (entry_->kind == kCIE) {
+    reporter_->RestoreInCIE(entry_->offset, CursorOffset());
+    return false;
+  }
+  Rule rule = cie_rules_.RegisterRule(reg);
+  if (!rule.isVALID()) {
+    // This isn't really the right thing to do, but since CFI generally
+    // only mentions callee-saves registers, and GCC's convention for
+    // callee-saves registers is that they are unchanged, it's a good
+    // approximation.
+    rule = Rule::mkSameValueRule();
+  }
+  return DoRule(reg, rule);
+}
+
+bool CallFrameInfo::ReadEntryPrologue(const char* cursor, Entry* entry) {
+  const char* buffer_end = buffer_ + buffer_length_;
+
+  // Initialize enough of ENTRY for use in error reporting.
+  entry->offset = cursor - buffer_;
+  entry->start = cursor;
+  entry->kind = kUnknown;
+  entry->end = NULL;
+
+  // Read the initial length. This sets reader_'s offset size.
+  size_t length_size;
+  uint64 length = reader_->ReadInitialLength(cursor, &length_size);
+  if (length_size > size_t(buffer_end - cursor)) return ReportIncomplete(entry);
+  cursor += length_size;
+
+  // In a .eh_frame section, a length of zero marks the end of the series
+  // of entries.
+  if (length == 0 && eh_frame_) {
+    entry->kind = kTerminator;
+    entry->end = cursor;
+    return true;
+  }
+
+  // Validate the length.
+  if (length > size_t(buffer_end - cursor)) return ReportIncomplete(entry);
+
+  // The length is the number of bytes after the initial length field;
+  // we have that position handy at this point, so compute the end
+  // now. (If we're parsing 64-bit-offset DWARF on a 32-bit machine,
+  // and the length didn't fit in a size_t, we would have rejected it
+  // above.)
+  entry->end = cursor + length;
+
+  // Parse the next field: either the offset of a CIE or a CIE id.
+  size_t offset_size = reader_->OffsetSize();
+  if (offset_size > size_t(entry->end - cursor)) return ReportIncomplete(entry);
+  entry->id = reader_->ReadOffset(cursor);
+
+  // Don't advance cursor past id field yet; in .eh_frame data we need
+  // the id's position to compute the section offset of an FDE's CIE.
+
+  // Now we can decide what kind of entry this is.
+  if (eh_frame_) {
+    // In .eh_frame data, an ID of zero marks the entry as a CIE, and
+    // anything else is an offset from the id field of the FDE to the start
+    // of the CIE.
+    if (entry->id == 0) {
+      entry->kind = kCIE;
+    } else {
+      entry->kind = kFDE;
+      // Turn the offset from the id into an offset from the buffer's start.
+      entry->id = (cursor - buffer_) - entry->id;
+    }
+  } else {
+    // In DWARF CFI data, an ID of ~0 (of the appropriate width, given the
+    // offset size for the entry) marks the entry as a CIE, and anything
+    // else is the offset of the CIE from the beginning of the section.
+    if (offset_size == 4)
+      entry->kind = (entry->id == 0xffffffff) ? kCIE : kFDE;
+    else {
+      MOZ_ASSERT(offset_size == 8);
+      entry->kind = (entry->id == 0xffffffffffffffffULL) ? kCIE : kFDE;
+    }
+  }
+
+  // Now advance cursor past the id.
+  cursor += offset_size;
+
+  // The fields specific to this kind of entry start here.
+  entry->fields = cursor;
+
+  entry->cie = NULL;
+
+  return true;
+}
+
+bool CallFrameInfo::ReadCIEFields(CIE* cie) {
+  const char* cursor = cie->fields;
+  size_t len;
+
+  MOZ_ASSERT(cie->kind == kCIE);
+
+  // Prepare for early exit.
+  cie->version = 0;
+  cie->augmentation.clear();
+  cie->code_alignment_factor = 0;
+  cie->data_alignment_factor = 0;
+  cie->return_address_register = 0;
+  cie->has_z_augmentation = false;
+  cie->pointer_encoding = DW_EH_PE_absptr;
+  cie->instructions = 0;
+
+  // Parse the version number.
+  if (cie->end - cursor < 1) return ReportIncomplete(cie);
+  cie->version = reader_->ReadOneByte(cursor);
+  cursor++;
+
+  // If we don't recognize the version, we can't parse any more fields of the
+  // CIE. For DWARF CFI, we handle versions 1 through 4 (there was never a
+  // version 2 of CFI data). For .eh_frame, we handle versions 1 and 4 as well;
+  // the difference between those versions seems to be the same as for
+  // .debug_frame.
+  if (cie->version < 1 || cie->version > 4) {
+    reporter_->UnrecognizedVersion(cie->offset, cie->version);
+    return false;
+  }
+
+  const char* augmentation_start = cursor;
+  const void* augmentation_end =
+      memchr(augmentation_start, '\0', cie->end - augmentation_start);
+  if (!augmentation_end) return ReportIncomplete(cie);
+  cursor = static_cast<const char*>(augmentation_end);
+  cie->augmentation = string(augmentation_start, cursor - augmentation_start);
+  // Skip the terminating '\0'.
+  cursor++;
+
+  // Is this CFI augmented?
+  if (!cie->augmentation.empty()) {
+    // Is it an augmentation we recognize?
+    if (cie->augmentation[0] == DW_Z_augmentation_start) {
+      // Linux C++ ABI 'z' augmentation, used for exception handling data.
+      cie->has_z_augmentation = true;
+    } else {
+      // Not an augmentation we recognize. Augmentations can have arbitrary
+      // effects on the form of rest of the content, so we have to give up.
+      reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation);
+      return false;
+    }
+  }
+
+  if (cie->version >= 4) {
+    // Check that the address_size and segment_size fields are plausible.
+    if (cie->end - cursor < 2) {
+      return ReportIncomplete(cie);
+    }
+    uint8_t address_size = reader_->ReadOneByte(cursor);
+    cursor++;
+    if (address_size != sizeof(void*)) {
+      // This is not per-se invalid CFI.  But we can reasonably expect to
+      // be running on a target of the same word size as the CFI is for,
+      // so we reject this case.
+      reporter_->InvalidDwarf4Artefact(cie->offset, "Invalid address_size");
+      return false;
+    }
+    uint8_t segment_size = reader_->ReadOneByte(cursor);
+    cursor++;
+    if (segment_size != 0) {
+      // This is also not per-se invalid CFI, but we don't currently handle
+      // the case of non-zero |segment_size|.
+      reporter_->InvalidDwarf4Artefact(cie->offset, "Invalid segment_size");
+      return false;
+    }
+    // We only continue parsing if |segment_size| is zero.  If this routine
+    // is ever changed to allow non-zero |segment_size|, then
+    // ReadFDEFields() below will have to be changed to match, per comments
+    // there.
+  }
+
+  // Parse the code alignment factor.
+  cie->code_alignment_factor = reader_->ReadUnsignedLEB128(cursor, &len);
+  if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
+  cursor += len;
+
+  // Parse the data alignment factor.
+  cie->data_alignment_factor = reader_->ReadSignedLEB128(cursor, &len);
+  if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
+  cursor += len;
+
+  // Parse the return address register. This is a ubyte in version 1, and
+  // a ULEB128 in version 3.
+  if (cie->version == 1) {
+    if (cursor >= cie->end) return ReportIncomplete(cie);
+    cie->return_address_register = uint8(*cursor++);
+  } else {
+    cie->return_address_register = reader_->ReadUnsignedLEB128(cursor, &len);
+    if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
+    cursor += len;
+  }
+
+  // If we have a 'z' augmentation string, find the augmentation data and
+  // use the augmentation string to parse it.
+  if (cie->has_z_augmentation) {
+    uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &len);
+    if (size_t(cie->end - cursor) < len + data_size)
+      return ReportIncomplete(cie);
+    cursor += len;
+    const char* data = cursor;
+    cursor += data_size;
+    const char* data_end = cursor;
+
+    cie->has_z_lsda = false;
+    cie->has_z_personality = false;
+    cie->has_z_signal_frame = false;
+
+    // Walk the augmentation string, and extract values from the
+    // augmentation data as the string directs.
+    for (size_t i = 1; i < cie->augmentation.size(); i++) {
+      switch (cie->augmentation[i]) {
+        case DW_Z_has_LSDA:
+          // The CIE's augmentation data holds the language-specific data
+          // area pointer's encoding, and the FDE's augmentation data holds
+          // the pointer itself.
+          cie->has_z_lsda = true;
+          // Fetch the LSDA encoding from the augmentation data.
+          if (data >= data_end) return ReportIncomplete(cie);
+          cie->lsda_encoding = DwarfPointerEncoding(*data++);
+          if (!reader_->ValidEncoding(cie->lsda_encoding)) {
+            reporter_->InvalidPointerEncoding(cie->offset, cie->lsda_encoding);
+            return false;
+          }
+          // Don't check if the encoding is usable here --- we haven't
+          // read the FDE's fields yet, so we're not prepared for
+          // DW_EH_PE_funcrel, although that's a fine encoding for the
+          // LSDA to use, since it appears in the FDE.
+          break;
+
+        case DW_Z_has_personality_routine:
+          // The CIE's augmentation data holds the personality routine
+          // pointer's encoding, followed by the pointer itself.
+          cie->has_z_personality = true;
+          // Fetch the personality routine pointer's encoding from the
+          // augmentation data.
+          if (data >= data_end) return ReportIncomplete(cie);
+          cie->personality_encoding = DwarfPointerEncoding(*data++);
+          if (!reader_->ValidEncoding(cie->personality_encoding)) {
+            reporter_->InvalidPointerEncoding(cie->offset,
+                                              cie->personality_encoding);
+            return false;
+          }
+          if (!reader_->UsableEncoding(cie->personality_encoding)) {
+            reporter_->UnusablePointerEncoding(cie->offset,
+                                               cie->personality_encoding);
+            return false;
+          }
+          // Fetch the personality routine's pointer itself from the data.
+          cie->personality_address = reader_->ReadEncodedPointer(
+              data, cie->personality_encoding, &len);
+          if (len > size_t(data_end - data)) return ReportIncomplete(cie);
+          data += len;
+          break;
+
+        case DW_Z_has_FDE_address_encoding:
+          // The CIE's augmentation data holds the pointer encoding to use
+          // for addresses in the FDE.
+          if (data >= data_end) return ReportIncomplete(cie);
+          cie->pointer_encoding = DwarfPointerEncoding(*data++);
+          if (!reader_->ValidEncoding(cie->pointer_encoding)) {
+            reporter_->InvalidPointerEncoding(cie->offset,
+                                              cie->pointer_encoding);
+            return false;
+          }
+          if (!reader_->UsableEncoding(cie->pointer_encoding)) {
+            reporter_->UnusablePointerEncoding(cie->offset,
+                                               cie->pointer_encoding);
+            return false;
+          }
+          break;
+
+        case DW_Z_is_signal_trampoline:
+          // Frames using this CIE are signal delivery frames.
+          cie->has_z_signal_frame = true;
+          break;
+
+        default:
+          // An augmentation we don't recognize.
+          reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation);
+          return false;
+      }
+    }
+  }
+
+  // The CIE's instructions start here.
+  cie->instructions = cursor;
+
+  return true;
+}
+
+bool CallFrameInfo::ReadFDEFields(FDE* fde) {
+  const char* cursor = fde->fields;
+  size_t size;
+
+  // At this point, for Dwarf 4 and above, we are assuming that the
+  // associated CIE has its |segment_size| field equal to zero.  This is
+  // checked for in ReadCIEFields() above.  If ReadCIEFields() is ever
+  // changed to allow non-zero |segment_size| CIEs then we will have to read
+  // the segment_selector value at this point.
+
+  fde->address =
+      reader_->ReadEncodedPointer(cursor, fde->cie->pointer_encoding, &size);
+  if (size > size_t(fde->end - cursor)) return ReportIncomplete(fde);
+  cursor += size;
+  reader_->SetFunctionBase(fde->address);
+
+  // For the length, we strip off the upper nybble of the encoding used for
+  // the starting address.
+  DwarfPointerEncoding length_encoding =
+      DwarfPointerEncoding(fde->cie->pointer_encoding & 0x0f);
+  fde->size = reader_->ReadEncodedPointer(cursor, length_encoding, &size);
+  if (size > size_t(fde->end - cursor)) return ReportIncomplete(fde);
+  cursor += size;
+
+  // If the CIE has a 'z' augmentation string, then augmentation data
+  // appears here.
+  if (fde->cie->has_z_augmentation) {
+    uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &size);
+    if (size_t(fde->end - cursor) < size + data_size)
+      return ReportIncomplete(fde);
+    cursor += size;
+
+    // In the abstract, we should walk the augmentation string, and extract
+    // items from the FDE's augmentation data as we encounter augmentation
+    // string characters that specify their presence: the ordering of items
+    // in the augmentation string determines the arrangement of values in
+    // the augmentation data.
+    //
+    // In practice, there's only ever one value in FDE augmentation data
+    // that we support --- the LSDA pointer --- and we have to bail if we
+    // see any unrecognized augmentation string characters. So if there is
+    // anything here at all, we know what it is, and where it starts.
+    if (fde->cie->has_z_lsda) {
+      // Check whether the LSDA's pointer encoding is usable now: only once
+      // we've parsed the FDE's starting address do we call reader_->
+      // SetFunctionBase, so that the DW_EH_PE_funcrel encoding becomes
+      // usable.
+      if (!reader_->UsableEncoding(fde->cie->lsda_encoding)) {
+        reporter_->UnusablePointerEncoding(fde->cie->offset,
+                                           fde->cie->lsda_encoding);
+        return false;
+      }
+
+      fde->lsda_address =
+          reader_->ReadEncodedPointer(cursor, fde->cie->lsda_encoding, &size);
+      if (size > data_size) return ReportIncomplete(fde);
+      // Ideally, we would also complain here if there were unconsumed
+      // augmentation data.
+    }
+
+    cursor += data_size;
+  }
+
+  // The FDE's instructions start after those.
+  fde->instructions = cursor;
+
+  return true;
+}
+
+bool CallFrameInfo::Start() {
+  const char* buffer_end = buffer_ + buffer_length_;
+  const char* cursor;
+  bool all_ok = true;
+  const char* entry_end;
+  bool ok;
+
+  // Traverse all the entries in buffer_, skipping CIEs and offering
+  // FDEs to the handler.
+  for (cursor = buffer_; cursor < buffer_end;
+       cursor = entry_end, all_ok = all_ok && ok) {
+    FDE fde;
+
+    // Make it easy to skip this entry with 'continue': assume that
+    // things are not okay until we've checked all the data, and
+    // prepare the address of the next entry.
+    ok = false;
+
+    // Read the entry's prologue.
+    if (!ReadEntryPrologue(cursor, &fde)) {
+      if (!fde.end) {
+        // If we couldn't even figure out this entry's extent, then we
+        // must stop processing entries altogether.
+        all_ok = false;
+        break;
+      }
+      entry_end = fde.end;
+      continue;
+    }
+
+    // The next iteration picks up after this entry.
+    entry_end = fde.end;
+
+    // Did we see an .eh_frame terminating mark?
+    if (fde.kind == kTerminator) {
+      // If there appears to be more data left in the section after the
+      // terminating mark, warn the user. But this is just a warning;
+      // we leave all_ok true.
+      if (fde.end < buffer_end) reporter_->EarlyEHTerminator(fde.offset);
+      break;
+    }
+
+    // In this loop, we skip CIEs. We only parse them fully when we
+    // parse an FDE that refers to them. This limits our memory
+    // consumption (beyond the buffer itself) to that needed to
+    // process the largest single entry.
+    if (fde.kind != kFDE) {
+      ok = true;
+      continue;
+    }
+
+    // Validate the CIE pointer.
+    if (fde.id > buffer_length_) {
+      reporter_->CIEPointerOutOfRange(fde.offset, fde.id);
+      continue;
+    }
+
+    CIE cie;
+
+    // Parse this FDE's CIE header.
+    if (!ReadEntryPrologue(buffer_ + fde.id, &cie)) continue;
+    // This had better be an actual CIE.
+    if (cie.kind != kCIE) {
+      reporter_->BadCIEId(fde.offset, fde.id);
+      continue;
+    }
+    if (!ReadCIEFields(&cie)) continue;
+
+    // We now have the values that govern both the CIE and the FDE.
+    cie.cie = &cie;
+    fde.cie = &cie;
+
+    // Parse the FDE's header.
+    if (!ReadFDEFields(&fde)) continue;
+
+    // Call Entry to ask the consumer if they're interested.
+    if (!handler_->Entry(fde.offset, fde.address, fde.size, cie.version,
+                         cie.augmentation, cie.return_address_register)) {
+      // The handler isn't interested in this entry. That's not an error.
+      ok = true;
+      continue;
+    }
+
+    if (cie.has_z_augmentation) {
+      // Report the personality routine address, if we have one.
+      if (cie.has_z_personality) {
+        if (!handler_->PersonalityRoutine(
+                cie.personality_address,
+                IsIndirectEncoding(cie.personality_encoding)))
+          continue;
+      }
+
+      // Report the language-specific data area address, if we have one.
+      if (cie.has_z_lsda) {
+        if (!handler_->LanguageSpecificDataArea(
+                fde.lsda_address, IsIndirectEncoding(cie.lsda_encoding)))
+          continue;
+      }
+
+      // If this is a signal-handling frame, report that.
+      if (cie.has_z_signal_frame) {
+        if (!handler_->SignalHandler()) continue;
+      }
+    }
+
+    // Interpret the CIE's instructions, and then the FDE's instructions.
+    State state(reader_, handler_, reporter_, fde.address);
+    ok = state.InterpretCIE(cie) && state.InterpretFDE(fde);
+
+    // Tell the ByteReader that the function start address from the
+    // FDE header is no longer valid.
+    reader_->ClearFunctionBase();
+
+    // Report the end of the entry.
+    handler_->End();
+  }
+
+  return all_ok;
+}
+
+const char* CallFrameInfo::KindName(EntryKind kind) {
+  if (kind == CallFrameInfo::kUnknown)
+    return "entry";
+  else if (kind == CallFrameInfo::kCIE)
+    return "common information entry";
+  else if (kind == CallFrameInfo::kFDE)
+    return "frame description entry";
+  else {
+    MOZ_ASSERT(kind == CallFrameInfo::kTerminator);
+    return ".eh_frame sequence terminator";
+  }
+}
+
+bool CallFrameInfo::ReportIncomplete(Entry* entry) {
+  reporter_->Incomplete(entry->offset, entry->kind);
+  return false;
+}
+
+void CallFrameInfo::Reporter::Incomplete(uint64 offset,
+                                         CallFrameInfo::EntryKind kind) {
+  char buf[300];
+  SprintfLiteral(buf, "%s: CFI %s at offset 0x%llx in '%s': entry ends early\n",
+                 filename_.c_str(), CallFrameInfo::KindName(kind), offset,
+                 section_.c_str());
+  log_(buf);
+}
+
+void CallFrameInfo::Reporter::EarlyEHTerminator(uint64 offset) {
+  char buf[300];
+  SprintfLiteral(buf,
+                 "%s: CFI at offset 0x%llx in '%s': saw end-of-data marker"
+                 " before end of section contents\n",
+                 filename_.c_str(), offset, section_.c_str());
+  log_(buf);
+}
+
+void CallFrameInfo::Reporter::CIEPointerOutOfRange(uint64 offset,
+                                                   uint64 cie_offset) {
+  char buf[300];
+  SprintfLiteral(buf,
+                 "%s: CFI frame description entry at offset 0x%llx in '%s':"
+                 " CIE pointer is out of range: 0x%llx\n",
+                 filename_.c_str(), offset, section_.c_str(), cie_offset);
+  log_(buf);
+}
+
+void CallFrameInfo::Reporter::BadCIEId(uint64 offset, uint64 cie_offset) {
+  char buf[300];
+  SprintfLiteral(buf,
+                 "%s: CFI frame description entry at offset 0x%llx in '%s':"
+                 " CIE pointer does not point to a CIE: 0x%llx\n",
+                 filename_.c_str(), offset, section_.c_str(), cie_offset);
+  log_(buf);
+}
+
+void CallFrameInfo::Reporter::UnrecognizedVersion(uint64 offset, int version) {
+  char buf[300];
+  SprintfLiteral(buf,
+                 "%s: CFI frame description entry at offset 0x%llx in '%s':"
+                 " CIE specifies unrecognized version: %d\n",
+                 filename_.c_str(), offset, section_.c_str(), version);
+  log_(buf);
+}
+
+void CallFrameInfo::Reporter::UnrecognizedAugmentation(uint64 offset,
+                                                       const string& aug) {
+  char buf[300];
+  SprintfLiteral(buf,
+                 "%s: CFI frame description entry at offset 0x%llx in '%s':"
+                 " CIE specifies unrecognized augmentation: '%s'\n",
+                 filename_.c_str(), offset, section_.c_str(), aug.c_str());
+  log_(buf);
+}
+
+void CallFrameInfo::Reporter::InvalidDwarf4Artefact(uint64 offset,
+                                                    const char* what) {
+  char* what_safe = strndup(what, 100);
+  char buf[300];
+  SprintfLiteral(buf,
+                 "%s: CFI frame description entry at offset 0x%llx in '%s':"
+                 " CIE specifies invalid Dwarf4 artefact: %s\n",
+                 filename_.c_str(), offset, section_.c_str(), what_safe);
+  log_(buf);
+  free(what_safe);
+}
+
+void CallFrameInfo::Reporter::InvalidPointerEncoding(uint64 offset,
+                                                     uint8 encoding) {
+  char buf[300];
+  SprintfLiteral(buf,
+                 "%s: CFI common information entry at offset 0x%llx in '%s':"
+                 " 'z' augmentation specifies invalid pointer encoding: "
+                 "0x%02x\n",
+                 filename_.c_str(), offset, section_.c_str(), encoding);
+  log_(buf);
+}
+
+void CallFrameInfo::Reporter::UnusablePointerEncoding(uint64 offset,
+                                                      uint8 encoding) {
+  char buf[300];
+  SprintfLiteral(buf,
+                 "%s: CFI common information entry at offset 0x%llx in '%s':"
+                 " 'z' augmentation specifies a pointer encoding for which"
+                 " we have no base address: 0x%02x\n",
+                 filename_.c_str(), offset, section_.c_str(), encoding);
+  log_(buf);
+}
+
+void CallFrameInfo::Reporter::RestoreInCIE(uint64 offset, uint64 insn_offset) {
+  char buf[300];
+  SprintfLiteral(buf,
+                 "%s: CFI common information entry at offset 0x%llx in '%s':"
+                 " the DW_CFA_restore instruction at offset 0x%llx"
+                 " cannot be used in a common information entry\n",
+                 filename_.c_str(), offset, section_.c_str(), insn_offset);
+  log_(buf);
+}
+
+void CallFrameInfo::Reporter::BadInstruction(uint64 offset,
+                                             CallFrameInfo::EntryKind kind,
+                                             uint64 insn_offset) {
+  char buf[300];
+  SprintfLiteral(buf,
+                 "%s: CFI %s at offset 0x%llx in section '%s':"
+                 " the instruction at offset 0x%llx is unrecognized\n",
+                 filename_.c_str(), CallFrameInfo::KindName(kind), offset,
+                 section_.c_str(), insn_offset);
+  log_(buf);
+}
+
+void CallFrameInfo::Reporter::NoCFARule(uint64 offset,
+                                        CallFrameInfo::EntryKind kind,
+                                        uint64 insn_offset) {
+  char buf[300];
+  SprintfLiteral(buf,
+                 "%s: CFI %s at offset 0x%llx in section '%s':"
+                 " the instruction at offset 0x%llx assumes that a CFA rule "
+                 "has been set, but none has been set\n",
+                 filename_.c_str(), CallFrameInfo::KindName(kind), offset,
+                 section_.c_str(), insn_offset);
+  log_(buf);
+}
+
+void CallFrameInfo::Reporter::EmptyStateStack(uint64 offset,
+                                              CallFrameInfo::EntryKind kind,
+                                              uint64 insn_offset) {
+  char buf[300];
+  SprintfLiteral(buf,
+                 "%s: CFI %s at offset 0x%llx in section '%s':"
+                 " the DW_CFA_restore_state instruction at offset 0x%llx"
+                 " should pop a saved state from the stack, but the stack "
+                 "is empty\n",
+                 filename_.c_str(), CallFrameInfo::KindName(kind), offset,
+                 section_.c_str(), insn_offset);
+  log_(buf);
+}
+
+void CallFrameInfo::Reporter::ClearingCFARule(uint64 offset,
+                                              CallFrameInfo::EntryKind kind,
+                                              uint64 insn_offset) {
+  char buf[300];
+  SprintfLiteral(buf,
+                 "%s: CFI %s at offset 0x%llx in section '%s':"
+                 " the DW_CFA_restore_state instruction at offset 0x%llx"
+                 " would clear the CFA rule in effect\n",
+                 filename_.c_str(), CallFrameInfo::KindName(kind), offset,
+                 section_.c_str(), insn_offset);
+  log_(buf);
+}
+
+unsigned int DwarfCFIToModule::RegisterNames::I386() {
+  /*
+   8 "$eax", "$ecx", "$edx", "$ebx", "$esp", "$ebp", "$esi", "$edi",
+   3 "$eip", "$eflags", "$unused1",
+   8 "$st0", "$st1", "$st2", "$st3", "$st4", "$st5", "$st6", "$st7",
+   2 "$unused2", "$unused3",
+   8 "$xmm0", "$xmm1", "$xmm2", "$xmm3", "$xmm4", "$xmm5", "$xmm6", "$xmm7",
+   8 "$mm0", "$mm1", "$mm2", "$mm3", "$mm4", "$mm5", "$mm6", "$mm7",
+   3 "$fcw", "$fsw", "$mxcsr",
+   8 "$es", "$cs", "$ss", "$ds", "$fs", "$gs", "$unused4", "$unused5",
+   2 "$tr", "$ldtr"
+  */
+  return 8 + 3 + 8 + 2 + 8 + 8 + 3 + 8 + 2;
+}
+
+unsigned int DwarfCFIToModule::RegisterNames::X86_64() {
+  /*
+   8 "$rax", "$rdx", "$rcx", "$rbx", "$rsi", "$rdi", "$rbp", "$rsp",
+   8 "$r8",  "$r9",  "$r10", "$r11", "$r12", "$r13", "$r14", "$r15",
+   1 "$rip",
+   8 "$xmm0","$xmm1","$xmm2", "$xmm3", "$xmm4", "$xmm5", "$xmm6", "$xmm7",
+   8 "$xmm8","$xmm9","$xmm10","$xmm11","$xmm12","$xmm13","$xmm14","$xmm15",
+   8 "$st0", "$st1", "$st2", "$st3", "$st4", "$st5", "$st6", "$st7",
+   8 "$mm0", "$mm1", "$mm2", "$mm3", "$mm4", "$mm5", "$mm6", "$mm7",
+   1 "$rflags",
+   8 "$es", "$cs", "$ss", "$ds", "$fs", "$gs", "$unused1", "$unused2",
+   4 "$fs.base", "$gs.base", "$unused3", "$unused4",
+   2 "$tr", "$ldtr",
+   3 "$mxcsr", "$fcw", "$fsw"
+  */
+  return 8 + 8 + 1 + 8 + 8 + 8 + 8 + 1 + 8 + 4 + 2 + 3;
+}
+
+// Per ARM IHI 0040A, section 3.1
+unsigned int DwarfCFIToModule::RegisterNames::ARM() {
+  /*
+   8 "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
+   8 "r8",  "r9",  "r10", "r11", "r12", "sp",  "lr",  "pc",
+   8 "f0",  "f1",  "f2",  "f3",  "f4",  "f5",  "f6",  "f7",
+   8 "fps", "cpsr", "",   "",    "",    "",    "",    "",
+   8 "",    "",    "",    "",    "",    "",    "",    "",
+   8 "",    "",    "",    "",    "",    "",    "",    "",
+   8 "",    "",    "",    "",    "",    "",    "",    "",
+   8 "",    "",    "",    "",    "",    "",    "",    "",
+   8 "s0",  "s1",  "s2",  "s3",  "s4",  "s5",  "s6",  "s7",
+   8 "s8",  "s9",  "s10", "s11", "s12", "s13", "s14", "s15",
+   8 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
+   8 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
+   8 "f0",  "f1",  "f2",  "f3",  "f4",  "f5",  "f6",  "f7"
+  */
+  return 13 * 8;
+}
+
+// Per ARM IHI 0057A, section 3.1
+unsigned int DwarfCFIToModule::RegisterNames::ARM64() {
+  /*
+   8 "x0",  "x1",  "x2",  "x3",  "x4",  "x5",  "x6",  "x7",
+   8 "x8",  "x9",  "x10", "x11", "x12", "x13", "x14", "x15",
+   8 "x16"  "x17", "x18", "x19", "x20", "x21", "x22", "x23",
+   8 "x24", "x25", "x26", "x27", "x28", "x29",  "x30","sp",
+   8 "",    "",    "",    "",    "",    "",    "",    "",
+   8 "",    "",    "",    "",    "",    "",    "",    "",
+   8 "",    "",    "",    "",    "",    "",    "",    "",
+   8 "",    "",    "",    "",    "",    "",    "",    "",
+   8 "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
+   8 "v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
+   8 "v16", "v17", "v18", "v19", "v20", "v21", "v22,  "v23",
+   8 "v24", "x25", "x26,  "x27", "v28", "v29", "v30", "v31",
+  */
+  return 12 * 8;
+}
+
+unsigned int DwarfCFIToModule::RegisterNames::MIPS() {
+  /*
+   8 "$zero", "$at",  "$v0",  "$v1",  "$a0",   "$a1",  "$a2",  "$a3",
+   8 "$t0",   "$t1",  "$t2",  "$t3",  "$t4",   "$t5",  "$t6",  "$t7",
+   8 "$s0",   "$s1",  "$s2",  "$s3",  "$s4",   "$s5",  "$s6",  "$s7",
+   8 "$t8",   "$t9",  "$k0",  "$k1",  "$gp",   "$sp",  "$fp",  "$ra",
+   9 "$lo",   "$hi",  "$pc",  "$f0",  "$f1",   "$f2",  "$f3",  "$f4",  "$f5",
+   8 "$f6",   "$f7",  "$f8",  "$f9",  "$f10",  "$f11", "$f12", "$f13",
+   7 "$f14",  "$f15", "$f16", "$f17", "$f18",  "$f19", "$f20",
+   7 "$f21",  "$f22", "$f23", "$f24", "$f25",  "$f26", "$f27",
+   6 "$f28",  "$f29", "$f30", "$f31", "$fcsr", "$fir"
+  */
+  return 8 + 8 + 8 + 8 + 9 + 8 + 7 + 7 + 6;
+}
+
+// See prototype for comments.
+int32_t parseDwarfExpr(Summariser* summ, const ByteReader* reader,
+                       ImageSlice expr, bool debug, bool pushCfaAtStart,
+                       bool derefAtEnd) {
+  const char* cursor = expr.start_;
+  const char* end1 = cursor + expr.length_;
+
+  char buf[100];
+  if (debug) {
+    SprintfLiteral(buf, "LUL.DW  << DwarfExpr, len is %d\n",
+                   (int)(end1 - cursor));
+    summ->Log(buf);
+  }
+
+  // Add a marker for the start of this expression.  In it, indicate
+  // whether or not the CFA should be pushed onto the stack prior to
+  // evaluation.
+  int32_t start_ix =
+      summ->AddPfxInstr(PfxInstr(PX_Start, pushCfaAtStart ? 1 : 0));
+  MOZ_ASSERT(start_ix >= 0);
+
+  while (cursor < end1) {
+    uint8 opc = reader->ReadOneByte(cursor);
+    cursor++;
+
+    const char* nm = nullptr;
+    PfxExprOp pxop = PX_End;
+
+    switch (opc) {
+      case DW_OP_lit0 ... DW_OP_lit31: {
+        int32_t simm32 = (int32_t)(opc - DW_OP_lit0);
+        if (debug) {
+          SprintfLiteral(buf, "LUL.DW   DW_OP_lit%d\n", (int)simm32);
+          summ->Log(buf);
+        }
+        (void)summ->AddPfxInstr(PfxInstr(PX_SImm32, simm32));
+        break;
+      }
+
+      case DW_OP_breg0 ... DW_OP_breg31: {
+        size_t len;
+        int64_t n = reader->ReadSignedLEB128(cursor, &len);
+        cursor += len;
+        DW_REG_NUMBER reg = (DW_REG_NUMBER)(opc - DW_OP_breg0);
+        if (debug) {
+          SprintfLiteral(buf, "LUL.DW   DW_OP_breg%d %lld\n", (int)reg,
+                         (long long int)n);
+          summ->Log(buf);
+        }
+        // PfxInstr only allows a 32 bit signed offset.  So we
+        // must fail if the immediate is out of range.
+        if (n < INT32_MIN || INT32_MAX < n) goto fail;
+        (void)summ->AddPfxInstr(PfxInstr(PX_DwReg, reg));
+        (void)summ->AddPfxInstr(PfxInstr(PX_SImm32, (int32_t)n));
+        (void)summ->AddPfxInstr(PfxInstr(PX_Add));
+        break;
+      }
+
+      case DW_OP_const4s: {
+        uint64_t u64 = reader->ReadFourBytes(cursor);
+        cursor += 4;
+        // u64 is guaranteed by |ReadFourBytes| to be in the
+        // range 0 .. FFFFFFFF inclusive.  But to be safe:
+        uint32_t u32 = (uint32_t)(u64 & 0xFFFFFFFF);
+        int32_t s32 = (int32_t)u32;
+        if (debug) {
+          SprintfLiteral(buf, "LUL.DW   DW_OP_const4s %d\n", (int)s32);
+          summ->Log(buf);
+        }
+        (void)summ->AddPfxInstr(PfxInstr(PX_SImm32, s32));
+        break;
+      }
+
+      case DW_OP_deref:
+        nm = "deref";
+        pxop = PX_Deref;
+        goto no_operands;
+      case DW_OP_and:
+        nm = "and";
+        pxop = PX_And;
+        goto no_operands;
+      case DW_OP_plus:
+        nm = "plus";
+        pxop = PX_Add;
+        goto no_operands;
+      case DW_OP_minus:
+        nm = "minus";
+        pxop = PX_Sub;
+        goto no_operands;
+      case DW_OP_shl:
+        nm = "shl";
+        pxop = PX_Shl;
+        goto no_operands;
+      case DW_OP_ge:
+        nm = "ge";
+        pxop = PX_CmpGES;
+        goto no_operands;
+      no_operands:
+        MOZ_ASSERT(nm && pxop != PX_End);
+        if (debug) {
+          SprintfLiteral(buf, "LUL.DW   DW_OP_%s\n", nm);
+          summ->Log(buf);
+        }
+        (void)summ->AddPfxInstr(PfxInstr(pxop));
+        break;
+
+      default:
+        if (debug) {
+          SprintfLiteral(buf, "LUL.DW   unknown opc %d\n", (int)opc);
+          summ->Log(buf);
+        }
+        goto fail;
+
+    }  // switch (opc)
+
+  }  // while (cursor < end1)
+
+  MOZ_ASSERT(cursor >= end1);
+
+  if (cursor > end1) {
+    // We overran the Dwarf expression.  Give up.
+    goto fail;
+  }
+
+  // For DW_CFA_expression, what the expression denotes is the address
+  // of where the previous value is located.  The caller of this routine
+  // may therefore request one last dereference before the end marker is
+  // inserted.
+  if (derefAtEnd) {
+    (void)summ->AddPfxInstr(PfxInstr(PX_Deref));
+  }
+
+  // Insert an end marker, and declare success.
+  (void)summ->AddPfxInstr(PfxInstr(PX_End));
+  if (debug) {
+    SprintfLiteral(buf,
+                   "LUL.DW   conversion of dwarf expression succeeded, "
+                   "ix = %d\n",
+                   (int)start_ix);
+    summ->Log(buf);
+    summ->Log("LUL.DW  >>\n");
+  }
+  return start_ix;
+
+fail:
+  if (debug) {
+    summ->Log("LUL.DW   conversion of dwarf expression failed\n");
+    summ->Log("LUL.DW  >>\n");
+  }
+  return -1;
+}
+
+bool DwarfCFIToModule::Entry(size_t offset, uint64 address, uint64 length,
+                             uint8 version, const string& augmentation,
+                             unsigned return_address) {
+  if (DEBUG_DWARF) {
+    char buf[100];
+    SprintfLiteral(buf, "LUL.DW DwarfCFIToModule::Entry 0x%llx,+%lld\n",
+                   address, length);
+    summ_->Log(buf);
+  }
+
+  summ_->Entry(address, length);
+
+  // If dwarf2reader::CallFrameInfo can handle this version and
+  // augmentation, then we should be okay with that, so there's no
+  // need to check them here.
+
+  // Get ready to collect entries.
+  return_address_ = return_address;
+
+  // Breakpad STACK CFI records must provide a .ra rule, but DWARF CFI
+  // may not establish any rule for .ra if the return address column
+  // is an ordinary register, and that register holds the return
+  // address on entry to the function. So establish an initial .ra
+  // rule citing the return address register.
+  if (return_address_ < num_dw_regs_) {
+    summ_->Rule(address, return_address_, NODEREF, return_address, 0);
+  }
+
+  return true;
+}
+
+const UniqueString* DwarfCFIToModule::RegisterName(int i) {
+  if (i < 0) {
+    MOZ_ASSERT(i == kCFARegister);
+    return usu_->ToUniqueString(".cfa");
+  }
+  unsigned reg = i;
+  if (reg == return_address_) return usu_->ToUniqueString(".ra");
+
+  char buf[30];
+  SprintfLiteral(buf, "dwarf_reg_%u", reg);
+  return usu_->ToUniqueString(buf);
+}
+
+bool DwarfCFIToModule::UndefinedRule(uint64 address, int reg) {
+  reporter_->UndefinedNotSupported(entry_offset_, RegisterName(reg));
+  // Treat this as a non-fatal error.
+  return true;
+}
+
+bool DwarfCFIToModule::SameValueRule(uint64 address, int reg) {
+  if (DEBUG_DWARF) {
+    char buf[100];
+    SprintfLiteral(buf, "LUL.DW  0x%llx: old r%d = Same\n", address, reg);
+    summ_->Log(buf);
+  }
+  // reg + 0
+  summ_->Rule(address, reg, NODEREF, reg, 0);
+  return true;
+}
+
+bool DwarfCFIToModule::OffsetRule(uint64 address, int reg, int base_register,
+                                  long offset) {
+  if (DEBUG_DWARF) {
+    char buf[100];
+    SprintfLiteral(buf, "LUL.DW  0x%llx: old r%d = *(r%d + %ld)\n", address,
+                   reg, base_register, offset);
+    summ_->Log(buf);
+  }
+  // *(base_register + offset)
+  summ_->Rule(address, reg, DEREF, base_register, offset);
+  return true;
+}
+
+bool DwarfCFIToModule::ValOffsetRule(uint64 address, int reg, int base_register,
+                                     long offset) {
+  if (DEBUG_DWARF) {
+    char buf[100];
+    SprintfLiteral(buf, "LUL.DW  0x%llx: old r%d = r%d + %ld\n", address, reg,
+                   base_register, offset);
+    summ_->Log(buf);
+  }
+  // base_register + offset
+  summ_->Rule(address, reg, NODEREF, base_register, offset);
+  return true;
+}
+
+bool DwarfCFIToModule::RegisterRule(uint64 address, int reg,
+                                    int base_register) {
+  if (DEBUG_DWARF) {
+    char buf[100];
+    SprintfLiteral(buf, "LUL.DW  0x%llx: old r%d = r%d\n", address, reg,
+                   base_register);
+    summ_->Log(buf);
+  }
+  // base_register + 0
+  summ_->Rule(address, reg, NODEREF, base_register, 0);
+  return true;
+}
+
+bool DwarfCFIToModule::ExpressionRule(uint64 address, int reg,
+                                      const ImageSlice& expression) {
+  bool debug = !!DEBUG_DWARF;
+  int32_t start_ix =
+      parseDwarfExpr(summ_, reader_, expression, debug, true /*pushCfaAtStart*/,
+                     true /*derefAtEnd*/);
+  if (start_ix >= 0) {
+    summ_->Rule(address, reg, PFXEXPR, 0, start_ix);
+  } else {
+    // Parsing of the Dwarf expression failed.  Treat this as a
+    // non-fatal error, hence return |true| even on this path.
+    reporter_->ExpressionCouldNotBeSummarised(entry_offset_, RegisterName(reg));
+  }
+  return true;
+}
+
+bool DwarfCFIToModule::ValExpressionRule(uint64 address, int reg,
+                                         const ImageSlice& expression) {
+  bool debug = !!DEBUG_DWARF;
+  int32_t start_ix =
+      parseDwarfExpr(summ_, reader_, expression, debug, true /*pushCfaAtStart*/,
+                     false /*!derefAtEnd*/);
+  if (start_ix >= 0) {
+    summ_->Rule(address, reg, PFXEXPR, 0, start_ix);
+  } else {
+    // Parsing of the Dwarf expression failed.  Treat this as a
+    // non-fatal error, hence return |true| even on this path.
+    reporter_->ExpressionCouldNotBeSummarised(entry_offset_, RegisterName(reg));
+  }
+  return true;
+}
+
+bool DwarfCFIToModule::End() {
+  // module_->AddStackFrameEntry(entry_);
+  if (DEBUG_DWARF) {
+    summ_->Log("LUL.DW DwarfCFIToModule::End()\n");
+  }
+  summ_->End();
+  return true;
+}
+
+void DwarfCFIToModule::Reporter::UndefinedNotSupported(
+    size_t offset, const UniqueString* reg) {
+  char buf[300];
+  SprintfLiteral(buf, "DwarfCFIToModule::Reporter::UndefinedNotSupported()\n");
+  log_(buf);
+  // BPLOG(INFO) << file_ << ", section '" << section_
+  //  << "': the call frame entry at offset 0x"
+  //  << std::setbase(16) << offset << std::setbase(10)
+  //  << " sets the rule for register '" << FromUniqueString(reg)
+  //  << "' to 'undefined', but the Breakpad symbol file format cannot "
+  //  << " express this";
+}
+
+// FIXME: move this somewhere sensible
+static bool is_power_of_2(uint64_t n) {
+  int i, nSetBits = 0;
+  for (i = 0; i < 8 * (int)sizeof(n); i++) {
+    if ((n & ((uint64_t)1) << i) != 0) nSetBits++;
+  }
+  return nSetBits <= 1;
+}
+
+void DwarfCFIToModule::Reporter::ExpressionCouldNotBeSummarised(
+    size_t offset, const UniqueString* reg) {
+  static uint64_t n_complaints = 0;  // This isn't threadsafe
+  n_complaints++;
+  if (!is_power_of_2(n_complaints)) return;
+  char buf[300];
+  SprintfLiteral(buf,
+                 "DwarfCFIToModule::Reporter::"
+                 "ExpressionCouldNotBeSummarised(shown %llu times)\n",
+                 (unsigned long long int)n_complaints);
+  log_(buf);
+}
+
+}  // namespace lul
diff --git a/tools/profiler/lul/LulDwarfExt.h b/tools/profiler/lul/LulDwarfExt.h
new file mode 100644
index 0000000000..4ee6fe17a8
--- /dev/null
+++ b/tools/profiler/lul/LulDwarfExt.h
@@ -0,0 +1,1312 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+
+// Copyright 2006, 2010 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
+
+// This file is derived from the following files in
+// toolkit/crashreporter/google-breakpad:
+//   src/common/dwarf/types.h
+//   src/common/dwarf/dwarf2enums.h
+//   src/common/dwarf/bytereader.h
+//   src/common/dwarf_cfi_to_module.h
+//   src/common/dwarf/dwarf2reader.h
+
+#ifndef LulDwarfExt_h
+#define LulDwarfExt_h
+
+#include "LulDwarfSummariser.h"
+
+#include "mozilla/Assertions.h"
+
+#include <stdint.h>
+#include <string>
+
+typedef signed char int8;
+typedef short int16;
+typedef int int32;
+typedef long long int64;
+
+typedef unsigned char uint8;
+typedef unsigned short uint16;
+typedef unsigned int uint32;
+typedef unsigned long long uint64;
+
+#ifdef __PTRDIFF_TYPE__
+typedef __PTRDIFF_TYPE__ intptr;
+typedef unsigned __PTRDIFF_TYPE__ uintptr;
+#else
+#  error "Can't find pointer-sized integral types."
+#endif
+
+namespace lul {
+
+class UniqueString;
+
+// This represents a read-only slice of the "image" (the temporarily mmaped-in
+// .so).  It is used for representing byte ranges containing Dwarf expressions.
+// Note that equality (operator==) is on slice contents, not slice locations.
+struct ImageSlice {
+  const char* start_;
+  size_t length_;
+  ImageSlice() : start_(0), length_(0) {}
+  ImageSlice(const char* start, size_t length)
+      : start_(start), length_(length) {}
+  // Make one from a C string (for testing only).  Note, the terminating zero
+  // is not included in the length.
+  explicit ImageSlice(const char* cstring)
+      : start_(cstring), length_(strlen(cstring)) {}
+  explicit ImageSlice(const std::string& str)
+      : start_(str.c_str()), length_(str.length()) {}
+  ImageSlice(const ImageSlice& other)
+      : start_(other.start_), length_(other.length_) {}
+  ImageSlice(ImageSlice& other)
+      : start_(other.start_), length_(other.length_) {}
+  bool operator==(const ImageSlice& other) const {
+    if (length_ != other.length_) {
+      return false;
+    }
+    // This relies on the fact that that memcmp returns zero whenever length_
+    // is zero.
+    return memcmp(start_, other.start_, length_) == 0;
+  }
+};
+
+// Exception handling frame description pointer formats, as described
+// by the Linux Standard Base Core Specification 4.0, section 11.5,
+// DWARF Extensions.
+enum DwarfPointerEncoding {
+  DW_EH_PE_absptr = 0x00,
+  DW_EH_PE_omit = 0xff,
+  DW_EH_PE_uleb128 = 0x01,
+  DW_EH_PE_udata2 = 0x02,
+  DW_EH_PE_udata4 = 0x03,
+  DW_EH_PE_udata8 = 0x04,
+  DW_EH_PE_sleb128 = 0x09,
+  DW_EH_PE_sdata2 = 0x0A,
+  DW_EH_PE_sdata4 = 0x0B,
+  DW_EH_PE_sdata8 = 0x0C,
+  DW_EH_PE_pcrel = 0x10,
+  DW_EH_PE_textrel = 0x20,
+  DW_EH_PE_datarel = 0x30,
+  DW_EH_PE_funcrel = 0x40,
+  DW_EH_PE_aligned = 0x50,
+
+  // The GNU toolchain sources define this enum value as well,
+  // simply to help classify the lower nybble values into signed and
+  // unsigned groups.
+  DW_EH_PE_signed = 0x08,
+
+  // This is not documented in LSB 4.0, but it is used in both the
+  // Linux and OS X toolchains. It can be added to any other
+  // encoding (except DW_EH_PE_aligned), and indicates that the
+  // encoded value represents the address at which the true address
+  // is stored, not the true address itself.
+  DW_EH_PE_indirect = 0x80
+};
+
+// We can't use the obvious name of LITTLE_ENDIAN and BIG_ENDIAN
+// because it conflicts with a macro
+enum Endianness { ENDIANNESS_BIG, ENDIANNESS_LITTLE };
+
+// A ByteReader knows how to read single- and multi-byte values of
+// various endiannesses, sizes, and encodings, as used in DWARF
+// debugging information and Linux C++ exception handling data.
+class ByteReader {
+ public:
+  // Construct a ByteReader capable of reading one-, two-, four-, and
+  // eight-byte values according to ENDIANNESS, absolute machine-sized
+  // addresses, DWARF-style "initial length" values, signed and
+  // unsigned LEB128 numbers, and Linux C++ exception handling data's
+  // encoded pointers.
+  explicit ByteReader(enum Endianness endianness);
+  virtual ~ByteReader();
+
+  // Read a single byte from BUFFER and return it as an unsigned 8 bit
+  // number.
+  uint8 ReadOneByte(const char* buffer) const;
+
+  // Read two bytes from BUFFER and return them as an unsigned 16 bit
+  // number, using this ByteReader's endianness.
+  uint16 ReadTwoBytes(const char* buffer) const;
+
+  // Read four bytes from BUFFER and return them as an unsigned 32 bit
+  // number, using this ByteReader's endianness. This function returns
+  // a uint64 so that it is compatible with ReadAddress and
+  // ReadOffset. The number it returns will never be outside the range
+  // of an unsigned 32 bit integer.
+  uint64 ReadFourBytes(const char* buffer) const;
+
+  // Read eight bytes from BUFFER and return them as an unsigned 64
+  // bit number, using this ByteReader's endianness.
+  uint64 ReadEightBytes(const char* buffer) const;
+
+  // Read an unsigned LEB128 (Little Endian Base 128) number from
+  // BUFFER and return it as an unsigned 64 bit integer. Set LEN to
+  // the number of bytes read.
+  //
+  // The unsigned LEB128 representation of an integer N is a variable
+  // number of bytes:
+  //
+  // - If N is between 0 and 0x7f, then its unsigned LEB128
+  //   representation is a single byte whose value is N.
+  //
+  // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) |
+  //   0x80, followed by the unsigned LEB128 representation of N /
+  //   128, rounded towards negative infinity.
+  //
+  // In other words, we break VALUE into groups of seven bits, put
+  // them in little-endian order, and then write them as eight-bit
+  // bytes with the high bit on all but the last.
+  uint64 ReadUnsignedLEB128(const char* buffer, size_t* len) const;
+
+  // Read a signed LEB128 number from BUFFER and return it as an
+  // signed 64 bit integer. Set LEN to the number of bytes read.
+  //
+  // The signed LEB128 representation of an integer N is a variable
+  // number of bytes:
+  //
+  // - If N is between -0x40 and 0x3f, then its signed LEB128
+  //   representation is a single byte whose value is N in two's
+  //   complement.
+  //
+  // - Otherwise, its signed LEB128 representation is (N & 0x7f) |
+  //   0x80, followed by the signed LEB128 representation of N / 128,
+  //   rounded towards negative infinity.
+  //
+  // In other words, we break VALUE into groups of seven bits, put
+  // them in little-endian order, and then write them as eight-bit
+  // bytes with the high bit on all but the last.
+  int64 ReadSignedLEB128(const char* buffer, size_t* len) const;
+
+  // Indicate that addresses on this architecture are SIZE bytes long. SIZE
+  // must be either 4 or 8. (DWARF allows addresses to be any number of
+  // bytes in length from 1 to 255, but we only support 32- and 64-bit
+  // addresses at the moment.) You must call this before using the
+  // ReadAddress member function.
+  //
+  // For data in a .debug_info section, or something that .debug_info
+  // refers to like line number or macro data, the compilation unit
+  // header's address_size field indicates the address size to use. Call
+  // frame information doesn't indicate its address size (a shortcoming of
+  // the spec); you must supply the appropriate size based on the
+  // architecture of the target machine.
+  void SetAddressSize(uint8 size);
+
+  // Return the current address size, in bytes. This is either 4,
+  // indicating 32-bit addresses, or 8, indicating 64-bit addresses.
+  uint8 AddressSize() const { return address_size_; }
+
+  // Read an address from BUFFER and return it as an unsigned 64 bit
+  // integer, respecting this ByteReader's endianness and address size. You
+  // must call SetAddressSize before calling this function.
+  uint64 ReadAddress(const char* buffer) const;
+
+  // DWARF actually defines two slightly different formats: 32-bit DWARF
+  // and 64-bit DWARF. This is *not* related to the size of registers or
+  // addresses on the target machine; it refers only to the size of section
+  // offsets and data lengths appearing in the DWARF data. One only needs
+  // 64-bit DWARF when the debugging data itself is larger than 4GiB.
+  // 32-bit DWARF can handle x86_64 or PPC64 code just fine, unless the
+  // debugging data itself is very large.
+  //
+  // DWARF information identifies itself as 32-bit or 64-bit DWARF: each
+  // compilation unit and call frame information entry begins with an
+  // "initial length" field, which, in addition to giving the length of the
+  // data, also indicates the size of section offsets and lengths appearing
+  // in that data. The ReadInitialLength member function, below, reads an
+  // initial length and sets the ByteReader's offset size as a side effect.
+  // Thus, in the normal process of reading DWARF data, the appropriate
+  // offset size is set automatically. So, you should only need to call
+  // SetOffsetSize if you are using the same ByteReader to jump from the
+  // midst of one block of DWARF data into another.
+
+  // Read a DWARF "initial length" field from START, and return it as
+  // an unsigned 64 bit integer, respecting this ByteReader's
+  // endianness. Set *LEN to the length of the initial length in
+  // bytes, either four or twelve. As a side effect, set this
+  // ByteReader's offset size to either 4 (if we see a 32-bit DWARF
+  // initial length) or 8 (if we see a 64-bit DWARF initial length).
+  //
+  // A DWARF initial length is either:
+  //
+  // - a byte count stored as an unsigned 32-bit value less than
+  //   0xffffff00, indicating that the data whose length is being
+  //   measured uses the 32-bit DWARF format, or
+  //
+  // - The 32-bit value 0xffffffff, followed by a 64-bit byte count,
+  //   indicating that the data whose length is being measured uses
+  //   the 64-bit DWARF format.
+  uint64 ReadInitialLength(const char* start, size_t* len);
+
+  // Read an offset from BUFFER and return it as an unsigned 64 bit
+  // integer, respecting the ByteReader's endianness. In 32-bit DWARF, the
+  // offset is 4 bytes long; in 64-bit DWARF, the offset is eight bytes
+  // long. You must call ReadInitialLength or SetOffsetSize before calling
+  // this function; see the comments above for details.
+  uint64 ReadOffset(const char* buffer) const;
+
+  // Return the current offset size, in bytes.
+  // A return value of 4 indicates that we are reading 32-bit DWARF.
+  // A return value of 8 indicates that we are reading 64-bit DWARF.
+  uint8 OffsetSize() const { return offset_size_; }
+
+  // Indicate that section offsets and lengths are SIZE bytes long. SIZE
+  // must be either 4 (meaning 32-bit DWARF) or 8 (meaning 64-bit DWARF).
+  // Usually, you should not call this function yourself; instead, let a
+  // call to ReadInitialLength establish the data's offset size
+  // automatically.
+  void SetOffsetSize(uint8 size);
+
+  // The Linux C++ ABI uses a variant of DWARF call frame information
+  // for exception handling. This data is included in the program's
+  // address space as the ".eh_frame" section, and intepreted at
+  // runtime to walk the stack, find exception handlers, and run
+  // cleanup code. The format is mostly the same as DWARF CFI, with
+  // some adjustments made to provide the additional
+  // exception-handling data, and to make the data easier to work with
+  // in memory --- for example, to allow it to be placed in read-only
+  // memory even when describing position-independent code.
+  //
+  // In particular, exception handling data can select a number of
+  // different encodings for pointers that appear in the data, as
+  // described by the DwarfPointerEncoding enum. There are actually
+  // four axes(!) to the encoding:
+  //
+  // - The pointer size: pointers can be 2, 4, or 8 bytes long, or use
+  //   the DWARF LEB128 encoding.
+  //
+  // - The pointer's signedness: pointers can be signed or unsigned.
+  //
+  // - The pointer's base address: the data stored in the exception
+  //   handling data can be the actual address (that is, an absolute
+  //   pointer), or relative to one of a number of different base
+  //   addreses --- including that of the encoded pointer itself, for
+  //   a form of "pc-relative" addressing.
+  //
+  // - The pointer may be indirect: it may be the address where the
+  //   true pointer is stored. (This is used to refer to things via
+  //   global offset table entries, program linkage table entries, or
+  //   other tricks used in position-independent code.)
+  //
+  // There are also two options that fall outside that matrix
+  // altogether: the pointer may be omitted, or it may have padding to
+  // align it on an appropriate address boundary. (That last option
+  // may seem like it should be just another axis, but it is not.)
+
+  // Indicate that the exception handling data is loaded starting at
+  // SECTION_BASE, and that the start of its buffer in our own memory
+  // is BUFFER_BASE. This allows us to find the address that a given
+  // byte in our buffer would have when loaded into the program the
+  // data describes. We need this to resolve DW_EH_PE_pcrel pointers.
+  void SetCFIDataBase(uint64 section_base, const char* buffer_base);
+
+  // Indicate that the base address of the program's ".text" section
+  // is TEXT_BASE. We need this to resolve DW_EH_PE_textrel pointers.
+  void SetTextBase(uint64 text_base);
+
+  // Indicate that the base address for DW_EH_PE_datarel pointers is
+  // DATA_BASE. The proper value depends on the ABI; it is usually the
+  // address of the global offset table, held in a designated register in
+  // position-independent code. You will need to look at the startup code
+  // for the target system to be sure. I tried; my eyes bled.
+  void SetDataBase(uint64 data_base);
+
+  // Indicate that the base address for the FDE we are processing is
+  // FUNCTION_BASE. This is the start address of DW_EH_PE_funcrel
+  // pointers. (This encoding does not seem to be used by the GNU
+  // toolchain.)
+  void SetFunctionBase(uint64 function_base);
+
+  // Indicate that we are no longer processing any FDE, so any use of
+  // a DW_EH_PE_funcrel encoding is an error.
+  void ClearFunctionBase();
+
+  // Return true if ENCODING is a valid pointer encoding.
+  bool ValidEncoding(DwarfPointerEncoding encoding) const;
+
+  // Return true if we have all the information we need to read a
+  // pointer that uses ENCODING. This checks that the appropriate
+  // SetFooBase function for ENCODING has been called.
+  bool UsableEncoding(DwarfPointerEncoding encoding) const;
+
+  // Read an encoded pointer from BUFFER using ENCODING; return the
+  // absolute address it represents, and set *LEN to the pointer's
+  // length in bytes, including any padding for aligned pointers.
+  //
+  // This function calls 'abort' if ENCODING is invalid or refers to a
+  // base address this reader hasn't been given, so you should check
+  // with ValidEncoding and UsableEncoding first if you would rather
+  // die in a more helpful way.
+  uint64 ReadEncodedPointer(const char* buffer, DwarfPointerEncoding encoding,
+                            size_t* len) const;
+
+ private:
+  // Function pointer type for our address and offset readers.
+  typedef uint64 (ByteReader::*AddressReader)(const char*) const;
+
+  // Read an offset from BUFFER and return it as an unsigned 64 bit
+  // integer.  DWARF2/3 define offsets as either 4 or 8 bytes,
+  // generally depending on the amount of DWARF2/3 info present.
+  // This function pointer gets set by SetOffsetSize.
+  AddressReader offset_reader_;
+
+  // Read an address from BUFFER and return it as an unsigned 64 bit
+  // integer.  DWARF2/3 allow addresses to be any size from 0-255
+  // bytes currently.  Internally we support 4 and 8 byte addresses,
+  // and will CHECK on anything else.
+  // This function pointer gets set by SetAddressSize.
+  AddressReader address_reader_;
+
+  Endianness endian_;
+  uint8 address_size_;
+  uint8 offset_size_;
+
+  // Base addresses for Linux C++ exception handling data's encoded pointers.
+  bool have_section_base_, have_text_base_, have_data_base_;
+  bool have_function_base_;
+  uint64 section_base_;
+  uint64 text_base_, data_base_, function_base_;
+  const char* buffer_base_;
+};
+
+inline uint8 ByteReader::ReadOneByte(const char* buffer) const {
+  return buffer[0];
+}
+
+inline uint16 ByteReader::ReadTwoBytes(const char* signed_buffer) const {
+  const unsigned char* buffer =
+      reinterpret_cast<const unsigned char*>(signed_buffer);
+  const uint16 buffer0 = buffer[0];
+  const uint16 buffer1 = buffer[1];
+  if (endian_ == ENDIANNESS_LITTLE) {
+    return buffer0 | buffer1 << 8;
+  } else {
+    return buffer1 | buffer0 << 8;
+  }
+}
+
+inline uint64 ByteReader::ReadFourBytes(const char* signed_buffer) const {
+  const unsigned char* buffer =
+      reinterpret_cast<const unsigned char*>(signed_buffer);
+  const uint32 buffer0 = buffer[0];
+  const uint32 buffer1 = buffer[1];
+  const uint32 buffer2 = buffer[2];
+  const uint32 buffer3 = buffer[3];
+  if (endian_ == ENDIANNESS_LITTLE) {
+    return buffer0 | buffer1 << 8 | buffer2 << 16 | buffer3 << 24;
+  } else {
+    return buffer3 | buffer2 << 8 | buffer1 << 16 | buffer0 << 24;
+  }
+}
+
+inline uint64 ByteReader::ReadEightBytes(const char* signed_buffer) const {
+  const unsigned char* buffer =
+      reinterpret_cast<const unsigned char*>(signed_buffer);
+  const uint64 buffer0 = buffer[0];
+  const uint64 buffer1 = buffer[1];
+  const uint64 buffer2 = buffer[2];
+  const uint64 buffer3 = buffer[3];
+  const uint64 buffer4 = buffer[4];
+  const uint64 buffer5 = buffer[5];
+  const uint64 buffer6 = buffer[6];
+  const uint64 buffer7 = buffer[7];
+  if (endian_ == ENDIANNESS_LITTLE) {
+    return buffer0 | buffer1 << 8 | buffer2 << 16 | buffer3 << 24 |
+           buffer4 << 32 | buffer5 << 40 | buffer6 << 48 | buffer7 << 56;
+  } else {
+    return buffer7 | buffer6 << 8 | buffer5 << 16 | buffer4 << 24 |
+           buffer3 << 32 | buffer2 << 40 | buffer1 << 48 | buffer0 << 56;
+  }
+}
+
+// Read an unsigned LEB128 number.  Each byte contains 7 bits of
+// information, plus one bit saying whether the number continues or
+// not.
+
+inline uint64 ByteReader::ReadUnsignedLEB128(const char* buffer,
+                                             size_t* len) const {
+  uint64 result = 0;
+  size_t num_read = 0;
+  unsigned int shift = 0;
+  unsigned char byte;
+
+  do {
+    byte = *buffer++;
+    num_read++;
+
+    result |= (static_cast<uint64>(byte & 0x7f)) << shift;
+
+    shift += 7;
+
+  } while (byte & 0x80);
+
+  *len = num_read;
+
+  return result;
+}
+
+// Read a signed LEB128 number.  These are like regular LEB128
+// numbers, except the last byte may have a sign bit set.
+
+inline int64 ByteReader::ReadSignedLEB128(const char* buffer,
+                                          size_t* len) const {
+  int64 result = 0;
+  unsigned int shift = 0;
+  size_t num_read = 0;
+  unsigned char byte;
+
+  do {
+    byte = *buffer++;
+    num_read++;
+    result |= (static_cast<uint64>(byte & 0x7f) << shift);
+    shift += 7;
+  } while (byte & 0x80);
+
+  if ((shift < 8 * sizeof(result)) && (byte & 0x40))
+    result |= -((static_cast<int64>(1)) << shift);
+  *len = num_read;
+  return result;
+}
+
+inline uint64 ByteReader::ReadOffset(const char* buffer) const {
+  MOZ_ASSERT(this->offset_reader_);
+  return (this->*offset_reader_)(buffer);
+}
+
+inline uint64 ByteReader::ReadAddress(const char* buffer) const {
+  MOZ_ASSERT(this->address_reader_);
+  return (this->*address_reader_)(buffer);
+}
+
+inline void ByteReader::SetCFIDataBase(uint64 section_base,
+                                       const char* buffer_base) {
+  section_base_ = section_base;
+  buffer_base_ = buffer_base;
+  have_section_base_ = true;
+}
+
+inline void ByteReader::SetTextBase(uint64 text_base) {
+  text_base_ = text_base;
+  have_text_base_ = true;
+}
+
+inline void ByteReader::SetDataBase(uint64 data_base) {
+  data_base_ = data_base;
+  have_data_base_ = true;
+}
+
+inline void ByteReader::SetFunctionBase(uint64 function_base) {
+  function_base_ = function_base;
+  have_function_base_ = true;
+}
+
+inline void ByteReader::ClearFunctionBase() { have_function_base_ = false; }
+
+// (derived from)
+// dwarf_cfi_to_module.h: Define the DwarfCFIToModule class, which
+// accepts parsed DWARF call frame info and adds it to a Summariser object.
+
+// This class is a reader for DWARF's Call Frame Information.  CFI
+// describes how to unwind stack frames --- even for functions that do
+// not follow fixed conventions for saving registers, whose frame size
+// varies as they execute, etc.
+//
+// CFI describes, at each machine instruction, how to compute the
+// stack frame's base address, how to find the return address, and
+// where to find the saved values of the caller's registers (if the
+// callee has stashed them somewhere to free up the registers for its
+// own use).
+//
+// For example, suppose we have a function whose machine code looks
+// like this (imagine an assembly language that looks like C, for a
+// machine with 32-bit registers, and a stack that grows towards lower
+// addresses):
+//
+// func:                                ; entry point; return address at sp
+// func+0:      sp = sp - 16            ; allocate space for stack frame
+// func+1:      sp[12] = r0             ; save r0 at sp+12
+// ...                                  ; other code, not frame-related
+// func+10:     sp -= 4; *sp = x        ; push some x on the stack
+// ...                                  ; other code, not frame-related
+// func+20:     r0 = sp[16]             ; restore saved r0
+// func+21:     sp += 20                ; pop whole stack frame
+// func+22:     pc = *sp; sp += 4       ; pop return address and jump to it
+//
+// DWARF CFI is (a very compressed representation of) a table with a
+// row for each machine instruction address and a column for each
+// register showing how to restore it, if possible.
+//
+// A special column named "CFA", for "Canonical Frame Address", tells how
+// to compute the base address of the frame; registers' entries may
+// refer to the CFA in describing where the registers are saved.
+//
+// Another special column, named "RA", represents the return address.
+//
+// For example, here is a complete (uncompressed) table describing the
+// function above:
+//
+//     insn      cfa    r0      r1 ...  ra
+//     =======================================
+//     func+0:   sp                     cfa[0]
+//     func+1:   sp+16                  cfa[0]
+//     func+2:   sp+16  cfa[-4]         cfa[0]
+//     func+11:  sp+20  cfa[-4]         cfa[0]
+//     func+21:  sp+20                  cfa[0]
+//     func+22:  sp                     cfa[0]
+//
+// Some things to note here:
+//
+// - Each row describes the state of affairs *before* executing the
+//   instruction at the given address.  Thus, the row for func+0
+//   describes the state before we allocate the stack frame.  In the
+//   next row, the formula for computing the CFA has changed,
+//   reflecting that allocation.
+//
+// - The other entries are written in terms of the CFA; this allows
+//   them to remain unchanged as the stack pointer gets bumped around.
+//   For example, the rule for recovering the return address (the "ra"
+//   column) remains unchanged throughout the function, even as the
+//   stack pointer takes on three different offsets from the return
+//   address.
+//
+// - Although we haven't shown it, most calling conventions designate
+//   "callee-saves" and "caller-saves" registers. The callee must
+//   preserve the values of callee-saves registers; if it uses them,
+//   it must save their original values somewhere, and restore them
+//   before it returns. In contrast, the callee is free to trash
+//   caller-saves registers; if the callee uses these, it will
+//   probably not bother to save them anywhere, and the CFI will
+//   probably mark their values as "unrecoverable".
+//
+//   (However, since the caller cannot assume the callee was going to
+//   save them, caller-saves registers are probably dead in the caller
+//   anyway, so compilers usually don't generate CFA for caller-saves
+//   registers.)
+//
+// - Exactly where the CFA points is a matter of convention that
+//   depends on the architecture and ABI in use. In the example, the
+//   CFA is the value the stack pointer had upon entry to the
+//   function, pointing at the saved return address. But on the x86,
+//   the call frame information generated by GCC follows the
+//   convention that the CFA is the address *after* the saved return
+//   address.
+//
+//   But by definition, the CFA remains constant throughout the
+//   lifetime of the frame. This makes it a useful value for other
+//   columns to refer to. It is also gives debuggers a useful handle
+//   for identifying a frame.
+//
+// If you look at the table above, you'll notice that a given entry is
+// often the same as the one immediately above it: most instructions
+// change only one or two aspects of the stack frame, if they affect
+// it at all. The DWARF format takes advantage of this fact, and
+// reduces the size of the data by mentioning only the addresses and
+// columns at which changes take place. So for the above, DWARF CFI
+// data would only actually mention the following:
+//
+//     insn      cfa    r0      r1 ...  ra
+//     =======================================
+//     func+0:   sp                     cfa[0]
+//     func+1:   sp+16
+//     func+2:          cfa[-4]
+//     func+11:  sp+20
+//     func+21:         r0
+//     func+22:  sp
+//
+// In fact, this is the way the parser reports CFI to the consumer: as
+// a series of statements of the form, "At address X, column Y changed
+// to Z," and related conventions for describing the initial state.
+//
+// Naturally, it would be impractical to have to scan the entire
+// program's CFI, noting changes as we go, just to recover the
+// unwinding rules in effect at one particular instruction. To avoid
+// this, CFI data is grouped into "entries", each of which covers a
+// specified range of addresses and begins with a complete statement
+// of the rules for all recoverable registers at that starting
+// address. Each entry typically covers a single function.
+//
+// Thus, to compute the contents of a given row of the table --- that
+// is, rules for recovering the CFA, RA, and registers at a given
+// instruction --- the consumer should find the entry that covers that
+// instruction's address, start with the initial state supplied at the
+// beginning of the entry, and work forward until it has processed all
+// the changes up to and including those for the present instruction.
+//
+// There are seven kinds of rules that can appear in an entry of the
+// table:
+//
+// - "undefined": The given register is not preserved by the callee;
+//   its value cannot be recovered.
+//
+// - "same value": This register has the same value it did in the callee.
+//
+// - offset(N): The register is saved at offset N from the CFA.
+//
+// - val_offset(N): The value the register had in the caller is the
+//   CFA plus offset N. (This is usually only useful for describing
+//   the stack pointer.)
+//
+// - register(R): The register's value was saved in another register R.
+//
+// - expression(E): Evaluating the DWARF expression E using the
+//   current frame's registers' values yields the address at which the
+//   register was saved.
+//
+// - val_expression(E): Evaluating the DWARF expression E using the
+//   current frame's registers' values yields the value the register
+//   had in the caller.
+
+class CallFrameInfo {
+ public:
+  // The different kinds of entries one finds in CFI. Used internally,
+  // and for error reporting.
+  enum EntryKind { kUnknown, kCIE, kFDE, kTerminator };
+
+  // The handler class to which the parser hands the parsed call frame
+  // information.  Defined below.
+  class Handler;
+
+  // A reporter class, which CallFrameInfo uses to report errors
+  // encountered while parsing call frame information.  Defined below.
+  class Reporter;
+
+  // Create a DWARF CFI parser. BUFFER points to the contents of the
+  // .debug_frame section to parse; BUFFER_LENGTH is its length in bytes.
+  // REPORTER is an error reporter the parser should use to report
+  // problems. READER is a ByteReader instance that has the endianness and
+  // address size set properly. Report the data we find to HANDLER.
+  //
+  // This class can also parse Linux C++ exception handling data, as found
+  // in '.eh_frame' sections. This data is a variant of DWARF CFI that is
+  // placed in loadable segments so that it is present in the program's
+  // address space, and is interpreted by the C++ runtime to search the
+  // call stack for a handler interested in the exception being thrown,
+  // actually pop the frames, and find cleanup code to run.
+  //
+  // There are two differences between the call frame information described
+  // in the DWARF standard and the exception handling data Linux places in
+  // the .eh_frame section:
+  //
+  // - Exception handling data uses uses a different format for call frame
+  //   information entry headers. The distinguished CIE id, the way FDEs
+  //   refer to their CIEs, and the way the end of the series of entries is
+  //   determined are all slightly different.
+  //
+  //   If the constructor's EH_FRAME argument is true, then the
+  //   CallFrameInfo parses the entry headers as Linux C++ exception
+  //   handling data. If EH_FRAME is false or omitted, the CallFrameInfo
+  //   parses standard DWARF call frame information.
+  //
+  // - Linux C++ exception handling data uses CIE augmentation strings
+  //   beginning with 'z' to specify the presence of additional data after
+  //   the CIE and FDE headers and special encodings used for addresses in
+  //   frame description entries.
+  //
+  //   CallFrameInfo can handle 'z' augmentations in either DWARF CFI or
+  //   exception handling data if you have supplied READER with the base
+  //   addresses needed to interpret the pointer encodings that 'z'
+  //   augmentations can specify. See the ByteReader interface for details
+  //   about the base addresses. See the CallFrameInfo::Handler interface
+  //   for details about the additional information one might find in
+  //   'z'-augmented data.
+  //
+  // Thus:
+  //
+  // - If you are parsing standard DWARF CFI, as found in a .debug_frame
+  //   section, you should pass false for the EH_FRAME argument, or omit
+  //   it, and you need not worry about providing READER with the
+  //   additional base addresses.
+  //
+  // - If you want to parse Linux C++ exception handling data from a
+  //   .eh_frame section, you should pass EH_FRAME as true, and call
+  //   READER's Set*Base member functions before calling our Start method.
+  //
+  // - If you want to parse DWARF CFI that uses the 'z' augmentations
+  //   (although I don't think any toolchain ever emits such data), you
+  //   could pass false for EH_FRAME, but call READER's Set*Base members.
+  //
+  // The extensions the Linux C++ ABI makes to DWARF for exception
+  // handling are described here, rather poorly:
+  // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html
+  // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html
+  //
+  // The mechanics of C++ exception handling, personality routines,
+  // and language-specific data areas are described here, rather nicely:
+  // http://www.codesourcery.com/public/cxx-abi/abi-eh.html
+
+  CallFrameInfo(const char* buffer, size_t buffer_length, ByteReader* reader,
+                Handler* handler, Reporter* reporter, bool eh_frame = false)
+      : buffer_(buffer),
+        buffer_length_(buffer_length),
+        reader_(reader),
+        handler_(handler),
+        reporter_(reporter),
+        eh_frame_(eh_frame) {}
+
+  ~CallFrameInfo() {}
+
+  // Parse the entries in BUFFER, reporting what we find to HANDLER.
+  // Return true if we reach the end of the section successfully, or
+  // false if we encounter an error.
+  bool Start();
+
+  // Return the textual name of KIND. For error reporting.
+  static const char* KindName(EntryKind kind);
+
+ private:
+  struct CIE;
+
+  // A CFI entry, either an FDE or a CIE.
+  struct Entry {
+    // The starting offset of the entry in the section, for error
+    // reporting.
+    size_t offset;
+
+    // The start of this entry in the buffer.
+    const char* start;
+
+    // Which kind of entry this is.
+    //
+    // We want to be able to use this for error reporting even while we're
+    // in the midst of parsing. Error reporting code may assume that kind,
+    // offset, and start fields are valid, although kind may be kUnknown.
+    EntryKind kind;
+
+    // The end of this entry's common prologue (initial length and id), and
+    // the start of this entry's kind-specific fields.
+    const char* fields;
+
+    // The start of this entry's instructions.
+    const char* instructions;
+
+    // The address past the entry's last byte in the buffer. (Note that
+    // since offset points to the entry's initial length field, and the
+    // length field is the number of bytes after that field, this is not
+    // simply buffer_ + offset + length.)
+    const char* end;
+
+    // For both DWARF CFI and .eh_frame sections, this is the CIE id in a
+    // CIE, and the offset of the associated CIE in an FDE.
+    uint64 id;
+
+    // The CIE that applies to this entry, if we've parsed it. If this is a
+    // CIE, then this field points to this structure.
+    CIE* cie;
+  };
+
+  // A common information entry (CIE).
+  struct CIE : public Entry {
+    uint8 version;                     // CFI data version number
+    std::string augmentation;          // vendor format extension markers
+    uint64 code_alignment_factor;      // scale for code address adjustments
+    int data_alignment_factor;         // scale for stack pointer adjustments
+    unsigned return_address_register;  // which register holds the return addr
+
+    // True if this CIE includes Linux C++ ABI 'z' augmentation data.
+    bool has_z_augmentation;
+
+    // Parsed 'z' augmentation data. These are meaningful only if
+    // has_z_augmentation is true.
+    bool has_z_lsda;          // The 'z' augmentation included 'L'.
+    bool has_z_personality;   // The 'z' augmentation included 'P'.
+    bool has_z_signal_frame;  // The 'z' augmentation included 'S'.
+
+    // If has_z_lsda is true, this is the encoding to be used for language-
+    // specific data area pointers in FDEs.
+    DwarfPointerEncoding lsda_encoding;
+
+    // If has_z_personality is true, this is the encoding used for the
+    // personality routine pointer in the augmentation data.
+    DwarfPointerEncoding personality_encoding;
+
+    // If has_z_personality is true, this is the address of the personality
+    // routine --- or, if personality_encoding & DW_EH_PE_indirect, the
+    // address where the personality routine's address is stored.
+    uint64 personality_address;
+
+    // This is the encoding used for addresses in the FDE header and
+    // in DW_CFA_set_loc instructions. This is always valid, whether
+    // or not we saw a 'z' augmentation string; its default value is
+    // DW_EH_PE_absptr, which is what normal DWARF CFI uses.
+    DwarfPointerEncoding pointer_encoding;
+  };
+
+  // A frame description entry (FDE).
+  struct FDE : public Entry {
+    uint64 address;  // start address of described code
+    uint64 size;     // size of described code, in bytes
+
+    // If cie->has_z_lsda is true, then this is the language-specific data
+    // area's address --- or its address's address, if cie->lsda_encoding
+    // has the DW_EH_PE_indirect bit set.
+    uint64 lsda_address;
+  };
+
+  // Internal use.
+  class Rule;
+  class RuleMapLowLevel;
+  class RuleMap;
+  class State;
+
+  // Parse the initial length and id of a CFI entry, either a CIE, an FDE,
+  // or a .eh_frame end-of-data mark. CURSOR points to the beginning of the
+  // data to parse. On success, populate ENTRY as appropriate, and return
+  // true. On failure, report the problem, and return false. Even if we
+  // return false, set ENTRY->end to the first byte after the entry if we
+  // were able to figure that out, or NULL if we weren't.
+  bool ReadEntryPrologue(const char* cursor, Entry* entry);
+
+  // Parse the fields of a CIE after the entry prologue, including any 'z'
+  // augmentation data. Assume that the 'Entry' fields of CIE are
+  // populated; use CIE->fields and CIE->end as the start and limit for
+  // parsing. On success, populate the rest of *CIE, and return true; on
+  // failure, report the problem and return false.
+  bool ReadCIEFields(CIE* cie);
+
+  // Parse the fields of an FDE after the entry prologue, including any 'z'
+  // augmentation data. Assume that the 'Entry' fields of *FDE are
+  // initialized; use FDE->fields and FDE->end as the start and limit for
+  // parsing. Assume that FDE->cie is fully initialized. On success,
+  // populate the rest of *FDE, and return true; on failure, report the
+  // problem and return false.
+  bool ReadFDEFields(FDE* fde);
+
+  // Report that ENTRY is incomplete, and return false. This is just a
+  // trivial wrapper for invoking reporter_->Incomplete; it provides a
+  // little brevity.
+  bool ReportIncomplete(Entry* entry);
+
+  // Return true if ENCODING has the DW_EH_PE_indirect bit set.
+  static bool IsIndirectEncoding(DwarfPointerEncoding encoding) {
+    return encoding & DW_EH_PE_indirect;
+  }
+
+  // The contents of the DWARF .debug_info section we're parsing.
+  const char* buffer_;
+  size_t buffer_length_;
+
+  // For reading multi-byte values with the appropriate endianness.
+  ByteReader* reader_;
+
+  // The handler to which we should report the data we find.
+  Handler* handler_;
+
+  // For reporting problems in the info we're parsing.
+  Reporter* reporter_;
+
+  // True if we are processing .eh_frame-format data.
+  bool eh_frame_;
+};
+
+// The handler class for CallFrameInfo.  The a CFI parser calls the
+// member functions of a handler object to report the data it finds.
+class CallFrameInfo::Handler {
+ public:
+  // The pseudo-register number for the canonical frame address.
+  enum { kCFARegister = DW_REG_CFA };
+
+  Handler() {}
+  virtual ~Handler() {}
+
+  // The parser has found CFI for the machine code at ADDRESS,
+  // extending for LENGTH bytes. OFFSET is the offset of the frame
+  // description entry in the section, for use in error messages.
+  // VERSION is the version number of the CFI format. AUGMENTATION is
+  // a string describing any producer-specific extensions present in
+  // the data. RETURN_ADDRESS is the number of the register that holds
+  // the address to which the function should return.
+  //
+  // Entry should return true to process this CFI, or false to skip to
+  // the next entry.
+  //
+  // The parser invokes Entry for each Frame Description Entry (FDE)
+  // it finds.  The parser doesn't report Common Information Entries
+  // to the handler explicitly; instead, if the handler elects to
+  // process a given FDE, the parser reiterates the appropriate CIE's
+  // contents at the beginning of the FDE's rules.
+  virtual bool Entry(size_t offset, uint64 address, uint64 length,
+                     uint8 version, const std::string& augmentation,
+                     unsigned return_address) = 0;
+
+  // When the Entry function returns true, the parser calls these
+  // handler functions repeatedly to describe the rules for recovering
+  // registers at each instruction in the given range of machine code.
+  // Immediately after a call to Entry, the handler should assume that
+  // the rule for each callee-saves register is "unchanged" --- that
+  // is, that the register still has the value it had in the caller.
+  //
+  // If a *Rule function returns true, we continue processing this entry's
+  // instructions. If a *Rule function returns false, we stop evaluating
+  // instructions, and skip to the next entry. Either way, we call End
+  // before going on to the next entry.
+  //
+  // In all of these functions, if the REG parameter is kCFARegister, then
+  // the rule describes how to find the canonical frame address.
+  // kCFARegister may be passed as a BASE_REGISTER argument, meaning that
+  // the canonical frame address should be used as the base address for the
+  // computation. All other REG values will be positive.
+
+  // At ADDRESS, register REG's value is not recoverable.
+  virtual bool UndefinedRule(uint64 address, int reg) = 0;
+
+  // At ADDRESS, register REG's value is the same as that it had in
+  // the caller.
+  virtual bool SameValueRule(uint64 address, int reg) = 0;
+
+  // At ADDRESS, register REG has been saved at offset OFFSET from
+  // BASE_REGISTER.
+  virtual bool OffsetRule(uint64 address, int reg, int base_register,
+                          long offset) = 0;
+
+  // At ADDRESS, the caller's value of register REG is the current
+  // value of BASE_REGISTER plus OFFSET. (This rule doesn't provide an
+  // address at which the register's value is saved.)
+  virtual bool ValOffsetRule(uint64 address, int reg, int base_register,
+                             long offset) = 0;
+
+  // At ADDRESS, register REG has been saved in BASE_REGISTER. This differs
+  // from ValOffsetRule(ADDRESS, REG, BASE_REGISTER, 0), in that
+  // BASE_REGISTER is the "home" for REG's saved value: if you want to
+  // assign to a variable whose home is REG in the calling frame, you
+  // should put the value in BASE_REGISTER.
+  virtual bool RegisterRule(uint64 address, int reg, int base_register) = 0;
+
+  // At ADDRESS, the DWARF expression EXPRESSION yields the address at
+  // which REG was saved.
+  virtual bool ExpressionRule(uint64 address, int reg,
+                              const ImageSlice& expression) = 0;
+
+  // At ADDRESS, the DWARF expression EXPRESSION yields the caller's
+  // value for REG. (This rule doesn't provide an address at which the
+  // register's value is saved.)
+  virtual bool ValExpressionRule(uint64 address, int reg,
+                                 const ImageSlice& expression) = 0;
+
+  // Indicate that the rules for the address range reported by the
+  // last call to Entry are complete.  End should return true if
+  // everything is okay, or false if an error has occurred and parsing
+  // should stop.
+  virtual bool End() = 0;
+
+  // Handler functions for Linux C++ exception handling data. These are
+  // only called if the data includes 'z' augmentation strings.
+
+  // The Linux C++ ABI uses an extension of the DWARF CFI format to
+  // walk the stack to propagate exceptions from the throw to the
+  // appropriate catch, and do the appropriate cleanups along the way.
+  // CFI entries used for exception handling have two additional data
+  // associated with them:
+  //
+  // - The "language-specific data area" describes which exception
+  //   types the function has 'catch' clauses for, and indicates how
+  //   to go about re-entering the function at the appropriate catch
+  //   clause. If the exception is not caught, it describes the
+  //   destructors that must run before the frame is popped.
+  //
+  // - The "personality routine" is responsible for interpreting the
+  //   language-specific data area's contents, and deciding whether
+  //   the exception should continue to propagate down the stack,
+  //   perhaps after doing some cleanup for this frame, or whether the
+  //   exception will be caught here.
+  //
+  // In principle, the language-specific data area is opaque to
+  // everybody but the personality routine. In practice, these values
+  // may be useful or interesting to readers with extra context, and
+  // we have to at least skip them anyway, so we might as well report
+  // them to the handler.
+
+  // This entry's exception handling personality routine's address is
+  // ADDRESS. If INDIRECT is true, then ADDRESS is the address at
+  // which the routine's address is stored. The default definition for
+  // this handler function simply returns true, allowing parsing of
+  // the entry to continue.
+  virtual bool PersonalityRoutine(uint64 address, bool indirect) {
+    return true;
+  }
+
+  // This entry's language-specific data area (LSDA) is located at
+  // ADDRESS. If INDIRECT is true, then ADDRESS is the address at
+  // which the area's address is stored. The default definition for
+  // this handler function simply returns true, allowing parsing of
+  // the entry to continue.
+  virtual bool LanguageSpecificDataArea(uint64 address, bool indirect) {
+    return true;
+  }
+
+  // This entry describes a signal trampoline --- this frame is the
+  // caller of a signal handler. The default definition for this
+  // handler function simply returns true, allowing parsing of the
+  // entry to continue.
+  //
+  // The best description of the rationale for and meaning of signal
+  // trampoline CFI entries seems to be in the GCC bug database:
+  // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26208
+  virtual bool SignalHandler() { return true; }
+};
+
+// The CallFrameInfo class makes calls on an instance of this class to
+// report errors or warn about problems in the data it is parsing.
+// These messages are sent to the message sink |aLog| provided to the
+// constructor.
+class CallFrameInfo::Reporter {
+ public:
+  // Create an error reporter which attributes troubles to the section
+  // named SECTION in FILENAME.
+  //
+  // Normally SECTION would be .debug_frame, but the Mac puts CFI data
+  // in a Mach-O section named __debug_frame. If we support
+  // Linux-style exception handling data, we could be reading an
+  // .eh_frame section.
+  Reporter(void (*aLog)(const char*), const std::string& filename,
+           const std::string& section = ".debug_frame")
+      : log_(aLog), filename_(filename), section_(section) {}
+  virtual ~Reporter() {}
+
+  // The CFI entry at OFFSET ends too early to be well-formed. KIND
+  // indicates what kind of entry it is; KIND can be kUnknown if we
+  // haven't parsed enough of the entry to tell yet.
+  virtual void Incomplete(uint64 offset, CallFrameInfo::EntryKind kind);
+
+  // The .eh_frame data has a four-byte zero at OFFSET where the next
+  // entry's length would be; this is a terminator. However, the buffer
+  // length as given to the CallFrameInfo constructor says there should be
+  // more data.
+  virtual void EarlyEHTerminator(uint64 offset);
+
+  // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the
+  // section is not that large.
+  virtual void CIEPointerOutOfRange(uint64 offset, uint64 cie_offset);
+
+  // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the entry
+  // there is not a CIE.
+  virtual void BadCIEId(uint64 offset, uint64 cie_offset);
+
+  // The FDE at OFFSET refers to a CIE with version number VERSION,
+  // which we don't recognize. We cannot parse DWARF CFI if it uses
+  // a version number we don't recognize.
+  virtual void UnrecognizedVersion(uint64 offset, int version);
+
+  // The FDE at OFFSET refers to a CIE with augmentation AUGMENTATION,
+  // which we don't recognize. We cannot parse DWARF CFI if it uses
+  // augmentations we don't recognize.
+  virtual void UnrecognizedAugmentation(uint64 offset,
+                                        const std::string& augmentation);
+
+  // The FDE at OFFSET contains an invalid or otherwise unusable Dwarf4
+  // specific field (currently, only "address_size" or "segment_size").
+  // Parsing DWARF CFI with unexpected values here seems dubious at best,
+  // so we stop.  WHAT gives a little more information about what is wrong.
+  virtual void InvalidDwarf4Artefact(uint64 offset, const char* what);
+
+  // The pointer encoding ENCODING, specified by the CIE at OFFSET, is not
+  // a valid encoding.
+  virtual void InvalidPointerEncoding(uint64 offset, uint8 encoding);
+
+  // The pointer encoding ENCODING, specified by the CIE at OFFSET, depends
+  // on a base address which has not been supplied.
+  virtual void UnusablePointerEncoding(uint64 offset, uint8 encoding);
+
+  // The CIE at OFFSET contains a DW_CFA_restore instruction at
+  // INSN_OFFSET, which may not appear in a CIE.
+  virtual void RestoreInCIE(uint64 offset, uint64 insn_offset);
+
+  // The entry at OFFSET, of kind KIND, has an unrecognized
+  // instruction at INSN_OFFSET.
+  virtual void BadInstruction(uint64 offset, CallFrameInfo::EntryKind kind,
+                              uint64 insn_offset);
+
+  // The instruction at INSN_OFFSET in the entry at OFFSET, of kind
+  // KIND, establishes a rule that cites the CFA, but we have not
+  // established a CFA rule yet.
+  virtual void NoCFARule(uint64 offset, CallFrameInfo::EntryKind kind,
+                         uint64 insn_offset);
+
+  // The instruction at INSN_OFFSET in the entry at OFFSET, of kind
+  // KIND, is a DW_CFA_restore_state instruction, but the stack of
+  // saved states is empty.
+  virtual void EmptyStateStack(uint64 offset, CallFrameInfo::EntryKind kind,
+                               uint64 insn_offset);
+
+  // The DW_CFA_remember_state instruction at INSN_OFFSET in the entry
+  // at OFFSET, of kind KIND, would restore a state that has no CFA
+  // rule, whereas the current state does have a CFA rule. This is
+  // bogus input, which the CallFrameInfo::Handler interface doesn't
+  // (and shouldn't) have any way to report.
+  virtual void ClearingCFARule(uint64 offset, CallFrameInfo::EntryKind kind,
+                               uint64 insn_offset);
+
+ private:
+  // A logging sink function, as supplied by LUL's user.
+  void (*log_)(const char*);
+
+ protected:
+  // The name of the file whose CFI we're reading.
+  std::string filename_;
+
+  // The name of the CFI section in that file.
+  std::string section_;
+};
+
+using lul::CallFrameInfo;
+using lul::Summariser;
+
+// A class that accepts parsed call frame information from the DWARF
+// CFI parser and populates a google_breakpad::Module object with the
+// contents.
+class DwarfCFIToModule : public CallFrameInfo::Handler {
+ public:
+  // DwarfCFIToModule uses an instance of this class to report errors
+  // detected while converting DWARF CFI to Breakpad STACK CFI records.
+  class Reporter {
+   public:
+    // Create a reporter that writes messages to the message sink
+    // |aLog|. FILE is the name of the file we're processing, and
+    // SECTION is the name of the section within that file that we're
+    // looking at (.debug_frame, .eh_frame, etc.).
+    Reporter(void (*aLog)(const char*), const std::string& file,
+             const std::string& section)
+        : log_(aLog), file_(file), section_(section) {}
+    virtual ~Reporter() {}
+
+    // The DWARF CFI entry at OFFSET says that REG is undefined, but the
+    // Breakpad symbol file format cannot express this.
+    virtual void UndefinedNotSupported(size_t offset, const UniqueString* reg);
+
+    // The DWARF CFI entry at OFFSET says that REG uses a DWARF
+    // expression to find its value, but parseDwarfExpr could not
+    // convert it to a sequence of PfxInstrs.
+    virtual void ExpressionCouldNotBeSummarised(size_t offset,
+                                                const UniqueString* reg);
+
+   private:
+    // A logging sink function, as supplied by LUL's user.
+    void (*log_)(const char*);
+
+   protected:
+    std::string file_, section_;
+  };
+
+  // Register name tables. If TABLE is a vector returned by one of these
+  // functions, then TABLE[R] is the name of the register numbered R in
+  // DWARF call frame information.
+  class RegisterNames {
+   public:
+    // Intel's "x86" or IA-32.
+    static unsigned int I386();
+
+    // AMD x86_64, AMD64, Intel EM64T, or Intel 64
+    static unsigned int X86_64();
+
+    // ARM.
+    static unsigned int ARM();
+
+    // AARCH64.
+    static unsigned int ARM64();
+
+    // MIPS.
+    static unsigned int MIPS();
+  };
+
+  // Create a handler for the dwarf2reader::CallFrameInfo parser that
+  // records the stack unwinding information it receives in SUMM.
+  //
+  // Use REGISTER_NAMES[I] as the name of register number I; *this
+  // keeps a reference to the vector, so the vector should remain
+  // alive for as long as the DwarfCFIToModule does.
+  //
+  // Use REPORTER for reporting problems encountered in the conversion
+  // process.
+  DwarfCFIToModule(const unsigned int num_dw_regs, Reporter* reporter,
+                   ByteReader* reader,
+                   /*MOD*/ UniqueStringUniverse* usu,
+                   /*OUT*/ Summariser* summ)
+      : summ_(summ),
+        usu_(usu),
+        num_dw_regs_(num_dw_regs),
+        reporter_(reporter),
+        reader_(reader),
+        return_address_(-1) {}
+  virtual ~DwarfCFIToModule() {}
+
+  virtual bool Entry(size_t offset, uint64 address, uint64 length,
+                     uint8 version, const std::string& augmentation,
+                     unsigned return_address) override;
+  virtual bool UndefinedRule(uint64 address, int reg) override;
+  virtual bool SameValueRule(uint64 address, int reg) override;
+  virtual bool OffsetRule(uint64 address, int reg, int base_register,
+                          long offset) override;
+  virtual bool ValOffsetRule(uint64 address, int reg, int base_register,
+                             long offset) override;
+  virtual bool RegisterRule(uint64 address, int reg,
+                            int base_register) override;
+  virtual bool ExpressionRule(uint64 address, int reg,
+                              const ImageSlice& expression) override;
+  virtual bool ValExpressionRule(uint64 address, int reg,
+                                 const ImageSlice& expression) override;
+  virtual bool End() override;
+
+ private:
+  // Return the name to use for register I.
+  const UniqueString* RegisterName(int i);
+
+  // The Summariser to which we should give entries
+  Summariser* summ_;
+
+  // Universe for creating UniqueStrings in, should that be necessary.
+  UniqueStringUniverse* usu_;
+
+  // The number of Dwarf-defined register names for this architecture.
+  const unsigned int num_dw_regs_;
+
+  // The reporter to use to report problems.
+  Reporter* reporter_;
+
+  // The ByteReader to use for parsing Dwarf expressions.
+  ByteReader* reader_;
+
+  // The section offset of the current frame description entry, for
+  // use in error messages.
+  size_t entry_offset_;
+
+  // The return address column for that entry.
+  unsigned return_address_;
+};
+
+// Convert the Dwarf expression in |expr| into PfxInstrs stored in the
+// SecMap referred to by |summ|, and return the index of the starting
+// PfxInstr added, which must be >= 0.  In case of failure return -1.
+int32_t parseDwarfExpr(Summariser* summ, const ByteReader* reader,
+                       ImageSlice expr, bool debug, bool pushCfaAtStart,
+                       bool derefAtEnd);
+
+}  // namespace lul
+
+#endif  // LulDwarfExt_h
diff --git a/tools/profiler/lul/LulDwarfInt.h b/tools/profiler/lul/LulDwarfInt.h
new file mode 100644
index 0000000000..b72c6e08e3
--- /dev/null
+++ b/tools/profiler/lul/LulDwarfInt.h
@@ -0,0 +1,193 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+
+// Copyright (c) 2008, 2010 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// CFI reader author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
+
+// This file is derived from the following file in
+// toolkit/crashreporter/google-breakpad:
+//   src/common/dwarf/dwarf2enums.h
+
+#ifndef LulDwarfInt_h
+#define LulDwarfInt_h
+
+#include "LulCommonExt.h"
+#include "LulDwarfExt.h"
+
+namespace lul {
+
+// These enums do not follow the google3 style only because they are
+// known universally (specs, other implementations) by the names in
+// exactly this capitalization.
+// Tag names and codes.
+
+// Call Frame Info instructions.
+enum DwarfCFI {
+  DW_CFA_advance_loc = 0x40,
+  DW_CFA_offset = 0x80,
+  DW_CFA_restore = 0xc0,
+  DW_CFA_nop = 0x00,
+  DW_CFA_set_loc = 0x01,
+  DW_CFA_advance_loc1 = 0x02,
+  DW_CFA_advance_loc2 = 0x03,
+  DW_CFA_advance_loc4 = 0x04,
+  DW_CFA_offset_extended = 0x05,
+  DW_CFA_restore_extended = 0x06,
+  DW_CFA_undefined = 0x07,
+  DW_CFA_same_value = 0x08,
+  DW_CFA_register = 0x09,
+  DW_CFA_remember_state = 0x0a,
+  DW_CFA_restore_state = 0x0b,
+  DW_CFA_def_cfa = 0x0c,
+  DW_CFA_def_cfa_register = 0x0d,
+  DW_CFA_def_cfa_offset = 0x0e,
+  DW_CFA_def_cfa_expression = 0x0f,
+  DW_CFA_expression = 0x10,
+  DW_CFA_offset_extended_sf = 0x11,
+  DW_CFA_def_cfa_sf = 0x12,
+  DW_CFA_def_cfa_offset_sf = 0x13,
+  DW_CFA_val_offset = 0x14,
+  DW_CFA_val_offset_sf = 0x15,
+  DW_CFA_val_expression = 0x16,
+
+  // Opcodes in this range are reserved for user extensions.
+  DW_CFA_lo_user = 0x1c,
+  DW_CFA_hi_user = 0x3f,
+
+  // SGI/MIPS specific.
+  DW_CFA_MIPS_advance_loc8 = 0x1d,
+
+  // GNU extensions.
+  DW_CFA_GNU_window_save = 0x2d,
+  DW_CFA_GNU_args_size = 0x2e,
+  DW_CFA_GNU_negative_offset_extended = 0x2f
+};
+
+// Exception handling 'z' augmentation letters.
+enum DwarfZAugmentationCodes {
+  // If the CFI augmentation string begins with 'z', then the CIE and FDE
+  // have an augmentation data area just before the instructions, whose
+  // contents are determined by the subsequent augmentation letters.
+  DW_Z_augmentation_start = 'z',
+
+  // If this letter is present in a 'z' augmentation string, the CIE
+  // augmentation data includes a pointer encoding, and the FDE
+  // augmentation data includes a language-specific data area pointer,
+  // represented using that encoding.
+  DW_Z_has_LSDA = 'L',
+
+  // If this letter is present in a 'z' augmentation string, the CIE
+  // augmentation data includes a pointer encoding, followed by a pointer
+  // to a personality routine, represented using that encoding.
+  DW_Z_has_personality_routine = 'P',
+
+  // If this letter is present in a 'z' augmentation string, the CIE
+  // augmentation data includes a pointer encoding describing how the FDE's
+  // initial location, address range, and DW_CFA_set_loc operands are
+  // encoded.
+  DW_Z_has_FDE_address_encoding = 'R',
+
+  // If this letter is present in a 'z' augmentation string, then code
+  // addresses covered by FDEs that cite this CIE are signal delivery
+  // trampolines. Return addresses of frames in trampolines should not be
+  // adjusted as described in section 6.4.4 of the DWARF 3 spec.
+  DW_Z_is_signal_trampoline = 'S'
+};
+
+// Expression opcodes
+enum DwarfExpressionOpcodes {
+  DW_OP_addr = 0x03,
+  DW_OP_deref = 0x06,
+  DW_OP_const1s = 0x09,
+  DW_OP_const2u = 0x0a,
+  DW_OP_const2s = 0x0b,
+  DW_OP_const4u = 0x0c,
+  DW_OP_const4s = 0x0d,
+  DW_OP_const8u = 0x0e,
+  DW_OP_const8s = 0x0f,
+  DW_OP_constu = 0x10,
+  DW_OP_consts = 0x11,
+  DW_OP_dup = 0x12,
+  DW_OP_drop = 0x13,
+  DW_OP_over = 0x14,
+  DW_OP_pick = 0x15,
+  DW_OP_swap = 0x16,
+  DW_OP_rot = 0x17,
+  DW_OP_xderef = 0x18,
+  DW_OP_abs = 0x19,
+  DW_OP_and = 0x1a,
+  DW_OP_div = 0x1b,
+  DW_OP_minus = 0x1c,
+  DW_OP_mod = 0x1d,
+  DW_OP_mul = 0x1e,
+  DW_OP_neg = 0x1f,
+  DW_OP_not = 0x20,
+  DW_OP_or = 0x21,
+  DW_OP_plus = 0x22,
+  DW_OP_plus_uconst = 0x23,
+  DW_OP_shl = 0x24,
+  DW_OP_shr = 0x25,
+  DW_OP_shra = 0x26,
+  DW_OP_xor = 0x27,
+  DW_OP_skip = 0x2f,
+  DW_OP_bra = 0x28,
+  DW_OP_eq = 0x29,
+  DW_OP_ge = 0x2a,
+  DW_OP_gt = 0x2b,
+  DW_OP_le = 0x2c,
+  DW_OP_lt = 0x2d,
+  DW_OP_ne = 0x2e,
+  DW_OP_lit0 = 0x30,
+  DW_OP_lit31 = 0x4f,
+  DW_OP_reg0 = 0x50,
+  DW_OP_reg31 = 0x6f,
+  DW_OP_breg0 = 0x70,
+  DW_OP_breg31 = 0x8f,
+  DW_OP_regx = 0x90,
+  DW_OP_fbreg = 0x91,
+  DW_OP_bregx = 0x92,
+  DW_OP_piece = 0x93,
+  DW_OP_deref_size = 0x94,
+  DW_OP_xderef_size = 0x95,
+  DW_OP_nop = 0x96,
+  DW_OP_push_object_address = 0x97,
+  DW_OP_call2 = 0x98,
+  DW_OP_call4 = 0x99,
+  DW_OP_call_ref = 0x9a,
+  DW_OP_form_tls_address = 0x9b,
+  DW_OP_call_frame_cfa = 0x9c,
+  DW_OP_bit_piece = 0x9d,
+  DW_OP_lo_user = 0xe0,
+  DW_OP_hi_user = 0xff
+};
+
+}  // namespace lul
+
+#endif  // LulDwarfInt_h
diff --git a/tools/profiler/lul/LulDwarfSummariser.cpp b/tools/profiler/lul/LulDwarfSummariser.cpp
new file mode 100644
index 0000000000..e9172c3e18
--- /dev/null
+++ b/tools/profiler/lul/LulDwarfSummariser.cpp
@@ -0,0 +1,549 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "LulDwarfSummariser.h"
+
+#include "LulDwarfExt.h"
+
+#include "mozilla/Assertions.h"
+#include "mozilla/Sprintf.h"
+
+// Set this to 1 for verbose logging
+#define DEBUG_SUMMARISER 0
+
+namespace lul {
+
+// Do |s64|'s lowest 32 bits sign extend back to |s64| itself?
+static inline bool fitsIn32Bits(int64 s64) {
+  return s64 == ((s64 & 0xffffffff) ^ 0x80000000) - 0x80000000;
+}
+
+// Check a LExpr prefix expression, starting at pfxInstrs[start] up to
+// the next PX_End instruction, to ensure that:
+// * It only mentions registers that are tracked on this target
+// * The start point is sane
+// If the expression is ok, return NULL.  Else return a pointer
+// a const char* holding a bit of text describing the problem.
+static const char* checkPfxExpr(const vector<PfxInstr>* pfxInstrs,
+                                int64_t start) {
+  size_t nInstrs = pfxInstrs->size();
+  if (start < 0 || start >= (ssize_t)nInstrs) {
+    return "bogus start point";
+  }
+  size_t i;
+  for (i = start; i < nInstrs; i++) {
+    PfxInstr pxi = (*pfxInstrs)[i];
+    if (pxi.mOpcode == PX_End) break;
+    if (pxi.mOpcode == PX_DwReg &&
+        !registerIsTracked((DW_REG_NUMBER)pxi.mOperand)) {
+      return "uses untracked reg";
+    }
+  }
+  return nullptr;  // success
+}
+
+Summariser::Summariser(SecMap* aSecMap, uintptr_t aTextBias,
+                       void (*aLog)(const char*))
+    : mSecMap(aSecMap), mTextBias(aTextBias), mLog(aLog) {
+  mCurrAddr = 0;
+  mMax1Addr = 0;  // Gives an empty range.
+
+  // Initialise the running RuleSet to "haven't got a clue" status.
+  new (&mCurrRules) RuleSet();
+}
+
+void Summariser::Entry(uintptr_t aAddress, uintptr_t aLength) {
+  aAddress += mTextBias;
+  if (DEBUG_SUMMARISER) {
+    char buf[100];
+    SprintfLiteral(buf, "LUL Entry(%llx, %llu)\n",
+                   (unsigned long long int)aAddress,
+                   (unsigned long long int)aLength);
+    mLog(buf);
+  }
+  // This throws away any previous summary, that is, assumes
+  // that the previous summary, if any, has been properly finished
+  // by a call to End().
+  mCurrAddr = aAddress;
+  mMax1Addr = aAddress + aLength;
+  new (&mCurrRules) RuleSet();
+}
+
+void Summariser::Rule(uintptr_t aAddress, int aNewReg, LExprHow how,
+                      int16_t oldReg, int64_t offset) {
+  aAddress += mTextBias;
+  if (DEBUG_SUMMARISER) {
+    char buf[100];
+    if (how == NODEREF || how == DEREF) {
+      bool deref = how == DEREF;
+      SprintfLiteral(buf, "LUL  0x%llx  old-r%d = %sr%d + %lld%s\n",
+                     (unsigned long long int)aAddress, aNewReg,
+                     deref ? "*(" : "", (int)oldReg, (long long int)offset,
+                     deref ? ")" : "");
+    } else if (how == PFXEXPR) {
+      SprintfLiteral(buf, "LUL  0x%llx  old-r%d = pfx-expr-at %lld\n",
+                     (unsigned long long int)aAddress, aNewReg,
+                     (long long int)offset);
+    } else {
+      SprintfLiteral(buf, "LUL  0x%llx  old-r%d = (invalid LExpr!)\n",
+                     (unsigned long long int)aAddress, aNewReg);
+    }
+    mLog(buf);
+  }
+
+  if (mCurrAddr < aAddress) {
+    // Flush the existing summary first.
+    mSecMap->AddRuleSet(&mCurrRules, mCurrAddr, aAddress - mCurrAddr);
+    if (DEBUG_SUMMARISER) {
+      mLog("LUL  ");
+      mCurrRules.Print(mCurrAddr, aAddress - mCurrAddr, mLog);
+      mLog("\n");
+    }
+    mCurrAddr = aAddress;
+  }
+
+  // If for some reason summarisation fails, either or both of these
+  // become non-null and point at constant text describing the
+  // problem.  Using two rather than just one avoids complications of
+  // having to concatenate two strings to produce a complete error message.
+  const char* reason1 = nullptr;
+  const char* reason2 = nullptr;
+
+  // |offset| needs to be a 32 bit value that sign extends to 64 bits
+  // on a 64 bit target.  We will need to incorporate |offset| into
+  // any LExpr made here.  So we may as well check it right now.
+  if (!fitsIn32Bits(offset)) {
+    reason1 = "offset not in signed 32-bit range";
+    goto cant_summarise;
+  }
+
+  // FIXME: factor out common parts of the arch-dependent summarisers.
+
+#if defined(GP_ARCH_arm)
+
+  // ----------------- arm ----------------- //
+
+  // Now, can we add the rule to our summary?  This depends on whether
+  // the registers and the overall expression are representable.  This
+  // is the heart of the summarisation process.
+  switch (aNewReg) {
+    case DW_REG_CFA:
+      // This is a rule that defines the CFA.  The only forms we
+      // choose to represent are: r7/11/12/13 + offset.  The offset
+      // must fit into 32 bits since 'uintptr_t' is 32 bit on ARM,
+      // hence there is no need to check it for overflow.
+      if (how != NODEREF) {
+        reason1 = "rule for DW_REG_CFA: invalid |how|";
+        goto cant_summarise;
+      }
+      switch (oldReg) {
+        case DW_REG_ARM_R7:
+        case DW_REG_ARM_R11:
+        case DW_REG_ARM_R12:
+        case DW_REG_ARM_R13:
+          break;
+        default:
+          reason1 = "rule for DW_REG_CFA: invalid |oldReg|";
+          goto cant_summarise;
+      }
+      mCurrRules.mCfaExpr = LExpr(how, oldReg, offset);
+      break;
+
+    case DW_REG_ARM_R7:
+    case DW_REG_ARM_R11:
+    case DW_REG_ARM_R12:
+    case DW_REG_ARM_R13:
+    case DW_REG_ARM_R14:
+    case DW_REG_ARM_R15: {
+      // This is a new rule for R7, R11, R12, R13 (SP), R14 (LR) or
+      // R15 (the return address).
+      switch (how) {
+        case NODEREF:
+        case DEREF:
+          // Check the old register is one we're tracking.
+          if (!registerIsTracked((DW_REG_NUMBER)oldReg) &&
+              oldReg != DW_REG_CFA) {
+            reason1 = "rule for R7/11/12/13/14/15: uses untracked reg";
+            goto cant_summarise;
+          }
+          break;
+        case PFXEXPR: {
+          // Check that the prefix expression only mentions tracked registers.
+          const vector<PfxInstr>* pfxInstrs = mSecMap->GetPfxInstrs();
+          reason2 = checkPfxExpr(pfxInstrs, offset);
+          if (reason2) {
+            reason1 = "rule for R7/11/12/13/14/15: ";
+            goto cant_summarise;
+          }
+          break;
+        }
+        default:
+          goto cant_summarise;
+      }
+      LExpr expr = LExpr(how, oldReg, offset);
+      switch (aNewReg) {
+        case DW_REG_ARM_R7:
+          mCurrRules.mR7expr = expr;
+          break;
+        case DW_REG_ARM_R11:
+          mCurrRules.mR11expr = expr;
+          break;
+        case DW_REG_ARM_R12:
+          mCurrRules.mR12expr = expr;
+          break;
+        case DW_REG_ARM_R13:
+          mCurrRules.mR13expr = expr;
+          break;
+        case DW_REG_ARM_R14:
+          mCurrRules.mR14expr = expr;
+          break;
+        case DW_REG_ARM_R15:
+          mCurrRules.mR15expr = expr;
+          break;
+        default:
+          MOZ_ASSERT(0);
+      }
+      break;
+    }
+
+    default:
+      // Leave |reason1| and |reason2| unset here.  This program point
+      // is reached so often that it causes a flood of "Can't
+      // summarise" messages.  In any case, we don't really care about
+      // the fact that this summary would produce a new value for a
+      // register that we're not tracking.  We do on the other hand
+      // care if the summary's expression *uses* a register that we're
+      // not tracking.  But in that case one of the above failures
+      // should tell us which.
+      goto cant_summarise;
+  }
+
+  // Mark callee-saved registers (r4 .. r11) as unchanged, if there is
+  // no other information about them.  FIXME: do this just once, at
+  // the point where the ruleset is committed.
+  if (mCurrRules.mR7expr.mHow == UNKNOWN) {
+    mCurrRules.mR7expr = LExpr(NODEREF, DW_REG_ARM_R7, 0);
+  }
+  if (mCurrRules.mR11expr.mHow == UNKNOWN) {
+    mCurrRules.mR11expr = LExpr(NODEREF, DW_REG_ARM_R11, 0);
+  }
+  if (mCurrRules.mR12expr.mHow == UNKNOWN) {
+    mCurrRules.mR12expr = LExpr(NODEREF, DW_REG_ARM_R12, 0);
+  }
+
+  // The old r13 (SP) value before the call is always the same as the
+  // CFA.
+  mCurrRules.mR13expr = LExpr(NODEREF, DW_REG_CFA, 0);
+
+  // If there's no information about R15 (the return address), say
+  // it's a copy of R14 (the link register).
+  if (mCurrRules.mR15expr.mHow == UNKNOWN) {
+    mCurrRules.mR15expr = LExpr(NODEREF, DW_REG_ARM_R14, 0);
+  }
+
+#elif defined(GP_ARCH_arm64)
+
+  // ----------------- arm64 ----------------- //
+
+  switch (aNewReg) {
+    case DW_REG_CFA:
+      if (how != NODEREF) {
+        reason1 = "rule for DW_REG_CFA: invalid |how|";
+        goto cant_summarise;
+      }
+      switch (oldReg) {
+        case DW_REG_AARCH64_X29:
+        case DW_REG_AARCH64_SP:
+          break;
+        default:
+          reason1 = "rule for DW_REG_CFA: invalid |oldReg|";
+          goto cant_summarise;
+      }
+      mCurrRules.mCfaExpr = LExpr(how, oldReg, offset);
+      break;
+
+    case DW_REG_AARCH64_X29:
+    case DW_REG_AARCH64_X30:
+    case DW_REG_AARCH64_SP: {
+      switch (how) {
+        case NODEREF:
+        case DEREF:
+          // Check the old register is one we're tracking.
+          if (!registerIsTracked((DW_REG_NUMBER)oldReg) &&
+              oldReg != DW_REG_CFA) {
+            reason1 = "rule for X29/X30/SP: uses untracked reg";
+            goto cant_summarise;
+          }
+          break;
+        case PFXEXPR: {
+          // Check that the prefix expression only mentions tracked registers.
+          const vector<PfxInstr>* pfxInstrs = mSecMap->GetPfxInstrs();
+          reason2 = checkPfxExpr(pfxInstrs, offset);
+          if (reason2) {
+            reason1 = "rule for X29/X30/SP: ";
+            goto cant_summarise;
+          }
+          break;
+        }
+        default:
+          goto cant_summarise;
+      }
+      LExpr expr = LExpr(how, oldReg, offset);
+      switch (aNewReg) {
+        case DW_REG_AARCH64_X29:
+          mCurrRules.mX29expr = expr;
+          break;
+        case DW_REG_AARCH64_X30:
+          mCurrRules.mX30expr = expr;
+          break;
+        case DW_REG_AARCH64_SP:
+          mCurrRules.mSPexpr = expr;
+          break;
+        default:
+          MOZ_ASSERT(0);
+      }
+      break;
+    }
+    default:
+      // Leave |reason1| and |reason2| unset here, for the reasons explained
+      // in the analogous point
+      goto cant_summarise;
+  }
+
+  if (mCurrRules.mX29expr.mHow == UNKNOWN) {
+    mCurrRules.mX29expr = LExpr(NODEREF, DW_REG_AARCH64_X29, 0);
+  }
+  if (mCurrRules.mX30expr.mHow == UNKNOWN) {
+    mCurrRules.mX30expr = LExpr(NODEREF, DW_REG_AARCH64_X30, 0);
+  }
+  // On aarch64, it seems the old SP value before the call is always the
+  // same as the CFA.  Therefore, in the absence of any other way to
+  // recover the SP, specify that the CFA should be copied.
+  if (mCurrRules.mSPexpr.mHow == UNKNOWN) {
+    mCurrRules.mSPexpr = LExpr(NODEREF, DW_REG_CFA, 0);
+  }
+#elif defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
+
+  // ---------------- x64/x86 ---------------- //
+
+  // Now, can we add the rule to our summary?  This depends on whether
+  // the registers and the overall expression are representable.  This
+  // is the heart of the summarisation process.
+  switch (aNewReg) {
+    case DW_REG_CFA: {
+      // This is a rule that defines the CFA.  The only forms we choose to
+      // represent are: = SP+offset, = FP+offset, or =prefix-expr.
+      switch (how) {
+        case NODEREF:
+          if (oldReg != DW_REG_INTEL_XSP && oldReg != DW_REG_INTEL_XBP) {
+            reason1 = "rule for DW_REG_CFA: invalid |oldReg|";
+            goto cant_summarise;
+          }
+          break;
+        case DEREF:
+          reason1 = "rule for DW_REG_CFA: invalid |how|";
+          goto cant_summarise;
+        case PFXEXPR: {
+          // Check that the prefix expression only mentions tracked registers.
+          const vector<PfxInstr>* pfxInstrs = mSecMap->GetPfxInstrs();
+          reason2 = checkPfxExpr(pfxInstrs, offset);
+          if (reason2) {
+            reason1 = "rule for CFA: ";
+            goto cant_summarise;
+          }
+          break;
+        }
+        default:
+          goto cant_summarise;
+      }
+      mCurrRules.mCfaExpr = LExpr(how, oldReg, offset);
+      break;
+    }
+
+    case DW_REG_INTEL_XSP:
+    case DW_REG_INTEL_XBP:
+    case DW_REG_INTEL_XIP: {
+      // This is a new rule for XSP, XBP or XIP (the return address).
+      switch (how) {
+        case NODEREF:
+        case DEREF:
+          // Check the old register is one we're tracking.
+          if (!registerIsTracked((DW_REG_NUMBER)oldReg) &&
+              oldReg != DW_REG_CFA) {
+            reason1 = "rule for XSP/XBP/XIP: uses untracked reg";
+            goto cant_summarise;
+          }
+          break;
+        case PFXEXPR: {
+          // Check that the prefix expression only mentions tracked registers.
+          const vector<PfxInstr>* pfxInstrs = mSecMap->GetPfxInstrs();
+          reason2 = checkPfxExpr(pfxInstrs, offset);
+          if (reason2) {
+            reason1 = "rule for XSP/XBP/XIP: ";
+            goto cant_summarise;
+          }
+          break;
+        }
+        default:
+          goto cant_summarise;
+      }
+      LExpr expr = LExpr(how, oldReg, offset);
+      switch (aNewReg) {
+        case DW_REG_INTEL_XBP:
+          mCurrRules.mXbpExpr = expr;
+          break;
+        case DW_REG_INTEL_XSP:
+          mCurrRules.mXspExpr = expr;
+          break;
+        case DW_REG_INTEL_XIP:
+          mCurrRules.mXipExpr = expr;
+          break;
+        default:
+          MOZ_CRASH("impossible value for aNewReg");
+      }
+      break;
+    }
+
+    default:
+      // Leave |reason1| and |reason2| unset here, for the reasons
+      // explained in the analogous point in the ARM case just above.
+      goto cant_summarise;
+  }
+
+  // On Intel, it seems the old SP value before the call is always the
+  // same as the CFA.  Therefore, in the absence of any other way to
+  // recover the SP, specify that the CFA should be copied.
+  if (mCurrRules.mXspExpr.mHow == UNKNOWN) {
+    mCurrRules.mXspExpr = LExpr(NODEREF, DW_REG_CFA, 0);
+  }
+
+  // Also, gcc says "Undef" for BP when it is unchanged.
+  if (mCurrRules.mXbpExpr.mHow == UNKNOWN) {
+    mCurrRules.mXbpExpr = LExpr(NODEREF, DW_REG_INTEL_XBP, 0);
+  }
+
+#elif defined(GP_ARCH_mips64)
+  // ---------------- mips ---------------- //
+  //
+  // Now, can we add the rule to our summary?  This depends on whether
+  // the registers and the overall expression are representable.  This
+  // is the heart of the summarisation process.
+  switch (aNewReg) {
+    case DW_REG_CFA:
+      // This is a rule that defines the CFA.  The only forms we can
+      // represent are: = SP+offset or = FP+offset.
+      if (how != NODEREF) {
+        reason1 = "rule for DW_REG_CFA: invalid |how|";
+        goto cant_summarise;
+      }
+      if (oldReg != DW_REG_MIPS_SP && oldReg != DW_REG_MIPS_FP) {
+        reason1 = "rule for DW_REG_CFA: invalid |oldReg|";
+        goto cant_summarise;
+      }
+      mCurrRules.mCfaExpr = LExpr(how, oldReg, offset);
+      break;
+
+    case DW_REG_MIPS_SP:
+    case DW_REG_MIPS_FP:
+    case DW_REG_MIPS_PC: {
+      // This is a new rule for SP, FP or PC (the return address).
+      switch (how) {
+        case NODEREF:
+        case DEREF:
+          // Check the old register is one we're tracking.
+          if (!registerIsTracked((DW_REG_NUMBER)oldReg) &&
+              oldReg != DW_REG_CFA) {
+            reason1 = "rule for SP/FP/PC: uses untracked reg";
+            goto cant_summarise;
+          }
+          break;
+        case PFXEXPR: {
+          // Check that the prefix expression only mentions tracked registers.
+          const vector<PfxInstr>* pfxInstrs = mSecMap->GetPfxInstrs();
+          reason2 = checkPfxExpr(pfxInstrs, offset);
+          if (reason2) {
+            reason1 = "rule for SP/FP/PC: ";
+            goto cant_summarise;
+          }
+          break;
+        }
+        default:
+          goto cant_summarise;
+      }
+      LExpr expr = LExpr(how, oldReg, offset);
+      switch (aNewReg) {
+        case DW_REG_MIPS_FP:
+          mCurrRules.mFPexpr = expr;
+          break;
+        case DW_REG_MIPS_SP:
+          mCurrRules.mSPexpr = expr;
+          break;
+        case DW_REG_MIPS_PC:
+          mCurrRules.mPCexpr = expr;
+          break;
+        default:
+          MOZ_CRASH("impossible value for aNewReg");
+      }
+      break;
+    }
+    default:
+      // Leave |reason1| and |reason2| unset here, for the reasons
+      // explained in the analogous point in the ARM case just above.
+      goto cant_summarise;
+  }
+
+  // On MIPS, it seems the old SP value before the call is always the
+  // same as the CFA.  Therefore, in the absence of any other way to
+  // recover the SP, specify that the CFA should be copied.
+  if (mCurrRules.mSPexpr.mHow == UNKNOWN) {
+    mCurrRules.mSPexpr = LExpr(NODEREF, DW_REG_CFA, 0);
+  }
+
+  // Also, gcc says "Undef" for FP when it is unchanged.
+  if (mCurrRules.mFPexpr.mHow == UNKNOWN) {
+    mCurrRules.mFPexpr = LExpr(NODEREF, DW_REG_MIPS_FP, 0);
+  }
+
+#else
+
+#  error "Unsupported arch"
+#endif
+
+  return;
+
+cant_summarise:
+  if (reason1 || reason2) {
+    char buf[200];
+    SprintfLiteral(buf,
+                   "LUL  can't summarise: "
+                   "SVMA=0x%llx: %s%s, expr=LExpr(%s,%u,%lld)\n",
+                   (unsigned long long int)(aAddress - mTextBias),
+                   reason1 ? reason1 : "", reason2 ? reason2 : "",
+                   NameOf_LExprHow(how), (unsigned int)oldReg,
+                   (long long int)offset);
+    mLog(buf);
+  }
+}
+
+uint32_t Summariser::AddPfxInstr(PfxInstr pfxi) {
+  return mSecMap->AddPfxInstr(pfxi);
+}
+
+void Summariser::End() {
+  if (DEBUG_SUMMARISER) {
+    mLog("LUL End\n");
+  }
+  if (mCurrAddr < mMax1Addr) {
+    mSecMap->AddRuleSet(&mCurrRules, mCurrAddr, mMax1Addr - mCurrAddr);
+    if (DEBUG_SUMMARISER) {
+      mLog("LUL  ");
+      mCurrRules.Print(mCurrAddr, mMax1Addr - mCurrAddr, mLog);
+      mLog("\n");
+    }
+  }
+}
+
+}  // namespace lul
diff --git a/tools/profiler/lul/LulDwarfSummariser.h b/tools/profiler/lul/LulDwarfSummariser.h
new file mode 100644
index 0000000000..30f1ba23c1
--- /dev/null
+++ b/tools/profiler/lul/LulDwarfSummariser.h
@@ -0,0 +1,64 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef LulDwarfSummariser_h
+#define LulDwarfSummariser_h
+
+#include "LulMainInt.h"
+
+namespace lul {
+
+class Summariser {
+ public:
+  Summariser(SecMap* aSecMap, uintptr_t aTextBias, void (*aLog)(const char*));
+
+  virtual void Entry(uintptr_t aAddress, uintptr_t aLength);
+  virtual void End();
+
+  // Tell the summariser that the value for |aNewReg| at |aAddress| is
+  // recovered using the LExpr that can be constructed using the
+  // components |how|, |oldReg| and |offset|.  The summariser will
+  // inspect the components and may reject them for various reasons,
+  // but the hope is that it will find them acceptable and record this
+  // rule permanently.
+  virtual void Rule(uintptr_t aAddress, int aNewReg, LExprHow how,
+                    int16_t oldReg, int64_t offset);
+
+  virtual uint32_t AddPfxInstr(PfxInstr pfxi);
+
+  // Send output to the logging sink, for debugging.
+  virtual void Log(const char* str) { mLog(str); }
+
+ private:
+  // The SecMap in which we park the finished summaries (RuleSets) and
+  // also any PfxInstrs derived from Dwarf expressions.
+  SecMap* mSecMap;
+
+  // Running state for the current summary (RuleSet) under construction.
+  RuleSet mCurrRules;
+
+  // The start of the address range to which the RuleSet under
+  // construction applies.
+  uintptr_t mCurrAddr;
+
+  // The highest address, plus one, for which the RuleSet under
+  // construction could possibly apply.  If there are no further
+  // incoming events then mCurrRules will eventually be emitted
+  // as-is, for the range mCurrAddr.. mMax1Addr - 1, if that is
+  // nonempty.
+  uintptr_t mMax1Addr;
+
+  // The bias value (to add to the SVMAs, to get AVMAs) to be used
+  // when adding entries into mSecMap.
+  uintptr_t mTextBias;
+
+  // A logging sink, for debugging.
+  void (*mLog)(const char* aFmt);
+};
+
+}  // namespace lul
+
+#endif  // LulDwarfSummariser_h
diff --git a/tools/profiler/lul/LulElf.cpp b/tools/profiler/lul/LulElf.cpp
new file mode 100644
index 0000000000..28980a1349
--- /dev/null
+++ b/tools/profiler/lul/LulElf.cpp
@@ -0,0 +1,887 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+
+// Copyright (c) 2006, 2011, 2012 Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Restructured in 2009 by: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
+
+// (derived from)
+// dump_symbols.cc: implement google_breakpad::WriteSymbolFile:
+// Find all the debugging info in a file and dump it as a Breakpad symbol file.
+//
+// dump_symbols.h: Read debugging information from an ELF file, and write
+// it out as a Breakpad symbol file.
+
+// This file is derived from the following files in
+// toolkit/crashreporter/google-breakpad:
+//   src/common/linux/dump_symbols.cc
+//   src/common/linux/elfutils.cc
+//   src/common/linux/file_id.cc
+
+#include <errno.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+
+#include <set>
+#include <string>
+#include <vector>
+
+#include "mozilla/Assertions.h"
+#include "mozilla/Sprintf.h"
+
+#include "PlatformMacros.h"
+#include "LulCommonExt.h"
+#include "LulDwarfExt.h"
+#include "LulElfInt.h"
+#include "LulMainInt.h"
+
+#if defined(GP_PLAT_arm_android) && !defined(SHT_ARM_EXIDX)
+// bionic and older glibsc don't define it
+#  define SHT_ARM_EXIDX (SHT_LOPROC + 1)
+#endif
+
+#if (defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android)) && \
+    !defined(SHT_X86_64_UNWIND)
+// This is sometimes necessary on x86_64-android and x86_64-linux.
+#  define SHT_X86_64_UNWIND 0x70000001
+#endif
+
+// Old Linux header doesn't define EM_AARCH64
+#ifndef EM_AARCH64
+#  define EM_AARCH64 183
+#endif
+
+// This namespace contains helper functions.
+namespace {
+
+using lul::DwarfCFIToModule;
+using lul::FindElfSectionByName;
+using lul::GetOffset;
+using lul::IsValidElf;
+using lul::Module;
+using lul::scoped_ptr;
+using lul::Summariser;
+using lul::UniqueStringUniverse;
+using std::set;
+using std::string;
+using std::vector;
+
+//
+// FDWrapper
+//
+// Wrapper class to make sure opened file is closed.
+//
+class FDWrapper {
+ public:
+  explicit FDWrapper(int fd) : fd_(fd) {}
+  ~FDWrapper() {
+    if (fd_ != -1) close(fd_);
+  }
+  int get() { return fd_; }
+  int release() {
+    int fd = fd_;
+    fd_ = -1;
+    return fd;
+  }
+
+ private:
+  int fd_;
+};
+
+//
+// MmapWrapper
+//
+// Wrapper class to make sure mapped regions are unmapped.
+//
+class MmapWrapper {
+ public:
+  MmapWrapper() : is_set_(false), base_(NULL), size_(0) {}
+  ~MmapWrapper() {
+    if (is_set_ && base_ != NULL) {
+      MOZ_ASSERT(size_ > 0);
+      munmap(base_, size_);
+    }
+  }
+  void set(void* mapped_address, size_t mapped_size) {
+    is_set_ = true;
+    base_ = mapped_address;
+    size_ = mapped_size;
+  }
+  void release() {
+    MOZ_ASSERT(is_set_);
+    is_set_ = false;
+    base_ = NULL;
+    size_ = 0;
+  }
+
+ private:
+  bool is_set_;
+  void* base_;
+  size_t size_;
+};
+
+// Set NUM_DW_REGNAMES to be the number of Dwarf register names
+// appropriate to the machine architecture given in HEADER.  Return
+// true on success, or false if HEADER's machine architecture is not
+// supported.
+template <typename ElfClass>
+bool DwarfCFIRegisterNames(const typename ElfClass::Ehdr* elf_header,
+                           unsigned int* num_dw_regnames) {
+  switch (elf_header->e_machine) {
+    case EM_386:
+      *num_dw_regnames = DwarfCFIToModule::RegisterNames::I386();
+      return true;
+    case EM_ARM:
+      *num_dw_regnames = DwarfCFIToModule::RegisterNames::ARM();
+      return true;
+    case EM_X86_64:
+      *num_dw_regnames = DwarfCFIToModule::RegisterNames::X86_64();
+      return true;
+    case EM_MIPS:
+      *num_dw_regnames = DwarfCFIToModule::RegisterNames::MIPS();
+      return true;
+    case EM_AARCH64:
+      *num_dw_regnames = DwarfCFIToModule::RegisterNames::ARM64();
+      return true;
+    default:
+      MOZ_ASSERT(0);
+      return false;
+  }
+}
+
+template <typename ElfClass>
+bool LoadDwarfCFI(const string& dwarf_filename,
+                  const typename ElfClass::Ehdr* elf_header,
+                  const char* section_name,
+                  const typename ElfClass::Shdr* section, const bool eh_frame,
+                  const typename ElfClass::Shdr* got_section,
+                  const typename ElfClass::Shdr* text_section,
+                  const bool big_endian, SecMap* smap, uintptr_t text_bias,
+                  UniqueStringUniverse* usu, void (*log)(const char*)) {
+  // Find the appropriate set of register names for this file's
+  // architecture.
+  unsigned int num_dw_regs = 0;
+  if (!DwarfCFIRegisterNames<ElfClass>(elf_header, &num_dw_regs)) {
+    fprintf(stderr,
+            "%s: unrecognized ELF machine architecture '%d';"
+            " cannot convert DWARF call frame information\n",
+            dwarf_filename.c_str(), elf_header->e_machine);
+    return false;
+  }
+
+  const lul::Endianness endianness =
+      big_endian ? lul::ENDIANNESS_BIG : lul::ENDIANNESS_LITTLE;
+
+  // Find the call frame information and its size.
+  const char* cfi = GetOffset<ElfClass, char>(elf_header, section->sh_offset);
+  size_t cfi_size = section->sh_size;
+
+  // Plug together the parser, handler, and their entourages.
+
+  // Here's a summariser, which will receive the output of the
+  // parser, create summaries, and add them to |smap|.
+  Summariser summ(smap, text_bias, log);
+
+  lul::ByteReader reader(endianness);
+  reader.SetAddressSize(ElfClass::kAddrSize);
+
+  DwarfCFIToModule::Reporter module_reporter(log, dwarf_filename, section_name);
+  DwarfCFIToModule handler(num_dw_regs, &module_reporter, &reader, usu, &summ);
+
+  // Provide the base addresses for .eh_frame encoded pointers, if
+  // possible.
+  reader.SetCFIDataBase(section->sh_addr, cfi);
+  if (got_section) reader.SetDataBase(got_section->sh_addr);
+  if (text_section) reader.SetTextBase(text_section->sh_addr);
+
+  lul::CallFrameInfo::Reporter dwarf_reporter(log, dwarf_filename,
+                                              section_name);
+  lul::CallFrameInfo parser(cfi, cfi_size, &reader, &handler, &dwarf_reporter,
+                            eh_frame);
+  parser.Start();
+
+  return true;
+}
+
+bool LoadELF(const string& obj_file, MmapWrapper* map_wrapper,
+             void** elf_header) {
+  int obj_fd = open(obj_file.c_str(), O_RDONLY);
+  if (obj_fd < 0) {
+    fprintf(stderr, "Failed to open ELF file '%s': %s\n", obj_file.c_str(),
+            strerror(errno));
+    return false;
+  }
+  FDWrapper obj_fd_wrapper(obj_fd);
+  struct stat st;
+  if (fstat(obj_fd, &st) != 0 && st.st_size <= 0) {
+    fprintf(stderr, "Unable to fstat ELF file '%s': %s\n", obj_file.c_str(),
+            strerror(errno));
+    return false;
+  }
+  // Mapping it read-only is good enough.  In any case, mapping it
+  // read-write confuses Valgrind's debuginfo acquire/discard
+  // heuristics, making it hard to profile the profiler.
+  void* obj_base = mmap(nullptr, st.st_size, PROT_READ, MAP_PRIVATE, obj_fd, 0);
+  if (obj_base == MAP_FAILED) {
+    fprintf(stderr, "Failed to mmap ELF file '%s': %s\n", obj_file.c_str(),
+            strerror(errno));
+    return false;
+  }
+  map_wrapper->set(obj_base, st.st_size);
+  *elf_header = obj_base;
+  if (!IsValidElf(*elf_header)) {
+    fprintf(stderr, "Not a valid ELF file: %s\n", obj_file.c_str());
+    return false;
+  }
+  return true;
+}
+
+// Get the endianness of ELF_HEADER. If it's invalid, return false.
+template <typename ElfClass>
+bool ElfEndianness(const typename ElfClass::Ehdr* elf_header,
+                   bool* big_endian) {
+  if (elf_header->e_ident[EI_DATA] == ELFDATA2LSB) {
+    *big_endian = false;
+    return true;
+  }
+  if (elf_header->e_ident[EI_DATA] == ELFDATA2MSB) {
+    *big_endian = true;
+    return true;
+  }
+
+  fprintf(stderr, "bad data encoding in ELF header: %d\n",
+          elf_header->e_ident[EI_DATA]);
+  return false;
+}
+
+//
+// LoadSymbolsInfo
+//
+// Holds the state between the two calls to LoadSymbols() in case it's necessary
+// to follow the .gnu_debuglink section and load debug information from a
+// different file.
+//
+template <typename ElfClass>
+class LoadSymbolsInfo {
+ public:
+  typedef typename ElfClass::Addr Addr;
+
+  explicit LoadSymbolsInfo(const vector<string>& dbg_dirs)
+      : debug_dirs_(dbg_dirs), has_loading_addr_(false) {}
+
+  // Keeps track of which sections have been loaded so sections don't
+  // accidentally get loaded twice from two different files.
+  void LoadedSection(const string& section) {
+    if (loaded_sections_.count(section) == 0) {
+      loaded_sections_.insert(section);
+    } else {
+      fprintf(stderr, "Section %s has already been loaded.\n", section.c_str());
+    }
+  }
+
+  string debuglink_file() const { return debuglink_file_; }
+
+ private:
+  const vector<string>& debug_dirs_;  // Directories in which to
+                                      // search for the debug ELF file.
+
+  string debuglink_file_;  // Full path to the debug ELF file.
+
+  bool has_loading_addr_;  // Indicate if LOADING_ADDR_ is valid.
+
+  set<string> loaded_sections_;  // Tracks the Loaded ELF sections
+                                 // between calls to LoadSymbols().
+};
+
+// Find the preferred loading address of the binary.
+template <typename ElfClass>
+typename ElfClass::Addr GetLoadingAddress(
+    const typename ElfClass::Phdr* program_headers, int nheader) {
+  typedef typename ElfClass::Phdr Phdr;
+
+  // For non-PIC executables (e_type == ET_EXEC), the load address is
+  // the start address of the first PT_LOAD segment.  (ELF requires
+  // the segments to be sorted by load address.)  For PIC executables
+  // and dynamic libraries (e_type == ET_DYN), this address will
+  // normally be zero.
+  for (int i = 0; i < nheader; ++i) {
+    const Phdr& header = program_headers[i];
+    if (header.p_type == PT_LOAD) return header.p_vaddr;
+  }
+  return 0;
+}
+
+template <typename ElfClass>
+bool LoadSymbols(const string& obj_file, const bool big_endian,
+                 const typename ElfClass::Ehdr* elf_header,
+                 const bool read_gnu_debug_link,
+                 LoadSymbolsInfo<ElfClass>* info, SecMap* smap, void* rx_avma,
+                 size_t rx_size, UniqueStringUniverse* usu,
+                 void (*log)(const char*)) {
+  typedef typename ElfClass::Phdr Phdr;
+  typedef typename ElfClass::Shdr Shdr;
+
+  char buf[500];
+  SprintfLiteral(buf, "LoadSymbols: BEGIN   %s\n", obj_file.c_str());
+  buf[sizeof(buf) - 1] = 0;
+  log(buf);
+
+  // This is how the text bias is calculated.
+  // BEGIN CALCULATE BIAS
+  uintptr_t loading_addr = GetLoadingAddress<ElfClass>(
+      GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff),
+      elf_header->e_phnum);
+  uintptr_t text_bias = ((uintptr_t)rx_avma) - loading_addr;
+  SprintfLiteral(buf, "LoadSymbols:   rx_avma=%llx, text_bias=%llx",
+                 (unsigned long long int)(uintptr_t)rx_avma,
+                 (unsigned long long int)text_bias);
+  buf[sizeof(buf) - 1] = 0;
+  log(buf);
+  // END CALCULATE BIAS
+
+  const Shdr* sections =
+      GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff);
+  const Shdr* section_names = sections + elf_header->e_shstrndx;
+  const char* names =
+      GetOffset<ElfClass, char>(elf_header, section_names->sh_offset);
+  const char* names_end = names + section_names->sh_size;
+  bool found_usable_info = false;
+
+  // Dwarf Call Frame Information (CFI) is actually independent from
+  // the other DWARF debugging information, and can be used alone.
+  const Shdr* dwarf_cfi_section =
+      FindElfSectionByName<ElfClass>(".debug_frame", SHT_PROGBITS, sections,
+                                     names, names_end, elf_header->e_shnum);
+  if (dwarf_cfi_section) {
+    // Ignore the return value of this function; even without call frame
+    // information, the other debugging information could be perfectly
+    // useful.
+    info->LoadedSection(".debug_frame");
+    bool result = LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".debug_frame",
+                                         dwarf_cfi_section, false, 0, 0,
+                                         big_endian, smap, text_bias, usu, log);
+    found_usable_info = found_usable_info || result;
+    if (result) log("LoadSymbols:   read CFI from .debug_frame");
+  }
+
+  // Linux C++ exception handling information can also provide
+  // unwinding data.
+  const Shdr* eh_frame_section =
+      FindElfSectionByName<ElfClass>(".eh_frame", SHT_PROGBITS, sections, names,
+                                     names_end, elf_header->e_shnum);
+#if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android)
+  if (!eh_frame_section) {
+    // Possibly depending on which linker created libxul.so, on x86_64-linux
+    // and -android, .eh_frame may instead have the SHT_X86_64_UNWIND type.
+    eh_frame_section =
+        FindElfSectionByName<ElfClass>(".eh_frame", SHT_X86_64_UNWIND, sections,
+                                       names, names_end, elf_header->e_shnum);
+  }
+#endif
+  if (eh_frame_section) {
+    // Pointers in .eh_frame data may be relative to the base addresses of
+    // certain sections. Provide those sections if present.
+    const Shdr* got_section = FindElfSectionByName<ElfClass>(
+        ".got", SHT_PROGBITS, sections, names, names_end, elf_header->e_shnum);
+    const Shdr* text_section = FindElfSectionByName<ElfClass>(
+        ".text", SHT_PROGBITS, sections, names, names_end, elf_header->e_shnum);
+    info->LoadedSection(".eh_frame");
+    // As above, ignore the return value of this function.
+    bool result = LoadDwarfCFI<ElfClass>(
+        obj_file, elf_header, ".eh_frame", eh_frame_section, true, got_section,
+        text_section, big_endian, smap, text_bias, usu, log);
+    found_usable_info = found_usable_info || result;
+    if (result) log("LoadSymbols:   read CFI from .eh_frame");
+  }
+
+  SprintfLiteral(buf, "LoadSymbols: END     %s\n", obj_file.c_str());
+  buf[sizeof(buf) - 1] = 0;
+  log(buf);
+
+  return found_usable_info;
+}
+
+// Return the breakpad symbol file identifier for the architecture of
+// ELF_HEADER.
+template <typename ElfClass>
+const char* ElfArchitecture(const typename ElfClass::Ehdr* elf_header) {
+  typedef typename ElfClass::Half Half;
+  Half arch = elf_header->e_machine;
+  switch (arch) {
+    case EM_386:
+      return "x86";
+    case EM_ARM:
+      return "arm";
+    case EM_AARCH64:
+      return "arm64";
+    case EM_MIPS:
+      return "mips";
+    case EM_PPC64:
+      return "ppc64";
+    case EM_PPC:
+      return "ppc";
+    case EM_S390:
+      return "s390";
+    case EM_SPARC:
+      return "sparc";
+    case EM_SPARCV9:
+      return "sparcv9";
+    case EM_X86_64:
+      return "x86_64";
+    default:
+      return NULL;
+  }
+}
+
+// Format the Elf file identifier in IDENTIFIER as a UUID with the
+// dashes removed.
+string FormatIdentifier(unsigned char identifier[16]) {
+  char identifier_str[40];
+  lul::FileID::ConvertIdentifierToString(identifier, identifier_str,
+                                         sizeof(identifier_str));
+  string id_no_dash;
+  for (int i = 0; identifier_str[i] != '\0'; ++i)
+    if (identifier_str[i] != '-') id_no_dash += identifier_str[i];
+  // Add an extra "0" by the end.  PDB files on Windows have an 'age'
+  // number appended to the end of the file identifier; this isn't
+  // really used or necessary on other platforms, but be consistent.
+  id_no_dash += '0';
+  return id_no_dash;
+}
+
+// Return the non-directory portion of FILENAME: the portion after the
+// last slash, or the whole filename if there are no slashes.
+string BaseFileName(const string& filename) {
+  // Lots of copies!  basename's behavior is less than ideal.
+  char* c_filename = strdup(filename.c_str());
+  string base = basename(c_filename);
+  free(c_filename);
+  return base;
+}
+
+template <typename ElfClass>
+bool ReadSymbolDataElfClass(const typename ElfClass::Ehdr* elf_header,
+                            const string& obj_filename,
+                            const vector<string>& debug_dirs, SecMap* smap,
+                            void* rx_avma, size_t rx_size,
+                            UniqueStringUniverse* usu,
+                            void (*log)(const char*)) {
+  typedef typename ElfClass::Ehdr Ehdr;
+
+  unsigned char identifier[16];
+  if (!lul ::FileID::ElfFileIdentifierFromMappedFile(elf_header, identifier)) {
+    fprintf(stderr, "%s: unable to generate file identifier\n",
+            obj_filename.c_str());
+    return false;
+  }
+
+  const char* architecture = ElfArchitecture<ElfClass>(elf_header);
+  if (!architecture) {
+    fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
+            obj_filename.c_str(), elf_header->e_machine);
+    return false;
+  }
+
+  // Figure out what endianness this file is.
+  bool big_endian;
+  if (!ElfEndianness<ElfClass>(elf_header, &big_endian)) return false;
+
+  string name = BaseFileName(obj_filename);
+  string os = "Linux";
+  string id = FormatIdentifier(identifier);
+
+  LoadSymbolsInfo<ElfClass> info(debug_dirs);
+  if (!LoadSymbols<ElfClass>(obj_filename, big_endian, elf_header,
+                             !debug_dirs.empty(), &info, smap, rx_avma, rx_size,
+                             usu, log)) {
+    const string debuglink_file = info.debuglink_file();
+    if (debuglink_file.empty()) return false;
+
+    // Load debuglink ELF file.
+    fprintf(stderr, "Found debugging info in %s\n", debuglink_file.c_str());
+    MmapWrapper debug_map_wrapper;
+    Ehdr* debug_elf_header = NULL;
+    if (!LoadELF(debuglink_file, &debug_map_wrapper,
+                 reinterpret_cast<void**>(&debug_elf_header)))
+      return false;
+    // Sanity checks to make sure everything matches up.
+    const char* debug_architecture =
+        ElfArchitecture<ElfClass>(debug_elf_header);
+    if (!debug_architecture) {
+      fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
+              debuglink_file.c_str(), debug_elf_header->e_machine);
+      return false;
+    }
+    if (strcmp(architecture, debug_architecture)) {
+      fprintf(stderr,
+              "%s with ELF machine architecture %s does not match "
+              "%s with ELF architecture %s\n",
+              debuglink_file.c_str(), debug_architecture, obj_filename.c_str(),
+              architecture);
+      return false;
+    }
+
+    bool debug_big_endian;
+    if (!ElfEndianness<ElfClass>(debug_elf_header, &debug_big_endian))
+      return false;
+    if (debug_big_endian != big_endian) {
+      fprintf(stderr, "%s and %s does not match in endianness\n",
+              obj_filename.c_str(), debuglink_file.c_str());
+      return false;
+    }
+
+    if (!LoadSymbols<ElfClass>(debuglink_file, debug_big_endian,
+                               debug_elf_header, false, &info, smap, rx_avma,
+                               rx_size, usu, log)) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+}  // namespace
+
+namespace lul {
+
+bool ReadSymbolDataInternal(const uint8_t* obj_file, const string& obj_filename,
+                            const vector<string>& debug_dirs, SecMap* smap,
+                            void* rx_avma, size_t rx_size,
+                            UniqueStringUniverse* usu,
+                            void (*log)(const char*)) {
+  if (!IsValidElf(obj_file)) {
+    fprintf(stderr, "Not a valid ELF file: %s\n", obj_filename.c_str());
+    return false;
+  }
+
+  int elfclass = ElfClass(obj_file);
+  if (elfclass == ELFCLASS32) {
+    return ReadSymbolDataElfClass<ElfClass32>(
+        reinterpret_cast<const Elf32_Ehdr*>(obj_file), obj_filename, debug_dirs,
+        smap, rx_avma, rx_size, usu, log);
+  }
+  if (elfclass == ELFCLASS64) {
+    return ReadSymbolDataElfClass<ElfClass64>(
+        reinterpret_cast<const Elf64_Ehdr*>(obj_file), obj_filename, debug_dirs,
+        smap, rx_avma, rx_size, usu, log);
+  }
+
+  return false;
+}
+
+bool ReadSymbolData(const string& obj_file, const vector<string>& debug_dirs,
+                    SecMap* smap, void* rx_avma, size_t rx_size,
+                    UniqueStringUniverse* usu, void (*log)(const char*)) {
+  MmapWrapper map_wrapper;
+  void* elf_header = NULL;
+  if (!LoadELF(obj_file, &map_wrapper, &elf_header)) return false;
+
+  return ReadSymbolDataInternal(reinterpret_cast<uint8_t*>(elf_header),
+                                obj_file, debug_dirs, smap, rx_avma, rx_size,
+                                usu, log);
+}
+
+namespace {
+
+template <typename ElfClass>
+void FindElfClassSection(const char* elf_base, const char* section_name,
+                         typename ElfClass::Word section_type,
+                         const void** section_start, int* section_size) {
+  typedef typename ElfClass::Ehdr Ehdr;
+  typedef typename ElfClass::Shdr Shdr;
+
+  MOZ_ASSERT(elf_base);
+  MOZ_ASSERT(section_start);
+  MOZ_ASSERT(section_size);
+
+  MOZ_ASSERT(strncmp(elf_base, ELFMAG, SELFMAG) == 0);
+
+  const Ehdr* elf_header = reinterpret_cast<const Ehdr*>(elf_base);
+  MOZ_ASSERT(elf_header->e_ident[EI_CLASS] == ElfClass::kClass);
+
+  const Shdr* sections =
+      GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff);
+  const Shdr* section_names = sections + elf_header->e_shstrndx;
+  const char* names =
+      GetOffset<ElfClass, char>(elf_header, section_names->sh_offset);
+  const char* names_end = names + section_names->sh_size;
+
+  const Shdr* section =
+      FindElfSectionByName<ElfClass>(section_name, section_type, sections,
+                                     names, names_end, elf_header->e_shnum);
+
+  if (section != NULL && section->sh_size > 0) {
+    *section_start = elf_base + section->sh_offset;
+    *section_size = section->sh_size;
+  }
+}
+
+template <typename ElfClass>
+void FindElfClassSegment(const char* elf_base,
+                         typename ElfClass::Word segment_type,
+                         const void** segment_start, int* segment_size) {
+  typedef typename ElfClass::Ehdr Ehdr;
+  typedef typename ElfClass::Phdr Phdr;
+
+  MOZ_ASSERT(elf_base);
+  MOZ_ASSERT(segment_start);
+  MOZ_ASSERT(segment_size);
+
+  MOZ_ASSERT(strncmp(elf_base, ELFMAG, SELFMAG) == 0);
+
+  const Ehdr* elf_header = reinterpret_cast<const Ehdr*>(elf_base);
+  MOZ_ASSERT(elf_header->e_ident[EI_CLASS] == ElfClass::kClass);
+
+  const Phdr* phdrs =
+      GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff);
+
+  for (int i = 0; i < elf_header->e_phnum; ++i) {
+    if (phdrs[i].p_type == segment_type) {
+      *segment_start = elf_base + phdrs[i].p_offset;
+      *segment_size = phdrs[i].p_filesz;
+      return;
+    }
+  }
+}
+
+}  // namespace
+
+bool IsValidElf(const void* elf_base) {
+  return strncmp(reinterpret_cast<const char*>(elf_base), ELFMAG, SELFMAG) == 0;
+}
+
+int ElfClass(const void* elf_base) {
+  const ElfW(Ehdr)* elf_header = reinterpret_cast<const ElfW(Ehdr)*>(elf_base);
+
+  return elf_header->e_ident[EI_CLASS];
+}
+
+bool FindElfSection(const void* elf_mapped_base, const char* section_name,
+                    uint32_t section_type, const void** section_start,
+                    int* section_size, int* elfclass) {
+  MOZ_ASSERT(elf_mapped_base);
+  MOZ_ASSERT(section_start);
+  MOZ_ASSERT(section_size);
+
+  *section_start = NULL;
+  *section_size = 0;
+
+  if (!IsValidElf(elf_mapped_base)) return false;
+
+  int cls = ElfClass(elf_mapped_base);
+  if (elfclass) {
+    *elfclass = cls;
+  }
+
+  const char* elf_base = static_cast<const char*>(elf_mapped_base);
+
+  if (cls == ELFCLASS32) {
+    FindElfClassSection<ElfClass32>(elf_base, section_name, section_type,
+                                    section_start, section_size);
+    return *section_start != NULL;
+  } else if (cls == ELFCLASS64) {
+    FindElfClassSection<ElfClass64>(elf_base, section_name, section_type,
+                                    section_start, section_size);
+    return *section_start != NULL;
+  }
+
+  return false;
+}
+
+bool FindElfSegment(const void* elf_mapped_base, uint32_t segment_type,
+                    const void** segment_start, int* segment_size,
+                    int* elfclass) {
+  MOZ_ASSERT(elf_mapped_base);
+  MOZ_ASSERT(segment_start);
+  MOZ_ASSERT(segment_size);
+
+  *segment_start = NULL;
+  *segment_size = 0;
+
+  if (!IsValidElf(elf_mapped_base)) return false;
+
+  int cls = ElfClass(elf_mapped_base);
+  if (elfclass) {
+    *elfclass = cls;
+  }
+
+  const char* elf_base = static_cast<const char*>(elf_mapped_base);
+
+  if (cls == ELFCLASS32) {
+    FindElfClassSegment<ElfClass32>(elf_base, segment_type, segment_start,
+                                    segment_size);
+    return *segment_start != NULL;
+  } else if (cls == ELFCLASS64) {
+    FindElfClassSegment<ElfClass64>(elf_base, segment_type, segment_start,
+                                    segment_size);
+    return *segment_start != NULL;
+  }
+
+  return false;
+}
+
+// (derived from)
+// file_id.cc: Return a unique identifier for a file
+//
+// See file_id.h for documentation
+//
+
+// ELF note name and desc are 32-bits word padded.
+#define NOTE_PADDING(a) ((a + 3) & ~3)
+
+// These functions are also used inside the crashed process, so be safe
+// and use the syscall/libc wrappers instead of direct syscalls or libc.
+
+template <typename ElfClass>
+static bool ElfClassBuildIDNoteIdentifier(const void* section, int length,
+                                          uint8_t identifier[kMDGUIDSize]) {
+  typedef typename ElfClass::Nhdr Nhdr;
+
+  const void* section_end = reinterpret_cast<const char*>(section) + length;
+  const Nhdr* note_header = reinterpret_cast<const Nhdr*>(section);
+  while (reinterpret_cast<const void*>(note_header) < section_end) {
+    if (note_header->n_type == NT_GNU_BUILD_ID) break;
+    note_header = reinterpret_cast<const Nhdr*>(
+        reinterpret_cast<const char*>(note_header) + sizeof(Nhdr) +
+        NOTE_PADDING(note_header->n_namesz) +
+        NOTE_PADDING(note_header->n_descsz));
+  }
+  if (reinterpret_cast<const void*>(note_header) >= section_end ||
+      note_header->n_descsz == 0) {
+    return false;
+  }
+
+  const char* build_id = reinterpret_cast<const char*>(note_header) +
+                         sizeof(Nhdr) + NOTE_PADDING(note_header->n_namesz);
+  // Copy as many bits of the build ID as will fit
+  // into the GUID space.
+  memset(identifier, 0, kMDGUIDSize);
+  memcpy(identifier, build_id,
+         std::min(kMDGUIDSize, (size_t)note_header->n_descsz));
+
+  return true;
+}
+
+// Attempt to locate a .note.gnu.build-id section in an ELF binary
+// and copy as many bytes of it as will fit into |identifier|.
+static bool FindElfBuildIDNote(const void* elf_mapped_base,
+                               uint8_t identifier[kMDGUIDSize]) {
+  void* note_section;
+  int note_size, elfclass;
+  if ((!FindElfSegment(elf_mapped_base, PT_NOTE, (const void**)&note_section,
+                       &note_size, &elfclass) ||
+       note_size == 0) &&
+      (!FindElfSection(elf_mapped_base, ".note.gnu.build-id", SHT_NOTE,
+                       (const void**)&note_section, &note_size, &elfclass) ||
+       note_size == 0)) {
+    return false;
+  }
+
+  if (elfclass == ELFCLASS32) {
+    return ElfClassBuildIDNoteIdentifier<ElfClass32>(note_section, note_size,
+                                                     identifier);
+  } else if (elfclass == ELFCLASS64) {
+    return ElfClassBuildIDNoteIdentifier<ElfClass64>(note_section, note_size,
+                                                     identifier);
+  }
+
+  return false;
+}
+
+// Attempt to locate the .text section of an ELF binary and generate
+// a simple hash by XORing the first page worth of bytes into |identifier|.
+static bool HashElfTextSection(const void* elf_mapped_base,
+                               uint8_t identifier[kMDGUIDSize]) {
+  void* text_section;
+  int text_size;
+  if (!FindElfSection(elf_mapped_base, ".text", SHT_PROGBITS,
+                      (const void**)&text_section, &text_size, NULL) ||
+      text_size == 0) {
+    return false;
+  }
+
+  memset(identifier, 0, kMDGUIDSize);
+  const uint8_t* ptr = reinterpret_cast<const uint8_t*>(text_section);
+  const uint8_t* ptr_end = ptr + std::min(text_size, 4096);
+  while (ptr < ptr_end) {
+    for (unsigned i = 0; i < kMDGUIDSize; i++) identifier[i] ^= ptr[i];
+    ptr += kMDGUIDSize;
+  }
+  return true;
+}
+
+// static
+bool FileID::ElfFileIdentifierFromMappedFile(const void* base,
+                                             uint8_t identifier[kMDGUIDSize]) {
+  // Look for a build id note first.
+  if (FindElfBuildIDNote(base, identifier)) return true;
+
+  // Fall back on hashing the first page of the text section.
+  return HashElfTextSection(base, identifier);
+}
+
+// static
+void FileID::ConvertIdentifierToString(const uint8_t identifier[kMDGUIDSize],
+                                       char* buffer, int buffer_length) {
+  uint8_t identifier_swapped[kMDGUIDSize];
+
+  // Endian-ness swap to match dump processor expectation.
+  memcpy(identifier_swapped, identifier, kMDGUIDSize);
+  uint32_t* data1 = reinterpret_cast<uint32_t*>(identifier_swapped);
+  *data1 = htonl(*data1);
+  uint16_t* data2 = reinterpret_cast<uint16_t*>(identifier_swapped + 4);
+  *data2 = htons(*data2);
+  uint16_t* data3 = reinterpret_cast<uint16_t*>(identifier_swapped + 6);
+  *data3 = htons(*data3);
+
+  int buffer_idx = 0;
+  for (unsigned int idx = 0;
+       (buffer_idx < buffer_length) && (idx < kMDGUIDSize); ++idx) {
+    int hi = (identifier_swapped[idx] >> 4) & 0x0F;
+    int lo = (identifier_swapped[idx]) & 0x0F;
+
+    if (idx == 4 || idx == 6 || idx == 8 || idx == 10)
+      buffer[buffer_idx++] = '-';
+
+    buffer[buffer_idx++] = (hi >= 10) ? 'A' + hi - 10 : '0' + hi;
+    buffer[buffer_idx++] = (lo >= 10) ? 'A' + lo - 10 : '0' + lo;
+  }
+
+  // NULL terminate
+  buffer[(buffer_idx < buffer_length) ? buffer_idx : buffer_idx - 1] = 0;
+}
+
+}  // namespace lul
diff --git a/tools/profiler/lul/LulElfExt.h b/tools/profiler/lul/LulElfExt.h
new file mode 100644
index 0000000000..73d9ff7f15
--- /dev/null
+++ b/tools/profiler/lul/LulElfExt.h
@@ -0,0 +1,69 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+
+// Copyright (c) 2006, 2011, 2012 Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// This file is derived from the following files in
+// toolkit/crashreporter/google-breakpad:
+//   src/common/linux/dump_symbols.h
+
+#ifndef LulElfExt_h
+#define LulElfExt_h
+
+// These two functions are the external interface to the
+// ELF/Dwarf/EXIDX reader.
+
+#include "LulMainInt.h"
+
+using lul::SecMap;
+
+namespace lul {
+
+class UniqueStringUniverse;
+
+// Find all the unwind information in OBJ_FILE, an ELF executable
+// or shared library, and add it to SMAP.
+bool ReadSymbolData(const std::string& obj_file,
+                    const std::vector<std::string>& debug_dirs, SecMap* smap,
+                    void* rx_avma, size_t rx_size, UniqueStringUniverse* usu,
+                    void (*log)(const char*));
+
+// The same as ReadSymbolData, except that OBJ_FILE is assumed to
+// point to a mapped-in image of OBJ_FILENAME.
+bool ReadSymbolDataInternal(const uint8_t* obj_file,
+                            const std::string& obj_filename,
+                            const std::vector<std::string>& debug_dirs,
+                            SecMap* smap, void* rx_avma, size_t rx_size,
+                            UniqueStringUniverse* usu,
+                            void (*log)(const char*));
+
+}  // namespace lul
+
+#endif  // LulElfExt_h
diff --git a/tools/profiler/lul/LulElfInt.h b/tools/profiler/lul/LulElfInt.h
new file mode 100644
index 0000000000..31ffba8ff0
--- /dev/null
+++ b/tools/profiler/lul/LulElfInt.h
@@ -0,0 +1,218 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+
+// Copyright (c) 2006, 2012, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// This file is derived from the following files in
+// toolkit/crashreporter/google-breakpad:
+//   src/common/android/include/elf.h
+//   src/common/linux/elfutils.h
+//   src/common/linux/file_id.h
+//   src/common/linux/elfutils-inl.h
+
+#ifndef LulElfInt_h
+#define LulElfInt_h
+
+// This header defines functions etc internal to the ELF reader.  It
+// should not be included outside of LulElf.cpp.
+
+#include <elf.h>
+#include <stdlib.h>
+
+#include "mozilla/Assertions.h"
+
+#include "PlatformMacros.h"
+
+// (derived from)
+// elfutils.h: Utilities for dealing with ELF files.
+//
+#include <link.h>
+
+#if defined(GP_OS_android)
+
+// From toolkit/crashreporter/google-breakpad/src/common/android/include/elf.h
+// The Android headers don't always define this constant.
+#  ifndef EM_X86_64
+#    define EM_X86_64 62
+#  endif
+
+#  ifndef EM_PPC64
+#    define EM_PPC64 21
+#  endif
+
+#  ifndef EM_S390
+#    define EM_S390 22
+#  endif
+
+#  ifndef NT_GNU_BUILD_ID
+#    define NT_GNU_BUILD_ID 3
+#  endif
+
+#  ifndef ElfW
+#    define ElfW(type) _ElfW(Elf, ELFSIZE, type)
+#    define _ElfW(e, w, t) _ElfW_1(e, w, _##t)
+#    define _ElfW_1(e, w, t) e##w##t
+#  endif
+
+#endif
+
+#if defined(GP_OS_freebsd)
+
+#  ifndef ElfW
+#    define ElfW(type) Elf_##type
+#  endif
+
+#endif
+
+namespace lul {
+
+// Traits classes so consumers can write templatized code to deal
+// with specific ELF bits.
+struct ElfClass32 {
+  typedef Elf32_Addr Addr;
+  typedef Elf32_Ehdr Ehdr;
+  typedef Elf32_Nhdr Nhdr;
+  typedef Elf32_Phdr Phdr;
+  typedef Elf32_Shdr Shdr;
+  typedef Elf32_Half Half;
+  typedef Elf32_Off Off;
+  typedef Elf32_Word Word;
+  static const int kClass = ELFCLASS32;
+  static const size_t kAddrSize = sizeof(Elf32_Addr);
+};
+
+struct ElfClass64 {
+  typedef Elf64_Addr Addr;
+  typedef Elf64_Ehdr Ehdr;
+  typedef Elf64_Nhdr Nhdr;
+  typedef Elf64_Phdr Phdr;
+  typedef Elf64_Shdr Shdr;
+  typedef Elf64_Half Half;
+  typedef Elf64_Off Off;
+  typedef Elf64_Word Word;
+  static const int kClass = ELFCLASS64;
+  static const size_t kAddrSize = sizeof(Elf64_Addr);
+};
+
+bool IsValidElf(const void* elf_header);
+int ElfClass(const void* elf_base);
+
+// Attempt to find a section named |section_name| of type |section_type|
+// in the ELF binary data at |elf_mapped_base|. On success, returns true
+// and sets |*section_start| to point to the start of the section data,
+// and |*section_size| to the size of the section's data. If |elfclass|
+// is not NULL, set |*elfclass| to the ELF file class.
+bool FindElfSection(const void* elf_mapped_base, const char* section_name,
+                    uint32_t section_type, const void** section_start,
+                    int* section_size, int* elfclass);
+
+// Internal helper method, exposed for convenience for callers
+// that already have more info.
+template <typename ElfClass>
+const typename ElfClass::Shdr* FindElfSectionByName(
+    const char* name, typename ElfClass::Word section_type,
+    const typename ElfClass::Shdr* sections, const char* section_names,
+    const char* names_end, int nsection);
+
+// Attempt to find the first segment of type |segment_type| in the ELF
+// binary data at |elf_mapped_base|. On success, returns true and sets
+// |*segment_start| to point to the start of the segment data, and
+// and |*segment_size| to the size of the segment's data. If |elfclass|
+// is not NULL, set |*elfclass| to the ELF file class.
+bool FindElfSegment(const void* elf_mapped_base, uint32_t segment_type,
+                    const void** segment_start, int* segment_size,
+                    int* elfclass);
+
+// Convert an offset from an Elf header into a pointer to the mapped
+// address in the current process. Takes an extra template parameter
+// to specify the return type to avoid having to dynamic_cast the
+// result.
+template <typename ElfClass, typename T>
+const T* GetOffset(const typename ElfClass::Ehdr* elf_header,
+                   typename ElfClass::Off offset);
+
+// (derived from)
+// file_id.h: Return a unique identifier for a file
+//
+
+static const size_t kMDGUIDSize = sizeof(MDGUID);
+
+class FileID {
+ public:
+  // Load the identifier for the elf file mapped into memory at |base| into
+  // |identifier|.  Return false if the identifier could not be created for the
+  // file.
+  static bool ElfFileIdentifierFromMappedFile(const void* base,
+                                              uint8_t identifier[kMDGUIDSize]);
+
+  // Convert the |identifier| data to a NULL terminated string.  The string will
+  // be formatted as a UUID (e.g., 22F065BB-FC9C-49F7-80FE-26A7CEBD7BCE).
+  // The |buffer| should be at least 37 bytes long to receive all of the data
+  // and termination.  Shorter buffers will contain truncated data.
+  static void ConvertIdentifierToString(const uint8_t identifier[kMDGUIDSize],
+                                        char* buffer, int buffer_length);
+};
+
+template <typename ElfClass, typename T>
+const T* GetOffset(const typename ElfClass::Ehdr* elf_header,
+                   typename ElfClass::Off offset) {
+  return reinterpret_cast<const T*>(reinterpret_cast<uintptr_t>(elf_header) +
+                                    offset);
+}
+
+template <typename ElfClass>
+const typename ElfClass::Shdr* FindElfSectionByName(
+    const char* name, typename ElfClass::Word section_type,
+    const typename ElfClass::Shdr* sections, const char* section_names,
+    const char* names_end, int nsection) {
+  MOZ_ASSERT(name != NULL);
+  MOZ_ASSERT(sections != NULL);
+  MOZ_ASSERT(nsection > 0);
+
+  int name_len = strlen(name);
+  if (name_len == 0) return NULL;
+
+  for (int i = 0; i < nsection; ++i) {
+    const char* section_name = section_names + sections[i].sh_name;
+    if (sections[i].sh_type == section_type &&
+        names_end - section_name >= name_len + 1 &&
+        strcmp(name, section_name) == 0) {
+      return sections + i;
+    }
+  }
+  return NULL;
+}
+
+}  // namespace lul
+
+// And finally, the external interface, offered to LulMain.cpp
+#include "LulElfExt.h"
+
+#endif  // LulElfInt_h
diff --git a/tools/profiler/lul/LulMain.cpp b/tools/profiler/lul/LulMain.cpp
new file mode 100644
index 0000000000..7cf5508234
--- /dev/null
+++ b/tools/profiler/lul/LulMain.cpp
@@ -0,0 +1,2079 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "LulMain.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>  // write(), only for testing LUL
+
+#include <algorithm>  // std::sort
+#include <string>
+#include <utility>
+
+#include "GeckoProfiler.h"  // for profiler_current_thread_id()
+#include "LulCommonExt.h"
+#include "LulElfExt.h"
+#include "LulMainInt.h"
+#include "mozilla/ArrayUtils.h"
+#include "mozilla/Assertions.h"
+#include "mozilla/CheckedInt.h"
+#include "mozilla/DebugOnly.h"
+#include "mozilla/MemoryChecking.h"
+#include "mozilla/Sprintf.h"
+#include "mozilla/UniquePtr.h"
+#include "mozilla/Unused.h"
+
+// Set this to 1 for verbose logging
+#define DEBUG_MAIN 0
+
+namespace lul {
+
+using mozilla::CheckedInt;
+using mozilla::DebugOnly;
+using mozilla::MallocSizeOf;
+using mozilla::Unused;
+using std::pair;
+using std::string;
+using std::vector;
+
+// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+//
+// Some functions in this file are marked RUNS IN NO-MALLOC CONTEXT.
+// Any such function -- and, hence, the transitive closure of those
+// reachable from it -- must not do any dynamic memory allocation.
+// Doing so risks deadlock.  There is exactly one root function for
+// the transitive closure: Lul::Unwind.
+//
+// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+
+////////////////////////////////////////////////////////////////
+// RuleSet                                                    //
+////////////////////////////////////////////////////////////////
+
+static const char* NameOf_DW_REG(int16_t aReg) {
+  switch (aReg) {
+    case DW_REG_CFA:
+      return "cfa";
+#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
+    case DW_REG_INTEL_XBP:
+      return "xbp";
+    case DW_REG_INTEL_XSP:
+      return "xsp";
+    case DW_REG_INTEL_XIP:
+      return "xip";
+#elif defined(GP_ARCH_arm)
+    case DW_REG_ARM_R7:
+      return "r7";
+    case DW_REG_ARM_R11:
+      return "r11";
+    case DW_REG_ARM_R12:
+      return "r12";
+    case DW_REG_ARM_R13:
+      return "r13";
+    case DW_REG_ARM_R14:
+      return "r14";
+    case DW_REG_ARM_R15:
+      return "r15";
+#elif defined(GP_ARCH_arm64)
+    case DW_REG_AARCH64_X29:
+      return "x29";
+    case DW_REG_AARCH64_X30:
+      return "x30";
+    case DW_REG_AARCH64_SP:
+      return "sp";
+#elif defined(GP_ARCH_mips64)
+    case DW_REG_MIPS_SP:
+      return "sp";
+    case DW_REG_MIPS_FP:
+      return "fp";
+    case DW_REG_MIPS_PC:
+      return "pc";
+#else
+#  error "Unsupported arch"
+#endif
+    default:
+      return "???";
+  }
+}
+
+string LExpr::ShowRule(const char* aNewReg) const {
+  char buf[64];
+  string res = string(aNewReg) + "=";
+  switch (mHow) {
+    case UNKNOWN:
+      res += "Unknown";
+      break;
+    case NODEREF:
+      SprintfLiteral(buf, "%s+%d", NameOf_DW_REG(mReg), (int)mOffset);
+      res += buf;
+      break;
+    case DEREF:
+      SprintfLiteral(buf, "*(%s+%d)", NameOf_DW_REG(mReg), (int)mOffset);
+      res += buf;
+      break;
+    case PFXEXPR:
+      SprintfLiteral(buf, "PfxExpr-at-%d", (int)mOffset);
+      res += buf;
+      break;
+    default:
+      res += "???";
+      break;
+  }
+  return res;
+}
+
+void RuleSet::Print(uintptr_t avma, uintptr_t len,
+                    void (*aLog)(const char*)) const {
+  char buf[96];
+  SprintfLiteral(buf, "[%llx .. %llx]: let ", (unsigned long long int)avma,
+                 (unsigned long long int)(avma + len - 1));
+  string res = string(buf);
+  res += mCfaExpr.ShowRule("cfa");
+  res += " in";
+  // For each reg we care about, print the recovery expression.
+#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
+  res += mXipExpr.ShowRule(" RA");
+  res += mXspExpr.ShowRule(" SP");
+  res += mXbpExpr.ShowRule(" BP");
+#elif defined(GP_ARCH_arm)
+  res += mR15expr.ShowRule(" R15");
+  res += mR7expr.ShowRule(" R7");
+  res += mR11expr.ShowRule(" R11");
+  res += mR12expr.ShowRule(" R12");
+  res += mR13expr.ShowRule(" R13");
+  res += mR14expr.ShowRule(" R14");
+#elif defined(GP_ARCH_arm64)
+  res += mX29expr.ShowRule(" X29");
+  res += mX30expr.ShowRule(" X30");
+  res += mSPexpr.ShowRule(" SP");
+#elif defined(GP_ARCH_mips64)
+  res += mPCexpr.ShowRule(" PC");
+  res += mSPexpr.ShowRule(" SP");
+  res += mFPexpr.ShowRule(" FP");
+#else
+#  error "Unsupported arch"
+#endif
+  aLog(res.c_str());
+}
+
+LExpr* RuleSet::ExprForRegno(DW_REG_NUMBER aRegno) {
+  switch (aRegno) {
+    case DW_REG_CFA:
+      return &mCfaExpr;
+#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
+    case DW_REG_INTEL_XIP:
+      return &mXipExpr;
+    case DW_REG_INTEL_XSP:
+      return &mXspExpr;
+    case DW_REG_INTEL_XBP:
+      return &mXbpExpr;
+#elif defined(GP_ARCH_arm)
+    case DW_REG_ARM_R15:
+      return &mR15expr;
+    case DW_REG_ARM_R14:
+      return &mR14expr;
+    case DW_REG_ARM_R13:
+      return &mR13expr;
+    case DW_REG_ARM_R12:
+      return &mR12expr;
+    case DW_REG_ARM_R11:
+      return &mR11expr;
+    case DW_REG_ARM_R7:
+      return &mR7expr;
+#elif defined(GP_ARCH_arm64)
+    case DW_REG_AARCH64_X29:
+      return &mX29expr;
+    case DW_REG_AARCH64_X30:
+      return &mX30expr;
+    case DW_REG_AARCH64_SP:
+      return &mSPexpr;
+#elif defined(GP_ARCH_mips64)
+    case DW_REG_MIPS_SP:
+      return &mSPexpr;
+    case DW_REG_MIPS_FP:
+      return &mFPexpr;
+    case DW_REG_MIPS_PC:
+      return &mPCexpr;
+#else
+#  error "Unknown arch"
+#endif
+    default:
+      return nullptr;
+  }
+}
+
+RuleSet::RuleSet() {
+  // All fields are of type LExpr and so are initialised by LExpr::LExpr().
+}
+
+////////////////////////////////////////////////////////////////
+// SecMap                                                     //
+////////////////////////////////////////////////////////////////
+
+// See header file LulMainInt.h for comments about invariants.
+
+SecMap::SecMap(uintptr_t mapStartAVMA, uint32_t mapLen,
+               void (*aLog)(const char*))
+    : mUsable(false),
+      mUniqifier(new mozilla::HashMap<RuleSet, uint32_t, RuleSet,
+                                      InfallibleAllocPolicy>),
+      mLog(aLog) {
+  if (mapLen == 0) {
+    // Degenerate case.
+    mMapMinAVMA = 1;
+    mMapMaxAVMA = 0;
+  } else {
+    mMapMinAVMA = mapStartAVMA;
+    mMapMaxAVMA = mapStartAVMA + (uintptr_t)mapLen - 1;
+  }
+}
+
+SecMap::~SecMap() {
+  mExtents.clear();
+  mDictionary.clear();
+  if (mUniqifier) {
+    mUniqifier->clear();
+    mUniqifier = nullptr;
+  }
+}
+
+// RUNS IN NO-MALLOC CONTEXT
+RuleSet* SecMap::FindRuleSet(uintptr_t ia) {
+  // Binary search mExtents to find one that brackets |ia|.
+  // lo and hi need to be signed, else the loop termination tests
+  // don't work properly.  Note that this works correctly even when
+  // mExtents.size() == 0.
+
+  // Can't do this until the array has been sorted and preened.
+  MOZ_ASSERT(mUsable);
+
+  long int lo = 0;
+  long int hi = (long int)mExtents.size() - 1;
+  while (true) {
+    // current unsearched space is from lo to hi, inclusive.
+    if (lo > hi) {
+      // not found
+      return nullptr;
+    }
+    long int mid = lo + ((hi - lo) / 2);
+    Extent* mid_extent = &mExtents[mid];
+    uintptr_t mid_offset = mid_extent->offset();
+    uintptr_t mid_len = mid_extent->len();
+    uintptr_t mid_minAddr = mMapMinAVMA + mid_offset;
+    uintptr_t mid_maxAddr = mid_minAddr + mid_len - 1;
+    if (ia < mid_minAddr) {
+      hi = mid - 1;
+      continue;
+    }
+    if (ia > mid_maxAddr) {
+      lo = mid + 1;
+      continue;
+    }
+    MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr);
+    uint32_t mid_extent_dictIx = mid_extent->dictIx();
+    MOZ_RELEASE_ASSERT(mid_extent_dictIx < mExtents.size());
+    return &mDictionary[mid_extent_dictIx];
+  }
+  // NOTREACHED
+}
+
+// Add a RuleSet to the collection.  The rule is copied in.  Calling
+// this makes the map non-searchable.
+void SecMap::AddRuleSet(const RuleSet* rs, uintptr_t avma, uintptr_t len) {
+  mUsable = false;
+
+  // Zero length RuleSet?  Meaningless, but ignore it anyway.
+  if (len == 0) {
+    return;
+  }
+
+  // Ignore attempts to add RuleSets whose address range doesn't fall within
+  // the declared address range for the SecMap.  Maybe we should print some
+  // kind of error message rather than silently ignoring them.
+  if (!(avma >= mMapMinAVMA && avma + len - 1 <= mMapMaxAVMA)) {
+    return;
+  }
+
+  // Because `mMapStartAVMA` .. `mMapEndAVMA` can specify at most a 2^32-1 byte
+  // chunk of address space, the following must now hold.
+  MOZ_RELEASE_ASSERT(len <= (uintptr_t)0xFFFFFFFF);
+
+  // See if `mUniqifier` already has `rs`.  If so set `dictIx` to the assigned
+  // dictionary index; if not, add `rs` to `mUniqifier` and assign a new
+  // dictionary index.  This is the core of the RuleSet-de-duplication process.
+  uint32_t dictIx = 0;
+  mozilla::HashMap<RuleSet, uint32_t, RuleSet, InfallibleAllocPolicy>::AddPtr
+      p = mUniqifier->lookupForAdd(*rs);
+  if (!p) {
+    dictIx = mUniqifier->count();
+    // If this ever fails, Extents::dictIx will need to be changed to be a
+    // type wider than the current uint16_t.
+    MOZ_RELEASE_ASSERT(dictIx < (1 << 16));
+    // This returns `false` on OOM.  We ignore the return value since we asked
+    // for it to use the InfallibleAllocPolicy.
+    DebugOnly<bool> addedOK = mUniqifier->add(p, *rs, dictIx);
+    MOZ_ASSERT(addedOK);
+  } else {
+    dictIx = p->value();
+  }
+
+  uint32_t offset = (uint32_t)(avma - mMapMinAVMA);
+  while (len > 0) {
+    // Because Extents::len is a uint16_t, we have to add multiple `mExtents`
+    // entries to cover the case where `len` is equal to or greater than 2^16.
+    // This happens only exceedingly rarely.  In order to get more test
+    // coverage on what would otherwise be a very low probability (less than
+    // 0.0002%) corner case, we do this in steps of 4095.  On libxul.so as of
+    // Sept 2020, this increases the number of `mExtents` entries by about
+    // 0.05%, hence has no meaningful effect on space use, but increases the
+    // use of this corner case, and hence its test coverage, by a factor of 250.
+    uint32_t this_step_len = (len > 4095) ? 4095 : len;
+    mExtents.emplace_back(offset, this_step_len, dictIx);
+    offset += this_step_len;
+    len -= this_step_len;
+  }
+}
+
+// Add a PfxInstr to the vector of such instrs, and return the index
+// in the vector.  Calling this makes the map non-searchable.
+uint32_t SecMap::AddPfxInstr(PfxInstr pfxi) {
+  mUsable = false;
+  mPfxInstrs.push_back(pfxi);
+  return mPfxInstrs.size() - 1;
+}
+
+// Prepare the map for searching, by sorting it, de-overlapping entries and
+// removing any resulting zero-length entries.  At the start of this routine,
+// all Extents should fall within [mMapMinAVMA, mMapMaxAVMA] and not have zero
+// length, as a result of the checks in AddRuleSet().
+void SecMap::PrepareRuleSets() {
+  // At this point, the de-duped RuleSets are in `mUniqifier`, and
+  // `mDictionary` is empty.  This method will, amongst other things, copy
+  // them into `mDictionary` in order of their assigned dictionary-index
+  // values, as established by `SecMap::AddRuleSet`, and free `mUniqifier`;
+  // after this method, it has no further use.
+  MOZ_RELEASE_ASSERT(mUniqifier);
+  MOZ_RELEASE_ASSERT(mDictionary.empty());
+
+  if (mExtents.empty()) {
+    mUniqifier->clear();
+    mUniqifier = nullptr;
+    return;
+  }
+
+  if (mMapMinAVMA == 1 && mMapMaxAVMA == 0) {
+    // The map is empty.  This should never happen.
+    mExtents.clear();
+    mUniqifier->clear();
+    mUniqifier = nullptr;
+    return;
+  }
+  MOZ_RELEASE_ASSERT(mMapMinAVMA <= mMapMaxAVMA);
+
+  // We must have at least one Extent, and as a consequence there must be at
+  // least one entry in the uniqifier.
+  MOZ_RELEASE_ASSERT(!mExtents.empty() && !mUniqifier->empty());
+
+#ifdef DEBUG
+  // Check invariants on incoming Extents.
+  for (size_t i = 0; i < mExtents.size(); ++i) {
+    Extent* ext = &mExtents[i];
+    uint32_t len = ext->len();
+    MOZ_ASSERT(len > 0);
+    MOZ_ASSERT(len <= 4095 /* per '4095' in AddRuleSet() */);
+    uint32_t offset = ext->offset();
+    uintptr_t avma = mMapMinAVMA + (uintptr_t)offset;
+    // Upper bounds test.  There's no lower bounds test because `offset` is a
+    // positive displacement from `mMapMinAVMA`, so a small underrun will
+    // manifest as `len` being close to 2^32.
+    MOZ_ASSERT(avma + (uintptr_t)len - 1 <= mMapMaxAVMA);
+  }
+#endif
+
+  // Sort by start addresses.
+  std::sort(mExtents.begin(), mExtents.end(),
+            [](const Extent& ext1, const Extent& ext2) {
+              return ext1.offset() < ext2.offset();
+            });
+
+  // Iteratively truncate any overlaps and remove any zero length
+  // entries that might result, or that may have been present
+  // initially.  Unless the input is seriously screwy, this is
+  // expected to iterate only once.
+  while (true) {
+    size_t i;
+    size_t n = mExtents.size();
+    size_t nZeroLen = 0;
+
+    if (n == 0) {
+      break;
+    }
+
+    for (i = 1; i < n; ++i) {
+      Extent* prev = &mExtents[i - 1];
+      Extent* here = &mExtents[i];
+      MOZ_ASSERT(prev->offset() <= here->offset());
+      if (prev->offset() + prev->len() > here->offset()) {
+        prev->setLen(here->offset() - prev->offset());
+      }
+      if (prev->len() == 0) {
+        nZeroLen++;
+      }
+    }
+
+    if (mExtents[n - 1].len() == 0) {
+      nZeroLen++;
+    }
+
+    // At this point, the entries are in-order and non-overlapping.
+    // If none of them are zero-length, we are done.
+    if (nZeroLen == 0) {
+      break;
+    }
+
+    // Slide back the entries to remove the zero length ones.
+    size_t j = 0;  // The write-point.
+    for (i = 0; i < n; ++i) {
+      if (mExtents[i].len() == 0) {
+        continue;
+      }
+      if (j != i) {
+        mExtents[j] = mExtents[i];
+      }
+      ++j;
+    }
+    MOZ_ASSERT(i == n);
+    MOZ_ASSERT(nZeroLen <= n);
+    MOZ_ASSERT(j == n - nZeroLen);
+    while (nZeroLen > 0) {
+      mExtents.pop_back();
+      nZeroLen--;
+    }
+
+    MOZ_ASSERT(mExtents.size() == j);
+  }
+
+  size_t nExtents = mExtents.size();
+
+#ifdef DEBUG
+  // Do a final check on the extents: their address ranges must be
+  // ascending, non overlapping, non zero sized.
+  if (nExtents > 0) {
+    MOZ_ASSERT(mExtents[0].len() > 0);
+    for (size_t i = 1; i < nExtents; ++i) {
+      const Extent* prev = &mExtents[i - 1];
+      const Extent* here = &mExtents[i];
+      MOZ_ASSERT(prev->offset() < here->offset());
+      MOZ_ASSERT(here->len() > 0);
+      MOZ_ASSERT(prev->offset() + prev->len() <= here->offset());
+    }
+  }
+#endif
+
+  // Create the final dictionary by enumerating the uniqifier.
+  size_t nUniques = mUniqifier->count();
+
+  RuleSet dummy;
+  mozilla::PodZero(&dummy);
+
+  mDictionary.reserve(nUniques);
+  for (size_t i = 0; i < nUniques; i++) {
+    mDictionary.push_back(dummy);
+  }
+
+  for (auto iter = mUniqifier->iter(); !iter.done(); iter.next()) {
+    MOZ_RELEASE_ASSERT(iter.get().value() < nUniques);
+    mDictionary[iter.get().value()] = iter.get().key();
+  }
+
+  mUniqifier = nullptr;
+
+  char buf[150];
+  SprintfLiteral(
+      buf,
+      "PrepareRuleSets: %lu extents, %lu rulesets, "
+      "avma min/max 0x%llx, 0x%llx\n",
+      (unsigned long int)nExtents, (unsigned long int)mDictionary.size(),
+      (unsigned long long int)mMapMinAVMA, (unsigned long long int)mMapMaxAVMA);
+  buf[sizeof(buf) - 1] = 0;
+  mLog(buf);
+
+  // Is now usable for binary search.
+  mUsable = true;
+
+#if 0
+  mLog("\nRulesets after preening\n");
+  for (size_t i = 0; i < nExtents; ++i) {
+    const Extent* extent = &mExtents[i];
+    uintptr_t avma = mMapMinAVMA + (uintptr_t)extent->offset();
+    mDictionary[extent->dictIx()].Print(avma, extent->len(), mLog);
+    mLog("\n");
+  }
+  mLog("\n");
+#endif
+}
+
+bool SecMap::IsEmpty() { return mExtents.empty(); }
+
+size_t SecMap::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
+  size_t n = aMallocSizeOf(this);
+
+  // It's conceivable that these calls would be unsafe with some
+  // implementations of std::vector, but it seems to be working for now...
+  n += aMallocSizeOf(mPfxInstrs.data());
+
+  if (mUniqifier) {
+    n += mUniqifier->shallowSizeOfIncludingThis(aMallocSizeOf);
+  }
+  n += aMallocSizeOf(mDictionary.data());
+  n += aMallocSizeOf(mExtents.data());
+
+  return n;
+}
+
+////////////////////////////////////////////////////////////////
+// SegArray                                                   //
+////////////////////////////////////////////////////////////////
+
+// A SegArray holds a set of address ranges that together exactly
+// cover an address range, with no overlaps or holes.  Each range has
+// an associated value, which in this case has been specialised to be
+// a simple boolean.  The representation is kept to minimal canonical
+// form in which adjacent ranges with the same associated value are
+// merged together.  Each range is represented by a |struct Seg|.
+//
+// SegArrays are used to keep track of which parts of the address
+// space are known to contain instructions.
+class SegArray {
+ public:
+  void add(uintptr_t lo, uintptr_t hi, bool val) {
+    if (lo > hi) {
+      return;
+    }
+    split_at(lo);
+    if (hi < UINTPTR_MAX) {
+      split_at(hi + 1);
+    }
+    std::vector<Seg>::size_type iLo, iHi, i;
+    iLo = find(lo);
+    iHi = find(hi);
+    for (i = iLo; i <= iHi; ++i) {
+      mSegs[i].val = val;
+    }
+    preen();
+  }
+
+  // RUNS IN NO-MALLOC CONTEXT
+  bool getBoundingCodeSegment(/*OUT*/ uintptr_t* rx_min,
+                              /*OUT*/ uintptr_t* rx_max, uintptr_t addr) {
+    std::vector<Seg>::size_type i = find(addr);
+    if (!mSegs[i].val) {
+      return false;
+    }
+    *rx_min = mSegs[i].lo;
+    *rx_max = mSegs[i].hi;
+    return true;
+  }
+
+  SegArray() {
+    Seg s(0, UINTPTR_MAX, false);
+    mSegs.push_back(s);
+  }
+
+ private:
+  struct Seg {
+    Seg(uintptr_t lo, uintptr_t hi, bool val) : lo(lo), hi(hi), val(val) {}
+    uintptr_t lo;
+    uintptr_t hi;
+    bool val;
+  };
+
+  void preen() {
+    for (std::vector<Seg>::iterator iter = mSegs.begin();
+         iter < mSegs.end() - 1; ++iter) {
+      if (iter[0].val != iter[1].val) {
+        continue;
+      }
+      iter[0].hi = iter[1].hi;
+      mSegs.erase(iter + 1);
+      // Back up one, so as not to miss an opportunity to merge
+      // with the entry after this one.
+      --iter;
+    }
+  }
+
+  // RUNS IN NO-MALLOC CONTEXT
+  std::vector<Seg>::size_type find(uintptr_t a) {
+    long int lo = 0;
+    long int hi = (long int)mSegs.size();
+    while (true) {
+      // The unsearched space is lo .. hi inclusive.
+      if (lo > hi) {
+        // Not found.  This can't happen.
+        return (std::vector<Seg>::size_type)(-1);
+      }
+      long int mid = lo + ((hi - lo) / 2);
+      uintptr_t mid_lo = mSegs[mid].lo;
+      uintptr_t mid_hi = mSegs[mid].hi;
+      if (a < mid_lo) {
+        hi = mid - 1;
+        continue;
+      }
+      if (a > mid_hi) {
+        lo = mid + 1;
+        continue;
+      }
+      return (std::vector<Seg>::size_type)mid;
+    }
+  }
+
+  void split_at(uintptr_t a) {
+    std::vector<Seg>::size_type i = find(a);
+    if (mSegs[i].lo == a) {
+      return;
+    }
+    mSegs.insert(mSegs.begin() + i + 1, mSegs[i]);
+    mSegs[i].hi = a - 1;
+    mSegs[i + 1].lo = a;
+  }
+
+  void show() {
+    printf("<< %d entries:\n", (int)mSegs.size());
+    for (std::vector<Seg>::iterator iter = mSegs.begin(); iter < mSegs.end();
+         ++iter) {
+      printf("  %016llx  %016llx  %s\n", (unsigned long long int)(*iter).lo,
+             (unsigned long long int)(*iter).hi,
+             (*iter).val ? "true" : "false");
+    }
+    printf(">>\n");
+  }
+
+  std::vector<Seg> mSegs;
+};
+
+////////////////////////////////////////////////////////////////
+// PriMap                                                     //
+////////////////////////////////////////////////////////////////
+
+class PriMap {
+ public:
+  explicit PriMap(void (*aLog)(const char*)) : mLog(aLog) {}
+
+  // RUNS IN NO-MALLOC CONTEXT
+  pair<const RuleSet*, const vector<PfxInstr>*> Lookup(uintptr_t ia) {
+    SecMap* sm = FindSecMap(ia);
+    return pair<const RuleSet*, const vector<PfxInstr>*>(
+        sm ? sm->FindRuleSet(ia) : nullptr, sm ? sm->GetPfxInstrs() : nullptr);
+  }
+
+  // Add a secondary map.  No overlaps allowed w.r.t. existing
+  // secondary maps.
+  void AddSecMap(mozilla::UniquePtr<SecMap>&& aSecMap) {
+    // We can't add an empty SecMap to the PriMap.  But that's OK
+    // since we'd never be able to find anything in it anyway.
+    if (aSecMap->IsEmpty()) {
+      return;
+    }
+
+    // Iterate through the SecMaps and find the right place for this
+    // one.  At the same time, ensure that the in-order
+    // non-overlapping invariant is preserved (and, generally, holds).
+    // FIXME: this gives a cost that is O(N^2) in the total number of
+    // shared objects in the system.  ToDo: better.
+    MOZ_ASSERT(aSecMap->mMapMinAVMA <= aSecMap->mMapMaxAVMA);
+
+    size_t num_secMaps = mSecMaps.size();
+    uintptr_t i;
+    for (i = 0; i < num_secMaps; ++i) {
+      mozilla::UniquePtr<SecMap>& sm_i = mSecMaps[i];
+      MOZ_ASSERT(sm_i->mMapMinAVMA <= sm_i->mMapMaxAVMA);
+      if (aSecMap->mMapMinAVMA < sm_i->mMapMaxAVMA) {
+        // |aSecMap| needs to be inserted immediately before mSecMaps[i].
+        break;
+      }
+    }
+    MOZ_ASSERT(i <= num_secMaps);
+    if (i == num_secMaps) {
+      // It goes at the end.
+      mSecMaps.push_back(std::move(aSecMap));
+    } else {
+      std::vector<mozilla::UniquePtr<SecMap>>::iterator iter =
+          mSecMaps.begin() + i;
+      mSecMaps.insert(iter, std::move(aSecMap));
+    }
+    char buf[100];
+    SprintfLiteral(buf, "AddSecMap: now have %d SecMaps\n",
+                   (int)mSecMaps.size());
+    buf[sizeof(buf) - 1] = 0;
+    mLog(buf);
+  }
+
+  // Remove and delete any SecMaps in the mapping, that intersect
+  // with the specified address range.
+  void RemoveSecMapsInRange(uintptr_t avma_min, uintptr_t avma_max) {
+    MOZ_ASSERT(avma_min <= avma_max);
+    size_t num_secMaps = mSecMaps.size();
+    if (num_secMaps > 0) {
+      intptr_t i;
+      // Iterate from end to start over the vector, so as to ensure
+      // that the special case where |avma_min| and |avma_max| denote
+      // the entire address space, can be completed in time proportional
+      // to the number of elements in the map.
+      for (i = (intptr_t)num_secMaps - 1; i >= 0; i--) {
+        mozilla::UniquePtr<SecMap>& sm_i = mSecMaps[i];
+        if (sm_i->mMapMaxAVMA < avma_min || avma_max < sm_i->mMapMinAVMA) {
+          // There's no overlap.  Move on.
+          continue;
+        }
+        // We need to remove mSecMaps[i] and slide all those above it
+        // downwards to cover the hole.
+        mSecMaps.erase(mSecMaps.begin() + i);
+      }
+    }
+  }
+
+  // Return the number of currently contained SecMaps.
+  size_t CountSecMaps() { return mSecMaps.size(); }
+
+  size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
+    size_t n = aMallocSizeOf(this);
+
+    // It's conceivable that this call would be unsafe with some
+    // implementations of std::vector, but it seems to be working for now...
+    n += aMallocSizeOf(mSecMaps.data());
+
+    for (size_t i = 0; i < mSecMaps.size(); i++) {
+      n += mSecMaps[i]->SizeOfIncludingThis(aMallocSizeOf);
+    }
+
+    return n;
+  }
+
+ private:
+  // RUNS IN NO-MALLOC CONTEXT
+  SecMap* FindSecMap(uintptr_t ia) {
+    // Binary search mSecMaps to find one that brackets |ia|.
+    // lo and hi need to be signed, else the loop termination tests
+    // don't work properly.
+    long int lo = 0;
+    long int hi = (long int)mSecMaps.size() - 1;
+    while (true) {
+      // current unsearched space is from lo to hi, inclusive.
+      if (lo > hi) {
+        // not found
+        return nullptr;
+      }
+      long int mid = lo + ((hi - lo) / 2);
+      mozilla::UniquePtr<SecMap>& mid_secMap = mSecMaps[mid];
+      uintptr_t mid_minAddr = mid_secMap->mMapMinAVMA;
+      uintptr_t mid_maxAddr = mid_secMap->mMapMaxAVMA;
+      if (ia < mid_minAddr) {
+        hi = mid - 1;
+        continue;
+      }
+      if (ia > mid_maxAddr) {
+        lo = mid + 1;
+        continue;
+      }
+      MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr);
+      return mid_secMap.get();
+    }
+    // NOTREACHED
+  }
+
+ private:
+  // sorted array of per-object ranges, non overlapping, non empty
+  std::vector<mozilla::UniquePtr<SecMap>> mSecMaps;
+
+  // a logging sink, for debugging.
+  void (*mLog)(const char*);
+};
+
+////////////////////////////////////////////////////////////////
+// LUL                                                        //
+////////////////////////////////////////////////////////////////
+
+#define LUL_LOG(_str)                                                          \
+  do {                                                                         \
+    char buf[200];                                                             \
+    SprintfLiteral(buf, "LUL: pid %" PRIu64 " tid %" PRIu64 " lul-obj %p: %s", \
+                   uint64_t(profiler_current_process_id().ToNumber()),         \
+                   uint64_t(profiler_current_thread_id().ToNumber()), this,    \
+                   (_str));                                                    \
+    buf[sizeof(buf) - 1] = 0;                                                  \
+    mLog(buf);                                                                 \
+  } while (0)
+
+LUL::LUL(void (*aLog)(const char*))
+    : mLog(aLog),
+      mAdminMode(true),
+      mAdminThreadId(profiler_current_thread_id()),
+      mPriMap(new PriMap(aLog)),
+      mSegArray(new SegArray()),
+      mUSU(new UniqueStringUniverse()) {
+  LUL_LOG("LUL::LUL: Created object");
+}
+
+LUL::~LUL() {
+  LUL_LOG("LUL::~LUL: Destroyed object");
+  delete mPriMap;
+  delete mSegArray;
+  mLog = nullptr;
+  delete mUSU;
+}
+
+void LUL::MaybeShowStats() {
+  // This is racey in the sense that it can't guarantee that
+  //   n_new == n_new_Context + n_new_CFI + n_new_Scanned
+  // if it should happen that mStats is updated by some other thread
+  // in between computation of n_new and n_new_{Context,CFI,FP}.
+  // But it's just stats printing, so we don't really care.
+  uint32_t n_new = mStats - mStatsPrevious;
+  if (n_new >= 5000) {
+    uint32_t n_new_Context = mStats.mContext - mStatsPrevious.mContext;
+    uint32_t n_new_CFI = mStats.mCFI - mStatsPrevious.mCFI;
+    uint32_t n_new_FP = mStats.mFP - mStatsPrevious.mFP;
+    mStatsPrevious = mStats;
+    char buf[200];
+    SprintfLiteral(buf,
+                   "LUL frame stats: TOTAL %5u"
+                   "    CTX %4u    CFI %4u    FP %4u",
+                   n_new, n_new_Context, n_new_CFI, n_new_FP);
+    buf[sizeof(buf) - 1] = 0;
+    mLog(buf);
+  }
+}
+
+size_t LUL::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
+  size_t n = aMallocSizeOf(this);
+  n += mPriMap->SizeOfIncludingThis(aMallocSizeOf);
+
+  // Measurement of the following members may be added later if DMD finds it
+  // is worthwhile:
+  // - mSegArray
+  // - mUSU
+
+  return n;
+}
+
+void LUL::EnableUnwinding() {
+  LUL_LOG("LUL::EnableUnwinding");
+  // Don't assert for Admin mode here.  That is, tolerate a call here
+  // if we are already in Unwinding mode.
+  MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
+
+  mAdminMode = false;
+}
+
+void LUL::NotifyAfterMap(uintptr_t aRXavma, size_t aSize, const char* aFileName,
+                         const void* aMappedImage) {
+  MOZ_RELEASE_ASSERT(mAdminMode);
+  MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
+
+  mLog(":\n");
+  char buf[200];
+  SprintfLiteral(buf, "NotifyMap %llx %llu %s\n",
+                 (unsigned long long int)aRXavma, (unsigned long long int)aSize,
+                 aFileName);
+  buf[sizeof(buf) - 1] = 0;
+  mLog(buf);
+
+  // We can't have a SecMap covering more than 2^32-1 bytes of address space.
+  // See the definition of SecMap for why.  Rather than crash the system, just
+  // limit the SecMap's size accordingly.  This case is never actually
+  // expected to happen.
+  if (((unsigned long long int)aSize) > 0xFFFFFFFFULL) {
+    aSize = (uintptr_t)0xFFFFFFFF;
+  }
+  MOZ_RELEASE_ASSERT(aSize <= 0xFFFFFFFF);
+
+  // Ignore obviously-stupid notifications.
+  if (aSize > 0) {
+    // Here's a new mapping, for this object.
+    mozilla::UniquePtr<SecMap> smap =
+        mozilla::MakeUnique<SecMap>(aRXavma, (uint32_t)aSize, mLog);
+
+    // Read CFI or EXIDX unwind data into |smap|.
+    if (!aMappedImage) {
+      (void)lul::ReadSymbolData(string(aFileName), std::vector<string>(),
+                                smap.get(), (void*)aRXavma, aSize, mUSU, mLog);
+    } else {
+      (void)lul::ReadSymbolDataInternal(
+          (const uint8_t*)aMappedImage, string(aFileName),
+          std::vector<string>(), smap.get(), (void*)aRXavma, aSize, mUSU, mLog);
+    }
+
+    mLog("NotifyMap .. preparing entries\n");
+
+    smap->PrepareRuleSets();
+
+    SprintfLiteral(buf, "NotifyMap got %lld entries\n",
+                   (long long int)smap->Size());
+    buf[sizeof(buf) - 1] = 0;
+    mLog(buf);
+
+    // Add it to the primary map (the top level set of mapped objects).
+    mPriMap->AddSecMap(std::move(smap));
+
+    // Tell the segment array about the mapping, so that the stack
+    // scan and __kernel_syscall mechanisms know where valid code is.
+    mSegArray->add(aRXavma, aRXavma + aSize - 1, true);
+  }
+}
+
+void LUL::NotifyExecutableArea(uintptr_t aRXavma, size_t aSize) {
+  MOZ_RELEASE_ASSERT(mAdminMode);
+  MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
+
+  mLog(":\n");
+  char buf[200];
+  SprintfLiteral(buf, "NotifyExecutableArea %llx %llu\n",
+                 (unsigned long long int)aRXavma,
+                 (unsigned long long int)aSize);
+  buf[sizeof(buf) - 1] = 0;
+  mLog(buf);
+
+  // Ignore obviously-stupid notifications.
+  if (aSize > 0) {
+    // Tell the segment array about the mapping, so that the stack
+    // scan and __kernel_syscall mechanisms know where valid code is.
+    mSegArray->add(aRXavma, aRXavma + aSize - 1, true);
+  }
+}
+
+void LUL::NotifyBeforeUnmap(uintptr_t aRXavmaMin, uintptr_t aRXavmaMax) {
+  MOZ_RELEASE_ASSERT(mAdminMode);
+  MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
+
+  mLog(":\n");
+  char buf[100];
+  SprintfLiteral(buf, "NotifyUnmap %016llx-%016llx\n",
+                 (unsigned long long int)aRXavmaMin,
+                 (unsigned long long int)aRXavmaMax);
+  buf[sizeof(buf) - 1] = 0;
+  mLog(buf);
+
+  MOZ_ASSERT(aRXavmaMin <= aRXavmaMax);
+
+  // Remove from the primary map, any secondary maps that intersect
+  // with the address range.  Also delete the secondary maps.
+  mPriMap->RemoveSecMapsInRange(aRXavmaMin, aRXavmaMax);
+
+  // Tell the segment array that the address range no longer
+  // contains valid code.
+  mSegArray->add(aRXavmaMin, aRXavmaMax, false);
+
+  SprintfLiteral(buf, "NotifyUnmap: now have %d SecMaps\n",
+                 (int)mPriMap->CountSecMaps());
+  buf[sizeof(buf) - 1] = 0;
+  mLog(buf);
+}
+
+size_t LUL::CountMappings() {
+  MOZ_RELEASE_ASSERT(mAdminMode);
+  MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
+
+  return mPriMap->CountSecMaps();
+}
+
+// RUNS IN NO-MALLOC CONTEXT
+static TaggedUWord DerefTUW(TaggedUWord aAddr, const StackImage* aStackImg) {
+  if (!aAddr.Valid()) {
+    return TaggedUWord();
+  }
+
+  // Lower limit check.  |aAddr.Value()| is the lowest requested address
+  // and |aStackImg->mStartAvma| is the lowest address we actually have,
+  // so the comparison is straightforward.
+  if (aAddr.Value() < aStackImg->mStartAvma) {
+    return TaggedUWord();
+  }
+
+  // Upper limit check.  We must compute the highest requested address
+  // and the highest address we actually have, but being careful to
+  // avoid overflow.  In particular if |aAddr| is 0xFFF...FFF or the
+  // 3/7 values below that, then we will get overflow.  See bug #1245477.
+  typedef CheckedInt<uintptr_t> CheckedUWord;
+  CheckedUWord highest_requested_plus_one =
+      CheckedUWord(aAddr.Value()) + CheckedUWord(sizeof(uintptr_t));
+  CheckedUWord highest_available_plus_one =
+      CheckedUWord(aStackImg->mStartAvma) + CheckedUWord(aStackImg->mLen);
+  if (!highest_requested_plus_one.isValid()     // overflow?
+      || !highest_available_plus_one.isValid()  // overflow?
+      || (highest_requested_plus_one.value() >
+          highest_available_plus_one.value())) {  // in range?
+    return TaggedUWord();
+  }
+
+  return TaggedUWord(
+      *(uintptr_t*)(&aStackImg
+                         ->mContents[aAddr.Value() - aStackImg->mStartAvma]));
+}
+
+// RUNS IN NO-MALLOC CONTEXT
+static TaggedUWord EvaluateReg(int16_t aReg, const UnwindRegs* aOldRegs,
+                               TaggedUWord aCFA) {
+  switch (aReg) {
+    case DW_REG_CFA:
+      return aCFA;
+#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
+    case DW_REG_INTEL_XBP:
+      return aOldRegs->xbp;
+    case DW_REG_INTEL_XSP:
+      return aOldRegs->xsp;
+    case DW_REG_INTEL_XIP:
+      return aOldRegs->xip;
+#elif defined(GP_ARCH_arm)
+    case DW_REG_ARM_R7:
+      return aOldRegs->r7;
+    case DW_REG_ARM_R11:
+      return aOldRegs->r11;
+    case DW_REG_ARM_R12:
+      return aOldRegs->r12;
+    case DW_REG_ARM_R13:
+      return aOldRegs->r13;
+    case DW_REG_ARM_R14:
+      return aOldRegs->r14;
+    case DW_REG_ARM_R15:
+      return aOldRegs->r15;
+#elif defined(GP_ARCH_arm64)
+    case DW_REG_AARCH64_X29:
+      return aOldRegs->x29;
+    case DW_REG_AARCH64_X30:
+      return aOldRegs->x30;
+    case DW_REG_AARCH64_SP:
+      return aOldRegs->sp;
+#elif defined(GP_ARCH_mips64)
+    case DW_REG_MIPS_SP:
+      return aOldRegs->sp;
+    case DW_REG_MIPS_FP:
+      return aOldRegs->fp;
+    case DW_REG_MIPS_PC:
+      return aOldRegs->pc;
+#else
+#  error "Unsupported arch"
+#endif
+    default:
+      MOZ_ASSERT(0);
+      return TaggedUWord();
+  }
+}
+
+// RUNS IN NO-MALLOC CONTEXT
+// See prototype for comment.
+TaggedUWord EvaluatePfxExpr(int32_t start, const UnwindRegs* aOldRegs,
+                            TaggedUWord aCFA, const StackImage* aStackImg,
+                            const vector<PfxInstr>& aPfxInstrs) {
+  // A small evaluation stack, and a stack pointer, which points to
+  // the highest numbered in-use element.
+  const int N_STACK = 10;
+  TaggedUWord stack[N_STACK];
+  int stackPointer = -1;
+  for (int i = 0; i < N_STACK; i++) stack[i] = TaggedUWord();
+
+#define PUSH(_tuw)                                             \
+  do {                                                         \
+    if (stackPointer >= N_STACK - 1) goto fail; /* overflow */ \
+    stack[++stackPointer] = (_tuw);                            \
+  } while (0)
+
+#define POP(_lval)                                   \
+  do {                                               \
+    if (stackPointer < 0) goto fail; /* underflow */ \
+    _lval = stack[stackPointer--];                   \
+  } while (0)
+
+  // Cursor in the instruction sequence.
+  size_t curr = start + 1;
+
+  // Check the start point is sane.
+  size_t nInstrs = aPfxInstrs.size();
+  if (start < 0 || (size_t)start >= nInstrs) goto fail;
+
+  {
+    // The instruction sequence must start with PX_Start.  If not,
+    // something is seriously wrong.
+    PfxInstr first = aPfxInstrs[start];
+    if (first.mOpcode != PX_Start) goto fail;
+
+    // Push the CFA on the stack to start with (or not), as required by
+    // the original DW_OP_*expression* CFI.
+    if (first.mOperand != 0) PUSH(aCFA);
+  }
+
+  while (true) {
+    if (curr >= nInstrs) goto fail;  // ran off the end of the sequence
+
+    PfxInstr pfxi = aPfxInstrs[curr++];
+    if (pfxi.mOpcode == PX_End) break;  // we're done
+
+    switch (pfxi.mOpcode) {
+      case PX_Start:
+        // This should appear only at the start of the sequence.
+        goto fail;
+      case PX_End:
+        // We just took care of that, so we shouldn't see it again.
+        MOZ_ASSERT(0);
+        goto fail;
+      case PX_SImm32:
+        PUSH(TaggedUWord((intptr_t)pfxi.mOperand));
+        break;
+      case PX_DwReg: {
+        DW_REG_NUMBER reg = (DW_REG_NUMBER)pfxi.mOperand;
+        MOZ_ASSERT(reg != DW_REG_CFA);
+        PUSH(EvaluateReg(reg, aOldRegs, aCFA));
+        break;
+      }
+      case PX_Deref: {
+        TaggedUWord addr;
+        POP(addr);
+        PUSH(DerefTUW(addr, aStackImg));
+        break;
+      }
+      case PX_Add: {
+        TaggedUWord x, y;
+        POP(x);
+        POP(y);
+        PUSH(y + x);
+        break;
+      }
+      case PX_Sub: {
+        TaggedUWord x, y;
+        POP(x);
+        POP(y);
+        PUSH(y - x);
+        break;
+      }
+      case PX_And: {
+        TaggedUWord x, y;
+        POP(x);
+        POP(y);
+        PUSH(y & x);
+        break;
+      }
+      case PX_Or: {
+        TaggedUWord x, y;
+        POP(x);
+        POP(y);
+        PUSH(y | x);
+        break;
+      }
+      case PX_CmpGES: {
+        TaggedUWord x, y;
+        POP(x);
+        POP(y);
+        PUSH(y.CmpGEs(x));
+        break;
+      }
+      case PX_Shl: {
+        TaggedUWord x, y;
+        POP(x);
+        POP(y);
+        PUSH(y << x);
+        break;
+      }
+      default:
+        MOZ_ASSERT(0);
+        goto fail;
+    }
+  }  // while (true)
+
+  // Evaluation finished.  The top value on the stack is the result.
+  if (stackPointer >= 0) {
+    return stack[stackPointer];
+  }
+  // Else fall through
+
+fail:
+  return TaggedUWord();
+
+#undef PUSH
+#undef POP
+}
+
+// RUNS IN NO-MALLOC CONTEXT
+TaggedUWord LExpr::EvaluateExpr(const UnwindRegs* aOldRegs, TaggedUWord aCFA,
+                                const StackImage* aStackImg,
+                                const vector<PfxInstr>* aPfxInstrs) const {
+  switch (mHow) {
+    case UNKNOWN:
+      return TaggedUWord();
+    case NODEREF: {
+      TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA);
+      tuw = tuw + TaggedUWord((intptr_t)mOffset);
+      return tuw;
+    }
+    case DEREF: {
+      TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA);
+      tuw = tuw + TaggedUWord((intptr_t)mOffset);
+      return DerefTUW(tuw, aStackImg);
+    }
+    case PFXEXPR: {
+      MOZ_ASSERT(aPfxInstrs);
+      if (!aPfxInstrs) {
+        return TaggedUWord();
+      }
+      return EvaluatePfxExpr(mOffset, aOldRegs, aCFA, aStackImg, *aPfxInstrs);
+    }
+    default:
+      MOZ_ASSERT(0);
+      return TaggedUWord();
+  }
+}
+
+// RUNS IN NO-MALLOC CONTEXT
+static void UseRuleSet(/*MOD*/ UnwindRegs* aRegs, const StackImage* aStackImg,
+                       const RuleSet* aRS, const vector<PfxInstr>* aPfxInstrs) {
+  // Take a copy of regs, since we'll need to refer to the old values
+  // whilst computing the new ones.
+  UnwindRegs old_regs = *aRegs;
+
+  // Mark all the current register values as invalid, so that the
+  // caller can see, on our return, which ones have been computed
+  // anew.  If we don't even manage to compute a new PC value, then
+  // the caller will have to abandon the unwind.
+  // FIXME: Create and use instead: aRegs->SetAllInvalid();
+#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
+  aRegs->xbp = TaggedUWord();
+  aRegs->xsp = TaggedUWord();
+  aRegs->xip = TaggedUWord();
+#elif defined(GP_ARCH_arm)
+  aRegs->r7 = TaggedUWord();
+  aRegs->r11 = TaggedUWord();
+  aRegs->r12 = TaggedUWord();
+  aRegs->r13 = TaggedUWord();
+  aRegs->r14 = TaggedUWord();
+  aRegs->r15 = TaggedUWord();
+#elif defined(GP_ARCH_arm64)
+  aRegs->x29 = TaggedUWord();
+  aRegs->x30 = TaggedUWord();
+  aRegs->sp = TaggedUWord();
+  aRegs->pc = TaggedUWord();
+#elif defined(GP_ARCH_mips64)
+  aRegs->sp = TaggedUWord();
+  aRegs->fp = TaggedUWord();
+  aRegs->pc = TaggedUWord();
+#else
+#  error "Unsupported arch"
+#endif
+
+  // This is generally useful.
+  const TaggedUWord inval = TaggedUWord();
+
+  // First, compute the CFA.
+  TaggedUWord cfa = aRS->mCfaExpr.EvaluateExpr(&old_regs, inval /*old cfa*/,
+                                               aStackImg, aPfxInstrs);
+
+  // If we didn't manage to compute the CFA, well .. that's ungood,
+  // but keep going anyway.  It'll be OK provided none of the register
+  // value rules mention the CFA.  In any case, compute the new values
+  // for each register that we're tracking.
+
+#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
+  aRegs->xbp =
+      aRS->mXbpExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+  aRegs->xsp =
+      aRS->mXspExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+  aRegs->xip =
+      aRS->mXipExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+#elif defined(GP_ARCH_arm)
+  aRegs->r7 = aRS->mR7expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+  aRegs->r11 =
+      aRS->mR11expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+  aRegs->r12 =
+      aRS->mR12expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+  aRegs->r13 =
+      aRS->mR13expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+  aRegs->r14 =
+      aRS->mR14expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+  aRegs->r15 =
+      aRS->mR15expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+#elif defined(GP_ARCH_arm64)
+  aRegs->x29 =
+      aRS->mX29expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+  aRegs->x30 =
+      aRS->mX30expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+  aRegs->sp = aRS->mSPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+#elif defined(GP_ARCH_mips64)
+  aRegs->sp = aRS->mSPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+  aRegs->fp = aRS->mFPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+  aRegs->pc = aRS->mPCexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+#else
+#  error "Unsupported arch"
+#endif
+
+  // We're done.  Any regs for which we didn't manage to compute a
+  // new value will now be marked as invalid.
+}
+
+// RUNS IN NO-MALLOC CONTEXT
+void LUL::Unwind(/*OUT*/ uintptr_t* aFramePCs,
+                 /*OUT*/ uintptr_t* aFrameSPs,
+                 /*OUT*/ size_t* aFramesUsed,
+                 /*OUT*/ size_t* aFramePointerFramesAcquired,
+                 size_t aFramesAvail, UnwindRegs* aStartRegs,
+                 StackImage* aStackImg) {
+  MOZ_RELEASE_ASSERT(!mAdminMode);
+
+  /////////////////////////////////////////////////////////
+  // BEGIN UNWIND
+
+  *aFramesUsed = 0;
+
+  UnwindRegs regs = *aStartRegs;
+  TaggedUWord last_valid_sp = TaggedUWord();
+
+  while (true) {
+    if (DEBUG_MAIN) {
+      char buf[300];
+      mLog("\n");
+#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
+      SprintfLiteral(
+          buf, "LoopTop: rip %d/%llx  rsp %d/%llx  rbp %d/%llx\n",
+          (int)regs.xip.Valid(), (unsigned long long int)regs.xip.Value(),
+          (int)regs.xsp.Valid(), (unsigned long long int)regs.xsp.Value(),
+          (int)regs.xbp.Valid(), (unsigned long long int)regs.xbp.Value());
+      buf[sizeof(buf) - 1] = 0;
+      mLog(buf);
+#elif defined(GP_ARCH_arm)
+      SprintfLiteral(
+          buf,
+          "LoopTop: r15 %d/%llx  r7 %d/%llx  r11 %d/%llx"
+          "  r12 %d/%llx  r13 %d/%llx  r14 %d/%llx\n",
+          (int)regs.r15.Valid(), (unsigned long long int)regs.r15.Value(),
+          (int)regs.r7.Valid(), (unsigned long long int)regs.r7.Value(),
+          (int)regs.r11.Valid(), (unsigned long long int)regs.r11.Value(),
+          (int)regs.r12.Valid(), (unsigned long long int)regs.r12.Value(),
+          (int)regs.r13.Valid(), (unsigned long long int)regs.r13.Value(),
+          (int)regs.r14.Valid(), (unsigned long long int)regs.r14.Value());
+      buf[sizeof(buf) - 1] = 0;
+      mLog(buf);
+#elif defined(GP_ARCH_arm64)
+      SprintfLiteral(
+          buf,
+          "LoopTop: pc %d/%llx  x29 %d/%llx  x30 %d/%llx"
+          "  sp %d/%llx\n",
+          (int)regs.pc.Valid(), (unsigned long long int)regs.pc.Value(),
+          (int)regs.x29.Valid(), (unsigned long long int)regs.x29.Value(),
+          (int)regs.x30.Valid(), (unsigned long long int)regs.x30.Value(),
+          (int)regs.sp.Valid(), (unsigned long long int)regs.sp.Value());
+      buf[sizeof(buf) - 1] = 0;
+      mLog(buf);
+#elif defined(GP_ARCH_mips64)
+      SprintfLiteral(
+          buf, "LoopTop: pc %d/%llx  sp %d/%llx  fp %d/%llx\n",
+          (int)regs.pc.Valid(), (unsigned long long int)regs.pc.Value(),
+          (int)regs.sp.Valid(), (unsigned long long int)regs.sp.Value(),
+          (int)regs.fp.Valid(), (unsigned long long int)regs.fp.Value());
+      buf[sizeof(buf) - 1] = 0;
+      mLog(buf);
+#else
+#  error "Unsupported arch"
+#endif
+    }
+
+#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
+    TaggedUWord ia = regs.xip;
+    TaggedUWord sp = regs.xsp;
+#elif defined(GP_ARCH_arm)
+    TaggedUWord ia = (*aFramesUsed == 0 ? regs.r15 : regs.r14);
+    TaggedUWord sp = regs.r13;
+#elif defined(GP_ARCH_arm64)
+    TaggedUWord ia = (*aFramesUsed == 0 ? regs.pc : regs.x30);
+    TaggedUWord sp = regs.sp;
+#elif defined(GP_ARCH_mips64)
+    TaggedUWord ia = regs.pc;
+    TaggedUWord sp = regs.sp;
+#else
+#  error "Unsupported arch"
+#endif
+
+    if (*aFramesUsed >= aFramesAvail) {
+      break;
+    }
+
+    // If we don't have a valid value for the PC, give up.
+    if (!ia.Valid()) {
+      break;
+    }
+
+    // If this is the innermost frame, record the SP value, which
+    // presumably is valid.  If this isn't the innermost frame, and we
+    // have a valid SP value, check that its SP value isn't less that
+    // the one we've seen so far, so as to catch potential SP value
+    // cycles.
+    if (*aFramesUsed == 0) {
+      last_valid_sp = sp;
+    } else {
+      MOZ_ASSERT(last_valid_sp.Valid());
+      if (sp.Valid()) {
+        if (sp.Value() < last_valid_sp.Value()) {
+          // Hmm, SP going in the wrong direction.  Let's stop.
+          break;
+        }
+        // Remember where we got to.
+        last_valid_sp = sp;
+      }
+    }
+
+    aFramePCs[*aFramesUsed] = ia.Value();
+    aFrameSPs[*aFramesUsed] = sp.Valid() ? sp.Value() : 0;
+    (*aFramesUsed)++;
+
+    // Find the RuleSet for the current IA, if any.  This will also
+    // query the backing (secondary) maps if it isn't found in the
+    // thread-local cache.
+
+    // If this isn't the innermost frame, back up into the calling insn.
+    if (*aFramesUsed > 1) {
+      ia = ia + TaggedUWord((uintptr_t)(-1));
+    }
+
+    pair<const RuleSet*, const vector<PfxInstr>*> ruleset_and_pfxinstrs =
+        mPriMap->Lookup(ia.Value());
+    const RuleSet* ruleset = ruleset_and_pfxinstrs.first;
+    const vector<PfxInstr>* pfxinstrs = ruleset_and_pfxinstrs.second;
+
+    if (DEBUG_MAIN) {
+      char buf[100];
+      SprintfLiteral(buf, "ruleset for 0x%llx = %p\n",
+                     (unsigned long long int)ia.Value(), ruleset);
+      buf[sizeof(buf) - 1] = 0;
+      mLog(buf);
+    }
+
+#if defined(GP_PLAT_x86_android) || defined(GP_PLAT_x86_linux)
+    /////////////////////////////////////////////
+    ////
+    // On 32 bit x86-linux, syscalls are often done via the VDSO
+    // function __kernel_vsyscall, which doesn't have a corresponding
+    // object that we can read debuginfo from.  That effectively kills
+    // off all stack traces for threads blocked in syscalls.  Hence
+    // special-case by looking at the code surrounding the program
+    // counter.
+    //
+    // 0xf7757420 <__kernel_vsyscall+0>:	push   %ecx
+    // 0xf7757421 <__kernel_vsyscall+1>:	push   %edx
+    // 0xf7757422 <__kernel_vsyscall+2>:	push   %ebp
+    // 0xf7757423 <__kernel_vsyscall+3>:	mov    %esp,%ebp
+    // 0xf7757425 <__kernel_vsyscall+5>:	sysenter
+    // 0xf7757427 <__kernel_vsyscall+7>:	nop
+    // 0xf7757428 <__kernel_vsyscall+8>:	nop
+    // 0xf7757429 <__kernel_vsyscall+9>:	nop
+    // 0xf775742a <__kernel_vsyscall+10>:	nop
+    // 0xf775742b <__kernel_vsyscall+11>:	nop
+    // 0xf775742c <__kernel_vsyscall+12>:	nop
+    // 0xf775742d <__kernel_vsyscall+13>:	nop
+    // 0xf775742e <__kernel_vsyscall+14>:	int    $0x80
+    // 0xf7757430 <__kernel_vsyscall+16>:	pop    %ebp
+    // 0xf7757431 <__kernel_vsyscall+17>:	pop    %edx
+    // 0xf7757432 <__kernel_vsyscall+18>:	pop    %ecx
+    // 0xf7757433 <__kernel_vsyscall+19>:	ret
+    //
+    // In cases where the sampled thread is blocked in a syscall, its
+    // program counter will point at "pop %ebp".  Hence we look for
+    // the sequence "int $0x80; pop %ebp; pop %edx; pop %ecx; ret", and
+    // the corresponding register-recovery actions are:
+    //    new_ebp = *(old_esp + 0)
+    //    new eip = *(old_esp + 12)
+    //    new_esp = old_esp + 16
+    //
+    // It may also be the case that the program counter points two
+    // nops before the "int $0x80", viz, is __kernel_vsyscall+12, in
+    // the case where the syscall has been restarted but the thread
+    // hasn't been rescheduled.  The code below doesn't handle that;
+    // it could easily be made to.
+    //
+    if (!ruleset && *aFramesUsed == 1 && ia.Valid() && sp.Valid()) {
+      uintptr_t insns_min, insns_max;
+      uintptr_t eip = ia.Value();
+      bool b = mSegArray->getBoundingCodeSegment(&insns_min, &insns_max, eip);
+      if (b && eip - 2 >= insns_min && eip + 3 <= insns_max) {
+        uint8_t* eipC = (uint8_t*)eip;
+        if (eipC[-2] == 0xCD && eipC[-1] == 0x80 && eipC[0] == 0x5D &&
+            eipC[1] == 0x5A && eipC[2] == 0x59 && eipC[3] == 0xC3) {
+          TaggedUWord sp_plus_0 = sp;
+          TaggedUWord sp_plus_12 = sp;
+          TaggedUWord sp_plus_16 = sp;
+          sp_plus_12 = sp_plus_12 + TaggedUWord(12);
+          sp_plus_16 = sp_plus_16 + TaggedUWord(16);
+          TaggedUWord new_ebp = DerefTUW(sp_plus_0, aStackImg);
+          TaggedUWord new_eip = DerefTUW(sp_plus_12, aStackImg);
+          TaggedUWord new_esp = sp_plus_16;
+          if (new_ebp.Valid() && new_eip.Valid() && new_esp.Valid()) {
+            regs.xbp = new_ebp;
+            regs.xip = new_eip;
+            regs.xsp = new_esp;
+            continue;
+          }
+        }
+      }
+    }
+    ////
+    /////////////////////////////////////////////
+#endif  // defined(GP_PLAT_x86_android) || defined(GP_PLAT_x86_linux)
+
+    // So, do we have a ruleset for this address?  If so, use it now.
+    if (ruleset) {
+      if (DEBUG_MAIN) {
+        ruleset->Print(ia.Value(), 1 /*bogus, but doesn't matter*/, mLog);
+        mLog("\n");
+      }
+      // Use the RuleSet to compute the registers for the previous
+      // frame.  |regs| is modified in-place.
+      UseRuleSet(&regs, aStackImg, ruleset, pfxinstrs);
+      continue;
+    }
+
+#if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) ||     \
+    defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) || \
+    defined(GP_PLAT_amd64_freebsd)
+    // There's no RuleSet for the specified address.  On amd64/x86_linux, see if
+    // it's possible to recover the caller's frame by using the frame pointer.
+
+    // We seek to compute (new_IP, new_SP, new_BP) from (old_BP, stack image),
+    // and assume the following layout:
+    //
+    //                 <--- new_SP
+    //   +----------+
+    //   |  new_IP  |  (return address)
+    //   +----------+
+    //   |  new_BP  |  <--- old_BP
+    //   +----------+
+    //   |   ....   |
+    //   |   ....   |
+    //   |   ....   |
+    //   +----------+  <---- old_SP (arbitrary, but must be <= old_BP)
+
+    const size_t wordSzB = sizeof(uintptr_t);
+    TaggedUWord old_xsp = regs.xsp;
+
+    // points at new_BP ?
+    TaggedUWord old_xbp = regs.xbp;
+    // points at new_IP ?
+    TaggedUWord old_xbp_plus1 = regs.xbp + TaggedUWord(1 * wordSzB);
+    // is the new_SP ?
+    TaggedUWord old_xbp_plus2 = regs.xbp + TaggedUWord(2 * wordSzB);
+
+    if (old_xbp.Valid() && old_xbp.IsAligned() && old_xsp.Valid() &&
+        old_xsp.IsAligned() && old_xsp.Value() <= old_xbp.Value()) {
+      // We don't need to do any range, alignment or validity checks for
+      // addresses passed to DerefTUW, since that performs them itself, and
+      // returns an invalid value on failure.  Any such value will poison
+      // subsequent uses, and we do a final check for validity before putting
+      // the computed values into |regs|.
+      TaggedUWord new_xbp = DerefTUW(old_xbp, aStackImg);
+      if (new_xbp.Valid() && new_xbp.IsAligned() &&
+          old_xbp.Value() < new_xbp.Value()) {
+        TaggedUWord new_xip = DerefTUW(old_xbp_plus1, aStackImg);
+        TaggedUWord new_xsp = old_xbp_plus2;
+        if (new_xbp.Valid() && new_xip.Valid() && new_xsp.Valid()) {
+          regs.xbp = new_xbp;
+          regs.xip = new_xip;
+          regs.xsp = new_xsp;
+          (*aFramePointerFramesAcquired)++;
+          continue;
+        }
+      }
+    }
+#elif defined(GP_ARCH_arm64)
+    // Here is an example of generated code for prologue and epilogue..
+    //
+    // stp     x29, x30, [sp, #-16]!
+    // mov     x29, sp
+    // ...
+    // ldp     x29, x30, [sp], #16
+    // ret
+    //
+    // Next is another example of generated code.
+    //
+    // stp     x20, x19, [sp, #-32]!
+    // stp     x29, x30, [sp, #16]
+    // add     x29, sp, #0x10
+    // ...
+    // ldp     x29, x30, [sp, #16]
+    // ldp     x20, x19, [sp], #32
+    // ret
+    //
+    // Previous x29 and x30 register are stored in the address of x29 register.
+    // But since sp register value depends on local variables, we cannot compute
+    // previous sp register from current sp/fp/lr register and there is no
+    // regular rule for sp register in prologue. But since return address is lr
+    // register, if x29 is valid, we will get return address without sp
+    // register.
+    //
+    // So we assume the following layout that if no rule set. x29 is frame
+    // pointer, so we will be able to compute x29 and x30 .
+    //
+    //   +----------+  <--- new_sp (cannot compute)
+    //   |   ....   |
+    //   +----------+
+    //   |  new_lr  |  (return address)
+    //   +----------+
+    //   |  new_fp  |  <--- old_fp
+    //   +----------+
+    //   |   ....   |
+    //   |   ....   |
+    //   +----------+  <---- old_sp (arbitrary, but unused)
+
+    TaggedUWord old_fp = regs.x29;
+    if (old_fp.Valid() && old_fp.IsAligned() && last_valid_sp.Valid() &&
+        last_valid_sp.Value() <= old_fp.Value()) {
+      TaggedUWord new_fp = DerefTUW(old_fp, aStackImg);
+      if (new_fp.Valid() && new_fp.IsAligned() &&
+          old_fp.Value() < new_fp.Value()) {
+        TaggedUWord old_fp_plus1 = old_fp + TaggedUWord(8);
+        TaggedUWord new_lr = DerefTUW(old_fp_plus1, aStackImg);
+        if (new_lr.Valid()) {
+          regs.x29 = new_fp;
+          regs.x30 = new_lr;
+          // When using frame pointer to walk stack, we cannot compute sp
+          // register since we cannot compute sp register from fp/lr/sp
+          // register, and there is no regular rule to compute previous sp
+          // register. So mark as invalid.
+          regs.sp = TaggedUWord();
+          (*aFramePointerFramesAcquired)++;
+          continue;
+        }
+      }
+    }
+#endif  // defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) ||
+        // defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) ||
+        // defined(GP_PLAT_amd64_freebsd)
+
+    // We failed to recover a frame either using CFI or FP chasing, and we
+    // have no other ways to recover the frame.  So we have to give up.
+    break;
+
+  }  // top level unwind loop
+
+  // END UNWIND
+  /////////////////////////////////////////////////////////
+}
+
+////////////////////////////////////////////////////////////////
+// LUL Unit Testing                                           //
+////////////////////////////////////////////////////////////////
+
+static const int LUL_UNIT_TEST_STACK_SIZE = 32768;
+
+#if defined(GP_ARCH_mips64)
+static __attribute__((noinline)) unsigned long __getpc(void) {
+  unsigned long rtaddr;
+  __asm__ volatile("move %0, $31" : "=r"(rtaddr));
+  return rtaddr;
+}
+#endif
+
+// This function is innermost in the test call sequence.  It uses LUL
+// to unwind, and compares the result with the sequence specified in
+// the director string.  These need to agree in order for the test to
+// pass.  In order not to screw up the results, this function needs
+// to have a not-very big stack frame, since we're only presenting
+// the innermost LUL_UNIT_TEST_STACK_SIZE bytes of stack to LUL, and
+// that chunk unavoidably includes the frame for this function.
+//
+// This function must not be inlined into its callers.  Doing so will
+// cause the expected-vs-actual backtrace consistency checking to
+// fail.  Prints summary results to |aLUL|'s logging sink and also
+// returns a boolean indicating whether or not the test failed.
+static __attribute__((noinline)) bool GetAndCheckStackTrace(
+    LUL* aLUL, const char* dstring) {
+  // Get hold of the current unwind-start registers.
+  UnwindRegs startRegs;
+  memset(&startRegs, 0, sizeof(startRegs));
+#if defined(GP_ARCH_amd64)
+  volatile uintptr_t block[3];
+  MOZ_ASSERT(sizeof(block) == 24);
+  __asm__ __volatile__(
+      "leaq 0(%%rip), %%r15"
+      "\n\t"
+      "movq %%r15, 0(%0)"
+      "\n\t"
+      "movq %%rsp, 8(%0)"
+      "\n\t"
+      "movq %%rbp, 16(%0)"
+      "\n"
+      :
+      : "r"(&block[0])
+      : "memory", "r15");
+  startRegs.xip = TaggedUWord(block[0]);
+  startRegs.xsp = TaggedUWord(block[1]);
+  startRegs.xbp = TaggedUWord(block[2]);
+  const uintptr_t REDZONE_SIZE = 128;
+  uintptr_t start = block[1] - REDZONE_SIZE;
+#elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
+  volatile uintptr_t block[3];
+  MOZ_ASSERT(sizeof(block) == 12);
+  __asm__ __volatile__(
+      ".byte 0xE8,0x00,0x00,0x00,0x00" /*call next insn*/
+      "\n\t"
+      "popl %%edi"
+      "\n\t"
+      "movl %%edi, 0(%0)"
+      "\n\t"
+      "movl %%esp, 4(%0)"
+      "\n\t"
+      "movl %%ebp, 8(%0)"
+      "\n"
+      :
+      : "r"(&block[0])
+      : "memory", "edi");
+  startRegs.xip = TaggedUWord(block[0]);
+  startRegs.xsp = TaggedUWord(block[1]);
+  startRegs.xbp = TaggedUWord(block[2]);
+  const uintptr_t REDZONE_SIZE = 0;
+  uintptr_t start = block[1] - REDZONE_SIZE;
+#elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
+  volatile uintptr_t block[6];
+  MOZ_ASSERT(sizeof(block) == 24);
+  __asm__ __volatile__(
+      "mov r0, r15"
+      "\n\t"
+      "str r0,  [%0, #0]"
+      "\n\t"
+      "str r14, [%0, #4]"
+      "\n\t"
+      "str r13, [%0, #8]"
+      "\n\t"
+      "str r12, [%0, #12]"
+      "\n\t"
+      "str r11, [%0, #16]"
+      "\n\t"
+      "str r7,  [%0, #20]"
+      "\n"
+      :
+      : "r"(&block[0])
+      : "memory", "r0");
+  startRegs.r15 = TaggedUWord(block[0]);
+  startRegs.r14 = TaggedUWord(block[1]);
+  startRegs.r13 = TaggedUWord(block[2]);
+  startRegs.r12 = TaggedUWord(block[3]);
+  startRegs.r11 = TaggedUWord(block[4]);
+  startRegs.r7 = TaggedUWord(block[5]);
+  const uintptr_t REDZONE_SIZE = 0;
+  uintptr_t start = block[1] - REDZONE_SIZE;
+#elif defined(GP_ARCH_arm64)
+  volatile uintptr_t block[4];
+  MOZ_ASSERT(sizeof(block) == 32);
+  __asm__ __volatile__(
+      "adr x0, . \n\t"
+      "str x0, [%0, #0] \n\t"
+      "str x29, [%0, #8] \n\t"
+      "str x30, [%0, #16] \n\t"
+      "mov x0, sp \n\t"
+      "str x0, [%0, #24] \n\t"
+      :
+      : "r"(&block[0])
+      : "memory", "x0");
+  startRegs.pc = TaggedUWord(block[0]);
+  startRegs.x29 = TaggedUWord(block[1]);
+  startRegs.x30 = TaggedUWord(block[2]);
+  startRegs.sp = TaggedUWord(block[3]);
+  const uintptr_t REDZONE_SIZE = 0;
+  uintptr_t start = block[1] - REDZONE_SIZE;
+#elif defined(GP_ARCH_mips64)
+  volatile uintptr_t block[3];
+  MOZ_ASSERT(sizeof(block) == 24);
+  __asm__ __volatile__(
+      "sd $29, 8(%0)     \n"
+      "sd $30, 16(%0)    \n"
+      :
+      : "r"(block)
+      : "memory");
+  block[0] = __getpc();
+  startRegs.pc = TaggedUWord(block[0]);
+  startRegs.sp = TaggedUWord(block[1]);
+  startRegs.fp = TaggedUWord(block[2]);
+  const uintptr_t REDZONE_SIZE = 0;
+  uintptr_t start = block[1] - REDZONE_SIZE;
+#else
+#  error "Unsupported platform"
+#endif
+
+  // Get hold of the innermost LUL_UNIT_TEST_STACK_SIZE bytes of the
+  // stack.
+  uintptr_t end = start + LUL_UNIT_TEST_STACK_SIZE;
+  uintptr_t ws = sizeof(void*);
+  start &= ~(ws - 1);
+  end &= ~(ws - 1);
+  uintptr_t nToCopy = end - start;
+  if (nToCopy > lul::N_STACK_BYTES) {
+    nToCopy = lul::N_STACK_BYTES;
+  }
+  MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES);
+  StackImage* stackImg = new StackImage();
+  stackImg->mLen = nToCopy;
+  stackImg->mStartAvma = start;
+  if (nToCopy > 0) {
+    MOZ_MAKE_MEM_DEFINED((void*)start, nToCopy);
+    memcpy(&stackImg->mContents[0], (void*)start, nToCopy);
+  }
+
+  // Unwind it.
+  const int MAX_TEST_FRAMES = 64;
+  uintptr_t framePCs[MAX_TEST_FRAMES];
+  uintptr_t frameSPs[MAX_TEST_FRAMES];
+  size_t framesAvail = mozilla::ArrayLength(framePCs);
+  size_t framesUsed = 0;
+  size_t framePointerFramesAcquired = 0;
+  aLUL->Unwind(&framePCs[0], &frameSPs[0], &framesUsed,
+               &framePointerFramesAcquired, framesAvail, &startRegs, stackImg);
+
+  delete stackImg;
+
+  // if (0) {
+  //  // Show what we have.
+  //  fprintf(stderr, "Got %d frames:\n", (int)framesUsed);
+  //  for (size_t i = 0; i < framesUsed; i++) {
+  //    fprintf(stderr, "  [%2d]   SP %p   PC %p\n",
+  //            (int)i, (void*)frameSPs[i], (void*)framePCs[i]);
+  //  }
+  //  fprintf(stderr, "\n");
+  //}
+
+  // Check to see if there's a consistent binding between digits in
+  // the director string ('1' .. '8') and the PC values acquired by
+  // the unwind.  If there isn't, the unwinding has failed somehow.
+  uintptr_t binding[8];  // binding for '1' .. binding for '8'
+  memset((void*)binding, 0, sizeof(binding));
+
+  // The general plan is to work backwards along the director string
+  // and forwards along the framePCs array.  Doing so corresponds to
+  // working outwards from the innermost frame of the recursive test set.
+  const char* cursor = dstring;
+
+  // Find the end.  This leaves |cursor| two bytes past the first
+  // character we want to look at -- see comment below.
+  while (*cursor) cursor++;
+
+  // Counts the number of consistent frames.
+  size_t nConsistent = 0;
+
+  // Iterate back to the start of the director string.  The starting
+  // points are a bit complex.  We can't use framePCs[0] because that
+  // contains the PC in this frame (above).  We can't use framePCs[1]
+  // because that will contain the PC at return point in the recursive
+  // test group (TestFn[1-8]) for their call "out" to this function,
+  // GetAndCheckStackTrace.  Although LUL will compute a correct
+  // return address, that will not be the same return address as for a
+  // recursive call out of the the function to another function in the
+  // group.  Hence we can only start consistency checking at
+  // framePCs[2].
+  //
+  // To be consistent, then, we must ignore the last element in the
+  // director string as that corresponds to framePCs[1].  Hence the
+  // start points are: framePCs[2] and the director string 2 bytes
+  // before the terminating zero.
+  //
+  // Also as a result of this, the number of consistent frames counted
+  // will always be one less than the length of the director string
+  // (not including its terminating zero).
+  size_t frameIx;
+  for (cursor = cursor - 2, frameIx = 2;
+       cursor >= dstring && frameIx < framesUsed; cursor--, frameIx++) {
+    char c = *cursor;
+    uintptr_t pc = framePCs[frameIx];
+    // If this doesn't hold, the director string is ill-formed.
+    MOZ_ASSERT(c >= '1' && c <= '8');
+    int n = ((int)c) - ((int)'1');
+    if (binding[n] == 0) {
+      // There's no binding for |c| yet, so install |pc| and carry on.
+      binding[n] = pc;
+      nConsistent++;
+      continue;
+    }
+    // There's a pre-existing binding for |c|.  Check it's consistent.
+    if (binding[n] != pc) {
+      // Not consistent.  Give up now.
+      break;
+    }
+    // Consistent.  Keep going.
+    nConsistent++;
+  }
+
+  // So, did we succeed?
+  bool passed = nConsistent + 1 == strlen(dstring);
+
+  // Show the results.
+  char buf[200];
+  SprintfLiteral(buf, "LULUnitTest:   dstring = %s\n", dstring);
+  buf[sizeof(buf) - 1] = 0;
+  aLUL->mLog(buf);
+  SprintfLiteral(buf, "LULUnitTest:     %d consistent, %d in dstring: %s\n",
+                 (int)nConsistent, (int)strlen(dstring),
+                 passed ? "PASS" : "FAIL");
+  buf[sizeof(buf) - 1] = 0;
+  aLUL->mLog(buf);
+
+  return !passed;
+}
+
+// Macro magic to create a set of 8 mutually recursive functions with
+// varying frame sizes.  These will recurse amongst themselves as
+// specified by |strP|, the directory string, and call
+// GetAndCheckStackTrace when the string becomes empty, passing it the
+// original value of the string.  This checks the result, printing
+// results on |aLUL|'s logging sink, and also returns a boolean
+// indicating whether or not the results are acceptable (correct).
+
+#define DECL_TEST_FN(NAME) \
+  bool NAME(LUL* aLUL, const char* strPorig, const char* strP);
+
+#define GEN_TEST_FN(NAME, FRAMESIZE)                                          \
+  bool NAME(LUL* aLUL, const char* strPorig, const char* strP) {              \
+    /* Create a frame of size (at least) FRAMESIZE, so that the */            \
+    /* 8 functions created by this macro offer some variation in frame */     \
+    /* sizes.  This isn't as simple as it might seem, since a clever */       \
+    /* optimizing compiler (eg, clang-5) detects that the array is unused */  \
+    /* and removes it.  We try to defeat this by passing it to a function */  \
+    /* in a different compilation unit, and hoping that clang does not */     \
+    /* notice that the call is a no-op. */                                    \
+    char space[FRAMESIZE];                                                    \
+    Unused << write(1, space, 0); /* write zero bytes of |space| to stdout */ \
+                                                                              \
+    if (*strP == '\0') {                                                      \
+      /* We've come to the end of the director string. */                     \
+      /* Take a stack snapshot. */                                            \
+      /* We purposefully use a negation to avoid tail-call optimization */    \
+      return !GetAndCheckStackTrace(aLUL, strPorig);                          \
+    } else {                                                                  \
+      /* Recurse onwards.  This is a bit subtle.  The obvious */              \
+      /* thing to do here is call onwards directly, from within the */        \
+      /* arms of the case statement.  That gives a problem in that */         \
+      /* there will be multiple return points inside each function when */    \
+      /* unwinding, so it will be difficult to check for consistency */       \
+      /* against the director string.  Instead, we make an indirect */        \
+      /* call, so as to guarantee that there is only one call site */         \
+      /* within each function.  This does assume that the compiler */         \
+      /* won't transform it back to the simple direct-call form. */           \
+      /* To discourage it from doing so, the call is bracketed with */        \
+      /* __asm__ __volatile__ sections so as to make it not-movable. */       \
+      bool (*nextFn)(LUL*, const char*, const char*) = NULL;                  \
+      switch (*strP) {                                                        \
+        case '1':                                                             \
+          nextFn = TestFn1;                                                   \
+          break;                                                              \
+        case '2':                                                             \
+          nextFn = TestFn2;                                                   \
+          break;                                                              \
+        case '3':                                                             \
+          nextFn = TestFn3;                                                   \
+          break;                                                              \
+        case '4':                                                             \
+          nextFn = TestFn4;                                                   \
+          break;                                                              \
+        case '5':                                                             \
+          nextFn = TestFn5;                                                   \
+          break;                                                              \
+        case '6':                                                             \
+          nextFn = TestFn6;                                                   \
+          break;                                                              \
+        case '7':                                                             \
+          nextFn = TestFn7;                                                   \
+          break;                                                              \
+        case '8':                                                             \
+          nextFn = TestFn8;                                                   \
+          break;                                                              \
+        default:                                                              \
+          nextFn = TestFn8;                                                   \
+          break;                                                              \
+      }                                                                       \
+      /* "use" |space| immediately after the recursive call, */               \
+      /* so as to dissuade clang from deallocating the space while */         \
+      /* the call is active, or otherwise messing with the stack frame. */    \
+      __asm__ __volatile__("" ::: "cc", "memory");                            \
+      bool passed = nextFn(aLUL, strPorig, strP + 1);                         \
+      Unused << write(1, space, 0);                                           \
+      __asm__ __volatile__("" ::: "cc", "memory");                            \
+      return passed;                                                          \
+    }                                                                         \
+  }
+
+// The test functions are mutually recursive, so it is necessary to
+// declare them before defining them.
+DECL_TEST_FN(TestFn1)
+DECL_TEST_FN(TestFn2)
+DECL_TEST_FN(TestFn3)
+DECL_TEST_FN(TestFn4)
+DECL_TEST_FN(TestFn5)
+DECL_TEST_FN(TestFn6)
+DECL_TEST_FN(TestFn7)
+DECL_TEST_FN(TestFn8)
+
+GEN_TEST_FN(TestFn1, 123)
+GEN_TEST_FN(TestFn2, 456)
+GEN_TEST_FN(TestFn3, 789)
+GEN_TEST_FN(TestFn4, 23)
+GEN_TEST_FN(TestFn5, 47)
+GEN_TEST_FN(TestFn6, 117)
+GEN_TEST_FN(TestFn7, 1)
+GEN_TEST_FN(TestFn8, 99)
+
+// This starts the test sequence going.  Call here to generate a
+// sequence of calls as directed by the string |dstring|.  The call
+// sequence will, from its innermost frame, finish by calling
+// GetAndCheckStackTrace() and passing it |dstring|.
+// GetAndCheckStackTrace() will unwind the stack, check consistency
+// of those results against |dstring|, and print a pass/fail message
+// to aLUL's logging sink.  It also updates the counters in *aNTests
+// and aNTestsPassed.
+__attribute__((noinline)) void TestUnw(/*OUT*/ int* aNTests,
+                                       /*OUT*/ int* aNTestsPassed, LUL* aLUL,
+                                       const char* dstring) {
+  // Ensure that the stack has at least this much space on it.  This
+  // makes it safe to saw off the top LUL_UNIT_TEST_STACK_SIZE bytes
+  // and hand it to LUL.  Safe in the sense that no segfault can
+  // happen because the stack is at least this big.  This is all
+  // somewhat dubious in the sense that a sufficiently clever compiler
+  // (clang, for one) can figure out that space[] is unused and delete
+  // it from the frame.  Hence the somewhat elaborate hoop jumping to
+  // fill it up before the call and to at least appear to use the
+  // value afterwards.
+  int i;
+  volatile char space[LUL_UNIT_TEST_STACK_SIZE];
+  for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) {
+    space[i] = (char)(i & 0x7F);
+  }
+
+  // Really run the test.
+  bool passed = TestFn1(aLUL, dstring, dstring);
+
+  // Appear to use space[], by visiting the value to compute some kind
+  // of checksum, and then (apparently) using the checksum.
+  int sum = 0;
+  for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) {
+    // If this doesn't fool LLVM, I don't know what will.
+    sum += space[i] - 3 * i;
+  }
+  __asm__ __volatile__("" : : "r"(sum));
+
+  // Update the counters.
+  (*aNTests)++;
+  if (passed) {
+    (*aNTestsPassed)++;
+  }
+}
+
+void RunLulUnitTests(/*OUT*/ int* aNTests, /*OUT*/ int* aNTestsPassed,
+                     LUL* aLUL) {
+  aLUL->mLog(":\n");
+  aLUL->mLog("LULUnitTest: BEGIN\n");
+  *aNTests = *aNTestsPassed = 0;
+  TestUnw(aNTests, aNTestsPassed, aLUL, "11111111");
+  TestUnw(aNTests, aNTestsPassed, aLUL, "11222211");
+  TestUnw(aNTests, aNTestsPassed, aLUL, "111222333");
+  TestUnw(aNTests, aNTestsPassed, aLUL, "1212121231212331212121212121212");
+  TestUnw(aNTests, aNTestsPassed, aLUL, "31415827271828325332173258");
+  TestUnw(aNTests, aNTestsPassed, aLUL,
+          "123456781122334455667788777777777777777777777");
+  aLUL->mLog("LULUnitTest: END\n");
+  aLUL->mLog(":\n");
+}
+
+}  // namespace lul
diff --git a/tools/profiler/lul/LulMain.h b/tools/profiler/lul/LulMain.h
new file mode 100644
index 0000000000..d386bd5c4f
--- /dev/null
+++ b/tools/profiler/lul/LulMain.h
@@ -0,0 +1,378 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef LulMain_h
+#define LulMain_h
+
+#include "PlatformMacros.h"
+#include "mozilla/Atomics.h"
+#include "mozilla/MemoryReporting.h"
+#include "mozilla/ProfilerUtils.h"
+
+// LUL: A Lightweight Unwind Library.
+// This file provides the end-user (external) interface for LUL.
+
+// Some comments about naming in the implementation.  These are safe
+// to ignore if you are merely using LUL, but are important if you
+// hack on its internals.
+//
+// Debuginfo readers in general have tended to use the word "address"
+// to mean several different things.  This sometimes makes them
+// difficult to understand and maintain.  LUL tries hard to avoid
+// using the word "address" and instead uses the following more
+// precise terms:
+//
+// * SVMA ("Stated Virtual Memory Address"): this is an address of a
+//   symbol (etc) as it is stated in the symbol table, or other
+//   metadata, of an object.  Such values are typically small and
+//   start from zero or thereabouts, unless the object has been
+//   prelinked.
+//
+// * AVMA ("Actual Virtual Memory Address"): this is the address of a
+//   symbol (etc) in a running process, that is, once the associated
+//   object has been mapped into a process.  Such values are typically
+//   much larger than SVMAs, since objects can get mapped arbitrarily
+//   far along the address space.
+//
+// * "Bias": the difference between AVMA and SVMA for a given symbol
+//   (specifically, AVMA - SVMA).  The bias is always an integral
+//   number of pages.  Once we know the bias for a given object's
+//   text section (for example), we can compute the AVMAs of all of
+//   its text symbols by adding the bias to their SVMAs.
+//
+// * "Image address": typically, to read debuginfo from an object we
+//   will temporarily mmap in the file so as to read symbol tables
+//   etc.  Addresses in this temporary mapping are called "Image
+//   addresses".  Note that the temporary mapping is entirely
+//   unrelated to the mappings of the file that the dynamic linker
+//   must perform merely in order to get the program to run.  Hence
+//   image addresses are unrelated to either SVMAs or AVMAs.
+
+namespace lul {
+
+// A machine word plus validity tag.
+class TaggedUWord {
+ public:
+  // RUNS IN NO-MALLOC CONTEXT
+  // Construct a valid one.
+  explicit TaggedUWord(uintptr_t w) : mValue(w), mValid(true) {}
+
+  // RUNS IN NO-MALLOC CONTEXT
+  // Construct an invalid one.
+  TaggedUWord() : mValue(0), mValid(false) {}
+
+  // RUNS IN NO-MALLOC CONTEXT
+  TaggedUWord operator+(TaggedUWord rhs) const {
+    return (Valid() && rhs.Valid()) ? TaggedUWord(Value() + rhs.Value())
+                                    : TaggedUWord();
+  }
+
+  // RUNS IN NO-MALLOC CONTEXT
+  TaggedUWord operator-(TaggedUWord rhs) const {
+    return (Valid() && rhs.Valid()) ? TaggedUWord(Value() - rhs.Value())
+                                    : TaggedUWord();
+  }
+
+  // RUNS IN NO-MALLOC CONTEXT
+  TaggedUWord operator&(TaggedUWord rhs) const {
+    return (Valid() && rhs.Valid()) ? TaggedUWord(Value() & rhs.Value())
+                                    : TaggedUWord();
+  }
+
+  // RUNS IN NO-MALLOC CONTEXT
+  TaggedUWord operator|(TaggedUWord rhs) const {
+    return (Valid() && rhs.Valid()) ? TaggedUWord(Value() | rhs.Value())
+                                    : TaggedUWord();
+  }
+
+  // RUNS IN NO-MALLOC CONTEXT
+  TaggedUWord CmpGEs(TaggedUWord rhs) const {
+    if (Valid() && rhs.Valid()) {
+      intptr_t s1 = (intptr_t)Value();
+      intptr_t s2 = (intptr_t)rhs.Value();
+      return TaggedUWord(s1 >= s2 ? 1 : 0);
+    }
+    return TaggedUWord();
+  }
+
+  // RUNS IN NO-MALLOC CONTEXT
+  TaggedUWord operator<<(TaggedUWord rhs) const {
+    if (Valid() && rhs.Valid()) {
+      uintptr_t shift = rhs.Value();
+      if (shift < 8 * sizeof(uintptr_t)) return TaggedUWord(Value() << shift);
+    }
+    return TaggedUWord();
+  }
+
+  // RUNS IN NO-MALLOC CONTEXT
+  // Is equal?  Note: non-validity on either side gives non-equality.
+  bool operator==(TaggedUWord other) const {
+    return (mValid && other.Valid()) ? (mValue == other.Value()) : false;
+  }
+
+  // RUNS IN NO-MALLOC CONTEXT
+  // Is it word-aligned?
+  bool IsAligned() const {
+    return mValid && (mValue & (sizeof(uintptr_t) - 1)) == 0;
+  }
+
+  // RUNS IN NO-MALLOC CONTEXT
+  uintptr_t Value() const { return mValue; }
+
+  // RUNS IN NO-MALLOC CONTEXT
+  bool Valid() const { return mValid; }
+
+ private:
+  uintptr_t mValue;
+  bool mValid;
+};
+
+// The registers, with validity tags, that will be unwound.
+
+struct UnwindRegs {
+#if defined(GP_ARCH_arm)
+  TaggedUWord r7;
+  TaggedUWord r11;
+  TaggedUWord r12;
+  TaggedUWord r13;
+  TaggedUWord r14;
+  TaggedUWord r15;
+#elif defined(GP_ARCH_arm64)
+  TaggedUWord x29;
+  TaggedUWord x30;
+  TaggedUWord sp;
+  TaggedUWord pc;
+#elif defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
+  TaggedUWord xbp;
+  TaggedUWord xsp;
+  TaggedUWord xip;
+#elif defined(GP_ARCH_mips64)
+  TaggedUWord sp;
+  TaggedUWord fp;
+  TaggedUWord pc;
+#else
+#  error "Unknown plat"
+#endif
+};
+
+// The maximum number of bytes in a stack snapshot.  This value can be increased
+// if necessary, but testing showed that 160k is enough to obtain good
+// backtraces on x86_64 Linux.  Most backtraces fit comfortably into 4-8k of
+// stack space, but we do have some very deep stacks occasionally.  Please see
+// the comments in DoNativeBacktrace as to why it's OK to have this value be so
+// large.
+static const size_t N_STACK_BYTES = 160 * 1024;
+
+// The stack chunk image that will be unwound.
+struct StackImage {
+  // [start_avma, +len) specify the address range in the buffer.
+  // Obviously we require 0 <= len <= N_STACK_BYTES.
+  uintptr_t mStartAvma;
+  size_t mLen;
+  uint8_t mContents[N_STACK_BYTES];
+};
+
+// Statistics collection for the unwinder.
+template <typename T>
+class LULStats {
+ public:
+  LULStats() : mContext(0), mCFI(0), mFP(0) {}
+
+  template <typename S>
+  explicit LULStats(const LULStats<S>& aOther)
+      : mContext(aOther.mContext), mCFI(aOther.mCFI), mFP(aOther.mFP) {}
+
+  template <typename S>
+  LULStats<T>& operator=(const LULStats<S>& aOther) {
+    mContext = aOther.mContext;
+    mCFI = aOther.mCFI;
+    mFP = aOther.mFP;
+    return *this;
+  }
+
+  template <typename S>
+  uint32_t operator-(const LULStats<S>& aOther) {
+    return (mContext - aOther.mContext) + (mCFI - aOther.mCFI) +
+           (mFP - aOther.mFP);
+  }
+
+  T mContext;  // Number of context frames
+  T mCFI;      // Number of CFI/EXIDX frames
+  T mFP;       // Number of frame-pointer recovered frames
+};
+
+// The core unwinder library class.  Just one of these is needed, and
+// it can be shared by multiple unwinder threads.
+//
+// The library operates in one of two modes.
+//
+// * Admin mode.  The library is this state after creation.  In Admin
+//   mode, no unwinding may be performed.  It is however allowable to
+//   perform administrative tasks -- primarily, loading of unwind info
+//   -- in this mode.  In particular, it is safe for the library to
+//   perform dynamic memory allocation in this mode.  Safe in the
+//   sense that there is no risk of deadlock against unwinding threads
+//   that might -- because of where they have been sampled -- hold the
+//   system's malloc lock.
+//
+// * Unwind mode.  In this mode, calls to ::Unwind may be made, but
+//   nothing else.  ::Unwind guarantees not to make any dynamic memory
+//   requests, so as to guarantee that the calling thread won't
+//   deadlock in the case where it already holds the system's malloc lock.
+//
+// The library is created in Admin mode.  After debuginfo is loaded,
+// the caller must switch it into Unwind mode by calling
+// ::EnableUnwinding.  There is no way to switch it back to Admin mode
+// after that.  To safely switch back to Admin mode would require the
+// caller (or other external agent) to guarantee that there are no
+// pending ::Unwind calls.
+
+class PriMap;
+class SegArray;
+class UniqueStringUniverse;
+
+class LUL {
+ public:
+  // Create; supply a logging sink.  Sets the object in Admin mode.
+  explicit LUL(void (*aLog)(const char*));
+
+  // Destroy.  Caller is responsible for ensuring that no other
+  // threads are in Unwind calls.  All resources are freed and all
+  // registered unwinder threads are deregistered.  Can be called
+  // either in Admin or Unwind mode.
+  ~LUL();
+
+  // Notify the library that unwinding is now allowed and so
+  // admin-mode calls are no longer allowed.  The object is initially
+  // created in admin mode.  The only possible transition is
+  // admin->unwinding, therefore.
+  void EnableUnwinding();
+
+  // Notify of a new r-x mapping, and load the associated unwind info.
+  // The filename is strdup'd and used for debug printing.  If
+  // aMappedImage is NULL, this function will mmap/munmap the file
+  // itself, so as to be able to read the unwind info.  If
+  // aMappedImage is non-NULL then it is assumed to point to a
+  // called-supplied and caller-managed mapped image of the file.
+  // May only be called in Admin mode.
+  void NotifyAfterMap(uintptr_t aRXavma, size_t aSize, const char* aFileName,
+                      const void* aMappedImage);
+
+  // In rare cases we know an executable area exists but don't know
+  // what the associated file is.  This call notifies LUL of such
+  // areas.  This is important for correct functioning of stack
+  // scanning and of the x86-{linux,android} special-case
+  // __kernel_syscall function handling.
+  // This must be called only after the code area in
+  // question really has been mapped.
+  // May only be called in Admin mode.
+  void NotifyExecutableArea(uintptr_t aRXavma, size_t aSize);
+
+  // Notify that a mapped area has been unmapped; discard any
+  // associated unwind info.  Acquires mRWlock for writing.  Note that
+  // to avoid segfaulting the stack-scan unwinder, which inspects code
+  // areas, this must be called before the code area in question is
+  // really unmapped.  Note that, unlike NotifyAfterMap(), this
+  // function takes the start and end addresses of the range to be
+  // unmapped, rather than a start and a length parameter.  This is so
+  // as to make it possible to notify an unmap for the entire address
+  // space using a single call.
+  // May only be called in Admin mode.
+  void NotifyBeforeUnmap(uintptr_t aAvmaMin, uintptr_t aAvmaMax);
+
+  // Apply NotifyBeforeUnmap to the entire address space.  This causes
+  // LUL to discard all unwind and executable-area information for the
+  // entire address space.
+  // May only be called in Admin mode.
+  void NotifyBeforeUnmapAll() { NotifyBeforeUnmap(0, UINTPTR_MAX); }
+
+  // Returns the number of mappings currently registered.
+  // May only be called in Admin mode.
+  size_t CountMappings();
+
+  // Unwind |aStackImg| starting with the context in |aStartRegs|.
+  // Write the number of frames recovered in *aFramesUsed.  Put
+  // the PC values in aFramePCs[0 .. *aFramesUsed-1] and
+  // the SP values in aFrameSPs[0 .. *aFramesUsed-1].
+  // |aFramesAvail| is the size of the two output arrays and hence the
+  // largest possible value of *aFramesUsed.  PC values are always
+  // valid, and the unwind will stop when the PC becomes invalid, but
+  // the SP values might be invalid, in which case the value zero will
+  // be written in the relevant frameSPs[] slot.
+  //
+  // This function assumes that the SP values increase as it unwinds
+  // away from the innermost frame -- that is, that the stack grows
+  // down.  It monitors SP values as it unwinds to check they
+  // decrease, so as to avoid looping on corrupted stacks.
+  //
+  // May only be called in Unwind mode.  Multiple threads may unwind
+  // at once.  LUL user is responsible for ensuring that no thread makes
+  // any Admin calls whilst in Unwind mode.
+  // MOZ_CRASHes if the calling thread is not registered for unwinding.
+  //
+  // The calling thread must previously have been registered via a call to
+  // RegisterSampledThread.
+  void Unwind(/*OUT*/ uintptr_t* aFramePCs,
+              /*OUT*/ uintptr_t* aFrameSPs,
+              /*OUT*/ size_t* aFramesUsed,
+              /*OUT*/ size_t* aFramePointerFramesAcquired, size_t aFramesAvail,
+              UnwindRegs* aStartRegs, StackImage* aStackImg);
+
+  // The logging sink.  Call to send debug strings to the caller-
+  // specified destination.  Can only be called by the Admin thread.
+  void (*mLog)(const char*);
+
+  // Statistics relating to unwinding.  These have to be atomic since
+  // unwinding can occur on different threads simultaneously.
+  LULStats<mozilla::Atomic<uint32_t>> mStats;
+
+  // Possibly show the statistics.  This may not be called from any
+  // registered sampling thread, since it involves I/O.
+  void MaybeShowStats();
+
+  size_t SizeOfIncludingThis(mozilla::MallocSizeOf) const;
+
+ private:
+  // The statistics counters at the point where they were last printed.
+  LULStats<uint32_t> mStatsPrevious;
+
+  // Are we in admin mode?  Initially |true| but changes to |false|
+  // once unwinding begins.
+  bool mAdminMode;
+
+  // The thread ID associated with admin mode.  This is the only thread
+  // that is allowed do perform non-Unwind calls on this object.  Conversely,
+  // no registered Unwinding thread may be the admin thread.  This is so
+  // as to clearly partition the one thread that may do dynamic memory
+  // allocation from the threads that are being sampled, since the latter
+  // absolutely may not do dynamic memory allocation.
+  ProfilerThreadId mAdminThreadId;
+
+  // The top level mapping from code address ranges to postprocessed
+  // unwind info.  Basically a sorted array of (addr, len, info)
+  // records.  This field is updated by NotifyAfterMap and NotifyBeforeUnmap.
+  PriMap* mPriMap;
+
+  // An auxiliary structure that records which address ranges are
+  // mapped r-x, for the benefit of the stack scanner.
+  SegArray* mSegArray;
+
+  // A UniqueStringUniverse that holds all the strdup'd strings created
+  // whilst reading unwind information.  This is included so as to make
+  // it possible to free them in ~LUL.
+  UniqueStringUniverse* mUSU;
+};
+
+// Run unit tests on an initialised, loaded-up LUL instance, and print
+// summary results on |aLUL|'s logging sink.  Also return the number
+// of tests run in *aNTests and the number that passed in
+// *aNTestsPassed.
+void RunLulUnitTests(/*OUT*/ int* aNTests, /*OUT*/ int* aNTestsPassed,
+                     LUL* aLUL);
+
+}  // namespace lul
+
+#endif  // LulMain_h
diff --git a/tools/profiler/lul/LulMainInt.h b/tools/profiler/lul/LulMainInt.h
new file mode 100644
index 0000000000..001a4aecfb
--- /dev/null
+++ b/tools/profiler/lul/LulMainInt.h
@@ -0,0 +1,631 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef LulMainInt_h
+#define LulMainInt_h
+
+#include "PlatformMacros.h"
+#include "LulMain.h"  // for TaggedUWord
+
+#include <string>
+#include <vector>
+
+#include "mozilla/Assertions.h"
+#include "mozilla/HashFunctions.h"
+#include "mozilla/HashTable.h"
+#include "mozilla/Sprintf.h"
+
+// This file provides an internal interface inside LUL.  If you are an
+// end-user of LUL, do not include it in your code.  The end-user
+// interface is in LulMain.h.
+
+namespace lul {
+
+using std::vector;
+
+////////////////////////////////////////////////////////////////
+// DW_REG_ constants                                          //
+////////////////////////////////////////////////////////////////
+
+// These are the Dwarf CFI register numbers, as (presumably) defined
+// in the ELF ABI supplements for each architecture.
+
+enum DW_REG_NUMBER {
+  // No real register has this number.  It's convenient to be able to
+  // treat the CFA (Canonical Frame Address) as "just another
+  // register", though.
+  DW_REG_CFA = -1,
+#if defined(GP_ARCH_arm)
+  // ARM registers
+  DW_REG_ARM_R7 = 7,
+  DW_REG_ARM_R11 = 11,
+  DW_REG_ARM_R12 = 12,
+  DW_REG_ARM_R13 = 13,
+  DW_REG_ARM_R14 = 14,
+  DW_REG_ARM_R15 = 15,
+#elif defined(GP_ARCH_arm64)
+  // aarch64 registers
+  DW_REG_AARCH64_X29 = 29,
+  DW_REG_AARCH64_X30 = 30,
+  DW_REG_AARCH64_SP = 31,
+#elif defined(GP_ARCH_amd64)
+  // Because the X86 (32 bit) and AMD64 (64 bit) summarisers are
+  // combined, a merged set of register constants is needed.
+  DW_REG_INTEL_XBP = 6,
+  DW_REG_INTEL_XSP = 7,
+  DW_REG_INTEL_XIP = 16,
+#elif defined(GP_ARCH_x86)
+  DW_REG_INTEL_XBP = 5,
+  DW_REG_INTEL_XSP = 4,
+  DW_REG_INTEL_XIP = 8,
+#elif defined(GP_ARCH_mips64)
+  DW_REG_MIPS_SP = 29,
+  DW_REG_MIPS_FP = 30,
+  DW_REG_MIPS_PC = 34,
+#else
+#  error "Unknown arch"
+#endif
+};
+
+////////////////////////////////////////////////////////////////
+// PfxExpr                                                    //
+////////////////////////////////////////////////////////////////
+
+enum PfxExprOp {
+  //             meaning of mOperand     effect on stack
+  PX_Start,   // bool start-with-CFA?    start, with CFA on stack, or not
+  PX_End,     // none                    stop; result is at top of stack
+  PX_SImm32,  // int32                   push signed int32
+  PX_DwReg,   // DW_REG_NUMBER           push value of the specified reg
+  PX_Deref,   // none                    pop X ; push *X
+  PX_Add,     // none                    pop X ; pop Y ; push Y + X
+  PX_Sub,     // none                    pop X ; pop Y ; push Y - X
+  PX_And,     // none                    pop X ; pop Y ; push Y & X
+  PX_Or,      // none                    pop X ; pop Y ; push Y | X
+  PX_CmpGES,  // none                    pop X ; pop Y ; push (Y >=s X) ? 1 : 0
+  PX_Shl      // none                    pop X ; pop Y ; push Y << X
+};
+
+struct PfxInstr {
+  PfxInstr(PfxExprOp opcode, int32_t operand)
+      : mOpcode(opcode), mOperand(operand) {}
+  explicit PfxInstr(PfxExprOp opcode) : mOpcode(opcode), mOperand(0) {}
+  bool operator==(const PfxInstr& other) const {
+    return mOpcode == other.mOpcode && mOperand == other.mOperand;
+  }
+  PfxExprOp mOpcode;
+  int32_t mOperand;
+};
+
+static_assert(sizeof(PfxInstr) <= 8, "PfxInstr size changed unexpectedly");
+
+// Evaluate the prefix expression whose PfxInstrs start at aPfxInstrs[start].
+// In the case of any mishap (stack over/underflow, running off the end of
+// the instruction vector, obviously malformed sequences),
+// return an invalid TaggedUWord.
+// RUNS IN NO-MALLOC CONTEXT
+TaggedUWord EvaluatePfxExpr(int32_t start, const UnwindRegs* aOldRegs,
+                            TaggedUWord aCFA, const StackImage* aStackImg,
+                            const vector<PfxInstr>& aPfxInstrs);
+
+////////////////////////////////////////////////////////////////
+// LExpr                                                      //
+////////////////////////////////////////////////////////////////
+
+// An expression -- very primitive.  Denotes either "register +
+// offset", a dereferenced version of the same, or a reference to a
+// prefix expression stored elsewhere.  So as to allow convenient
+// handling of Dwarf-derived unwind info, the register may also denote
+// the CFA.  A large number of these need to be stored, so we ensure
+// it fits into 8 bytes.  See comment below on RuleSet to see how
+// expressions fit into the bigger picture.
+
+enum LExprHow {
+  UNKNOWN = 0,  // This LExpr denotes no value.
+  NODEREF,      // Value is  (mReg + mOffset).
+  DEREF,        // Value is *(mReg + mOffset).
+  PFXEXPR       // Value is EvaluatePfxExpr(secMap->mPfxInstrs[mOffset])
+};
+
+inline static const char* NameOf_LExprHow(LExprHow how) {
+  switch (how) {
+    case UNKNOWN:
+      return "UNKNOWN";
+    case NODEREF:
+      return "NODEREF";
+    case DEREF:
+      return "DEREF";
+    case PFXEXPR:
+      return "PFXEXPR";
+    default:
+      return "LExpr-??";
+  }
+}
+
+struct LExpr {
+  // Denotes an expression with no value.
+  LExpr() : mHow(UNKNOWN), mReg(0), mOffset(0) {}
+
+  // Denotes any expressible expression.
+  LExpr(LExprHow how, int16_t reg, int32_t offset)
+      : mHow(how), mReg(reg), mOffset(offset) {
+    switch (how) {
+      case UNKNOWN:
+        MOZ_ASSERT(reg == 0 && offset == 0);
+        break;
+      case NODEREF:
+        break;
+      case DEREF:
+        break;
+      case PFXEXPR:
+        MOZ_ASSERT(reg == 0 && offset >= 0);
+        break;
+      default:
+        MOZ_RELEASE_ASSERT(0, "LExpr::LExpr: invalid how");
+    }
+  }
+
+  // Hash it, carefully looking only at defined parts.
+  mozilla::HashNumber hash() const {
+    mozilla::HashNumber h = mHow;
+    switch (mHow) {
+      case UNKNOWN:
+        break;
+      case NODEREF:
+      case DEREF:
+        h = mozilla::AddToHash(h, mReg);
+        h = mozilla::AddToHash(h, mOffset);
+        break;
+      case PFXEXPR:
+        h = mozilla::AddToHash(h, mOffset);
+        break;
+      default:
+        MOZ_RELEASE_ASSERT(0, "LExpr::hash: invalid how");
+    }
+    return h;
+  }
+
+  // And structural equality.
+  bool equals(const LExpr& other) const {
+    if (mHow != other.mHow) {
+      return false;
+    }
+    switch (mHow) {
+      case UNKNOWN:
+        return true;
+      case NODEREF:
+      case DEREF:
+        return mReg == other.mReg && mOffset == other.mOffset;
+      case PFXEXPR:
+        return mOffset == other.mOffset;
+      default:
+        MOZ_RELEASE_ASSERT(0, "LExpr::equals: invalid how");
+    }
+  }
+
+  // Change the offset for an expression that references memory.
+  LExpr add_delta(long delta) {
+    MOZ_ASSERT(mHow == NODEREF);
+    // If this is a non-debug build and the above assertion would have
+    // failed, at least return LExpr() so that the machinery that uses
+    // the resulting expression fails in a repeatable way.
+    return (mHow == NODEREF) ? LExpr(mHow, mReg, mOffset + delta)
+                             : LExpr();  // Gone bad
+  }
+
+  // Dereference an expression that denotes a memory address.
+  LExpr deref() {
+    MOZ_ASSERT(mHow == NODEREF);
+    // Same rationale as for add_delta().
+    return (mHow == NODEREF) ? LExpr(DEREF, mReg, mOffset)
+                             : LExpr();  // Gone bad
+  }
+
+  // Print a rule for recovery of |aNewReg| whose recovered value
+  // is this LExpr.
+  std::string ShowRule(const char* aNewReg) const;
+
+  // Evaluate this expression, producing a TaggedUWord.  |aOldRegs|
+  // holds register values that may be referred to by the expression.
+  // |aCFA| holds the CFA value, if any, that applies.  |aStackImg|
+  // contains a chuck of stack that will be consulted if the expression
+  // references memory.  |aPfxInstrs| holds the vector of PfxInstrs
+  // that will be consulted if this is a PFXEXPR.
+  // RUNS IN NO-MALLOC CONTEXT
+  TaggedUWord EvaluateExpr(const UnwindRegs* aOldRegs, TaggedUWord aCFA,
+                           const StackImage* aStackImg,
+                           const vector<PfxInstr>* aPfxInstrs) const;
+
+  // Representation of expressions.  If |mReg| is DW_REG_CFA (-1) then
+  // it denotes the CFA.  All other allowed values for |mReg| are
+  // nonnegative and are DW_REG_ values.
+  LExprHow mHow : 8;
+  int16_t mReg;     // A DW_REG_ value
+  int32_t mOffset;  // 32-bit signed offset should be more than enough.
+};
+
+static_assert(sizeof(LExpr) <= 8, "LExpr size changed unexpectedly");
+
+////////////////////////////////////////////////////////////////
+// RuleSet                                                    //
+////////////////////////////////////////////////////////////////
+
+// This is platform-dependent.  It describes how to recover the CFA and then
+// how to recover the registers for the previous frame.  Such "recipes" are
+// specific to particular ranges of machine code, but the associated range
+// is not stored in RuleSet, because in general each RuleSet may be used
+// for many such range fragments ("extents").  See the comments below for
+// Extent and SecMap.
+//
+// The set of LExprs contained in a given RuleSet describe a DAG which
+// says how to compute the caller's registers ("new registers") from
+// the callee's registers ("old registers").  The DAG can contain a
+// single internal node, which is the value of the CFA for the callee.
+// It would be possible to construct a DAG that omits the CFA, but
+// including it makes the summarisers simpler, and the Dwarf CFI spec
+// has the CFA as a central concept.
+//
+// For this to make sense, |mCfaExpr| can't have
+// |mReg| == DW_REG_CFA since we have no previous value for the CFA.
+// All of the other |Expr| fields can -- and usually do -- specify
+// |mReg| == DW_REG_CFA.
+//
+// With that in place, the unwind algorithm proceeds as follows.
+//
+// (0) Initially: we have values for the old registers, and a memory
+//     image.
+//
+// (1) Compute the CFA by evaluating |mCfaExpr|.  Add the computed
+//     value to the set of "old registers".
+//
+// (2) Compute values for the registers by evaluating all of the other
+//     |Expr| fields in the RuleSet.  These can depend on both the old
+//     register values and the just-computed CFA.
+//
+// If we are unwinding without computing a CFA, perhaps because the
+// RuleSets are derived from EXIDX instead of Dwarf, then
+// |mCfaExpr.mHow| will be LExpr::UNKNOWN, so the computed value will
+// be invalid -- that is, TaggedUWord() -- and so any attempt to use
+// that will result in the same value.  But that's OK because the
+// RuleSet would make no sense if depended on the CFA but specified no
+// way to compute it.
+//
+// A RuleSet is not allowed to cover zero address range.  Having zero
+// length would break binary searching in SecMaps and PriMaps.
+
+class RuleSet {
+ public:
+  RuleSet();
+  void Print(uintptr_t avma, uintptr_t len, void (*aLog)(const char*)) const;
+
+  // Find the LExpr* for a given DW_REG_ value in this class.
+  LExpr* ExprForRegno(DW_REG_NUMBER aRegno);
+
+  // How to compute the CFA.
+  LExpr mCfaExpr;
+  // How to compute caller register values.  These may reference the
+  // value defined by |mCfaExpr|.
+#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
+  LExpr mXipExpr;  // return address
+  LExpr mXspExpr;
+  LExpr mXbpExpr;
+#elif defined(GP_ARCH_arm)
+  LExpr mR15expr;  // return address
+  LExpr mR14expr;
+  LExpr mR13expr;
+  LExpr mR12expr;
+  LExpr mR11expr;
+  LExpr mR7expr;
+#elif defined(GP_ARCH_arm64)
+  LExpr mX29expr;  // frame pointer register
+  LExpr mX30expr;  // link register
+  LExpr mSPexpr;
+#elif defined(GP_ARCH_mips64)
+  LExpr mPCexpr;
+  LExpr mFPexpr;
+  LExpr mSPexpr;
+#else
+#  error "Unknown arch"
+#endif
+
+  // Machinery in support of hashing.
+  typedef RuleSet Lookup;
+
+  static mozilla::HashNumber hash(RuleSet rs) {
+    mozilla::HashNumber h = rs.mCfaExpr.hash();
+#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
+    h = mozilla::AddToHash(h, rs.mXipExpr.hash());
+    h = mozilla::AddToHash(h, rs.mXspExpr.hash());
+    h = mozilla::AddToHash(h, rs.mXbpExpr.hash());
+#elif defined(GP_ARCH_arm)
+    h = mozilla::AddToHash(h, rs.mR15expr.hash());
+    h = mozilla::AddToHash(h, rs.mR14expr.hash());
+    h = mozilla::AddToHash(h, rs.mR13expr.hash());
+    h = mozilla::AddToHash(h, rs.mR12expr.hash());
+    h = mozilla::AddToHash(h, rs.mR11expr.hash());
+    h = mozilla::AddToHash(h, rs.mR7expr.hash());
+#elif defined(GP_ARCH_arm64)
+    h = mozilla::AddToHash(h, rs.mX29expr.hash());
+    h = mozilla::AddToHash(h, rs.mX30expr.hash());
+    h = mozilla::AddToHash(h, rs.mSPexpr.hash());
+#elif defined(GP_ARCH_mips64)
+    h = mozilla::AddToHash(h, rs.mPCexpr.hash());
+    h = mozilla::AddToHash(h, rs.mFPexpr.hash());
+    h = mozilla::AddToHash(h, rs.mSPexpr.hash());
+#else
+#  error "Unknown arch"
+#endif
+    return h;
+  }
+
+  static bool match(const RuleSet& rs1, const RuleSet& rs2) {
+    return rs1.mCfaExpr.equals(rs2.mCfaExpr) &&
+#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
+           rs1.mXipExpr.equals(rs2.mXipExpr) &&
+           rs1.mXspExpr.equals(rs2.mXspExpr) &&
+           rs1.mXbpExpr.equals(rs2.mXbpExpr);
+#elif defined(GP_ARCH_arm)
+           rs1.mR15expr.equals(rs2.mR15expr) &&
+           rs1.mR14expr.equals(rs2.mR14expr) &&
+           rs1.mR13expr.equals(rs2.mR13expr) &&
+           rs1.mR12expr.equals(rs2.mR12expr) &&
+           rs1.mR11expr.equals(rs2.mR11expr) && rs1.mR7expr.equals(rs2.mR7expr);
+#elif defined(GP_ARCH_arm64)
+           rs1.mX29expr.equals(rs2.mX29expr) &&
+           rs1.mX30expr.equals(rs2.mX30expr) && rs1.mSPexpr.equals(rs2.mSPexpr);
+#elif defined(GP_ARCH_mips64)
+           rs1.mPCexpr.equals(rs2.mPCexpr) && rs1.mFPexpr.equals(rs2.mFPexpr) &&
+           rs1.mSPexpr.equals(rs2.mSPexpr);
+#else
+#  error "Unknown arch"
+#endif
+  }
+};
+
+// Returns |true| for Dwarf register numbers which are members
+// of the set of registers that LUL unwinds on this target.
+static inline bool registerIsTracked(DW_REG_NUMBER reg) {
+  switch (reg) {
+#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
+    case DW_REG_INTEL_XBP:
+    case DW_REG_INTEL_XSP:
+    case DW_REG_INTEL_XIP:
+      return true;
+#elif defined(GP_ARCH_arm)
+    case DW_REG_ARM_R7:
+    case DW_REG_ARM_R11:
+    case DW_REG_ARM_R12:
+    case DW_REG_ARM_R13:
+    case DW_REG_ARM_R14:
+    case DW_REG_ARM_R15:
+      return true;
+#elif defined(GP_ARCH_arm64)
+    case DW_REG_AARCH64_X29:
+    case DW_REG_AARCH64_X30:
+    case DW_REG_AARCH64_SP:
+      return true;
+#elif defined(GP_ARCH_mips64)
+    case DW_REG_MIPS_FP:
+    case DW_REG_MIPS_SP:
+    case DW_REG_MIPS_PC:
+      return true;
+#else
+#  error "Unknown arch"
+#endif
+    default:
+      return false;
+  }
+}
+
+////////////////////////////////////////////////////////////////
+// Extent                                                     //
+////////////////////////////////////////////////////////////////
+
+struct Extent {
+  // Three fields, which together take 8 bytes.
+  uint32_t mOffset;
+  uint16_t mLen;
+  uint16_t mDictIx;
+
+  // What this means is: suppose we are looking for the unwind rules for some
+  // code address (AVMA) `avma`.  If we can find some SecMap `secmap` such
+  // that `avma` falls in the range
+  //
+  //   `[secmap.mMapMinAVMA, secmap.mMapMaxAVMA]`
+  //
+  // then the RuleSet to use is `secmap.mDictionary[dictIx]` iff we can find
+  // an `extent` in `secmap.mExtents` such that `avma` falls into the range
+  //
+  //   `[secmap.mMapMinAVMA + extent.offset(),
+  //     secmap.mMapMinAVMA + extent.offset() + extent.len())`.
+  //
+  // Packing Extent into the minimum space is important, since there will be
+  // huge numbers of Extents -- around 3 million for libxul.so as of Sept
+  // 2020.  Here, we aim for an 8-byte size, with the field sizes chosen
+  // carefully, as follows:
+  //
+  // `offset` denotes a byte offset inside the text section for some shared
+  // object.  libxul.so is by far the largest.  As of Sept 2020 it has a text
+  // size of up to around 120MB, that is, close to 2^27 bytes.  Hence a 32-bit
+  // `offset` field gives a safety margin of around a factor of 32
+  // (== 2 ^(32 - 27)).
+  //
+  // `dictIx` indicates a unique `RuleSet` for some code address range.
+  // Experimentation on x86_64-linux indicates that only around 300 different
+  // `RuleSet`s exist, for libxul.so.  A 16-bit bit field allows up to 65536
+  // to be recorded, hence leaving us a generous safety margin.
+  //
+  // `len` indicates the length of the associated address range.
+  //
+  // Note the representation becomes unusable if either `offset` overflows 32
+  // bits or `dictIx` overflows 16 bits.  On the other hand, it does not
+  // matter (although is undesirable) if `len` overflows 16 bits, because in
+  // that case we can add multiple size-65535 entries to `secmap.mExtents` to
+  // cover the entire range.  Hence the field sizes are biased so as to give a
+  // good safety margin for `offset` and `dictIx` at the cost of stealing bits
+  // from `len`.  Almost all `len` values we will ever see in practice are
+  // 65535 or less, so stealing those bits does not matter much.
+  //
+  // If further compression is required, it would be feasible to implement
+  // Extent using 29 bits for the offset, 8 bits for the length and 11 bits
+  // for the dictionary index, giving a total of 6 bytes, provided that the
+  // data is packed into 3 uint16_t's.  That would be a bit slower, though,
+  // due to the bit packing, and it would be more fragile, in the sense that
+  // it would fail for any object with more than 512MB of text segment, or
+  // with more than 2048 different `RuleSet`s.  For the current (Sept 2020)
+  // libxul.so situation, though, it would work fine.
+
+  Extent(uint32_t offset, uint32_t len, uint32_t dictIx) {
+    MOZ_RELEASE_ASSERT(len < (1 << 16));
+    MOZ_RELEASE_ASSERT(dictIx < (1 << 16));
+    mOffset = offset;
+    mLen = len;
+    mDictIx = dictIx;
+  }
+  inline uint32_t offset() const { return mOffset; }
+  inline uint32_t len() const { return mLen; }
+  inline uint32_t dictIx() const { return mDictIx; }
+  void setLen(uint32_t len) {
+    MOZ_RELEASE_ASSERT(len < (1 << 16));
+    mLen = len;
+  }
+  void Print(void (*aLog)(const char*)) const {
+    char buf[64];
+    SprintfLiteral(buf, "Extent(offs=0x%x, len=%u, dictIx=%u)", this->offset(),
+                   this->len(), this->dictIx());
+    aLog(buf);
+  }
+};
+
+static_assert(sizeof(Extent) == 8);
+
+////////////////////////////////////////////////////////////////
+// SecMap                                                     //
+////////////////////////////////////////////////////////////////
+
+// A SecMap may have zero address range, temporarily, whilst RuleSets
+// are being added to it.  But adding a zero-range SecMap to a PriMap
+// will make it impossible to maintain the total order of the PriMap
+// entries, and so that can't be allowed to happen.
+
+class SecMap {
+ public:
+  // In the constructor, `mapStartAVMA` and `mapLen` define the actual
+  // (in-process) virtual addresses covered by the SecMap.  All RuleSets
+  // subsequently added to it by calling `AddRuleSet` must fall into this
+  // address range, and attempts to add ones outside the range will be
+  // ignored.  This restriction exists because the type Extent (see below)
+  // indicates an address range for a RuleSet, but for reasons of compactness,
+  // it does not contain the start address of the range.  Instead, it contains
+  // a 32-bit offset from the base address of the SecMap.  This is also the
+  // reason why the map's size is a `uint32_t` and not a `uintptr_t`.
+  //
+  // The effect is to limit this mechanism to shared objects / executables
+  // whose text section size does not exceed 4GB (2^32 bytes).  Given that, as
+  // of Sept 2020, libxul.so's text section size is around 120MB, this does
+  // not seem like much of a limitation.
+  //
+  // From the supplied `mapStartAVMA` and `mapLen`, fields `mMapMinAVMA` and
+  // `mMapMaxAVMA` are calculated.  It is intended that no two SecMaps owned
+  // by the same PriMap contain overlapping address ranges, and the PriMap
+  // logic enforces that.
+  //
+  // Some invariants:
+  //
+  // mExtents is nonempty
+  //    <=> mMapMinAVMA <= mMapMaxAVMA
+  //        && mMapMinAVMA <= apply_delta(mExtents[0].offset())
+  //        && apply_delta(mExtents[#rulesets-1].offset()
+  //             + mExtents[#rulesets-1].len() - 1) <= mMapMaxAVMA
+  //        where
+  //           apply_delta(off) = off + mMapMinAVMA
+  //
+  //        This requires that no RuleSet has zero length.
+  //
+  // mExtents is empty
+  //    <=> mMapMinAVMA > mMapMaxAVMA
+  //
+  // This doesn't constrain mMapMinAVMA and mMapMaxAVMA uniquely, so let's use
+  // mMapMinAVMA == 1 and mMapMaxAVMA == 0 to denote this case.
+
+  SecMap(uintptr_t mapStartAVMA, uint32_t mapLen, void (*aLog)(const char*));
+  ~SecMap();
+
+  // Binary search mRuleSets to find one that brackets |ia|, or nullptr
+  // if none is found.  It's not allowable to do this until PrepareRuleSets
+  // has been called first.
+  RuleSet* FindRuleSet(uintptr_t ia);
+
+  // Add a RuleSet to the collection.  The rule is copied in.  Calling
+  // this makes the map non-searchable.
+  void AddRuleSet(const RuleSet* rs, uintptr_t avma, uintptr_t len);
+
+  // Add a PfxInstr to the vector of such instrs, and return the index
+  // in the vector.  Calling this makes the map non-searchable.
+  uint32_t AddPfxInstr(PfxInstr pfxi);
+
+  // Returns the entire vector of PfxInstrs.
+  const vector<PfxInstr>* GetPfxInstrs() { return &mPfxInstrs; }
+
+  // Prepare the map for searching, by sorting it, de-overlapping entries and
+  // removing any resulting zero-length entries.  At the start of this
+  // routine, all Extents should fall within [mMapMinAVMA, mMapMaxAVMA] and
+  // not have zero length, as a result of the checks in AddRuleSet().
+  void PrepareRuleSets();
+
+  bool IsEmpty();
+
+  size_t Size() { return mExtents.size() + mDictionary.size(); }
+
+  size_t SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) const;
+
+  // The extent of this SecMap as a whole.  The extents of all contained
+  // RuleSets must fall inside this.  See comment above for details.
+  uintptr_t mMapMinAVMA;
+  uintptr_t mMapMaxAVMA;
+
+ private:
+  // False whilst adding entries; true once it is safe to call FindRuleSet.
+  // Transition (false->true) is caused by calling PrepareRuleSets().
+  bool mUsable;
+
+  // This is used to find and remove duplicate RuleSets while we are adding
+  // them to the SecMap.  Almost all RuleSets are duplicates, so de-duping
+  // them is a huge space win.  This is non-null while `mUsable` is false, and
+  // becomes null (is discarded) after the call to PrepareRuleSets, which
+  // copies all the entries into `mDictionary`.
+  mozilla::UniquePtr<
+      mozilla::HashMap<RuleSet, uint32_t, RuleSet, InfallibleAllocPolicy>>
+      mUniqifier;
+
+  // This will contain final contents of `mUniqifier`, but ordered
+  // (implicitly) by the `uint32_t` value fields, for fast access.
+  vector<RuleSet> mDictionary;
+
+  // A vector of Extents, sorted by offset value, nonoverlapping (post
+  // PrepareRuleSets()).
+  vector<Extent> mExtents;
+
+  // A vector of PfxInstrs, which are referred to by the RuleSets.
+  // These are provided as a representation of Dwarf expressions
+  // (DW_CFA_val_expression, DW_CFA_expression, DW_CFA_def_cfa_expression),
+  // are relatively expensive to evaluate, and and are therefore
+  // expected to be used only occasionally.
+  //
+  // The vector holds a bunch of separate PfxInstr programs, each one
+  // starting with a PX_Start and terminated by a PX_End, all
+  // concatenated together.  When a RuleSet can't recover a value
+  // using a self-contained LExpr, it uses a PFXEXPR whose mOffset is
+  // the index in this vector of start of the necessary PfxInstr program.
+  vector<PfxInstr> mPfxInstrs;
+
+  // A logging sink, for debugging.
+  void (*mLog)(const char*);
+};
+
+}  // namespace lul
+
+#endif  // ndef LulMainInt_h
diff --git a/tools/profiler/lul/platform-linux-lul.cpp b/tools/profiler/lul/platform-linux-lul.cpp
new file mode 100644
index 0000000000..4027905c60
--- /dev/null
+++ b/tools/profiler/lul/platform-linux-lul.cpp
@@ -0,0 +1,75 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <signal.h>
+#include <string.h>
+#include <stdlib.h>
+#include <time.h>
+
+#include "mozilla/ProfilerState.h"
+#include "platform.h"
+#include "PlatformMacros.h"
+#include "LulMain.h"
+#include "shared-libraries.h"
+#include "AutoObjectMapper.h"
+
+// Contains miscellaneous helpers that are used to connect the Gecko Profiler
+// and LUL.
+
+// Find out, in a platform-dependent way, where the code modules got
+// mapped in the process' virtual address space, and get |aLUL| to
+// load unwind info for them.
+void read_procmaps(lul::LUL* aLUL) {
+  MOZ_ASSERT(aLUL->CountMappings() == 0);
+
+#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
+  SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf();
+
+  for (size_t i = 0; i < info.GetSize(); i++) {
+    const SharedLibrary& lib = info.GetEntry(i);
+
+    std::string nativePath = lib.GetNativeDebugPath();
+
+    // We can use the standard POSIX-based mapper.
+    AutoObjectMapperPOSIX mapper(aLUL->mLog);
+
+    // Ask |mapper| to map the object.  Then hand its mapped address
+    // to NotifyAfterMap().
+    void* image = nullptr;
+    size_t size = 0;
+    bool ok = mapper.Map(&image, &size, nativePath);
+    if (ok && image && size > 0) {
+      aLUL->NotifyAfterMap(lib.GetStart(), lib.GetEnd() - lib.GetStart(),
+                           nativePath.c_str(), image);
+    } else if (!ok && lib.GetDebugName().IsEmpty()) {
+      // The object has no name and (as a consequence) the mapper failed to map
+      // it.  This happens on Linux, where GetInfoForSelf() produces such a
+      // mapping for the VDSO.  This is a problem on x86-{linux,android} because
+      // lack of knowledge about the mapped area inhibits LUL's special
+      // __kernel_syscall handling.  Hence notify |aLUL| at least of the
+      // mapping, even though it can't read any unwind information for the area.
+      aLUL->NotifyExecutableArea(lib.GetStart(), lib.GetEnd() - lib.GetStart());
+    }
+
+    // |mapper| goes out of scope at this point and so its destructor
+    // unmaps the object.
+  }
+
+#else
+#  error "Unknown platform"
+#endif
+}
+
+// LUL needs a callback for its logging sink.
+void logging_sink_for_LUL(const char* str) {
+  // These are only printed when Verbose logging is enabled (e.g. with
+  // MOZ_LOG="prof:5"). This is because LUL's logging is much more verbose than
+  // the rest of the profiler's logging, which occurs at the Info (3) and Debug
+  // (4) levels.
+  MOZ_LOG(gProfilerLog, mozilla::LogLevel::Verbose,
+          ("[%" PRIu64 "] %s",
+           uint64_t(profiler_current_process_id().ToNumber()), str));
+}
diff --git a/tools/profiler/lul/platform-linux-lul.h b/tools/profiler/lul/platform-linux-lul.h
new file mode 100644
index 0000000000..7c94299961
--- /dev/null
+++ b/tools/profiler/lul/platform-linux-lul.h
@@ -0,0 +1,19 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MOZ_PLATFORM_LINUX_LUL_H
+#define MOZ_PLATFORM_LINUX_LUL_H
+
+#include "platform.h"
+
+// Find out, in a platform-dependent way, where the code modules got
+// mapped in the process' virtual address space, and get |aLUL| to
+// load unwind info for them.
+void read_procmaps(lul::LUL* aLUL);
+
+// LUL needs a callback for its logging sink.
+void logging_sink_for_LUL(const char* str);
+
+#endif /* ndef MOZ_PLATFORM_LINUX_LUL_H */
diff --git a/tools/profiler/moz.build b/tools/profiler/moz.build
new file mode 100644
index 0000000000..8b185195f8
--- /dev/null
+++ b/tools/profiler/moz.build
@@ -0,0 +1,227 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+if CONFIG["MOZ_GECKO_PROFILER"]:
+    DEFINES["MOZ_REPLACE_MALLOC_PREFIX"] = "profiler"
+    XPIDL_MODULE = "profiler"
+    XPIDL_SOURCES += [
+        "gecko/nsIProfiler.idl",
+    ]
+    EXPORTS += [
+        "public/GeckoProfilerReporter.h",
+        "public/ProfilerChild.h",
+        "public/ProfilerCodeAddressService.h",
+        "public/shared-libraries.h",
+    ]
+    UNIFIED_SOURCES += [
+        "core/PageInformation.cpp",
+        "core/platform.cpp",
+        "core/ProfileBuffer.cpp",
+        "core/ProfileBufferEntry.cpp",
+        "core/ProfiledThreadData.cpp",
+        "core/ProfilerBacktrace.cpp",
+        "core/ProfilerCodeAddressService.cpp",
+        "core/ProfilerMarkers.cpp",
+        "gecko/ChildProfilerController.cpp",
+        "gecko/nsProfilerStartParams.cpp",
+        "gecko/ProfilerChild.cpp",
+        "gecko/ProfilerIOInterposeObserver.cpp",
+    ]
+    if CONFIG["MOZ_REPLACE_MALLOC"] and CONFIG["MOZ_PROFILER_MEMORY"]:
+        SOURCES += [
+            "core/memory_hooks.cpp",  # Non-unified because of order of #includes
+        ]
+
+    XPCOM_MANIFESTS += [
+        "gecko/components.conf",
+    ]
+
+    if CONFIG["OS_TARGET"] == "Darwin":
+        # This file cannot be built in unified mode because it includes
+        # "nsLocalFile.h", which pulls in a system header which uses a type
+        # called TextRange, which conflicts with mozilla::TextRange due to
+        # a "using namespace mozilla;" declaration from a different file.
+        SOURCES += [
+            "gecko/nsProfiler.cpp",
+        ]
+    else:
+        UNIFIED_SOURCES += [
+            "gecko/nsProfiler.cpp",
+        ]
+
+    if CONFIG["OS_TARGET"] in ("Android", "Linux", "FreeBSD"):
+        if CONFIG["CPU_ARCH"] in ("arm", "aarch64", "x86", "x86_64", "mips64"):
+            UNIFIED_SOURCES += [
+                "lul/AutoObjectMapper.cpp",
+                "lul/LulCommon.cpp",
+                "lul/LulDwarf.cpp",
+                "lul/LulDwarfSummariser.cpp",
+                "lul/LulElf.cpp",
+                "lul/LulMain.cpp",
+                "lul/platform-linux-lul.cpp",
+            ]
+        # These files cannot be built in unified mode because of name clashes with mozglue headers on Android.
+        SOURCES += [
+            "core/shared-libraries-linux.cc",
+        ]
+        if not CONFIG["MOZ_CRASHREPORTER"]:
+            SOURCES += [
+                "/toolkit/crashreporter/google-breakpad/src/common/linux/elfutils.cc",
+                "/toolkit/crashreporter/google-breakpad/src/common/linux/file_id.cc",
+                "/toolkit/crashreporter/google-breakpad/src/common/linux/linux_libc_support.cc",
+                "/toolkit/crashreporter/google-breakpad/src/common/linux/memory_mapped_file.cc",
+            ]
+            if not CONFIG["HAVE_GETCONTEXT"]:
+                SOURCES += [
+                    "/toolkit/crashreporter/google-breakpad/src/common/linux/breakpad_getcontext.S"
+                ]
+        if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux":
+            UNIFIED_SOURCES += [
+                "core/PowerCounters-linux.cpp",
+            ]
+        if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] != "FreeBSD":
+            SOURCES += [
+                "core/EHABIStackWalk.cpp",
+            ]
+    elif CONFIG["OS_TARGET"] == "Darwin":
+        UNIFIED_SOURCES += [
+            "core/shared-libraries-macos.cc",
+        ]
+        if CONFIG["CPU_ARCH"] == "aarch64":
+            UNIFIED_SOURCES += [
+                "core/PowerCounters-mac-arm64.cpp",
+            ]
+        if CONFIG["CPU_ARCH"] == "x86_64":
+            UNIFIED_SOURCES += [
+                "core/PowerCounters-mac-amd64.cpp",
+            ]
+    elif CONFIG["OS_TARGET"] == "WINNT":
+        if CONFIG["CC_TYPE"] == "clang-cl":
+            UNIFIED_SOURCES += [
+                "core/PowerCounters-win.cpp",
+            ]
+        SOURCES += [
+            "core/shared-libraries-win32.cc",
+        ]
+
+    LOCAL_INCLUDES += [
+        "/caps",
+        "/docshell/base",
+        "/ipc/chromium/src",
+        "/mozglue/linker",
+        "/netwerk/base",
+        "/netwerk/protocol/http",
+        "/toolkit/components/jsoncpp/include",
+        "/toolkit/crashreporter/google-breakpad/src",
+        "/tools/profiler/core/",
+        "/tools/profiler/gecko/",
+        "/xpcom/base",
+    ]
+
+    if CONFIG["OS_TARGET"] == "Android":
+        DEFINES["ANDROID_NDK_MAJOR_VERSION"] = CONFIG["ANDROID_NDK_MAJOR_VERSION"]
+        DEFINES["ANDROID_NDK_MINOR_VERSION"] = CONFIG["ANDROID_NDK_MINOR_VERSION"]
+        LOCAL_INCLUDES += [
+            # We need access to Breakpad's getcontext(3) which is suitable for Android
+            "/toolkit/crashreporter/google-breakpad/src/common/android/include",
+        ]
+
+    if CONFIG["MOZ_VTUNE"]:
+        DEFINES["MOZ_VTUNE_INSTRUMENTATION"] = True
+        UNIFIED_SOURCES += [
+            "core/VTuneProfiler.cpp",
+        ]
+
+    XPCSHELL_TESTS_MANIFESTS += ["tests/xpcshell/xpcshell.ini"]
+    MOCHITEST_CHROME_MANIFESTS += ["tests/chrome/chrome.ini"]
+    BROWSER_CHROME_MANIFESTS += ["tests/browser/browser.ini"]
+
+UNIFIED_SOURCES += [
+    "core/MicroGeckoProfiler.cpp",
+    "core/ProfileAdditionalInformation.cpp",
+    "core/ProfilerBindings.cpp",
+    "core/ProfilerThreadRegistration.cpp",
+    "core/ProfilerThreadRegistrationData.cpp",
+    "core/ProfilerThreadRegistry.cpp",
+    "core/ProfilerUtils.cpp",
+    "gecko/ProfilerParent.cpp",
+]
+
+IPDL_SOURCES += [
+    "gecko/PProfiler.ipdl",
+    "gecko/ProfilerTypes.ipdlh",
+]
+
+include("/ipc/chromium/chromium-config.mozbuild")
+
+EXPORTS += [
+    "public/ChildProfilerController.h",
+    "public/GeckoProfiler.h",
+    "public/MicroGeckoProfiler.h",
+    "public/ProfileAdditionalInformation.h",
+    "public/ProfilerBindings.h",
+    "public/ProfilerControl.h",
+    "public/ProfilerParent.h",
+    "public/ProfilerRustBindings.h",
+]
+
+EXPORTS.mozilla += [
+    "public/ProfileBufferEntrySerializationGeckoExtensions.h",
+    "public/ProfileJSONWriter.h",
+    "public/ProfilerCounts.h",
+    "public/ProfilerLabels.h",
+    "public/ProfilerMarkers.h",
+    "public/ProfilerMarkersDetail.h",
+    "public/ProfilerMarkersPrerequisites.h",
+    "public/ProfilerMarkerTypes.h",
+    "public/ProfilerRunnable.h",
+    "public/ProfilerState.h",
+    "public/ProfilerThreadPlatformData.h",
+    "public/ProfilerThreadRegistration.h",
+    "public/ProfilerThreadRegistrationData.h",
+    "public/ProfilerThreadRegistrationInfo.h",
+    "public/ProfilerThreadRegistry.h",
+    "public/ProfilerThreadSleep.h",
+    "public/ProfilerThreadState.h",
+    "public/ProfilerUtils.h",
+]
+
+GeneratedFile(
+    "rust-api/src/gecko_bindings/profiling_categories.rs",
+    script="../../mozglue/baseprofiler/build/generate_profiling_categories.py",
+    entry_point="generate_rust_enums",
+    inputs=["../../mozglue/baseprofiler/build/profiling_categories.yaml"],
+)
+
+CONFIGURE_SUBST_FILES += [
+    "rust-api/extra-bindgen-flags",
+]
+
+
+if CONFIG["COMPILE_ENVIRONMENT"]:
+    CbindgenHeader("profiler_ffi_generated.h", inputs=["rust-api"])
+
+    EXPORTS.mozilla += [
+        "!profiler_ffi_generated.h",
+    ]
+
+USE_LIBS += [
+    "jsoncpp",
+]
+
+FINAL_LIBRARY = "xul"
+
+if CONFIG["ENABLE_TESTS"]:
+    DIRS += ["tests/gtest"]
+
+if CONFIG["CC_TYPE"] in ("clang", "gcc"):
+    CXXFLAGS += [
+        "-Wno-error=stack-protector",
+        "-Wno-ignored-qualifiers",  # due to use of breakpad headers
+    ]
+
+with Files("**"):
+    BUG_COMPONENT = ("Core", "Gecko Profiler")
diff --git a/tools/profiler/public/ChildProfilerController.h b/tools/profiler/public/ChildProfilerController.h
new file mode 100644
index 0000000000..8febc25b65
--- /dev/null
+++ b/tools/profiler/public/ChildProfilerController.h
@@ -0,0 +1,71 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ChildProfilerController_h
+#define ChildProfilerController_h
+
+#include "base/process.h"
+#include "mozilla/Attributes.h"
+#include "mozilla/ipc/ProtocolUtils.h"
+#include "mozilla/DataMutex.h"
+#include "mozilla/RefPtr.h"
+#include "nsISupportsImpl.h"
+#include "nsStringFwd.h"
+#include "ProfileAdditionalInformation.h"
+
+namespace mozilla {
+
+class ProfilerChild;
+class PProfilerChild;
+class PProfilerParent;
+
+// ChildProfilerController manages the setup and teardown of ProfilerChild.
+// It's used on the main thread.
+// It manages a background thread that ProfilerChild runs on.
+class ChildProfilerController final {
+ public:
+  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(ChildProfilerController)
+
+#ifdef MOZ_GECKO_PROFILER
+  static already_AddRefed<ChildProfilerController> Create(
+      mozilla::ipc::Endpoint<PProfilerChild>&& aEndpoint);
+
+  [[nodiscard]] ProfileAndAdditionalInformation
+  GrabShutdownProfileAndShutdown();
+  void Shutdown();
+
+ private:
+  ChildProfilerController();
+  ~ChildProfilerController();
+  void Init(mozilla::ipc::Endpoint<PProfilerChild>&& aEndpoint);
+  void ShutdownAndMaybeGrabShutdownProfileFirst(
+      ProfileAndAdditionalInformation* aOutShutdownProfileInformation);
+
+  // Called on mThread:
+  void SetupProfilerChild(mozilla::ipc::Endpoint<PProfilerChild>&& aEndpoint);
+  void ShutdownProfilerChild(
+      ProfileAndAdditionalInformation* aOutShutdownProfileInformation);
+
+  RefPtr<ProfilerChild> mProfilerChild;  // only accessed on mThread
+  DataMutex<RefPtr<nsIThread>> mThread;
+#else
+  static already_AddRefed<ChildProfilerController> Create(
+      mozilla::ipc::Endpoint<PProfilerChild>&& aEndpoint) {
+    return nullptr;
+  }
+  [[nodiscard]] ProfileAndAdditionalInformation
+  GrabShutdownProfileAndShutdown() {
+    return ProfileAndAdditionalInformation(std::move(EmptyCString()));
+  }
+  void Shutdown() {}
+
+ private:
+  ~ChildProfilerController() {}
+#endif  // MOZ_GECKO_PROFILER
+};
+
+}  // namespace mozilla
+
+#endif  // ChildProfilerController_h
diff --git a/tools/profiler/public/GeckoProfiler.h b/tools/profiler/public/GeckoProfiler.h
new file mode 100644
index 0000000000..f7c045297e
--- /dev/null
+++ b/tools/profiler/public/GeckoProfiler.h
@@ -0,0 +1,435 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// The Gecko Profiler is an always-on profiler that takes fast and low overhead
+// samples of the program execution using only userspace functionality for
+// portability. The goal of this module is to provide performance data in a
+// generic cross-platform way without requiring custom tools or kernel support.
+//
+// Samples are collected to form a timeline with optional timeline event
+// (markers) used for filtering. The samples include both native stacks and
+// platform-independent "label stack" frames.
+
+#ifndef GeckoProfiler_h
+#define GeckoProfiler_h
+
+// Everything in here is also safe to include unconditionally, and only defines
+// empty macros if MOZ_GECKO_PROFILER is unset.
+// If your file only uses particular APIs (e.g., only markers), please consider
+// including only the needed headers instead of this one, to reduce compilation
+// dependencies.
+#include "BaseProfiler.h"
+#include "ProfileAdditionalInformation.h"
+#include "mozilla/ProfilerCounts.h"
+#include "mozilla/ProfilerLabels.h"
+#include "mozilla/ProfilerMarkers.h"
+#include "mozilla/ProfilerState.h"
+#include "mozilla/ProfilerThreadSleep.h"
+#include "mozilla/ProfilerThreadState.h"
+#include "mozilla/ProgressLogger.h"
+#include "mozilla/Result.h"
+#include "mozilla/ResultVariant.h"
+
+#ifndef MOZ_GECKO_PROFILER
+
+#  include "mozilla/UniquePtr.h"
+
+// This file can be #included unconditionally. However, everything within this
+// file must be guarded by a #ifdef MOZ_GECKO_PROFILER, *except* for the
+// following macros and functions, which encapsulate the most common operations
+// and thus avoid the need for many #ifdefs.
+
+#  define PROFILER_REGISTER_THREAD(name)
+#  define PROFILER_UNREGISTER_THREAD()
+#  define AUTO_PROFILER_REGISTER_THREAD(name)
+
+#  define PROFILER_JS_INTERRUPT_CALLBACK()
+
+#  define PROFILER_SET_JS_CONTEXT(cx)
+#  define PROFILER_CLEAR_JS_CONTEXT()
+
+// Function stubs for when MOZ_GECKO_PROFILER is not defined.
+
+// This won't be used, it's just there to allow the empty definition of
+// `profiler_get_backtrace`.
+struct ProfilerBacktrace {};
+using UniqueProfilerBacktrace = mozilla::UniquePtr<ProfilerBacktrace>;
+
+// Get/Capture-backtrace functions can return nullptr or false, the result
+// should be fed to another empty macro or stub anyway.
+
+static inline UniqueProfilerBacktrace profiler_get_backtrace() {
+  return nullptr;
+}
+
+// This won't be used, it's just there to allow the empty definitions of
+// `profiler_capture_backtrace_into` and `profiler_capture_backtrace`.
+struct ProfileChunkedBuffer {};
+
+static inline bool profiler_capture_backtrace_into(
+    mozilla::ProfileChunkedBuffer& aChunkedBuffer,
+    mozilla::StackCaptureOptions aCaptureOptions) {
+  return false;
+}
+static inline mozilla::UniquePtr<mozilla::ProfileChunkedBuffer>
+profiler_capture_backtrace() {
+  return nullptr;
+}
+
+static inline void profiler_set_process_name(
+    const nsACString& aProcessName, const nsACString* aETLDplus1 = nullptr) {}
+
+static inline void profiler_received_exit_profile(
+    const nsACString& aExitProfile) {}
+
+static inline void profiler_register_page(uint64_t aTabID,
+                                          uint64_t aInnerWindowID,
+                                          const nsCString& aUrl,
+                                          uint64_t aEmbedderInnerWindowID,
+                                          bool aIsPrivateBrowsing) {}
+static inline void profiler_unregister_page(uint64_t aRegisteredInnerWindowID) {
+}
+
+static inline void GetProfilerEnvVarsForChildProcess(
+    std::function<void(const char* key, const char* value)>&& aSetEnv) {}
+
+static inline void profiler_record_wakeup_count(
+    const nsACString& aProcessType) {}
+
+#else  // !MOZ_GECKO_PROFILER
+
+#  include "js/ProfilingStack.h"
+#  include "mozilla/Assertions.h"
+#  include "mozilla/Atomics.h"
+#  include "mozilla/Attributes.h"
+#  include "mozilla/BaseProfilerRAIIMacro.h"
+#  include "mozilla/Maybe.h"
+#  include "mozilla/PowerOfTwo.h"
+#  include "mozilla/ThreadLocal.h"
+#  include "mozilla/TimeStamp.h"
+#  include "mozilla/UniquePtr.h"
+#  include "nscore.h"
+#  include "nsINamed.h"
+#  include "nsString.h"
+#  include "nsThreadUtils.h"
+
+#  include <functional>
+#  include <stdint.h>
+
+class ProfilerBacktrace;
+class ProfilerCodeAddressService;
+struct JSContext;
+
+namespace mozilla {
+class ProfileBufferControlledChunkManager;
+class ProfileChunkedBuffer;
+namespace baseprofiler {
+class SpliceableJSONWriter;
+}  // namespace baseprofiler
+}  // namespace mozilla
+class nsIURI;
+
+enum class ProfilerError {
+  IsInactive,
+  JsonGenerationFailed,
+};
+
+template <typename T>
+using ProfilerResult = mozilla::Result<T, ProfilerError>;
+
+//---------------------------------------------------------------------------
+// Give information to the profiler
+//---------------------------------------------------------------------------
+
+// Register/unregister threads with the profiler. Both functions operate the
+// same whether the profiler is active or inactive.
+#  define PROFILER_REGISTER_THREAD(name)         \
+    do {                                         \
+      char stackTop;                             \
+      profiler_register_thread(name, &stackTop); \
+    } while (0)
+#  define PROFILER_UNREGISTER_THREAD() profiler_unregister_thread()
+ProfilingStack* profiler_register_thread(const char* name, void* guessStackTop);
+void profiler_unregister_thread();
+
+// Registers a DOM Window (the JS global `window`) with the profiler. Each
+// Window _roughly_ corresponds to a single document loaded within a
+// browsing context. Both the Window Id and Browser Id are recorded to allow
+// correlating different Windows loaded within the same tab or frame element.
+//
+// We register pages for each navigations but we do not register
+// history.pushState or history.replaceState since they correspond to the same
+// Inner Window ID. When a browsing context is first loaded, the first url
+// loaded in it will be about:blank. Because of that, this call keeps the first
+// non-about:blank registration of window and discards the previous one.
+//
+//   "aTabID"                 is the BrowserId of that document belongs to.
+//                            That's used to determine the tab of that page.
+//   "aInnerWindowID"         is the ID of the `window` global object of that
+//                            document.
+//   "aUrl"                   is the URL of the page.
+//   "aEmbedderInnerWindowID" is the inner window id of embedder. It's used to
+//                            determine sub documents of a page.
+//   "aIsPrivateBrowsing"     is true if this browsing context happens in a
+//                            private browsing context.
+void profiler_register_page(uint64_t aTabID, uint64_t aInnerWindowID,
+                            const nsCString& aUrl,
+                            uint64_t aEmbedderInnerWindowID,
+                            bool aIsPrivateBrowsing);
+// Unregister page with the profiler.
+//
+// Take a Inner Window ID and unregister the page entry that has the same ID.
+void profiler_unregister_page(uint64_t aRegisteredInnerWindowID);
+
+// Remove all registered and unregistered pages in the profiler.
+void profiler_clear_all_pages();
+
+class BaseProfilerCount;
+void profiler_add_sampled_counter(BaseProfilerCount* aCounter);
+void profiler_remove_sampled_counter(BaseProfilerCount* aCounter);
+
+// Register and unregister a thread within a scope.
+#  define AUTO_PROFILER_REGISTER_THREAD(name) \
+    mozilla::AutoProfilerRegisterThread PROFILER_RAII(name)
+
+enum class SamplingState {
+  JustStopped,  // Sampling loop has just stopped without sampling, between the
+                // callback registration and now.
+  SamplingPaused,  // Profiler is active but sampling loop has gone through a
+                   // pause.
+  NoStackSamplingCompleted,  // A full sampling loop has completed in
+                             // no-stack-sampling mode.
+  SamplingCompleted          // A full sampling loop has completed.
+};
+
+using PostSamplingCallback = std::function<void(SamplingState)>;
+
+// Install a callback to be invoked at the end of the next sampling loop.
+// - `false` if profiler is not active, `aCallback` will stay untouched.
+// - `true` if `aCallback` was successfully moved-from into internal storage,
+//   and *will* be invoked at the end of the next sampling cycle. Note that this
+//   will happen on the Sampler thread, and will block further sampling, so
+//   please be mindful not to block for a long time (e.g., just dispatch a
+//   runnable to another thread.) Calling profiler functions from the callback
+//   is allowed.
+[[nodiscard]] bool profiler_callback_after_sampling(
+    PostSamplingCallback&& aCallback);
+
+// Called by the JSRuntime's operation callback. This is used to start profiling
+// on auxiliary threads. Operates the same whether the profiler is active or
+// not.
+#  define PROFILER_JS_INTERRUPT_CALLBACK() profiler_js_interrupt_callback()
+void profiler_js_interrupt_callback();
+
+// Set and clear the current thread's JSContext.
+#  define PROFILER_SET_JS_CONTEXT(cx) profiler_set_js_context(cx)
+#  define PROFILER_CLEAR_JS_CONTEXT() profiler_clear_js_context()
+void profiler_set_js_context(JSContext* aCx);
+void profiler_clear_js_context();
+
+//---------------------------------------------------------------------------
+// Get information from the profiler
+//---------------------------------------------------------------------------
+
+// Get the chunk manager used in the current profiling session, or null.
+mozilla::ProfileBufferControlledChunkManager*
+profiler_get_controlled_chunk_manager();
+
+// The number of milliseconds since the process started. Operates the same
+// whether the profiler is active or inactive.
+double profiler_time();
+
+// An object of this class is passed to profiler_suspend_and_sample_thread().
+// For each stack frame, one of the Collect methods will be called.
+class ProfilerStackCollector {
+ public:
+  // Some collectors need to worry about possibly overwriting previous
+  // generations of data. If that's not an issue, this can return Nothing,
+  // which is the default behaviour.
+  virtual mozilla::Maybe<uint64_t> SamplePositionInBuffer() {
+    return mozilla::Nothing();
+  }
+  virtual mozilla::Maybe<uint64_t> BufferRangeStart() {
+    return mozilla::Nothing();
+  }
+
+  // This method will be called once if the thread being suspended is the main
+  // thread. Default behaviour is to do nothing.
+  virtual void SetIsMainThread() {}
+
+  // WARNING: The target thread is suspended when the Collect methods are
+  // called. Do not try to allocate or acquire any locks, or you could
+  // deadlock. The target thread will have resumed by the time this function
+  // returns.
+
+  virtual void CollectNativeLeafAddr(void* aAddr) = 0;
+
+  virtual void CollectJitReturnAddr(void* aAddr) = 0;
+
+  virtual void CollectWasmFrame(const char* aLabel) = 0;
+
+  virtual void CollectProfilingStackFrame(
+      const js::ProfilingStackFrame& aFrame) = 0;
+};
+
+// This method suspends the thread identified by aThreadId, samples its
+// profiling stack, JS stack, and (optionally) native stack, passing the
+// collected frames into aCollector. aFeatures dictates which compiler features
+// are used. |Leaf| is the only relevant one.
+// Use `ProfilerThreadId{}` (unspecified) to sample the current thread.
+void profiler_suspend_and_sample_thread(ProfilerThreadId aThreadId,
+                                        uint32_t aFeatures,
+                                        ProfilerStackCollector& aCollector,
+                                        bool aSampleNative = true);
+
+struct ProfilerBacktraceDestructor {
+  void operator()(ProfilerBacktrace*);
+};
+
+using UniqueProfilerBacktrace =
+    mozilla::UniquePtr<ProfilerBacktrace, ProfilerBacktraceDestructor>;
+
+// Immediately capture the current thread's call stack, store it in the provided
+// buffer (usually to avoid allocations if you can construct the buffer on the
+// stack). Returns false if unsuccessful, or if the profiler is inactive.
+bool profiler_capture_backtrace_into(
+    mozilla::ProfileChunkedBuffer& aChunkedBuffer,
+    mozilla::StackCaptureOptions aCaptureOptions);
+
+// Immediately capture the current thread's call stack, and return it in a
+// ProfileChunkedBuffer (usually for later use in MarkerStack::TakeBacktrace()).
+// May be null if unsuccessful, or if the profiler is inactive.
+mozilla::UniquePtr<mozilla::ProfileChunkedBuffer> profiler_capture_backtrace();
+
+// Immediately capture the current thread's call stack, and return it in a
+// ProfilerBacktrace (usually for later use in marker function that take a
+// ProfilerBacktrace). May be null if unsuccessful, or if the profiler is
+// inactive.
+UniqueProfilerBacktrace profiler_get_backtrace();
+
+struct ProfilerStats {
+  unsigned n = 0;
+  double sum = 0;
+  double min = std::numeric_limits<double>::max();
+  double max = 0;
+  void Count(double v) {
+    ++n;
+    sum += v;
+    if (v < min) {
+      min = v;
+    }
+    if (v > max) {
+      max = v;
+    }
+  }
+};
+
+struct ProfilerBufferInfo {
+  // Index of the oldest entry.
+  uint64_t mRangeStart;
+  // Index of the newest entry.
+  uint64_t mRangeEnd;
+  // Buffer capacity in number of 8-byte entries.
+  uint32_t mEntryCount;
+  // Sampling stats: Interval between successive samplings.
+  ProfilerStats mIntervalsUs;
+  // Sampling stats: Total sampling duration. (Split detail below.)
+  ProfilerStats mOverheadsUs;
+  // Sampling stats: Time to acquire the lock before sampling.
+  ProfilerStats mLockingsUs;
+  // Sampling stats: Time to discard expired data.
+  ProfilerStats mCleaningsUs;
+  // Sampling stats: Time to collect counter data.
+  ProfilerStats mCountersUs;
+  // Sampling stats: Time to sample thread stacks.
+  ProfilerStats mThreadsUs;
+};
+
+// Get information about the current buffer status.
+// Returns Nothing() if the profiler is inactive.
+//
+// This information may be useful to a user-interface displaying the current
+// status of the profiler, allowing the user to get a sense for how fast the
+// buffer is being written to, and how much data is visible.
+mozilla::Maybe<ProfilerBufferInfo> profiler_get_buffer_info();
+
+// Record through glean how many times profiler_thread_wake has been
+// called.
+void profiler_record_wakeup_count(const nsACString& aProcessType);
+
+//---------------------------------------------------------------------------
+// Output profiles
+//---------------------------------------------------------------------------
+
+// Set a user-friendly process name, used in JSON stream.  Allows an optional
+// detailed name which may include private info (eTLD+1 in fission)
+void profiler_set_process_name(const nsACString& aProcessName,
+                               const nsACString* aETLDplus1 = nullptr);
+
+// Record an exit profile from a child process.
+void profiler_received_exit_profile(const nsACString& aExitProfile);
+
+// Get the profile encoded as a JSON string. A no-op (returning nullptr) if the
+// profiler is inactive.
+// If aIsShuttingDown is true, the current time is included as the process
+// shutdown time in the JSON's "meta" object.
+mozilla::UniquePtr<char[]> profiler_get_profile(double aSinceTime = 0,
+                                                bool aIsShuttingDown = false);
+
+// Write the profile for this process (excluding subprocesses) into aWriter.
+// Returns a failed result if the profiler is inactive.
+ProfilerResult<mozilla::ProfileGenerationAdditionalInformation>
+profiler_stream_json_for_this_process(
+    mozilla::baseprofiler::SpliceableJSONWriter& aWriter, double aSinceTime = 0,
+    bool aIsShuttingDown = false,
+    ProfilerCodeAddressService* aService = nullptr,
+    mozilla::ProgressLogger aProgressLogger = {});
+
+// Get the profile and write it into a file. A no-op if the profile is
+// inactive.
+//
+// This function is 'extern "C"' so that it is easily callable from a debugger
+// in a build without debugging information (a workaround for
+// http://llvm.org/bugs/show_bug.cgi?id=22211).
+extern "C" {
+void profiler_save_profile_to_file(const char* aFilename);
+}
+
+//---------------------------------------------------------------------------
+// RAII classes
+//---------------------------------------------------------------------------
+
+namespace mozilla {
+
+// Convenience class to register and unregister a thread with the profiler.
+// Needs to be the first object on the stack of the thread.
+class MOZ_RAII AutoProfilerRegisterThread final {
+ public:
+  explicit AutoProfilerRegisterThread(const char* aName) {
+    profiler_register_thread(aName, this);
+  }
+
+  ~AutoProfilerRegisterThread() { profiler_unregister_thread(); }
+
+ private:
+  AutoProfilerRegisterThread(const AutoProfilerRegisterThread&) = delete;
+  AutoProfilerRegisterThread& operator=(const AutoProfilerRegisterThread&) =
+      delete;
+};
+
+// Get the MOZ_PROFILER_STARTUP* environment variables that should be
+// supplied to a child process that is about to be launched, in order
+// to make that child process start with the same profiler settings as
+// in the current process.  The given function is invoked once for
+// each variable to be set.
+void GetProfilerEnvVarsForChildProcess(
+    std::function<void(const char* key, const char* value)>&& aSetEnv);
+
+}  // namespace mozilla
+
+#endif  // !MOZ_GECKO_PROFILER
+
+#endif  // GeckoProfiler_h
diff --git a/tools/profiler/public/GeckoProfilerReporter.h b/tools/profiler/public/GeckoProfilerReporter.h
new file mode 100644
index 0000000000..f5bf41f223
--- /dev/null
+++ b/tools/profiler/public/GeckoProfilerReporter.h
@@ -0,0 +1,26 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef GeckoProfilerReporter_h
+#define GeckoProfilerReporter_h
+
+#include "nsIMemoryReporter.h"
+
+class GeckoProfilerReporter final : public nsIMemoryReporter {
+ public:
+  NS_DECL_ISUPPORTS
+
+  GeckoProfilerReporter() {}
+
+  NS_IMETHOD
+  CollectReports(nsIHandleReportCallback* aHandleReport, nsISupports* aData,
+                 bool aAnonymize) override;
+
+ private:
+  ~GeckoProfilerReporter() {}
+};
+
+#endif
diff --git a/tools/profiler/public/GeckoTraceEvent.h b/tools/profiler/public/GeckoTraceEvent.h
new file mode 100644
index 0000000000..75affaf9c8
--- /dev/null
+++ b/tools/profiler/public/GeckoTraceEvent.h
@@ -0,0 +1,1060 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file under third_party_mods/chromium or at:
+// http://src.chromium.org/svn/trunk/src/LICENSE
+
+#ifndef GECKO_TRACE_EVENT_H_
+#define GECKO_TRACE_EVENT_H_
+
+#include "MicroGeckoProfiler.h"
+
+// Extracted from Chromium's src/base/debug/trace_event.h, modified to talk to
+// the Gecko profiler.
+
+#if defined(RTC_DISABLE_TRACE_EVENTS)
+#  define RTC_TRACE_EVENTS_ENABLED 0
+#else
+#  define RTC_TRACE_EVENTS_ENABLED 1
+#endif
+
+// Type values for identifying types in the TraceValue union.
+#define TRACE_VALUE_TYPE_BOOL (static_cast<unsigned char>(1))
+#define TRACE_VALUE_TYPE_UINT (static_cast<unsigned char>(2))
+#define TRACE_VALUE_TYPE_INT (static_cast<unsigned char>(3))
+#define TRACE_VALUE_TYPE_DOUBLE (static_cast<unsigned char>(4))
+#define TRACE_VALUE_TYPE_POINTER (static_cast<unsigned char>(5))
+#define TRACE_VALUE_TYPE_STRING (static_cast<unsigned char>(6))
+#define TRACE_VALUE_TYPE_COPY_STRING (static_cast<unsigned char>(7))
+
+#if RTC_TRACE_EVENTS_ENABLED
+
+// This header is designed to give you trace_event macros without specifying
+// how the events actually get collected and stored. If you need to expose trace
+// event to some other universe, you can copy-and-paste this file,
+// implement the TRACE_EVENT_API macros, and do any other necessary fixup for
+// the target platform. The end result is that multiple libraries can funnel
+// events through to a shared trace event collector.
+
+// Trace events are for tracking application performance and resource usage.
+// Macros are provided to track:
+//    Begin and end of function calls
+//    Counters
+//
+// Events are issued against categories. Whereas RTC_LOG's
+// categories are statically defined, TRACE categories are created
+// implicitly with a string. For example:
+//   TRACE_EVENT_INSTANT0("MY_SUBSYSTEM", "SomeImportantEvent")
+//
+// Events can be INSTANT, or can be pairs of BEGIN and END in the same scope:
+//   TRACE_EVENT_BEGIN0("MY_SUBSYSTEM", "SomethingCostly")
+//   doSomethingCostly()
+//   TRACE_EVENT_END0("MY_SUBSYSTEM", "SomethingCostly")
+// Note: our tools can't always determine the correct BEGIN/END pairs unless
+// these are used in the same scope. Use ASYNC_BEGIN/ASYNC_END macros if you
+// need them to be in separate scopes.
+//
+// A common use case is to trace entire function scopes. This
+// issues a trace BEGIN and END automatically:
+//   void doSomethingCostly() {
+//     TRACE_EVENT0("MY_SUBSYSTEM", "doSomethingCostly");
+//     ...
+//   }
+//
+// Additional parameters can be associated with an event:
+//   void doSomethingCostly2(int howMuch) {
+//     TRACE_EVENT1("MY_SUBSYSTEM", "doSomethingCostly",
+//         "howMuch", howMuch);
+//     ...
+//   }
+//
+// The trace system will automatically add to this information the
+// current process id, thread id, and a timestamp in microseconds.
+//
+// To trace an asynchronous procedure such as an IPC send/receive, use
+// ASYNC_BEGIN and ASYNC_END:
+//   [single threaded sender code]
+//     static int send_count = 0;
+//     ++send_count;
+//     TRACE_EVENT_ASYNC_BEGIN0("ipc", "message", send_count);
+//     Send(new MyMessage(send_count));
+//   [receive code]
+//     void OnMyMessage(send_count) {
+//       TRACE_EVENT_ASYNC_END0("ipc", "message", send_count);
+//     }
+// The third parameter is a unique ID to match ASYNC_BEGIN/ASYNC_END pairs.
+// ASYNC_BEGIN and ASYNC_END can occur on any thread of any traced process.
+// Pointers can be used for the ID parameter, and they will be mangled
+// internally so that the same pointer on two different processes will not
+// match. For example:
+//   class MyTracedClass {
+//    public:
+//     MyTracedClass() {
+//       TRACE_EVENT_ASYNC_BEGIN0("category", "MyTracedClass", this);
+//     }
+//     ~MyTracedClass() {
+//       TRACE_EVENT_ASYNC_END0("category", "MyTracedClass", this);
+//     }
+//   }
+//
+// Trace event also supports counters, which is a way to track a quantity
+// as it varies over time. Counters are created with the following macro:
+//   TRACE_COUNTER1("MY_SUBSYSTEM", "myCounter", g_myCounterValue);
+//
+// Counters are process-specific. The macro itself can be issued from any
+// thread, however.
+//
+// Sometimes, you want to track two counters at once. You can do this with two
+// counter macros:
+//   TRACE_COUNTER1("MY_SUBSYSTEM", "myCounter0", g_myCounterValue[0]);
+//   TRACE_COUNTER1("MY_SUBSYSTEM", "myCounter1", g_myCounterValue[1]);
+// Or you can do it with a combined macro:
+//   TRACE_COUNTER2("MY_SUBSYSTEM", "myCounter",
+//       "bytesPinned", g_myCounterValue[0],
+//       "bytesAllocated", g_myCounterValue[1]);
+// This indicates to the tracing UI that these counters should be displayed
+// in a single graph, as a summed area chart.
+//
+// Since counters are in a global namespace, you may want to disembiguate with a
+// unique ID, by using the TRACE_COUNTER_ID* variations.
+//
+// By default, trace collection is compiled in, but turned off at runtime.
+// Collecting trace data is the responsibility of the embedding
+// application. In Chrome's case, navigating to about:tracing will turn on
+// tracing and display data collected across all active processes.
+//
+//
+// Memory scoping note:
+// Tracing copies the pointers, not the string content, of the strings passed
+// in for category, name, and arg_names.  Thus, the following code will
+// cause problems:
+//     char* str = strdup("impprtantName");
+//     TRACE_EVENT_INSTANT0("SUBSYSTEM", str);  // BAD!
+//     free(str);                   // Trace system now has dangling pointer
+//
+// To avoid this issue with the `name` and `arg_name` parameters, use the
+// TRACE_EVENT_COPY_XXX overloads of the macros at additional runtime overhead.
+// Notes: The category must always be in a long-lived char* (i.e. static const).
+//        The `arg_values`, when used, are always deep copied with the _COPY
+//        macros.
+//
+// When are string argument values copied:
+// const char* arg_values are only referenced by default:
+//     TRACE_EVENT1("category", "name",
+//                  "arg1", "literal string is only referenced");
+// Use TRACE_STR_COPY to force copying of a const char*:
+//     TRACE_EVENT1("category", "name",
+//                  "arg1", TRACE_STR_COPY("string will be copied"));
+// std::string arg_values are always copied:
+//     TRACE_EVENT1("category", "name",
+//                  "arg1", std::string("string will be copied"));
+//
+//
+// Thread Safety:
+// Thread safety is provided by methods defined in event_tracer.h. See the file
+// for details.
+
+// By default, const char* argument values are assumed to have long-lived scope
+// and will not be copied. Use this macro to force a const char* to be copied.
+#  define TRACE_STR_COPY(str) \
+    webrtc::trace_event_internal::TraceStringWithCopy(str)
+
+// This will mark the trace event as disabled by default. The user will need
+// to explicitly enable the event.
+#  define TRACE_DISABLED_BY_DEFAULT(name) "disabled-by-default-" name
+
+// By default, uint64 ID argument values are not mangled with the Process ID in
+// TRACE_EVENT_ASYNC macros. Use this macro to force Process ID mangling.
+#  define TRACE_ID_MANGLE(id) \
+    webrtc::trace_event_internal::TraceID::ForceMangle(id)
+
+// Records a pair of begin and end events called "name" for the current
+// scope, with 0, 1 or 2 associated arguments. If the category is not
+// enabled, then this does nothing.
+// - category and name strings must have application lifetime (statics or
+//   literals). They may not include " chars.
+#  define TRACE_EVENT0(category, name) \
+    INTERNAL_TRACE_EVENT_ADD_SCOPED(category, name)
+#  define TRACE_EVENT1(category, name, arg1_name, arg1_val) \
+    INTERNAL_TRACE_EVENT_ADD_SCOPED(category, name, arg1_name, arg1_val)
+#  define TRACE_EVENT2(category, name, arg1_name, arg1_val, arg2_name,   \
+                       arg2_val)                                         \
+    INTERNAL_TRACE_EVENT_ADD_SCOPED(category, name, arg1_name, arg1_val, \
+                                    arg2_name, arg2_val)
+
+// Records a single event called "name" immediately, with 0, 1 or 2
+// associated arguments. If the category is not enabled, then this
+// does nothing.
+// - category and name strings must have application lifetime (statics or
+//   literals). They may not include " chars.
+#  define TRACE_EVENT_INSTANT0(category, name)                          \
+    INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_INSTANT, category, name, \
+                             TRACE_EVENT_FLAG_NONE)
+#  define TRACE_EVENT_INSTANT1(category, name, arg1_name, arg1_val)     \
+    INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_INSTANT, category, name, \
+                             TRACE_EVENT_FLAG_NONE, arg1_name, arg1_val)
+#  define TRACE_EVENT_INSTANT2(category, name, arg1_name, arg1_val, arg2_name, \
+                               arg2_val)                                       \
+    INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_INSTANT, category, name,        \
+                             TRACE_EVENT_FLAG_NONE, arg1_name, arg1_val,       \
+                             arg2_name, arg2_val)
+#  define TRACE_EVENT_COPY_INSTANT0(category, name)                     \
+    INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_INSTANT, category, name, \
+                             TRACE_EVENT_FLAG_COPY)
+#  define TRACE_EVENT_COPY_INSTANT1(category, name, arg1_name, arg1_val) \
+    INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_INSTANT, category, name,  \
+                             TRACE_EVENT_FLAG_COPY, arg1_name, arg1_val)
+#  define TRACE_EVENT_COPY_INSTANT2(category, name, arg1_name, arg1_val, \
+                                    arg2_name, arg2_val)                 \
+    INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_INSTANT, category, name,  \
+                             TRACE_EVENT_FLAG_COPY, arg1_name, arg1_val, \
+                             arg2_name, arg2_val)
+
+// Records a single BEGIN event called "name" immediately, with 0, 1 or 2
+// associated arguments. If the category is not enabled, then this
+// does nothing.
+// - category and name strings must have application lifetime (statics or
+//   literals). They may not include " chars.
+#  define TRACE_EVENT_BEGIN0(category, name)                          \
+    INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_BEGIN, category, name, \
+                             TRACE_EVENT_FLAG_NONE)
+#  define TRACE_EVENT_BEGIN1(category, name, arg1_name, arg1_val)     \
+    INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_BEGIN, category, name, \
+                             TRACE_EVENT_FLAG_NONE, arg1_name, arg1_val)
+#  define TRACE_EVENT_BEGIN2(category, name, arg1_name, arg1_val, arg2_name, \
+                             arg2_val)                                       \
+    INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_BEGIN, category, name,        \
+                             TRACE_EVENT_FLAG_NONE, arg1_name, arg1_val,     \
+                             arg2_name, arg2_val)
+#  define TRACE_EVENT_COPY_BEGIN0(category, name)                     \
+    INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_BEGIN, category, name, \
+                             TRACE_EVENT_FLAG_COPY)
+#  define TRACE_EVENT_COPY_BEGIN1(category, name, arg1_name, arg1_val) \
+    INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_BEGIN, category, name,  \
+                             TRACE_EVENT_FLAG_COPY, arg1_name, arg1_val)
+#  define TRACE_EVENT_COPY_BEGIN2(category, name, arg1_name, arg1_val,   \
+                                  arg2_name, arg2_val)                   \
+    INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_BEGIN, category, name,    \
+                             TRACE_EVENT_FLAG_COPY, arg1_name, arg1_val, \
+                             arg2_name, arg2_val)
+
+// Records a single END event for "name" immediately. If the category
+// is not enabled, then this does nothing.
+// - category and name strings must have application lifetime (statics or
+//   literals). They may not include " chars.
+#  define TRACE_EVENT_END0(category, name)                          \
+    INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_END, category, name, \
+                             TRACE_EVENT_FLAG_NONE)
+#  define TRACE_EVENT_END1(category, name, arg1_name, arg1_val)     \
+    INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_END, category, name, \
+                             TRACE_EVENT_FLAG_NONE, arg1_name, arg1_val)
+#  define TRACE_EVENT_END2(category, name, arg1_name, arg1_val, arg2_name, \
+                           arg2_val)                                       \
+    INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_END, category, name,        \
+                             TRACE_EVENT_FLAG_NONE, arg1_name, arg1_val,   \
+                             arg2_name, arg2_val)
+#  define TRACE_EVENT_COPY_END0(category, name)                     \
+    INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_END, category, name, \
+                             TRACE_EVENT_FLAG_COPY)
+#  define TRACE_EVENT_COPY_END1(category, name, arg1_name, arg1_val) \
+    INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_END, category, name,  \
+                             TRACE_EVENT_FLAG_COPY, arg1_name, arg1_val)
+#  define TRACE_EVENT_COPY_END2(category, name, arg1_name, arg1_val,     \
+                                arg2_name, arg2_val)                     \
+    INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_END, category, name,      \
+                             TRACE_EVENT_FLAG_COPY, arg1_name, arg1_val, \
+                             arg2_name, arg2_val)
+
+// Records the value of a counter called "name" immediately. Value
+// must be representable as a 32 bit integer.
+// - category and name strings must have application lifetime (statics or
+//   literals). They may not include " chars.
+#  define TRACE_COUNTER1(category, name, value)                         \
+    INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_COUNTER, category, name, \
+                             TRACE_EVENT_FLAG_NONE, "value",            \
+                             static_cast<int>(value))
+#  define TRACE_COPY_COUNTER1(category, name, value)                    \
+    INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_COUNTER, category, name, \
+                             TRACE_EVENT_FLAG_COPY, "value",            \
+                             static_cast<int>(value))
+
+// Records the values of a multi-parted counter called "name" immediately.
+// The UI will treat value1 and value2 as parts of a whole, displaying their
+// values as a stacked-bar chart.
+// - category and name strings must have application lifetime (statics or
+//   literals). They may not include " chars.
+#  define TRACE_COUNTER2(category, name, value1_name, value1_val, value2_name, \
+                         value2_val)                                           \
+    INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_COUNTER, category, name,        \
+                             TRACE_EVENT_FLAG_NONE, value1_name,               \
+                             static_cast<int>(value1_val), value2_name,        \
+                             static_cast<int>(value2_val))
+#  define TRACE_COPY_COUNTER2(category, name, value1_name, value1_val,  \
+                              value2_name, value2_val)                  \
+    INTERNAL_TRACE_EVENT_ADD(TRACE_EVENT_PHASE_COUNTER, category, name, \
+                             TRACE_EVENT_FLAG_COPY, value1_name,        \
+                             static_cast<int>(value1_val), value2_name, \
+                             static_cast<int>(value2_val))
+
+// Records the value of a counter called "name" immediately. Value
+// must be representable as a 32 bit integer.
+// - category and name strings must have application lifetime (statics or
+//   literals). They may not include " chars.
+// - `id` is used to disambiguate counters with the same name. It must either
+//   be a pointer or an integer value up to 64 bits. If it's a pointer, the bits
+//   will be xored with a hash of the process ID so that the same pointer on
+//   two different processes will not collide.
+#  define TRACE_COUNTER_ID1(category, name, id, value)                         \
+    INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_COUNTER, category,      \
+                                     name, id, TRACE_EVENT_FLAG_NONE, "value", \
+                                     static_cast<int>(value))
+#  define TRACE_COPY_COUNTER_ID1(category, name, id, value)                    \
+    INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_COUNTER, category,      \
+                                     name, id, TRACE_EVENT_FLAG_COPY, "value", \
+                                     static_cast<int>(value))
+
+// Records the values of a multi-parted counter called "name" immediately.
+// The UI will treat value1 and value2 as parts of a whole, displaying their
+// values as a stacked-bar chart.
+// - category and name strings must have application lifetime (statics or
+//   literals). They may not include " chars.
+// - `id` is used to disambiguate counters with the same name. It must either
+//   be a pointer or an integer value up to 64 bits. If it's a pointer, the bits
+//   will be xored with a hash of the process ID so that the same pointer on
+//   two different processes will not collide.
+#  define TRACE_COUNTER_ID2(category, name, id, value1_name, value1_val,      \
+                            value2_name, value2_val)                          \
+    INTERNAL_TRACE_EVENT_ADD_WITH_ID(                                         \
+        TRACE_EVENT_PHASE_COUNTER, category, name, id, TRACE_EVENT_FLAG_NONE, \
+        value1_name, static_cast<int>(value1_val), value2_name,               \
+        static_cast<int>(value2_val))
+#  define TRACE_COPY_COUNTER_ID2(category, name, id, value1_name, value1_val, \
+                                 value2_name, value2_val)                     \
+    INTERNAL_TRACE_EVENT_ADD_WITH_ID(                                         \
+        TRACE_EVENT_PHASE_COUNTER, category, name, id, TRACE_EVENT_FLAG_COPY, \
+        value1_name, static_cast<int>(value1_val), value2_name,               \
+        static_cast<int>(value2_val))
+
+// Records a single ASYNC_BEGIN event called "name" immediately, with 0, 1 or 2
+// associated arguments. If the category is not enabled, then this
+// does nothing.
+// - category and name strings must have application lifetime (statics or
+//   literals). They may not include " chars.
+// - `id` is used to match the ASYNC_BEGIN event with the ASYNC_END event. ASYNC
+//   events are considered to match if their category, name and id values all
+//   match. `id` must either be a pointer or an integer value up to 64 bits. If
+//   it's a pointer, the bits will be xored with a hash of the process ID so
+//   that the same pointer on two different processes will not collide.
+// An asynchronous operation can consist of multiple phases. The first phase is
+// defined by the ASYNC_BEGIN calls. Additional phases can be defined using the
+// ASYNC_STEP macros. When the operation completes, call ASYNC_END.
+// An ASYNC trace typically occur on a single thread (if not, they will only be
+// drawn on the thread defined in the ASYNC_BEGIN event), but all events in that
+// operation must use the same `name` and `id`. Each event can have its own
+// args.
+#  define TRACE_EVENT_ASYNC_BEGIN0(category, name, id)                        \
+    INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_ASYNC_BEGIN, category, \
+                                     name, id, TRACE_EVENT_FLAG_NONE)
+#  define TRACE_EVENT_ASYNC_BEGIN1(category, name, id, arg1_name, arg1_val)   \
+    INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_ASYNC_BEGIN, category, \
+                                     name, id, TRACE_EVENT_FLAG_NONE,         \
+                                     arg1_name, arg1_val)
+#  define TRACE_EVENT_ASYNC_BEGIN2(category, name, id, arg1_name, arg1_val,   \
+                                   arg2_name, arg2_val)                       \
+    INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_ASYNC_BEGIN, category, \
+                                     name, id, TRACE_EVENT_FLAG_NONE,         \
+                                     arg1_name, arg1_val, arg2_name, arg2_val)
+#  define TRACE_EVENT_COPY_ASYNC_BEGIN0(category, name, id)                   \
+    INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_ASYNC_BEGIN, category, \
+                                     name, id, TRACE_EVENT_FLAG_COPY)
+#  define TRACE_EVENT_COPY_ASYNC_BEGIN1(category, name, id, arg1_name,        \
+                                        arg1_val)                             \
+    INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_ASYNC_BEGIN, category, \
+                                     name, id, TRACE_EVENT_FLAG_COPY,         \
+                                     arg1_name, arg1_val)
+#  define TRACE_EVENT_COPY_ASYNC_BEGIN2(category, name, id, arg1_name,        \
+                                        arg1_val, arg2_name, arg2_val)        \
+    INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_ASYNC_BEGIN, category, \
+                                     name, id, TRACE_EVENT_FLAG_COPY,         \
+                                     arg1_name, arg1_val, arg2_name, arg2_val)
+
+// Records a single ASYNC_STEP event for `step` immediately. If the category
+// is not enabled, then this does nothing. The `name` and `id` must match the
+// ASYNC_BEGIN event above. The `step` param identifies this step within the
+// async event. This should be called at the beginning of the next phase of an
+// asynchronous operation.
+#  define TRACE_EVENT_ASYNC_STEP0(category, name, id, step)                   \
+    INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_ASYNC_STEP, category,  \
+                                     name, id, TRACE_EVENT_FLAG_NONE, "step", \
+                                     step)
+#  define TRACE_EVENT_ASYNC_STEP1(category, name, id, step, arg1_name,        \
+                                  arg1_val)                                   \
+    INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_ASYNC_STEP, category,  \
+                                     name, id, TRACE_EVENT_FLAG_NONE, "step", \
+                                     step, arg1_name, arg1_val)
+#  define TRACE_EVENT_COPY_ASYNC_STEP0(category, name, id, step)              \
+    INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_ASYNC_STEP, category,  \
+                                     name, id, TRACE_EVENT_FLAG_COPY, "step", \
+                                     step)
+#  define TRACE_EVENT_COPY_ASYNC_STEP1(category, name, id, step, arg1_name,   \
+                                       arg1_val)                              \
+    INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_ASYNC_STEP, category,  \
+                                     name, id, TRACE_EVENT_FLAG_COPY, "step", \
+                                     step, arg1_name, arg1_val)
+
+// Records a single ASYNC_END event for "name" immediately. If the category
+// is not enabled, then this does nothing.
+#  define TRACE_EVENT_ASYNC_END0(category, name, id)                        \
+    INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_ASYNC_END, category, \
+                                     name, id, TRACE_EVENT_FLAG_NONE)
+#  define TRACE_EVENT_ASYNC_END1(category, name, id, arg1_name, arg1_val)   \
+    INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_ASYNC_END, category, \
+                                     name, id, TRACE_EVENT_FLAG_NONE,       \
+                                     arg1_name, arg1_val)
+#  define TRACE_EVENT_ASYNC_END2(category, name, id, arg1_name, arg1_val,   \
+                                 arg2_name, arg2_val)                       \
+    INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_ASYNC_END, category, \
+                                     name, id, TRACE_EVENT_FLAG_NONE,       \
+                                     arg1_name, arg1_val, arg2_name, arg2_val)
+#  define TRACE_EVENT_COPY_ASYNC_END0(category, name, id)                   \
+    INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_ASYNC_END, category, \
+                                     name, id, TRACE_EVENT_FLAG_COPY)
+#  define TRACE_EVENT_COPY_ASYNC_END1(category, name, id, arg1_name, arg1_val) \
+    INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_ASYNC_END, category,    \
+                                     name, id, TRACE_EVENT_FLAG_COPY,          \
+                                     arg1_name, arg1_val)
+#  define TRACE_EVENT_COPY_ASYNC_END2(category, name, id, arg1_name, arg1_val, \
+                                      arg2_name, arg2_val)                     \
+    INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_ASYNC_END, category,    \
+                                     name, id, TRACE_EVENT_FLAG_COPY,          \
+                                     arg1_name, arg1_val, arg2_name, arg2_val)
+
+// Records a single FLOW_BEGIN event called "name" immediately, with 0, 1 or 2
+// associated arguments. If the category is not enabled, then this
+// does nothing.
+// - category and name strings must have application lifetime (statics or
+//   literals). They may not include " chars.
+// - `id` is used to match the FLOW_BEGIN event with the FLOW_END event. FLOW
+//   events are considered to match if their category, name and id values all
+//   match. `id` must either be a pointer or an integer value up to 64 bits. If
+//   it's a pointer, the bits will be xored with a hash of the process ID so
+//   that the same pointer on two different processes will not collide.
+// FLOW events are different from ASYNC events in how they are drawn by the
+// tracing UI. A FLOW defines asynchronous data flow, such as posting a task
+// (FLOW_BEGIN) and later executing that task (FLOW_END). Expect FLOWs to be
+// drawn as lines or arrows from FLOW_BEGIN scopes to FLOW_END scopes. Similar
+// to ASYNC, a FLOW can consist of multiple phases. The first phase is defined
+// by the FLOW_BEGIN calls. Additional phases can be defined using the FLOW_STEP
+// macros. When the operation completes, call FLOW_END. An async operation can
+// span threads and processes, but all events in that operation must use the
+// same `name` and `id`. Each event can have its own args.
+#  define TRACE_EVENT_FLOW_BEGIN0(category, name, id)                        \
+    INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_FLOW_BEGIN, category, \
+                                     name, id, TRACE_EVENT_FLAG_NONE)
+#  define TRACE_EVENT_FLOW_BEGIN1(category, name, id, arg1_name, arg1_val)   \
+    INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_FLOW_BEGIN, category, \
+                                     name, id, TRACE_EVENT_FLAG_NONE,        \
+                                     arg1_name, arg1_val)
+#  define TRACE_EVENT_FLOW_BEGIN2(category, name, id, arg1_name, arg1_val,   \
+                                  arg2_name, arg2_val)                       \
+    INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_FLOW_BEGIN, category, \
+                                     name, id, TRACE_EVENT_FLAG_NONE,        \
+                                     arg1_name, arg1_val, arg2_name, arg2_val)
+#  define TRACE_EVENT_COPY_FLOW_BEGIN0(category, name, id)                   \
+    INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_FLOW_BEGIN, category, \
+                                     name, id, TRACE_EVENT_FLAG_COPY)
+#  define TRACE_EVENT_COPY_FLOW_BEGIN1(category, name, id, arg1_name,        \
+                                       arg1_val)                             \
+    INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_FLOW_BEGIN, category, \
+                                     name, id, TRACE_EVENT_FLAG_COPY,        \
+                                     arg1_name, arg1_val)
+#  define TRACE_EVENT_COPY_FLOW_BEGIN2(category, name, id, arg1_name,        \
+                                       arg1_val, arg2_name, arg2_val)        \
+    INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_FLOW_BEGIN, category, \
+                                     name, id, TRACE_EVENT_FLAG_COPY,        \
+                                     arg1_name, arg1_val, arg2_name, arg2_val)
+
+// Records a single FLOW_STEP event for `step` immediately. If the category
+// is not enabled, then this does nothing. The `name` and `id` must match the
+// FLOW_BEGIN event above. The `step` param identifies this step within the
+// async event. This should be called at the beginning of the next phase of an
+// asynchronous operation.
+#  define TRACE_EVENT_FLOW_STEP0(category, name, id, step)                    \
+    INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_FLOW_STEP, category,   \
+                                     name, id, TRACE_EVENT_FLAG_NONE, "step", \
+                                     step)
+#  define TRACE_EVENT_FLOW_STEP1(category, name, id, step, arg1_name,         \
+                                 arg1_val)                                    \
+    INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_FLOW_STEP, category,   \
+                                     name, id, TRACE_EVENT_FLAG_NONE, "step", \
+                                     step, arg1_name, arg1_val)
+#  define TRACE_EVENT_COPY_FLOW_STEP0(category, name, id, step)               \
+    INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_FLOW_STEP, category,   \
+                                     name, id, TRACE_EVENT_FLAG_COPY, "step", \
+                                     step)
+#  define TRACE_EVENT_COPY_FLOW_STEP1(category, name, id, step, arg1_name,    \
+                                      arg1_val)                               \
+    INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_FLOW_STEP, category,   \
+                                     name, id, TRACE_EVENT_FLAG_COPY, "step", \
+                                     step, arg1_name, arg1_val)
+
+// Records a single FLOW_END event for "name" immediately. If the category
+// is not enabled, then this does nothing.
+#  define TRACE_EVENT_FLOW_END0(category, name, id)                        \
+    INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_FLOW_END, category, \
+                                     name, id, TRACE_EVENT_FLAG_NONE)
+#  define TRACE_EVENT_FLOW_END1(category, name, id, arg1_name, arg1_val)   \
+    INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_FLOW_END, category, \
+                                     name, id, TRACE_EVENT_FLAG_NONE,      \
+                                     arg1_name, arg1_val)
+#  define TRACE_EVENT_FLOW_END2(category, name, id, arg1_name, arg1_val,   \
+                                arg2_name, arg2_val)                       \
+    INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_FLOW_END, category, \
+                                     name, id, TRACE_EVENT_FLAG_NONE,      \
+                                     arg1_name, arg1_val, arg2_name, arg2_val)
+#  define TRACE_EVENT_COPY_FLOW_END0(category, name, id)                   \
+    INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_FLOW_END, category, \
+                                     name, id, TRACE_EVENT_FLAG_COPY)
+#  define TRACE_EVENT_COPY_FLOW_END1(category, name, id, arg1_name, arg1_val) \
+    INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_FLOW_END, category,    \
+                                     name, id, TRACE_EVENT_FLAG_COPY,         \
+                                     arg1_name, arg1_val)
+#  define TRACE_EVENT_COPY_FLOW_END2(category, name, id, arg1_name, arg1_val, \
+                                     arg2_name, arg2_val)                     \
+    INTERNAL_TRACE_EVENT_ADD_WITH_ID(TRACE_EVENT_PHASE_FLOW_END, category,    \
+                                     name, id, TRACE_EVENT_FLAG_COPY,         \
+                                     arg1_name, arg1_val, arg2_name, arg2_val)
+
+////////////////////////////////////////////////////////////////////////////////
+// Implementation specific tracing API definitions.
+
+// Get a pointer to the enabled state of the given trace category. Only
+// long-lived literal strings should be given as the category name. The returned
+// pointer can be held permanently in a local static for example. If the
+// unsigned char is non-zero, tracing is enabled. If tracing is enabled,
+// TRACE_EVENT_API_ADD_TRACE_EVENT can be called. It's OK if tracing is disabled
+// between the load of the tracing state and the call to
+// TRACE_EVENT_API_ADD_TRACE_EVENT, because this flag only provides an early out
+// for best performance when tracing is disabled.
+// const unsigned char*
+//     TRACE_EVENT_API_GET_CATEGORY_ENABLED(const char* category_name)
+#  define TRACE_EVENT_API_GET_CATEGORY_ENABLED \
+    webrtc::EventTracer::GetCategoryEnabled
+
+// Add a trace event to the platform tracing system.
+// void TRACE_EVENT_API_ADD_TRACE_EVENT(
+//                    char phase,
+//                    const unsigned char* category_enabled,
+//                    const char* name,
+//                    unsigned long long id,
+//                    int num_args,
+//                    const char** arg_names,
+//                    const unsigned char* arg_types,
+//                    const unsigned long long* arg_values,
+//                    unsigned char flags)
+#  define TRACE_EVENT_API_ADD_TRACE_EVENT MOZ_INTERNAL_UPROFILER_SIMPLE_EVENT
+
+////////////////////////////////////////////////////////////////////////////////
+
+// Implementation detail: trace event macros create temporary variables
+// to keep instrumentation overhead low. These macros give each temporary
+// variable a unique name based on the line number to prevent name collissions.
+#  define INTERNAL_TRACE_EVENT_UID3(a, b) trace_event_unique_##a##b
+#  define INTERNAL_TRACE_EVENT_UID2(a, b) INTERNAL_TRACE_EVENT_UID3(a, b)
+#  define INTERNAL_TRACE_EVENT_UID(name_prefix) \
+    INTERNAL_TRACE_EVENT_UID2(name_prefix, __LINE__)
+
+#  if WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS
+#    define INTERNAL_TRACE_EVENT_INFO_TYPE const unsigned char*
+#  else
+#    define INTERNAL_TRACE_EVENT_INFO_TYPE static const unsigned char*
+#  endif  // WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS
+
+// Implementation detail: internal macro to create static category.
+#  define INTERNAL_TRACE_EVENT_GET_CATEGORY_INFO(category)               \
+    INTERNAL_TRACE_EVENT_INFO_TYPE INTERNAL_TRACE_EVENT_UID(catstatic) = \
+        reinterpret_cast<const unsigned char*>(category);
+
+// Implementation detail: internal macro to create static category and add
+// event if the category is enabled.
+#  define INTERNAL_TRACE_EVENT_ADD(phase, category, name, flags, ...)        \
+    do {                                                                     \
+      INTERNAL_TRACE_EVENT_GET_CATEGORY_INFO(category);                      \
+      if (*INTERNAL_TRACE_EVENT_UID(catstatic)) {                            \
+        webrtc::trace_event_internal::AddTraceEvent(                         \
+            phase, INTERNAL_TRACE_EVENT_UID(catstatic), name,                \
+            webrtc::trace_event_internal::kNoEventId, flags, ##__VA_ARGS__); \
+      }                                                                      \
+    } while (0)
+
+// Implementation detail: internal macro to create static category and add begin
+// event if the category is enabled. Also adds the end event when the scope
+// ends.
+#  define INTERNAL_TRACE_EVENT_ADD_SCOPED(category, name, ...)                \
+    INTERNAL_TRACE_EVENT_GET_CATEGORY_INFO(category);                         \
+    webrtc::trace_event_internal::TraceEndOnScopeClose                        \
+        INTERNAL_TRACE_EVENT_UID(profileScope);                               \
+    if (*INTERNAL_TRACE_EVENT_UID(catstatic)) {                               \
+      webrtc::trace_event_internal::AddTraceEvent(                            \
+          TRACE_EVENT_PHASE_BEGIN, INTERNAL_TRACE_EVENT_UID(catstatic), name, \
+          webrtc::trace_event_internal::kNoEventId, TRACE_EVENT_FLAG_NONE,    \
+          ##__VA_ARGS__);                                                     \
+      INTERNAL_TRACE_EVENT_UID(profileScope)                                  \
+          .Initialize(INTERNAL_TRACE_EVENT_UID(catstatic), name);             \
+    }
+
+// Implementation detail: internal macro to create static category and add
+// event if the category is enabled.
+#  define INTERNAL_TRACE_EVENT_ADD_WITH_ID(phase, category, name, id, flags, \
+                                           ...)                              \
+    do {                                                                     \
+      INTERNAL_TRACE_EVENT_GET_CATEGORY_INFO(category);                      \
+      if (*INTERNAL_TRACE_EVENT_UID(catstatic)) {                            \
+        unsigned char trace_event_flags = flags | TRACE_EVENT_FLAG_HAS_ID;   \
+        webrtc::trace_event_internal::TraceID trace_event_trace_id(          \
+            id, &trace_event_flags);                                         \
+        webrtc::trace_event_internal::AddTraceEvent(                         \
+            phase, INTERNAL_TRACE_EVENT_UID(catstatic), name,                \
+            trace_event_trace_id.data(), trace_event_flags, ##__VA_ARGS__);  \
+      }                                                                      \
+    } while (0)
+
+#  ifdef MOZ_GECKO_PROFILER
+#    define MOZ_INTERNAL_UPROFILER_SIMPLE_EVENT(phase, category_enabled, name, \
+                                                id, num_args, arg_names,       \
+                                                arg_types, arg_values, flags)  \
+      uprofiler_simple_event_marker(name, phase, num_args, arg_names,          \
+                                    arg_types, arg_values);
+#  else
+#    define MOZ_INTERNAL_UPROFILER_SIMPLE_EVENT(phase, category_enabled, name, \
+                                                id, num_args, arg_names,       \
+                                                arg_types, arg_values, flags)
+#  endif
+
+// Notes regarding the following definitions:
+// New values can be added and propagated to third party libraries, but existing
+// definitions must never be changed, because third party libraries may use old
+// definitions.
+
+// Phase indicates the nature of an event entry. E.g. part of a begin/end pair.
+#  define TRACE_EVENT_PHASE_BEGIN ('B')
+#  define TRACE_EVENT_PHASE_END ('E')
+#  define TRACE_EVENT_PHASE_INSTANT ('I')
+#  define TRACE_EVENT_PHASE_ASYNC_BEGIN ('S')
+#  define TRACE_EVENT_PHASE_ASYNC_STEP ('T')
+#  define TRACE_EVENT_PHASE_ASYNC_END ('F')
+#  define TRACE_EVENT_PHASE_FLOW_BEGIN ('s')
+#  define TRACE_EVENT_PHASE_FLOW_STEP ('t')
+#  define TRACE_EVENT_PHASE_FLOW_END ('f')
+#  define TRACE_EVENT_PHASE_METADATA ('M')
+#  define TRACE_EVENT_PHASE_COUNTER ('C')
+
+// Flags for changing the behavior of TRACE_EVENT_API_ADD_TRACE_EVENT.
+#  define TRACE_EVENT_FLAG_NONE (static_cast<unsigned char>(0))
+#  define TRACE_EVENT_FLAG_COPY (static_cast<unsigned char>(1 << 0))
+#  define TRACE_EVENT_FLAG_HAS_ID (static_cast<unsigned char>(1 << 1))
+#  define TRACE_EVENT_FLAG_MANGLE_ID (static_cast<unsigned char>(1 << 2))
+
+namespace webrtc {
+namespace trace_event_internal {
+
+// Specify these values when the corresponding argument of AddTraceEvent is not
+// used.
+const int kZeroNumArgs = 0;
+const unsigned long long kNoEventId = 0;
+
+// TraceID encapsulates an ID that can either be an integer or pointer. Pointers
+// are mangled with the Process ID so that they are unlikely to collide when the
+// same pointer is used on different processes.
+class TraceID {
+ public:
+  class ForceMangle {
+   public:
+    explicit ForceMangle(unsigned long long id) : data_(id) {}
+    explicit ForceMangle(unsigned long id) : data_(id) {}
+    explicit ForceMangle(unsigned int id) : data_(id) {}
+    explicit ForceMangle(unsigned short id) : data_(id) {}
+    explicit ForceMangle(unsigned char id) : data_(id) {}
+    explicit ForceMangle(long long id)
+        : data_(static_cast<unsigned long long>(id)) {}
+    explicit ForceMangle(long id)
+        : data_(static_cast<unsigned long long>(id)) {}
+    explicit ForceMangle(int id) : data_(static_cast<unsigned long long>(id)) {}
+    explicit ForceMangle(short id)
+        : data_(static_cast<unsigned long long>(id)) {}
+    explicit ForceMangle(signed char id)
+        : data_(static_cast<unsigned long long>(id)) {}
+
+    unsigned long long data() const { return data_; }
+
+   private:
+    unsigned long long data_;
+  };
+
+  explicit TraceID(const void* id, unsigned char* flags)
+      : data_(
+            static_cast<unsigned long long>(reinterpret_cast<uintptr_t>(id))) {
+    *flags |= TRACE_EVENT_FLAG_MANGLE_ID;
+  }
+  explicit TraceID(ForceMangle id, unsigned char* flags) : data_(id.data()) {
+    *flags |= TRACE_EVENT_FLAG_MANGLE_ID;
+  }
+  explicit TraceID(unsigned long long id, unsigned char* flags) : data_(id) {
+    (void)flags;
+  }
+  explicit TraceID(unsigned long id, unsigned char* flags) : data_(id) {
+    (void)flags;
+  }
+  explicit TraceID(unsigned int id, unsigned char* flags) : data_(id) {
+    (void)flags;
+  }
+  explicit TraceID(unsigned short id, unsigned char* flags) : data_(id) {
+    (void)flags;
+  }
+  explicit TraceID(unsigned char id, unsigned char* flags) : data_(id) {
+    (void)flags;
+  }
+  explicit TraceID(long long id, unsigned char* flags)
+      : data_(static_cast<unsigned long long>(id)) {
+    (void)flags;
+  }
+  explicit TraceID(long id, unsigned char* flags)
+      : data_(static_cast<unsigned long long>(id)) {
+    (void)flags;
+  }
+  explicit TraceID(int id, unsigned char* flags)
+      : data_(static_cast<unsigned long long>(id)) {
+    (void)flags;
+  }
+  explicit TraceID(short id, unsigned char* flags)
+      : data_(static_cast<unsigned long long>(id)) {
+    (void)flags;
+  }
+  explicit TraceID(signed char id, unsigned char* flags)
+      : data_(static_cast<unsigned long long>(id)) {
+    (void)flags;
+  }
+
+  unsigned long long data() const { return data_; }
+
+ private:
+  unsigned long long data_;
+};
+
+// Simple union to store various types as unsigned long long.
+union TraceValueUnion {
+  bool as_bool;
+  unsigned long long as_uint;
+  long long as_int;
+  double as_double;
+  const void* as_pointer;
+  const char* as_string;
+};
+
+// Simple container for const char* that should be copied instead of retained.
+class TraceStringWithCopy {
+ public:
+  explicit TraceStringWithCopy(const char* str) : str_(str) {}
+  operator const char*() const { return str_; }
+
+ private:
+  const char* str_;
+};
+
+// Define SetTraceValue for each allowed type. It stores the type and
+// value in the return arguments. This allows this API to avoid declaring any
+// structures so that it is portable to third_party libraries.
+#  define INTERNAL_DECLARE_SET_TRACE_VALUE(actual_type, union_member,      \
+                                           value_type_id)                  \
+    static inline void SetTraceValue(actual_type arg, unsigned char* type, \
+                                     unsigned long long* value) {          \
+      TraceValueUnion type_value;                                          \
+      type_value.union_member = arg;                                       \
+      *type = value_type_id;                                               \
+      *value = type_value.as_uint;                                         \
+    }
+// Simpler form for int types that can be safely casted.
+#  define INTERNAL_DECLARE_SET_TRACE_VALUE_INT(actual_type, value_type_id) \
+    static inline void SetTraceValue(actual_type arg, unsigned char* type, \
+                                     unsigned long long* value) {          \
+      *type = value_type_id;                                               \
+      *value = static_cast<unsigned long long>(arg);                       \
+    }
+
+INTERNAL_DECLARE_SET_TRACE_VALUE_INT(unsigned long long, TRACE_VALUE_TYPE_UINT)
+INTERNAL_DECLARE_SET_TRACE_VALUE_INT(unsigned long, TRACE_VALUE_TYPE_UINT)
+INTERNAL_DECLARE_SET_TRACE_VALUE_INT(unsigned int, TRACE_VALUE_TYPE_UINT)
+INTERNAL_DECLARE_SET_TRACE_VALUE_INT(unsigned short, TRACE_VALUE_TYPE_UINT)
+INTERNAL_DECLARE_SET_TRACE_VALUE_INT(unsigned char, TRACE_VALUE_TYPE_UINT)
+INTERNAL_DECLARE_SET_TRACE_VALUE_INT(long long, TRACE_VALUE_TYPE_INT)
+INTERNAL_DECLARE_SET_TRACE_VALUE_INT(long, TRACE_VALUE_TYPE_INT)
+INTERNAL_DECLARE_SET_TRACE_VALUE_INT(int, TRACE_VALUE_TYPE_INT)
+INTERNAL_DECLARE_SET_TRACE_VALUE_INT(short, TRACE_VALUE_TYPE_INT)
+INTERNAL_DECLARE_SET_TRACE_VALUE_INT(signed char, TRACE_VALUE_TYPE_INT)
+INTERNAL_DECLARE_SET_TRACE_VALUE(bool, as_bool, TRACE_VALUE_TYPE_BOOL)
+INTERNAL_DECLARE_SET_TRACE_VALUE(double, as_double, TRACE_VALUE_TYPE_DOUBLE)
+INTERNAL_DECLARE_SET_TRACE_VALUE(const void*, as_pointer,
+                                 TRACE_VALUE_TYPE_POINTER)
+INTERNAL_DECLARE_SET_TRACE_VALUE(const char*, as_string,
+                                 TRACE_VALUE_TYPE_STRING)
+INTERNAL_DECLARE_SET_TRACE_VALUE(const TraceStringWithCopy&, as_string,
+                                 TRACE_VALUE_TYPE_COPY_STRING)
+
+#  undef INTERNAL_DECLARE_SET_TRACE_VALUE
+#  undef INTERNAL_DECLARE_SET_TRACE_VALUE_INT
+
+// std::string version of SetTraceValue so that trace arguments can be strings.
+static inline void SetTraceValue(const std::string& arg, unsigned char* type,
+                                 unsigned long long* value) {
+  TraceValueUnion type_value;
+  type_value.as_string = arg.c_str();
+  *type = TRACE_VALUE_TYPE_COPY_STRING;
+  *value = type_value.as_uint;
+}
+
+// These AddTraceEvent template functions are defined here instead of in the
+// macro, because the arg_values could be temporary objects, such as
+// std::string. In order to store pointers to the internal c_str and pass
+// through to the tracing API, the arg_values must live throughout
+// these procedures.
+
+static inline void AddTraceEvent(char phase,
+                                 const unsigned char* category_enabled,
+                                 const char* name, unsigned long long id,
+                                 unsigned char flags) {
+  TRACE_EVENT_API_ADD_TRACE_EVENT(phase, category_enabled, name, id,
+                                  kZeroNumArgs, nullptr, nullptr, nullptr,
+                                  flags);
+}
+
+template <class ARG1_TYPE>
+static inline void AddTraceEvent(char phase,
+                                 const unsigned char* category_enabled,
+                                 const char* name, unsigned long long id,
+                                 unsigned char flags, const char* arg1_name,
+                                 const ARG1_TYPE& arg1_val) {
+  const int num_args = 1;
+  unsigned char arg_types[1];
+  unsigned long long arg_values[1];
+  SetTraceValue(arg1_val, &arg_types[0], &arg_values[0]);
+  TRACE_EVENT_API_ADD_TRACE_EVENT(phase, category_enabled, name, id, num_args,
+                                  &arg1_name, arg_types, arg_values, flags);
+}
+
+template <class ARG1_TYPE, class ARG2_TYPE>
+static inline void AddTraceEvent(char phase,
+                                 const unsigned char* category_enabled,
+                                 const char* name, unsigned long long id,
+                                 unsigned char flags, const char* arg1_name,
+                                 const ARG1_TYPE& arg1_val,
+                                 const char* arg2_name,
+                                 const ARG2_TYPE& arg2_val) {
+  const int num_args = 2;
+  const char* arg_names[2] = {arg1_name, arg2_name};
+  unsigned char arg_types[2];
+  unsigned long long arg_values[2];
+  SetTraceValue(arg1_val, &arg_types[0], &arg_values[0]);
+  SetTraceValue(arg2_val, &arg_types[1], &arg_values[1]);
+  TRACE_EVENT_API_ADD_TRACE_EVENT(phase, category_enabled, name, id, num_args,
+                                  arg_names, arg_types, arg_values, flags);
+}
+
+// Used by TRACE_EVENTx macro. Do not use directly.
+class TraceEndOnScopeClose {
+ public:
+  // Note: members of data_ intentionally left uninitialized. See Initialize.
+  TraceEndOnScopeClose() : p_data_(nullptr) {}
+  ~TraceEndOnScopeClose() {
+    if (p_data_) AddEventIfEnabled();
+  }
+
+  void Initialize(const unsigned char* category_enabled, const char* name) {
+    data_.category_enabled = category_enabled;
+    data_.name = name;
+    p_data_ = &data_;
+  }
+
+ private:
+  // Add the end event if the category is still enabled.
+  void AddEventIfEnabled() {
+    // Only called when p_data_ is non-null.
+    if (*p_data_->category_enabled) {
+      TRACE_EVENT_API_ADD_TRACE_EVENT(TRACE_EVENT_PHASE_END,
+                                      p_data_->category_enabled, p_data_->name,
+                                      kNoEventId, kZeroNumArgs, nullptr,
+                                      nullptr, nullptr, TRACE_EVENT_FLAG_NONE);
+    }
+  }
+
+  // This Data struct workaround is to avoid initializing all the members
+  // in Data during construction of this object, since this object is always
+  // constructed, even when tracing is disabled. If the members of Data were
+  // members of this class instead, compiler warnings occur about potential
+  // uninitialized accesses.
+  struct Data {
+    const unsigned char* category_enabled;
+    const char* name;
+  };
+  Data* p_data_;
+  Data data_;
+};
+
+}  // namespace trace_event_internal
+}  // namespace webrtc
+#else
+
+////////////////////////////////////////////////////////////////////////////////
+// This section defines no-op alternatives to the tracing macros when
+// RTC_DISABLE_TRACE_EVENTS is defined.
+
+#  define RTC_NOOP() \
+    do {             \
+    } while (0)
+
+#  define TRACE_STR_COPY(str) RTC_NOOP()
+
+#  define TRACE_DISABLED_BY_DEFAULT(name) "disabled-by-default-" name
+
+#  define TRACE_ID_MANGLE(id) 0
+
+#  define TRACE_EVENT0(category, name) RTC_NOOP()
+#  define TRACE_EVENT1(category, name, arg1_name, arg1_val) RTC_NOOP()
+#  define TRACE_EVENT2(category, name, arg1_name, arg1_val, arg2_name, \
+                       arg2_val)                                       \
+    RTC_NOOP()
+
+#  define TRACE_EVENT_INSTANT0(category, name) RTC_NOOP()
+#  define TRACE_EVENT_INSTANT1(category, name, arg1_name, arg1_val) RTC_NOOP()
+
+#  define TRACE_EVENT_INSTANT2(category, name, arg1_name, arg1_val, arg2_name, \
+                               arg2_val)                                       \
+    RTC_NOOP()
+
+#  define TRACE_EVENT_COPY_INSTANT0(category, name) RTC_NOOP()
+#  define TRACE_EVENT_COPY_INSTANT1(category, name, arg1_name, arg1_val) \
+    RTC_NOOP()
+#  define TRACE_EVENT_COPY_INSTANT2(category, name, arg1_name, arg1_val, \
+                                    arg2_name, arg2_val)                 \
+    RTC_NOOP()
+
+#  define TRACE_EVENT_BEGIN0(category, name) RTC_NOOP()
+#  define TRACE_EVENT_BEGIN1(category, name, arg1_name, arg1_val) RTC_NOOP()
+#  define TRACE_EVENT_BEGIN2(category, name, arg1_name, arg1_val, arg2_name, \
+                             arg2_val)                                       \
+    RTC_NOOP()
+#  define TRACE_EVENT_COPY_BEGIN0(category, name) RTC_NOOP()
+#  define TRACE_EVENT_COPY_BEGIN1(category, name, arg1_name, arg1_val) \
+    RTC_NOOP()
+#  define TRACE_EVENT_COPY_BEGIN2(category, name, arg1_name, arg1_val, \
+                                  arg2_name, arg2_val)                 \
+    RTC_NOOP()
+
+#  define TRACE_EVENT_END0(category, name) RTC_NOOP()
+#  define TRACE_EVENT_END1(category, name, arg1_name, arg1_val) RTC_NOOP()
+#  define TRACE_EVENT_END2(category, name, arg1_name, arg1_val, arg2_name, \
+                           arg2_val)                                       \
+    RTC_NOOP()
+#  define TRACE_EVENT_COPY_END0(category, name) RTC_NOOP()
+#  define TRACE_EVENT_COPY_END1(category, name, arg1_name, arg1_val) RTC_NOOP()
+#  define TRACE_EVENT_COPY_END2(category, name, arg1_name, arg1_val, \
+                                arg2_name, arg2_val)                 \
+    RTC_NOOP()
+
+#  define TRACE_COUNTER1(category, name, value) RTC_NOOP()
+#  define TRACE_COPY_COUNTER1(category, name, value) RTC_NOOP()
+
+#  define TRACE_COUNTER2(category, name, value1_name, value1_val, value2_name, \
+                         value2_val)                                           \
+    RTC_NOOP()
+#  define TRACE_COPY_COUNTER2(category, name, value1_name, value1_val, \
+                              value2_name, value2_val)                 \
+    RTC_NOOP()
+
+#  define TRACE_COUNTER_ID1(category, name, id, value) RTC_NOOP()
+#  define TRACE_COPY_COUNTER_ID1(category, name, id, value) RTC_NOOP()
+
+#  define TRACE_COUNTER_ID2(category, name, id, value1_name, value1_val, \
+                            value2_name, value2_val)                     \
+    RTC_NOOP()
+#  define TRACE_COPY_COUNTER_ID2(category, name, id, value1_name, value1_val, \
+                                 value2_name, value2_val)                     \
+    RTC_NOOP()
+
+#  define TRACE_EVENT_ASYNC_BEGIN0(category, name, id) RTC_NOOP()
+#  define TRACE_EVENT_ASYNC_BEGIN1(category, name, id, arg1_name, arg1_val) \
+    RTC_NOOP()
+#  define TRACE_EVENT_ASYNC_BEGIN2(category, name, id, arg1_name, arg1_val, \
+                                   arg2_name, arg2_val)                     \
+    RTC_NOOP()
+#  define TRACE_EVENT_COPY_ASYNC_BEGIN0(category, name, id) RTC_NOOP()
+#  define TRACE_EVENT_COPY_ASYNC_BEGIN1(category, name, id, arg1_name, \
+                                        arg1_val)                      \
+    RTC_NOOP()
+#  define TRACE_EVENT_COPY_ASYNC_BEGIN2(category, name, id, arg1_name, \
+                                        arg1_val, arg2_name, arg2_val) \
+    RTC_NOOP()
+
+#  define TRACE_EVENT_ASYNC_STEP0(category, name, id, step) RTC_NOOP()
+#  define TRACE_EVENT_ASYNC_STEP1(category, name, id, step, arg1_name, \
+                                  arg1_val)                            \
+    RTC_NOOP()
+#  define TRACE_EVENT_COPY_ASYNC_STEP0(category, name, id, step) RTC_NOOP()
+#  define TRACE_EVENT_COPY_ASYNC_STEP1(category, name, id, step, arg1_name, \
+                                       arg1_val)                            \
+    RTC_NOOP()
+
+#  define TRACE_EVENT_ASYNC_END0(category, name, id) RTC_NOOP()
+#  define TRACE_EVENT_ASYNC_END1(category, name, id, arg1_name, arg1_val) \
+    RTC_NOOP()
+#  define TRACE_EVENT_ASYNC_END2(category, name, id, arg1_name, arg1_val, \
+                                 arg2_name, arg2_val)                     \
+    RTC_NOOP()
+#  define TRACE_EVENT_COPY_ASYNC_END0(category, name, id) RTC_NOOP()
+#  define TRACE_EVENT_COPY_ASYNC_END1(category, name, id, arg1_name, arg1_val) \
+    RTC_NOOP()
+#  define TRACE_EVENT_COPY_ASYNC_END2(category, name, id, arg1_name, arg1_val, \
+                                      arg2_name, arg2_val)                     \
+    RTC_NOOP()
+
+#  define TRACE_EVENT_FLOW_BEGIN0(category, name, id) RTC_NOOP()
+#  define TRACE_EVENT_FLOW_BEGIN1(category, name, id, arg1_name, arg1_val) \
+    RTC_NOOP()
+#  define TRACE_EVENT_FLOW_BEGIN2(category, name, id, arg1_name, arg1_val, \
+                                  arg2_name, arg2_val)                     \
+    RTC_NOOP()
+#  define TRACE_EVENT_COPY_FLOW_BEGIN0(category, name, id) RTC_NOOP()
+#  define TRACE_EVENT_COPY_FLOW_BEGIN1(category, name, id, arg1_name, \
+                                       arg1_val)                      \
+    RTC_NOOP()
+#  define TRACE_EVENT_COPY_FLOW_BEGIN2(category, name, id, arg1_name, \
+                                       arg1_val, arg2_name, arg2_val) \
+    RTC_NOOP()
+
+#  define TRACE_EVENT_FLOW_STEP0(category, name, id, step) RTC_NOOP()
+#  define TRACE_EVENT_FLOW_STEP1(category, name, id, step, arg1_name, \
+                                 arg1_val)                            \
+    RTC_NOOP()
+#  define TRACE_EVENT_COPY_FLOW_STEP0(category, name, id, step) RTC_NOOP()
+#  define TRACE_EVENT_COPY_FLOW_STEP1(category, name, id, step, arg1_name, \
+                                      arg1_val)                            \
+    RTC_NOOP()
+
+#  define TRACE_EVENT_FLOW_END0(category, name, id) RTC_NOOP()
+#  define TRACE_EVENT_FLOW_END1(category, name, id, arg1_name, arg1_val) \
+    RTC_NOOP()
+#  define TRACE_EVENT_FLOW_END2(category, name, id, arg1_name, arg1_val, \
+                                arg2_name, arg2_val)                     \
+    RTC_NOOP()
+#  define TRACE_EVENT_COPY_FLOW_END0(category, name, id) RTC_NOOP()
+#  define TRACE_EVENT_COPY_FLOW_END1(category, name, id, arg1_name, arg1_val) \
+    RTC_NOOP()
+#  define TRACE_EVENT_COPY_FLOW_END2(category, name, id, arg1_name, arg1_val, \
+                                     arg2_name, arg2_val)                     \
+    RTC_NOOP()
+
+#  define TRACE_EVENT_API_GET_CATEGORY_ENABLED ""
+
+#  define TRACE_EVENT_API_ADD_TRACE_EVENT RTC_NOOP()
+
+#endif  // RTC_TRACE_EVENTS_ENABLED
+
+#endif  // GECKO_TRACE_EVENT_H_
diff --git a/tools/profiler/public/MicroGeckoProfiler.h b/tools/profiler/public/MicroGeckoProfiler.h
new file mode 100644
index 0000000000..7b735e1eec
--- /dev/null
+++ b/tools/profiler/public/MicroGeckoProfiler.h
@@ -0,0 +1,130 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// This contains things related to the Gecko profiler, for use in third_party
+// code. It is very minimal and is designed to be used by patching over
+// upstream code.
+// Only use the C ABI and guard C++ code with #ifdefs, don't pull anything from
+// Gecko, it must be possible to include the header file into any C++ codebase.
+
+#ifndef MICRO_GECKO_PROFILER
+#define MICRO_GECKO_PROFILER
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <mozilla/Types.h>
+#include <stdio.h>
+
+#ifdef _WIN32
+#  include <libloaderapi.h>
+#else
+#  include <dlfcn.h>
+#endif
+
+extern MOZ_EXPORT void uprofiler_register_thread(const char* aName,
+                                                 void* aGuessStackTop);
+
+extern MOZ_EXPORT void uprofiler_unregister_thread();
+
+extern MOZ_EXPORT void uprofiler_simple_event_marker(
+    const char* name, char phase, int num_args, const char** arg_names,
+    const unsigned char* arg_types, const unsigned long long* arg_values);
+#ifdef __cplusplus
+}
+
+struct AutoRegisterProfiler {
+  AutoRegisterProfiler(const char* name, char* stacktop) {
+    if (getenv("MOZ_UPROFILER_LOG_THREAD_CREATION")) {
+      printf("### UProfiler: new thread: '%s'\n", name);
+    }
+    uprofiler_register_thread(name, stacktop);
+  }
+  ~AutoRegisterProfiler() { uprofiler_unregister_thread(); }
+};
+#endif  // __cplusplus
+
+void uprofiler_simple_event_marker(const char* name, char phase, int num_args,
+                                   const char** arg_names,
+                                   const unsigned char* arg_types,
+                                   const unsigned long long* arg_values);
+
+struct UprofilerFuncPtrs {
+  void (*register_thread)(const char* aName, void* aGuessStackTop);
+  void (*unregister_thread)();
+  void (*simple_event_marker)(const char* name, char phase, int num_args,
+                              const char** arg_names,
+                              const unsigned char* arg_types,
+                              const unsigned long long* arg_values);
+};
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-function"
+
+static void register_thread_noop(const char* aName, void* aGuessStackTop) {
+  /* no-op */
+}
+static void unregister_thread_noop() { /* no-op */
+}
+static void simple_event_marker_noop(const char* name, char phase, int num_args,
+                                     const char** arg_names,
+                                     const unsigned char* arg_types,
+                                     const unsigned long long* arg_values) {
+  /* no-op */
+}
+
+#pragma GCC diagnostic pop
+
+#if defined(_WIN32)
+#  define UPROFILER_OPENLIB() GetModuleHandle(NULL)
+#else
+#  define UPROFILER_OPENLIB() dlopen(NULL, RTLD_NOW)
+#endif
+
+#if defined(_WIN32)
+#  define UPROFILER_GET_SYM(handle, sym) GetProcAddress(handle, sym)
+#else
+#  define UPROFILER_GET_SYM(handle, sym) dlsym(handle, sym)
+#endif
+
+#if defined(_WIN32)
+#  define UPROFILER_PRINT_ERROR(func) fprintf(stderr, "%s error\n", #func);
+#else
+#  define UPROFILER_PRINT_ERROR(func) \
+    fprintf(stderr, "%s error: %s\n", #func, dlerror());
+#endif
+
+// Assumes that a variable of type UprofilerFuncPtrs, named uprofiler
+// is accessible in the scope
+#define UPROFILER_GET_FUNCTIONS()                                 \
+  void* handle = UPROFILER_OPENLIB();                             \
+  if (!handle) {                                                  \
+    UPROFILER_PRINT_ERROR(UPROFILER_OPENLIB);                     \
+    uprofiler.register_thread = register_thread_noop;             \
+    uprofiler.unregister_thread = unregister_thread_noop;         \
+    uprofiler.simple_event_marker = simple_event_marker_noop;     \
+  }                                                               \
+  uprofiler.register_thread =                                     \
+      UPROFILER_GET_SYM(handle, "uprofiler_register_thread");     \
+  if (!uprofiler.register_thread) {                               \
+    UPROFILER_PRINT_ERROR(uprofiler_unregister_thread);           \
+    uprofiler.register_thread = register_thread_noop;             \
+  }                                                               \
+  uprofiler.unregister_thread =                                   \
+      UPROFILER_GET_SYM(handle, "uprofiler_unregister_thread");   \
+  if (!uprofiler.unregister_thread) {                             \
+    UPROFILER_PRINT_ERROR(uprofiler_unregister_thread);           \
+    uprofiler.unregister_thread = unregister_thread_noop;         \
+  }                                                               \
+  uprofiler.simple_event_marker =                                 \
+      UPROFILER_GET_SYM(handle, "uprofiler_simple_event_marker"); \
+  if (!uprofiler.simple_event_marker) {                           \
+    UPROFILER_PRINT_ERROR(uprofiler_simple_event_marker);         \
+    uprofiler.simple_event_marker = simple_event_marker_noop;     \
+  }
+
+#endif  // MICRO_GECKO_PROFILER
diff --git a/tools/profiler/public/ProfileAdditionalInformation.h b/tools/profiler/public/ProfileAdditionalInformation.h
new file mode 100644
index 0000000000..c4cc8697b0
--- /dev/null
+++ b/tools/profiler/public/ProfileAdditionalInformation.h
@@ -0,0 +1,90 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// The Gecko Profiler is an always-on profiler that takes fast and low overhead
+// samples of the program execution using only userspace functionality for
+// portability. The goal of this module is to provide performance data in a
+// generic cross-platform way without requiring custom tools or kernel support.
+//
+// Samples are collected to form a timeline with optional timeline event
+// (markers) used for filtering. The samples include both native stacks and
+// platform-independent "label stack" frames.
+
+#ifndef ProfileAdditionalInformation_h
+#define ProfileAdditionalInformation_h
+
+#ifdef MOZ_GECKO_PROFILER
+#  include "shared-libraries.h"
+#endif
+#include "js/Value.h"
+#include "nsString.h"
+
+namespace IPC {
+class MessageReader;
+class MessageWriter;
+template <typename T>
+struct ParamTraits;
+}  // namespace IPC
+
+namespace mozilla {
+// This structure contains additional information gathered while generating the
+// profile json and iterating the buffer.
+struct ProfileGenerationAdditionalInformation {
+#ifdef MOZ_GECKO_PROFILER
+  ProfileGenerationAdditionalInformation() = default;
+  explicit ProfileGenerationAdditionalInformation(
+      const SharedLibraryInfo&& aSharedLibraries)
+      : mSharedLibraries(aSharedLibraries) {}
+
+  size_t SizeOf() const { return mSharedLibraries.SizeOf(); }
+
+  void Append(ProfileGenerationAdditionalInformation&& aOther) {
+    mSharedLibraries.AddAllSharedLibraries(aOther.mSharedLibraries);
+  }
+
+  void FinishGathering() { mSharedLibraries.DeduplicateEntries(); }
+
+  void ToJSValue(JSContext* aCx, JS::MutableHandle<JS::Value> aRetVal) const;
+
+  SharedLibraryInfo mSharedLibraries;
+#endif  // MOZ_GECKO_PROFILER
+};
+
+struct ProfileAndAdditionalInformation {
+  ProfileAndAdditionalInformation() = default;
+  explicit ProfileAndAdditionalInformation(const nsCString&& aProfile)
+      : mProfile(aProfile) {}
+
+  ProfileAndAdditionalInformation(
+      const nsCString&& aProfile,
+      const ProfileGenerationAdditionalInformation&& aAdditionalInformation)
+      : mProfile(aProfile),
+        mAdditionalInformation(Some(aAdditionalInformation)) {}
+
+  size_t SizeOf() const {
+    size_t size = mProfile.Length();
+#ifdef MOZ_GECKO_PROFILER
+    if (mAdditionalInformation.isSome()) {
+      size += mAdditionalInformation->SizeOf();
+    }
+#endif
+    return size;
+  }
+
+  nsCString mProfile;
+  Maybe<ProfileGenerationAdditionalInformation> mAdditionalInformation;
+};
+}  // namespace mozilla
+
+namespace IPC {
+template <>
+struct ParamTraits<mozilla::ProfileGenerationAdditionalInformation> {
+  typedef mozilla::ProfileGenerationAdditionalInformation paramType;
+
+  static void Write(MessageWriter* aWriter, const paramType& aParam);
+  static bool Read(MessageReader* aReader, paramType* aResult);
+};
+}  // namespace IPC
+
+#endif  // ProfileAdditionalInformation_h
diff --git a/tools/profiler/public/ProfileBufferEntrySerializationGeckoExtensions.h b/tools/profiler/public/ProfileBufferEntrySerializationGeckoExtensions.h
new file mode 100644
index 0000000000..1578bd2ddc
--- /dev/null
+++ b/tools/profiler/public/ProfileBufferEntrySerializationGeckoExtensions.h
@@ -0,0 +1,160 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfileBufferEntrySerializationGeckoExtensions_h
+#define ProfileBufferEntrySerializationGeckoExtensions_h
+
+#include "mozilla/ProfileBufferEntrySerialization.h"
+
+#include "js/AllocPolicy.h"
+#include "js/Utility.h"
+#include "nsString.h"
+
+namespace mozilla {
+
+// ----------------------------------------------------------------------------
+// ns[C]String
+
+// nsString or nsCString contents are serialized as the number of bytes (encoded
+// as ULEB128) and all the characters in the string. The terminal '\0' is
+// omitted.
+// Make sure you write and read with the same character type!
+//
+// Usage: `nsCString s = ...; aEW.WriteObject(s);`
+template <typename CHAR>
+struct ProfileBufferEntryWriter::Serializer<nsTString<CHAR>> {
+  static Length Bytes(const nsTString<CHAR>& aS) {
+    const auto length = aS.Length();
+    return ProfileBufferEntryWriter::ULEB128Size(length) +
+           static_cast<Length>(length * sizeof(CHAR));
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW, const nsTString<CHAR>& aS) {
+    const auto length = aS.Length();
+    aEW.WriteULEB128(length);
+    // Copy the bytes from the string's buffer.
+    aEW.WriteBytes(aS.Data(), length * sizeof(CHAR));
+  }
+};
+
+template <typename CHAR>
+struct ProfileBufferEntryReader::Deserializer<nsTString<CHAR>> {
+  static void ReadInto(ProfileBufferEntryReader& aER, nsTString<CHAR>& aS) {
+    aS = Read(aER);
+  }
+
+  static nsTString<CHAR> Read(ProfileBufferEntryReader& aER) {
+    const Length length = aER.ReadULEB128<Length>();
+    nsTString<CHAR> s;
+    // BulkWrite is the most efficient way to copy bytes into the target string.
+    auto writerOrErr = s.BulkWrite(length, 0, true);
+    MOZ_RELEASE_ASSERT(!writerOrErr.isErr());
+
+    auto writer = writerOrErr.unwrap();
+
+    aER.ReadBytes(writer.Elements(), length * sizeof(CHAR));
+    writer.Finish(length, true);
+    return s;
+  }
+};
+
+// ----------------------------------------------------------------------------
+// nsAuto[C]String
+
+// nsAuto[C]String contents are serialized as the number of bytes (encoded as
+// ULEB128) and all the characters in the string. The terminal '\0' is omitted.
+// Make sure you write and read with the same character type!
+//
+// Usage: `nsAutoCString s = ...; aEW.WriteObject(s);`
+template <typename CHAR, size_t N>
+struct ProfileBufferEntryWriter::Serializer<nsTAutoStringN<CHAR, N>> {
+  static Length Bytes(const nsTAutoStringN<CHAR, N>& aS) {
+    const auto length = aS.Length();
+    return ProfileBufferEntryWriter::ULEB128Size(length) +
+           static_cast<Length>(length * sizeof(CHAR));
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW,
+                    const nsTAutoStringN<CHAR, N>& aS) {
+    const auto length = aS.Length();
+    aEW.WriteULEB128(length);
+    // Copy the bytes from the string's buffer.
+    aEW.WriteBytes(aS.BeginReading(), length * sizeof(CHAR));
+  }
+};
+
+template <typename CHAR, size_t N>
+struct ProfileBufferEntryReader::Deserializer<nsTAutoStringN<CHAR, N>> {
+  static void ReadInto(ProfileBufferEntryReader& aER,
+                       nsTAutoStringN<CHAR, N>& aS) {
+    aS = Read(aER);
+  }
+
+  static nsTAutoStringN<CHAR, N> Read(ProfileBufferEntryReader& aER) {
+    const auto length = aER.ReadULEB128<Length>();
+    nsTAutoStringN<CHAR, N> s;
+    // BulkWrite is the most efficient way to copy bytes into the target string.
+    auto writerOrErr = s.BulkWrite(length, 0, true);
+    MOZ_RELEASE_ASSERT(!writerOrErr.isErr());
+
+    auto writer = writerOrErr.unwrap();
+    aER.ReadBytes(writer.Elements(), length * sizeof(CHAR));
+    writer.Finish(length, true);
+    return s;
+  }
+};
+
+// ----------------------------------------------------------------------------
+// JS::UniqueChars
+
+// JS::UniqueChars contents are serialized as the number of bytes (encoded as
+// ULEB128) and all the characters in the string. The terminal '\0' is omitted.
+// Note: A nullptr pointer will be serialized like an empty string, so when
+// deserializing it will result in an allocated buffer only containing a
+// single null terminator.
+//
+// Usage: `JS::UniqueChars s = ...; aEW.WriteObject(s);`
+template <>
+struct ProfileBufferEntryWriter::Serializer<JS::UniqueChars> {
+  static Length Bytes(const JS::UniqueChars& aS) {
+    if (!aS) {
+      return ProfileBufferEntryWriter::ULEB128Size<Length>(0);
+    }
+    const auto len = static_cast<Length>(strlen(aS.get()));
+    return ProfileBufferEntryWriter::ULEB128Size(len) + len;
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW, const JS::UniqueChars& aS) {
+    if (!aS) {
+      aEW.WriteULEB128<Length>(0);
+      return;
+    }
+    const auto len = static_cast<Length>(strlen(aS.get()));
+    aEW.WriteULEB128(len);
+    aEW.WriteBytes(aS.get(), len);
+  }
+};
+
+template <>
+struct ProfileBufferEntryReader::Deserializer<JS::UniqueChars> {
+  static void ReadInto(ProfileBufferEntryReader& aER, JS::UniqueChars& aS) {
+    aS = Read(aER);
+  }
+
+  static JS::UniqueChars Read(ProfileBufferEntryReader& aER) {
+    const auto len = aER.ReadULEB128<Length>();
+    // Use the same allocation policy as JS_smprintf.
+    char* buffer =
+        static_cast<char*>(js::SystemAllocPolicy{}.pod_malloc<char>(len + 1));
+    aER.ReadBytes(buffer, len);
+    buffer[len] = '\0';
+    return JS::UniqueChars(buffer);
+  }
+};
+
+}  // namespace mozilla
+
+#endif  // ProfileBufferEntrySerializationGeckoExtensions_h
diff --git a/tools/profiler/public/ProfileJSONWriter.h b/tools/profiler/public/ProfileJSONWriter.h
new file mode 100644
index 0000000000..8d23d7a890
--- /dev/null
+++ b/tools/profiler/public/ProfileJSONWriter.h
@@ -0,0 +1,19 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef PROFILEJSONWRITER_H
+#define PROFILEJSONWRITER_H
+
+#include "mozilla/BaseProfileJSONWriter.h"
+
+using ChunkedJSONWriteFunc = mozilla::baseprofiler::ChunkedJSONWriteFunc;
+using JSONSchemaWriter = mozilla::baseprofiler::JSONSchemaWriter;
+using OStreamJSONWriteFunc = mozilla::baseprofiler::OStreamJSONWriteFunc;
+using SpliceableChunkedJSONWriter =
+    mozilla::baseprofiler::SpliceableChunkedJSONWriter;
+using SpliceableJSONWriter = mozilla::baseprofiler::SpliceableJSONWriter;
+using UniqueJSONStrings = mozilla::baseprofiler::UniqueJSONStrings;
+
+#endif  // PROFILEJSONWRITER_H
diff --git a/tools/profiler/public/ProfilerBindings.h b/tools/profiler/public/ProfilerBindings.h
new file mode 100644
index 0000000000..096a860130
--- /dev/null
+++ b/tools/profiler/public/ProfilerBindings.h
@@ -0,0 +1,162 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* FFI functions for Profiler Rust API to call into profiler */
+
+#ifndef ProfilerBindings_h
+#define ProfilerBindings_h
+
+#include "mozilla/BaseProfilerMarkersPrerequisites.h"
+
+#include <cstddef>
+#include <stdint.h>
+
+namespace mozilla {
+class AutoProfilerLabel;
+class MarkerSchema;
+class MarkerTiming;
+class TimeStamp;
+enum class StackCaptureOptions;
+
+namespace baseprofiler {
+enum class ProfilingCategoryPair : uint32_t;
+class SpliceableJSONWriter;
+}  // namespace baseprofiler
+
+}  // namespace mozilla
+
+namespace JS {
+enum class ProfilingCategoryPair : uint32_t;
+}  // namespace JS
+
+// Everything in here is safe to include unconditionally, implementations must
+// take !MOZ_GECKO_PROFILER into account.
+extern "C" {
+
+void gecko_profiler_register_thread(const char* aName);
+void gecko_profiler_unregister_thread();
+
+void gecko_profiler_construct_label(mozilla::AutoProfilerLabel* aAutoLabel,
+                                    JS::ProfilingCategoryPair aCategoryPair);
+void gecko_profiler_destruct_label(mozilla::AutoProfilerLabel* aAutoLabel);
+
+// Construct, clone and destruct the timestamp for profiler time.
+void gecko_profiler_construct_timestamp_now(mozilla::TimeStamp* aTimeStamp);
+void gecko_profiler_clone_timestamp(const mozilla::TimeStamp* aSrcTimeStamp,
+                                    mozilla::TimeStamp* aDestTimeStamp);
+void gecko_profiler_destruct_timestamp(mozilla::TimeStamp* aTimeStamp);
+
+// Addition and subtraction for timestamp.
+void gecko_profiler_add_timestamp(const mozilla::TimeStamp* aTimeStamp,
+                                  mozilla::TimeStamp* aDestTimeStamp,
+                                  double aMicroseconds);
+void gecko_profiler_subtract_timestamp(const mozilla::TimeStamp* aTimeStamp,
+                                       mozilla::TimeStamp* aDestTimeStamp,
+                                       double aMicroseconds);
+
+// Various MarkerTiming constructors and a destructor.
+void gecko_profiler_construct_marker_timing_instant_at(
+    mozilla::MarkerTiming* aMarkerTiming, const mozilla::TimeStamp* aTime);
+void gecko_profiler_construct_marker_timing_instant_now(
+    mozilla::MarkerTiming* aMarkerTiming);
+void gecko_profiler_construct_marker_timing_interval(
+    mozilla::MarkerTiming* aMarkerTiming, const mozilla::TimeStamp* aStartTime,
+    const mozilla::TimeStamp* aEndTime);
+void gecko_profiler_construct_marker_timing_interval_until_now_from(
+    mozilla::MarkerTiming* aMarkerTiming, const mozilla::TimeStamp* aStartTime);
+void gecko_profiler_construct_marker_timing_interval_start(
+    mozilla::MarkerTiming* aMarkerTiming, const mozilla::TimeStamp* aTime);
+void gecko_profiler_construct_marker_timing_interval_end(
+    mozilla::MarkerTiming* aMarkerTiming, const mozilla::TimeStamp* aTime);
+void gecko_profiler_destruct_marker_timing(
+    mozilla::MarkerTiming* aMarkerTiming);
+
+// MarkerSchema constructors and destructor.
+void gecko_profiler_construct_marker_schema(
+    mozilla::MarkerSchema* aMarkerSchema,
+    const mozilla::MarkerSchema::Location* aLocations, size_t aLength);
+void gecko_profiler_construct_marker_schema_with_special_front_end_location(
+    mozilla::MarkerSchema* aMarkerSchema);
+void gecko_profiler_destruct_marker_schema(
+    mozilla::MarkerSchema* aMarkerSchema);
+
+// MarkerSchema methods for adding labels.
+void gecko_profiler_marker_schema_set_chart_label(
+    mozilla::MarkerSchema* aSchema, const char* aLabel, size_t aLabelLength);
+void gecko_profiler_marker_schema_set_tooltip_label(
+    mozilla::MarkerSchema* aSchema, const char* aLabel, size_t aLabelLength);
+void gecko_profiler_marker_schema_set_table_label(
+    mozilla::MarkerSchema* aSchema, const char* aLabel, size_t aLabelLength);
+void gecko_profiler_marker_schema_set_all_labels(mozilla::MarkerSchema* aSchema,
+                                                 const char* aLabel,
+                                                 size_t aLabelLength);
+
+// MarkerSchema methods for adding key/key-label values.
+void gecko_profiler_marker_schema_add_key_format(
+    mozilla::MarkerSchema* aSchema, const char* aKey, size_t aKeyLength,
+    mozilla::MarkerSchema::Format aFormat);
+void gecko_profiler_marker_schema_add_key_label_format(
+    mozilla::MarkerSchema* aSchema, const char* aKey, size_t aKeyLength,
+    const char* aLabel, size_t aLabelLength,
+    mozilla::MarkerSchema::Format aFormat);
+void gecko_profiler_marker_schema_add_key_format_searchable(
+    mozilla::MarkerSchema* aSchema, const char* aKey, size_t aKeyLength,
+    mozilla::MarkerSchema::Format aFormat,
+    mozilla::MarkerSchema::Searchable aSearchable);
+void gecko_profiler_marker_schema_add_key_label_format_searchable(
+    mozilla::MarkerSchema* aSchema, const char* aKey, size_t aKeyLength,
+    const char* aLabel, size_t aLabelLength,
+    mozilla::MarkerSchema::Format aFormat,
+    mozilla::MarkerSchema::Searchable aSearchable);
+void gecko_profiler_marker_schema_add_static_label_value(
+    mozilla::MarkerSchema* aSchema, const char* aLabel, size_t aLabelLength,
+    const char* aValue, size_t aValueLength);
+
+// Stream MarkerSchema to SpliceableJSONWriter.
+void gecko_profiler_marker_schema_stream(
+    mozilla::baseprofiler::SpliceableJSONWriter* aWriter, const char* aName,
+    size_t aNameLength, mozilla::MarkerSchema* aMarkerSchema,
+    void* aStreamedNamesSet);
+
+// Various SpliceableJSONWriter methods to add properties.
+void gecko_profiler_json_writer_int_property(
+    mozilla::baseprofiler::SpliceableJSONWriter* aWriter, const char* aName,
+    size_t aNameLength, int64_t aValue);
+void gecko_profiler_json_writer_float_property(
+    mozilla::baseprofiler::SpliceableJSONWriter* aWriter, const char* aName,
+    size_t aNameLength, double aValue);
+void gecko_profiler_json_writer_bool_property(
+    mozilla::baseprofiler::SpliceableJSONWriter* aWriter, const char* aName,
+    size_t aNameLength, bool aValue);
+void gecko_profiler_json_writer_string_property(
+    mozilla::baseprofiler::SpliceableJSONWriter* aWriter, const char* aName,
+    size_t aNameLength, const char* aValue, size_t aValueLength);
+void gecko_profiler_json_writer_null_property(
+    mozilla::baseprofiler::SpliceableJSONWriter* aWriter, const char* aName,
+    size_t aNameLength);
+
+// Marker APIs.
+void gecko_profiler_add_marker_untyped(
+    const char* aName, size_t aNameLength,
+    mozilla::baseprofiler::ProfilingCategoryPair aCategoryPair,
+    mozilla::MarkerTiming* aMarkerTiming,
+    mozilla::StackCaptureOptions aStackCaptureOptions);
+void gecko_profiler_add_marker_text(
+    const char* aName, size_t aNameLength,
+    mozilla::baseprofiler::ProfilingCategoryPair aCategoryPair,
+    mozilla::MarkerTiming* aMarkerTiming,
+    mozilla::StackCaptureOptions aStackCaptureOptions, const char* aText,
+    size_t aTextLength);
+void gecko_profiler_add_marker(
+    const char* aName, size_t aNameLength,
+    mozilla::baseprofiler::ProfilingCategoryPair aCategoryPair,
+    mozilla::MarkerTiming* aMarkerTiming,
+    mozilla::StackCaptureOptions aStackCaptureOptions, uint8_t aMarkerTag,
+    const uint8_t* aPayload, size_t aPayloadSize);
+
+}  // extern "C"
+
+#endif  // ProfilerBindings_h
diff --git a/tools/profiler/public/ProfilerChild.h b/tools/profiler/public/ProfilerChild.h
new file mode 100644
index 0000000000..a781784aae
--- /dev/null
+++ b/tools/profiler/public/ProfilerChild.h
@@ -0,0 +1,106 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfilerChild_h
+#define ProfilerChild_h
+
+#include "mozilla/BaseProfilerDetail.h"
+#include "mozilla/DataMutex.h"
+#include "mozilla/PProfilerChild.h"
+#include "mozilla/ProfileBufferControlledChunkManager.h"
+#include "mozilla/ProgressLogger.h"
+#include "mozilla/RefPtr.h"
+#include "ProfileAdditionalInformation.h"
+
+class nsIThread;
+struct PRThread;
+
+namespace mozilla {
+
+// The ProfilerChild actor is created in all processes except for the main
+// process. The corresponding ProfilerParent actor is created in the main
+// process, and it will notify us about profiler state changes and request
+// profiles from us.
+class ProfilerChild final : public PProfilerChild,
+                            public mozilla::ipc::IShmemAllocator {
+  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(ProfilerChild, final)
+
+  ProfilerChild();
+
+  // Collects and returns a profile.
+  // This method can be used to grab a profile just before PProfiler is torn
+  // down. The collected profile should then be sent through a different
+  // message channel that is guaranteed to stay open long enough.
+  ProfileAndAdditionalInformation GrabShutdownProfile();
+
+  void Destroy();
+
+  // This should be called regularly from outside of the profiler lock.
+  static void ProcessPendingUpdate();
+
+  static bool IsLockedOnCurrentThread();
+
+ private:
+  virtual ~ProfilerChild();
+
+  mozilla::ipc::IPCResult RecvStart(const ProfilerInitParams& params,
+                                    StartResolver&& aResolve) override;
+  mozilla::ipc::IPCResult RecvEnsureStarted(
+      const ProfilerInitParams& params,
+      EnsureStartedResolver&& aResolve) override;
+  mozilla::ipc::IPCResult RecvStop(StopResolver&& aResolve) override;
+  mozilla::ipc::IPCResult RecvPause(PauseResolver&& aResolve) override;
+  mozilla::ipc::IPCResult RecvResume(ResumeResolver&& aResolve) override;
+  mozilla::ipc::IPCResult RecvPauseSampling(
+      PauseSamplingResolver&& aResolve) override;
+  mozilla::ipc::IPCResult RecvResumeSampling(
+      ResumeSamplingResolver&& aResolve) override;
+  mozilla::ipc::IPCResult RecvWaitOnePeriodicSampling(
+      WaitOnePeriodicSamplingResolver&& aResolve) override;
+  mozilla::ipc::IPCResult RecvAwaitNextChunkManagerUpdate(
+      AwaitNextChunkManagerUpdateResolver&& aResolve) override;
+  mozilla::ipc::IPCResult RecvDestroyReleasedChunksAtOrBefore(
+      const TimeStamp& aTimeStamp) override;
+  mozilla::ipc::IPCResult RecvGatherProfile(
+      GatherProfileResolver&& aResolve) override;
+  mozilla::ipc::IPCResult RecvGetGatherProfileProgress(
+      GetGatherProfileProgressResolver&& aResolve) override;
+  mozilla::ipc::IPCResult RecvClearAllPages() override;
+
+  void ActorDestroy(ActorDestroyReason aActorDestroyReason) override;
+
+  FORWARD_SHMEM_ALLOCATOR_TO(PProfilerChild)
+
+  void SetupChunkManager();
+  void ResetChunkManager();
+  void ResolveChunkUpdate(
+      PProfilerChild::AwaitNextChunkManagerUpdateResolver& aResolve);
+  void ProcessChunkManagerUpdate(
+      ProfileBufferControlledChunkManager::Update&& aUpdate);
+
+  static void GatherProfileThreadFunction(void* already_AddRefedParameters);
+
+  nsCOMPtr<nsIThread> mThread;
+  bool mDestroyed;
+
+  ProfileBufferControlledChunkManager* mChunkManager = nullptr;
+  AwaitNextChunkManagerUpdateResolver mAwaitNextChunkManagerUpdateResolver;
+  ProfileBufferControlledChunkManager::Update mChunkManagerUpdate;
+
+  struct ProfilerChildAndUpdate {
+    RefPtr<ProfilerChild> mProfilerChild;
+    ProfileBufferControlledChunkManager::Update mUpdate;
+  };
+  static DataMutexBase<ProfilerChildAndUpdate,
+                       baseprofiler::detail::BaseProfilerMutex>
+      sPendingChunkManagerUpdate;
+
+  RefPtr<ProgressLogger::SharedProgress> mGatherProfileProgress;
+};
+
+}  // namespace mozilla
+
+#endif  // ProfilerChild_h
diff --git a/tools/profiler/public/ProfilerCodeAddressService.h b/tools/profiler/public/ProfilerCodeAddressService.h
new file mode 100644
index 0000000000..9d75c363b3
--- /dev/null
+++ b/tools/profiler/public/ProfilerCodeAddressService.h
@@ -0,0 +1,52 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfilerCodeAddressService_h
+#define ProfilerCodeAddressService_h
+
+#include "CodeAddressService.h"
+#include "nsTArray.h"
+
+namespace mozilla {
+
+// This SymbolTable struct, and the CompactSymbolTable struct in the
+// profiler rust module, have the exact same memory layout.
+// nsTArray and ThinVec are FFI-compatible, because the thin-vec crate is
+// being compiled with the "gecko-ffi" feature enabled.
+struct SymbolTable {
+  SymbolTable() = default;
+  SymbolTable(SymbolTable&& aOther) = default;
+
+  nsTArray<uint32_t> mAddrs;
+  nsTArray<uint32_t> mIndex;
+  nsTArray<uint8_t> mBuffer;
+};
+
+}  // namespace mozilla
+
+/**
+ * Cache and look up function symbol names.
+ *
+ * We don't template this on AllocPolicy since we need to use nsTArray in
+ * SymbolTable above, which doesn't work with AllocPolicy.  (We can't switch
+ * to Vector, as we would lose FFI compatibility with ThinVec.)
+ */
+class ProfilerCodeAddressService : public mozilla::CodeAddressService<> {
+ public:
+  // Like GetLocation, but only returns the symbol name.
+  bool GetFunction(const void* aPc, nsACString& aResult);
+
+ private:
+#if defined(XP_LINUX) || defined(XP_FREEBSD)
+  // Map of library names (owned by mLibraryStrings) to SymbolTables filled
+  // in by profiler_get_symbol_table.
+  mozilla::HashMap<const char*, mozilla::SymbolTable,
+                   mozilla::DefaultHasher<const char*>, AllocPolicy>
+      mSymbolTables;
+#endif
+};
+
+#endif  // ProfilerCodeAddressService_h
diff --git a/tools/profiler/public/ProfilerControl.h b/tools/profiler/public/ProfilerControl.h
new file mode 100644
index 0000000000..466d15eb69
--- /dev/null
+++ b/tools/profiler/public/ProfilerControl.h
@@ -0,0 +1,190 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// APIs that control the lifetime of the profiler: Initialization, start, pause,
+// resume, stop, and shutdown.
+
+#ifndef ProfilerControl_h
+#define ProfilerControl_h
+
+#include "mozilla/BaseProfilerRAIIMacro.h"
+
+// Everything in here is also safe to include unconditionally, and only defines
+// empty macros if MOZ_GECKO_PROFILER is unset.
+// If your file only uses particular APIs (e.g., only markers), please consider
+// including only the needed headers instead of this one, to reduce compilation
+// dependencies.
+
+enum class IsFastShutdown {
+  No,
+  Yes,
+};
+
+#ifndef MOZ_GECKO_PROFILER
+
+// This file can be #included unconditionally. However, everything within this
+// file must be guarded by a #ifdef MOZ_GECKO_PROFILER, *except* for the
+// following macros and functions, which encapsulate the most common operations
+// and thus avoid the need for many #ifdefs.
+
+#  define AUTO_PROFILER_INIT ::profiler_init_main_thread_id()
+#  define AUTO_PROFILER_INIT2
+
+// Function stubs for when MOZ_GECKO_PROFILER is not defined.
+
+static inline void profiler_init(void* stackTop) {}
+
+static inline void profiler_shutdown(
+    IsFastShutdown aIsFastShutdown = IsFastShutdown::No) {}
+
+#else  // !MOZ_GECKO_PROFILER
+
+#  include "BaseProfiler.h"
+#  include "mozilla/Attributes.h"
+#  include "mozilla/Maybe.h"
+#  include "mozilla/MozPromise.h"
+#  include "mozilla/PowerOfTwo.h"
+#  include "mozilla/Vector.h"
+
+//---------------------------------------------------------------------------
+// Start and stop the profiler
+//---------------------------------------------------------------------------
+
+static constexpr mozilla::PowerOfTwo32 PROFILER_DEFAULT_ENTRIES =
+    mozilla::baseprofiler::BASE_PROFILER_DEFAULT_ENTRIES;
+
+static constexpr mozilla::PowerOfTwo32 PROFILER_DEFAULT_STARTUP_ENTRIES =
+    mozilla::baseprofiler::BASE_PROFILER_DEFAULT_STARTUP_ENTRIES;
+
+#  define PROFILER_DEFAULT_INTERVAL BASE_PROFILER_DEFAULT_INTERVAL
+#  define PROFILER_MAX_INTERVAL BASE_PROFILER_MAX_INTERVAL
+
+#  define PROFILER_DEFAULT_ACTIVE_TAB_ID 0
+
+// Initialize the profiler. If MOZ_PROFILER_STARTUP is set the profiler will
+// also be started. This call must happen before any other profiler calls
+// (except profiler_start(), which will call profiler_init() if it hasn't
+// already run).
+void profiler_init(void* stackTop);
+void profiler_init_threadmanager();
+
+// Call this as early as possible
+#  define AUTO_PROFILER_INIT mozilla::AutoProfilerInit PROFILER_RAII
+// Call this after the nsThreadManager is Init()ed
+#  define AUTO_PROFILER_INIT2 mozilla::AutoProfilerInit2 PROFILER_RAII
+
+// Clean up the profiler module, stopping it if required. This function may
+// also save a shutdown profile if requested. No profiler calls should happen
+// after this point and all profiling stack labels should have been popped.
+void profiler_shutdown(IsFastShutdown aIsFastShutdown = IsFastShutdown::No);
+
+// Start the profiler -- initializing it first if necessary -- with the
+// selected options. Stops and restarts the profiler if it is already active.
+// After starting the profiler is "active". The samples will be recorded in a
+// circular buffer.
+//   "aCapacity" is the maximum number of 8-bytes entries in the profiler's
+//               circular buffer.
+//   "aInterval" the sampling interval, measured in millseconds.
+//   "aFeatures" is the feature set. Features unsupported by this
+//               platform/configuration are ignored.
+//   "aFilters" is the list of thread filters. Threads that do not match any
+//              of the filters are not profiled. A filter matches a thread if
+//              (a) the thread name contains the filter as a case-insensitive
+//                  substring, or
+//              (b) the filter is of the form "pid:<n>" where n is the process
+//                  id of the process that the thread is running in.
+//   "aActiveTabID" BrowserId of the active browser screen's active tab.
+//               It's being used to determine the profiled tab. It's "0" if
+//               we failed to get the ID.
+//   "aDuration" is the duration of entries in the profiler's circular buffer.
+// Returns as soon as this process' profiler has started, the returned promise
+// gets resolved when profilers in sub-processes (if any) have started.
+RefPtr<mozilla::GenericPromise> profiler_start(
+    mozilla::PowerOfTwo32 aCapacity, double aInterval, uint32_t aFeatures,
+    const char** aFilters, uint32_t aFilterCount, uint64_t aActiveTabID,
+    const mozilla::Maybe<double>& aDuration = mozilla::Nothing());
+
+// Stop the profiler and discard the profile without saving it. A no-op if the
+// profiler is inactive. After stopping the profiler is "inactive".
+// Returns as soon as this process' profiler has stopped, the returned promise
+// gets resolved when profilers in sub-processes (if any) have stopped.
+RefPtr<mozilla::GenericPromise> profiler_stop();
+
+// If the profiler is inactive, start it. If it's already active, restart it if
+// the requested settings differ from the current settings. Both the check and
+// the state change are performed while the profiler state is locked.
+// The only difference to profiler_start is that the current buffer contents are
+// not discarded if the profiler is already running with the requested settings.
+void profiler_ensure_started(
+    mozilla::PowerOfTwo32 aCapacity, double aInterval, uint32_t aFeatures,
+    const char** aFilters, uint32_t aFilterCount, uint64_t aActiveTabID,
+    const mozilla::Maybe<double>& aDuration = mozilla::Nothing());
+
+//---------------------------------------------------------------------------
+// Control the profiler
+//---------------------------------------------------------------------------
+
+// Pause and resume the profiler. No-ops if the profiler is inactive. While
+// paused the profile will not take any samples and will not record any data
+// into its buffers. The profiler remains fully initialized in this state.
+// Timeline markers will still be stored. This feature will keep JavaScript
+// profiling enabled, thus allowing toggling the profiler without invalidating
+// the JIT.
+// Returns as soon as this process' profiler has paused/resumed, the returned
+// promise gets resolved when profilers in sub-processes (if any) have
+// paused/resumed.
+RefPtr<mozilla::GenericPromise> profiler_pause();
+RefPtr<mozilla::GenericPromise> profiler_resume();
+
+// Only pause and resume the periodic sampling loop, including stack sampling,
+// counters, and profiling overheads.
+// Returns as soon as this process' profiler has paused/resumed sampling, the
+// returned promise gets resolved when profilers in sub-processes (if any) have
+// paused/resumed sampling.
+RefPtr<mozilla::GenericPromise> profiler_pause_sampling();
+RefPtr<mozilla::GenericPromise> profiler_resume_sampling();
+
+//---------------------------------------------------------------------------
+// Get information from the profiler
+//---------------------------------------------------------------------------
+
+// Get the params used to start the profiler. Returns 0 and an empty vector
+// (via outparams) if the profile is inactive. It's possible that the features
+// returned may be slightly different to those requested due to required
+// adjustments.
+void profiler_get_start_params(
+    int* aEntrySize, mozilla::Maybe<double>* aDuration, double* aInterval,
+    uint32_t* aFeatures,
+    mozilla::Vector<const char*, 0, mozilla::MallocAllocPolicy>* aFilters,
+    uint64_t* aActiveTabID);
+
+//---------------------------------------------------------------------------
+// RAII classes
+//---------------------------------------------------------------------------
+
+namespace mozilla {
+
+class MOZ_RAII AutoProfilerInit {
+ public:
+  explicit AutoProfilerInit() { profiler_init(this); }
+
+  ~AutoProfilerInit() { profiler_shutdown(); }
+
+ private:
+};
+
+class MOZ_RAII AutoProfilerInit2 {
+ public:
+  explicit AutoProfilerInit2() { profiler_init_threadmanager(); }
+
+ private:
+};
+
+}  // namespace mozilla
+
+#endif  // !MOZ_GECKO_PROFILER
+
+#endif  // ProfilerControl_h
diff --git a/tools/profiler/public/ProfilerCounts.h b/tools/profiler/public/ProfilerCounts.h
new file mode 100644
index 0000000000..86f6cbfe4f
--- /dev/null
+++ b/tools/profiler/public/ProfilerCounts.h
@@ -0,0 +1,296 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfilerCounts_h
+#define ProfilerCounts_h
+
+#ifndef MOZ_GECKO_PROFILER
+
+#  define PROFILER_DEFINE_COUNT_TOTAL(label, category, description)
+#  define PROFILER_DEFINE_COUNT(label, category, description)
+#  define PROFILER_DEFINE_STATIC_COUNT_TOTAL(label, category, description)
+#  define AUTO_PROFILER_TOTAL(label, count)
+#  define AUTO_PROFILER_COUNT(label)
+#  define AUTO_PROFILER_STATIC_COUNT(label, count)
+
+#else
+
+#  include "mozilla/Atomics.h"
+
+class BaseProfilerCount;
+void profiler_add_sampled_counter(BaseProfilerCount* aCounter);
+void profiler_remove_sampled_counter(BaseProfilerCount* aCounter);
+
+typedef mozilla::Atomic<int64_t, mozilla::MemoryOrdering::Relaxed>
+    ProfilerAtomicSigned;
+typedef mozilla::Atomic<uint64_t, mozilla::MemoryOrdering::Relaxed>
+    ProfilerAtomicUnsigned;
+
+// Counter support
+// There are two types of counters:
+// 1) a simple counter which can be added to or subtracted from.  This could
+// track the number of objects of a type, the number of calls to something
+// (reflow, JIT, etc).
+// 2) a combined counter which has the above, plus a number-of-calls counter
+// that is incremented by 1 for each call to modify the count.  This provides
+// an optional source for a 'heatmap' of access.  This can be used (for
+// example) to track the amount of memory allocated, and provide a heatmap of
+// memory operations (allocs/frees).
+//
+// Counters are sampled by the profiler once per sample-period.  At this time,
+// all counters are global to the process.  In the future, there might be more
+// versions with per-thread or other discriminators.
+//
+// Typical usage:
+// There are two ways to use counters: With heap-created counter objects,
+// or using macros.  Note: the macros use statics, and will be slightly
+// faster/smaller, and you need to care about creating them before using
+// them.  They're similar to the use-pattern for the other AUTO_PROFILER*
+// macros, but they do need the PROFILER_DEFINE* to be use to instantiate
+// the statics.
+//
+// PROFILER_DEFINE_COUNT(mything, "JIT", "Some JIT byte count")
+// ...
+// void foo() { ... AUTO_PROFILER_COUNT(mything, number_of_bytes_used); ... }
+//
+// or (to also get a heatmap)
+//
+// PROFILER_DEFINE_COUNT_TOTAL(mything, "JIT", "Some JIT byte count")
+// ...
+// void foo() {
+//   ...
+//   AUTO_PROFILER_COUNT_TOTAL(mything, number_of_bytes_generated);
+//   ...
+// }
+//
+// To use without statics/macros:
+//
+// UniquePtr<ProfilerCounter> myCounter;
+// ...
+// myCounter =
+//   MakeUnique<ProfilerCounter>("mything", "JIT", "Some JIT byte count"));
+// ...
+// void foo() { ... myCounter->Add(number_of_bytes_generated0; ... }
+
+class BaseProfilerCount {
+ public:
+  BaseProfilerCount(const char* aLabel, ProfilerAtomicSigned* aCounter,
+                    ProfilerAtomicUnsigned* aNumber, const char* aCategory,
+                    const char* aDescription)
+      : mLabel(aLabel),
+        mCategory(aCategory),
+        mDescription(aDescription),
+        mCounter(aCounter),
+        mNumber(aNumber) {
+#  define COUNTER_CANARY 0xDEADBEEF
+#  ifdef DEBUG
+    mCanary = COUNTER_CANARY;
+    mPrevNumber = 0;
+#  endif
+    // Can't call profiler_* here since this may be non-xul-library
+  }
+
+  virtual ~BaseProfilerCount() {
+#  ifdef DEBUG
+    mCanary = 0;
+#  endif
+  }
+
+  struct CountSample {
+    int64_t count;
+    uint64_t number;
+    // This field indicates if the sample has already been consummed by a call
+    // to the Sample() method. This allows the profiler to discard duplicate
+    // samples if the counter sampling rate is lower than the profiler sampling
+    // rate. This can happen for example with some power meters that sample up
+    // to every 10ms.
+    // It should always be true when calling Sample() for the first time.
+    bool isSampleNew;
+  };
+  virtual CountSample Sample() {
+    MOZ_ASSERT(mCanary == COUNTER_CANARY);
+
+    CountSample result;
+    result.count = *mCounter;
+    result.number = mNumber ? *mNumber : 0;
+#  ifdef DEBUG
+    MOZ_ASSERT(result.number >= mPrevNumber);
+    mPrevNumber = result.number;
+#  endif
+    result.isSampleNew = true;
+    return result;
+  }
+
+  void Clear() {
+    *mCounter = 0;
+    // We don't reset *mNumber or mPrevNumber.  We encode numbers as
+    // positive deltas, and currently we only care about the deltas (for
+    // e.g. heatmaps).  If we ever need to clear mNumber as well, we can an
+    // alternative method (Reset()) to do so.
+  }
+
+  // We don't define ++ and Add() here, since the static defines directly
+  // increment the atomic counters, and the subclasses implement ++ and
+  // Add() directly.
+
+  // These typically are static strings (for example if you use the macros
+  // below)
+  const char* mLabel;
+  const char* mCategory;
+  const char* mDescription;
+  // We're ok with these being un-ordered in race conditions.  These are
+  // pointers because we want to be able to use statics and increment them
+  // directly.  Otherwise we could just have them inline, and not need the
+  // constructor args.
+  // These can be static globals (using the macros below), though they
+  // don't have to be - their lifetime must be longer than the use of them
+  // by the profiler (see profiler_add/remove_sampled_counter()).  If you're
+  // using a lot of these, they probably should be allocated at runtime (see
+  // class ProfilerCountOnly below).
+  ProfilerAtomicSigned* mCounter;
+  ProfilerAtomicUnsigned* mNumber;  // may be null
+
+#  ifdef DEBUG
+  uint32_t mCanary;
+  uint64_t mPrevNumber;  // value of number from the last Sample()
+#  endif
+};
+
+// Designed to be allocated dynamically, and simply incremented with obj++
+// or obj->Add(n)
+class ProfilerCounter final : public BaseProfilerCount {
+ public:
+  ProfilerCounter(const char* aLabel, const char* aCategory,
+                  const char* aDescription)
+      : BaseProfilerCount(aLabel, &mCounter, nullptr, aCategory, aDescription) {
+    // Assume we're in libxul
+    profiler_add_sampled_counter(this);
+  }
+
+  virtual ~ProfilerCounter() { profiler_remove_sampled_counter(this); }
+
+  BaseProfilerCount& operator++() {
+    Add(1);
+    return *this;
+  }
+
+  void Add(int64_t aNumber) { mCounter += aNumber; }
+
+  ProfilerAtomicSigned mCounter;
+};
+
+// Also keeps a heatmap (number of calls to ++/Add())
+class ProfilerCounterTotal final : public BaseProfilerCount {
+ public:
+  ProfilerCounterTotal(const char* aLabel, const char* aCategory,
+                       const char* aDescription)
+      : BaseProfilerCount(aLabel, &mCounter, &mNumber, aCategory,
+                          aDescription) {
+    // Assume we're in libxul
+    profiler_add_sampled_counter(this);
+  }
+
+  virtual ~ProfilerCounterTotal() { profiler_remove_sampled_counter(this); }
+
+  BaseProfilerCount& operator++() {
+    Add(1);
+    return *this;
+  }
+
+  void Add(int64_t aNumber) {
+    mCounter += aNumber;
+    mNumber++;
+  }
+
+  ProfilerAtomicSigned mCounter;
+  ProfilerAtomicUnsigned mNumber;
+};
+
+// Defines a counter that is sampled on each profiler tick, with a running
+// count (signed), and number-of-instances. Note that because these are two
+// independent Atomics, there is a possiblity that count will not include
+// the last call, but number of uses will.  I think this is not worth
+// worrying about
+#  define PROFILER_DEFINE_COUNT_TOTAL(label, category, description) \
+    ProfilerAtomicSigned profiler_count_##label(0);                 \
+    ProfilerAtomicUnsigned profiler_number_##label(0);              \
+    const char profiler_category_##label[] = category;              \
+    const char profiler_description_##label[] = description;        \
+    mozilla::UniquePtr<BaseProfilerCount> AutoCount_##label;
+
+// This counts, but doesn't keep track of the number of calls to
+// AUTO_PROFILER_COUNT()
+#  define PROFILER_DEFINE_COUNT(label, category, description) \
+    ProfilerAtomicSigned profiler_count_##label(0);           \
+    const char profiler_category_##label[] = category;        \
+    const char profiler_description_##label[] = description;  \
+    mozilla::UniquePtr<BaseProfilerCount> AutoCount_##label;
+
+// This will create a static initializer if used, but avoids a possible
+// allocation.
+#  define PROFILER_DEFINE_STATIC_COUNT_TOTAL(label, category, description)  \
+    ProfilerAtomicSigned profiler_count_##label(0);                         \
+    ProfilerAtomicUnsigned profiler_number_##label(0);                      \
+    BaseProfilerCount AutoCount_##label(#label, &profiler_count_##label,    \
+                                        &profiler_number_##label, category, \
+                                        description);
+
+// If we didn't care about static initializers, we could avoid the need for
+// a ptr to the BaseProfilerCount object.
+
+// XXX It would be better to do this without the if() and without the
+// theoretical race to set the UniquePtr (i.e. possible leak).
+#  define AUTO_PROFILER_COUNT_TOTAL(label, count)                           \
+    do {                                                                    \
+      profiler_number_##label++; /* do this first*/                         \
+      profiler_count_##label += count;                                      \
+      if (!AutoCount_##label) {                                             \
+        /* Ignore that we could call this twice in theory, and that we leak \
+         * them                                                             \
+         */                                                                 \
+        AutoCount_##label.reset(new BaseProfilerCount(                      \
+            #label, &profiler_count_##label, &profiler_number_##label,      \
+            profiler_category_##label, profiler_description_##label));      \
+        profiler_add_sampled_counter(AutoCount_##label.get());              \
+      }                                                                     \
+    } while (0)
+
+#  define AUTO_PROFILER_COUNT(label, count)                                 \
+    do {                                                                    \
+      profiler_count_##label += count; /* do this first*/                   \
+      if (!AutoCount_##label) {                                             \
+        /* Ignore that we could call this twice in theory, and that we leak \
+         * them                                                             \
+         */                                                                 \
+        AutoCount_##label.reset(new BaseProfilerCount(                      \
+            #label, nullptr, &profiler_number_##label,                      \
+            profiler_category_##label, profiler_description_##label));      \
+        profiler_add_sampled_counter(AutoCount_##label.get());              \
+      }                                                                     \
+    } while (0)
+
+#  define AUTO_PROFILER_STATIC_COUNT(label, count)  \
+    do {                                            \
+      profiler_number_##label++; /* do this first*/ \
+      profiler_count_##label += count;              \
+    } while (0)
+
+// if we need to force the allocation
+#  define AUTO_PROFILER_FORCE_ALLOCATION(label)                             \
+    do {                                                                    \
+      if (!AutoCount_##label) {                                             \
+        /* Ignore that we could call this twice in theory, and that we leak \
+         * them                                                             \
+         */                                                                 \
+        AutoCount_##label.reset(new BaseProfilerCount(                      \
+            #label, &profiler_count_##label, &profiler_number_##label,      \
+            profiler_category_##label, profiler_description_##label));      \
+      }                                                                     \
+    } while (0)
+
+#endif  // !MOZ_GECKO_PROFILER
+
+#endif  // ProfilerCounts_h
diff --git a/tools/profiler/public/ProfilerLabels.h b/tools/profiler/public/ProfilerLabels.h
new file mode 100644
index 0000000000..f05e357451
--- /dev/null
+++ b/tools/profiler/public/ProfilerLabels.h
@@ -0,0 +1,268 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// This header contains all definitions related to profiler labels.
+// It is safe to include unconditionally, and only defines empty macros if
+// MOZ_GECKO_PROFILER is not set.
+
+#ifndef ProfilerLabels_h
+#define ProfilerLabels_h
+
+#include "mozilla/ProfilerThreadState.h"
+
+#include "js/ProfilingCategory.h"
+#include "js/ProfilingStack.h"
+#include "js/RootingAPI.h"
+#include "mozilla/Assertions.h"
+#include "mozilla/Atomics.h"
+#include "mozilla/Attributes.h"
+#include "mozilla/BaseProfilerRAIIMacro.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/ProfilerThreadRegistration.h"
+#include "mozilla/ThreadLocal.h"
+#include "nsString.h"
+
+#include <stdint.h>
+
+struct JSContext;
+
+// Insert an RAII object in this scope to enter a label stack frame. Any
+// samples collected in this scope will contain this label in their stack.
+// The label argument must be a static C string. It is usually of the
+// form "ClassName::FunctionName". (Ideally we'd use the compiler to provide
+// that for us, but __func__ gives us the function name without the class
+// name.) If the label applies to only part of a function, you can qualify it
+// like this: "ClassName::FunctionName:PartName".
+//
+// Use AUTO_PROFILER_LABEL_DYNAMIC_* if you want to add additional / dynamic
+// information to the label stack frame.
+#define AUTO_PROFILER_LABEL(label, categoryPair) \
+  mozilla::AutoProfilerLabel PROFILER_RAII(      \
+      label, nullptr, JS::ProfilingCategoryPair::categoryPair)
+
+// Similar to AUTO_PROFILER_LABEL, but that adds the RELEVANT_FOR_JS flag.
+#define AUTO_PROFILER_LABEL_RELEVANT_FOR_JS(label, categoryPair) \
+  mozilla::AutoProfilerLabel PROFILER_RAII(                      \
+      label, nullptr, JS::ProfilingCategoryPair::categoryPair,   \
+      uint32_t(js::ProfilingStackFrame::Flags::RELEVANT_FOR_JS))
+
+// Similar to AUTO_PROFILER_LABEL, but with only one argument: the category
+// pair. The label string is taken from the category pair. This is convenient
+// for labels like AUTO_PROFILER_LABEL_CATEGORY_PAIR(GRAPHICS_LayerBuilding)
+// which would otherwise just repeat the string.
+#define AUTO_PROFILER_LABEL_CATEGORY_PAIR(categoryPair)     \
+  mozilla::AutoProfilerLabel PROFILER_RAII(                 \
+      "", nullptr, JS::ProfilingCategoryPair::categoryPair, \
+      uint32_t(                                             \
+          js::ProfilingStackFrame::Flags::LABEL_DETERMINED_BY_CATEGORY_PAIR))
+
+// Similar to AUTO_PROFILER_LABEL_CATEGORY_PAIR but adding the RELEVANT_FOR_JS
+// flag.
+#define AUTO_PROFILER_LABEL_CATEGORY_PAIR_RELEVANT_FOR_JS(categoryPair)        \
+  mozilla::AutoProfilerLabel PROFILER_RAII(                                    \
+      "", nullptr, JS::ProfilingCategoryPair::categoryPair,                    \
+      uint32_t(                                                                \
+          js::ProfilingStackFrame::Flags::LABEL_DETERMINED_BY_CATEGORY_PAIR) | \
+          uint32_t(js::ProfilingStackFrame::Flags::RELEVANT_FOR_JS))
+
+// Similar to AUTO_PROFILER_LABEL, but with an additional string. The inserted
+// RAII object stores the cStr pointer in a field; it does not copy the string.
+//
+// WARNING: This means that the string you pass to this macro needs to live at
+// least until the end of the current scope. Be careful using this macro with
+// ns[C]String; the other AUTO_PROFILER_LABEL_DYNAMIC_* macros below are
+// preferred because they avoid this problem.
+//
+// If the profiler samples the current thread and walks the label stack while
+// this RAII object is on the stack, it will copy the supplied string into the
+// profile buffer. So there's one string copy operation, and it happens at
+// sample time.
+//
+// Compare this to the plain AUTO_PROFILER_LABEL macro, which only accepts
+// literal strings: When the label stack frames generated by
+// AUTO_PROFILER_LABEL are sampled, no string copy needs to be made because the
+// profile buffer can just store the raw pointers to the literal strings.
+// Consequently, AUTO_PROFILER_LABEL frames take up considerably less space in
+// the profile buffer than AUTO_PROFILER_LABEL_DYNAMIC_* frames.
+#define AUTO_PROFILER_LABEL_DYNAMIC_CSTR(label, categoryPair, cStr) \
+  mozilla::AutoProfilerLabel PROFILER_RAII(                         \
+      label, cStr, JS::ProfilingCategoryPair::categoryPair)
+
+// Like AUTO_PROFILER_LABEL_DYNAMIC_CSTR, but with the NONSENSITIVE flag to
+// note that it does not contain sensitive information (so we can include it
+// in, for example, the BackgroundHangMonitor)
+#define AUTO_PROFILER_LABEL_DYNAMIC_CSTR_NONSENSITIVE(label, categoryPair, \
+                                                      cStr)                \
+  mozilla::AutoProfilerLabel PROFILER_RAII(                                \
+      label, cStr, JS::ProfilingCategoryPair::categoryPair,                \
+      uint32_t(js::ProfilingStackFrame::Flags::NONSENSITIVE))
+
+// Similar to AUTO_PROFILER_LABEL_DYNAMIC_CSTR, but takes an nsACString.
+//
+// Note: The use of the Maybe<>s ensures the scopes for the dynamic string and
+// the AutoProfilerLabel are appropriate, while also not incurring the runtime
+// cost of the string assignment unless the profiler is active. Therefore,
+// unlike AUTO_PROFILER_LABEL and AUTO_PROFILER_LABEL_DYNAMIC_CSTR, this macro
+// doesn't push/pop a label when the profiler is inactive.
+#define AUTO_PROFILER_LABEL_DYNAMIC_NSCSTRING(label, categoryPair, nsCStr) \
+  mozilla::Maybe<nsAutoCString> autoCStr;                                  \
+  mozilla::Maybe<mozilla::AutoProfilerLabel> raiiObjectNsCString;          \
+  if (profiler_is_active()) {                                              \
+    autoCStr.emplace(nsCStr);                                              \
+    raiiObjectNsCString.emplace(label, autoCStr->get(),                    \
+                                JS::ProfilingCategoryPair::categoryPair);  \
+  }
+
+#define AUTO_PROFILER_LABEL_DYNAMIC_NSCSTRING_RELEVANT_FOR_JS(           \
+    label, categoryPair, nsCStr)                                         \
+  mozilla::Maybe<nsAutoCString> autoCStr;                                \
+  mozilla::Maybe<mozilla::AutoProfilerLabel> raiiObjectNsCString;        \
+  if (profiler_is_active()) {                                            \
+    autoCStr.emplace(nsCStr);                                            \
+    raiiObjectNsCString.emplace(                                         \
+        label, autoCStr->get(), JS::ProfilingCategoryPair::categoryPair, \
+        uint32_t(js::ProfilingStackFrame::Flags::RELEVANT_FOR_JS));      \
+  }
+
+// Match the conditions for MOZ_ENABLE_BACKGROUND_HANG_MONITOR
+#if defined(NIGHTLY_BUILD) && !defined(MOZ_DEBUG) && !defined(MOZ_TSAN) && \
+    !defined(MOZ_ASAN)
+#  define SHOULD_CREATE_ALL_NONSENSITIVE_LABEL_FRAMES true
+#else
+#  define SHOULD_CREATE_ALL_NONSENSITIVE_LABEL_FRAMES profiler_is_active()
+#endif
+
+// See note above AUTO_PROFILER_LABEL_DYNAMIC_CSTR_NONSENSITIVE
+#define AUTO_PROFILER_LABEL_DYNAMIC_NSCSTRING_NONSENSITIVE(              \
+    label, categoryPair, nsCStr)                                         \
+  mozilla::Maybe<nsAutoCString> autoCStr;                                \
+  mozilla::Maybe<mozilla::AutoProfilerLabel> raiiObjectNsCString;        \
+  if (SHOULD_CREATE_ALL_NONSENSITIVE_LABEL_FRAMES) {                     \
+    autoCStr.emplace(nsCStr);                                            \
+    raiiObjectNsCString.emplace(                                         \
+        label, autoCStr->get(), JS::ProfilingCategoryPair::categoryPair, \
+        uint32_t(js::ProfilingStackFrame::Flags::NONSENSITIVE));         \
+  }
+
+// Similar to AUTO_PROFILER_LABEL_DYNAMIC_CSTR, but takes an nsString that is
+// is lossily converted to an ASCII string.
+//
+// Note: The use of the Maybe<>s ensures the scopes for the converted dynamic
+// string and the AutoProfilerLabel are appropriate, while also not incurring
+// the runtime cost of the string conversion unless the profiler is active.
+// Therefore, unlike AUTO_PROFILER_LABEL and AUTO_PROFILER_LABEL_DYNAMIC_CSTR,
+// this macro doesn't push/pop a label when the profiler is inactive.
+#define AUTO_PROFILER_LABEL_DYNAMIC_LOSSY_NSSTRING(label, categoryPair, nsStr) \
+  mozilla::Maybe<NS_LossyConvertUTF16toASCII> asciiStr;                        \
+  mozilla::Maybe<mozilla::AutoProfilerLabel> raiiObjectLossyNsString;          \
+  if (profiler_is_active()) {                                                  \
+    asciiStr.emplace(nsStr);                                                   \
+    raiiObjectLossyNsString.emplace(label, asciiStr->get(),                    \
+                                    JS::ProfilingCategoryPair::categoryPair);  \
+  }
+
+// Similar to AUTO_PROFILER_LABEL, but accepting a JSContext* parameter, and a
+// no-op if the profiler is disabled.
+// Used to annotate functions for which overhead in the range of nanoseconds is
+// noticeable. It avoids overhead from the TLS lookup because it can get the
+// ProfilingStack from the JS context, and avoids almost all overhead in the
+// case where the profiler is disabled.
+#define AUTO_PROFILER_LABEL_FAST(label, categoryPair, ctx) \
+  mozilla::AutoProfilerLabel PROFILER_RAII(                \
+      ctx, label, nullptr, JS::ProfilingCategoryPair::categoryPair)
+
+// Similar to AUTO_PROFILER_LABEL_FAST, but also takes an extra string and an
+// additional set of flags. The flags parameter should carry values from the
+// js::ProfilingStackFrame::Flags enum.
+#define AUTO_PROFILER_LABEL_DYNAMIC_FAST(label, dynamicString, categoryPair, \
+                                         ctx, flags)                         \
+  mozilla::AutoProfilerLabel PROFILER_RAII(                                  \
+      ctx, label, dynamicString, JS::ProfilingCategoryPair::categoryPair,    \
+      flags)
+
+namespace mozilla {
+
+#ifndef MOZ_GECKO_PROFILER
+
+class MOZ_RAII AutoProfilerLabel {
+ public:
+  // This is the AUTO_PROFILER_LABEL and AUTO_PROFILER_LABEL_DYNAMIC variant.
+  AutoProfilerLabel(const char* aLabel, const char* aDynamicString,
+                    JS::ProfilingCategoryPair aCategoryPair,
+                    uint32_t aFlags = 0) {}
+
+  // This is the AUTO_PROFILER_LABEL_FAST variant.
+  AutoProfilerLabel(JSContext* aJSContext, const char* aLabel,
+                    const char* aDynamicString,
+                    JS::ProfilingCategoryPair aCategoryPair, uint32_t aFlags) {}
+
+  ~AutoProfilerLabel() {}
+};
+
+#else  // !MOZ_GECKO_PROFILER
+
+// This class creates a non-owning ProfilingStack reference. Objects of this
+// class are stack-allocated, and so exist within a thread, and are thus bounded
+// by the lifetime of the thread, which ensures that the references held can't
+// be used after the ProfilingStack is destroyed.
+class MOZ_RAII AutoProfilerLabel {
+ public:
+  // This is the AUTO_PROFILER_LABEL and AUTO_PROFILER_LABEL_DYNAMIC variant.
+  AutoProfilerLabel(const char* aLabel, const char* aDynamicString,
+                    JS::ProfilingCategoryPair aCategoryPair,
+                    uint32_t aFlags = 0) {
+    // Get the ProfilingStack from TLS.
+    ProfilingStack* profilingStack =
+        profiler::ThreadRegistration::WithOnThreadRefOr(
+            [](profiler::ThreadRegistration::OnThreadRef aThread) {
+              return &aThread.UnlockedConstReaderAndAtomicRWRef()
+                          .ProfilingStackRef();
+            },
+            nullptr);
+    Push(profilingStack, aLabel, aDynamicString, aCategoryPair, aFlags);
+  }
+
+  // This is the AUTO_PROFILER_LABEL_FAST variant. It retrieves the
+  // ProfilingStack from the JSContext and does nothing if the profiler is
+  // inactive.
+  AutoProfilerLabel(JSContext* aJSContext, const char* aLabel,
+                    const char* aDynamicString,
+                    JS::ProfilingCategoryPair aCategoryPair, uint32_t aFlags) {
+    Push(js::GetContextProfilingStackIfEnabled(aJSContext), aLabel,
+         aDynamicString, aCategoryPair, aFlags);
+  }
+
+  void Push(ProfilingStack* aProfilingStack, const char* aLabel,
+            const char* aDynamicString, JS::ProfilingCategoryPair aCategoryPair,
+            uint32_t aFlags = 0) {
+    // This function runs both on and off the main thread.
+
+    mProfilingStack = aProfilingStack;
+    if (mProfilingStack) {
+      mProfilingStack->pushLabelFrame(aLabel, aDynamicString, this,
+                                      aCategoryPair, aFlags);
+    }
+  }
+
+  ~AutoProfilerLabel() {
+    // This function runs both on and off the main thread.
+
+    if (mProfilingStack) {
+      mProfilingStack->pop();
+    }
+  }
+
+ private:
+  // We save a ProfilingStack pointer in the ctor so we don't have to redo the
+  // TLS lookup in the dtor.
+  ProfilingStack* mProfilingStack;
+};
+
+#endif  // !MOZ_GECKO_PROFILER
+
+}  // namespace mozilla
+
+#endif  // ProfilerLabels_h
diff --git a/tools/profiler/public/ProfilerMarkerTypes.h b/tools/profiler/public/ProfilerMarkerTypes.h
new file mode 100644
index 0000000000..0868c70e30
--- /dev/null
+++ b/tools/profiler/public/ProfilerMarkerTypes.h
@@ -0,0 +1,41 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfilerMarkerTypes_h
+#define ProfilerMarkerTypes_h
+
+// This header contains common marker type definitions that rely on xpcom.
+//
+// It #include's "mozilla/BaseProfilerMarkerTypess.h" and "ProfilerMarkers.h",
+// see these files for more marker types, how to define other marker types, and
+// how to add markers to the profiler buffers.
+
+// !!!                       /!\ WORK IN PROGRESS /!\                       !!!
+// This file contains draft marker definitions, but most are not used yet.
+// Further work is needed to complete these definitions, and use them to convert
+// existing PROFILER_ADD_MARKER calls. See meta bug 1661394.
+
+#include "mozilla/BaseProfilerMarkerTypes.h"
+#include "mozilla/ProfilerMarkers.h"
+#include "js/ProfilingFrameIterator.h"
+#include "js/Utility.h"
+#include "mozilla/Preferences.h"
+#include "mozilla/ServoTraversalStatistics.h"
+
+namespace geckoprofiler::markers {
+
+// Import some common markers from mozilla::baseprofiler::markers.
+using MediaSampleMarker = mozilla::baseprofiler::markers::MediaSampleMarker;
+using VideoFallingBehindMarker =
+    mozilla::baseprofiler::markers::VideoFallingBehindMarker;
+using ContentBuildMarker = mozilla::baseprofiler::markers::ContentBuildMarker;
+using MediaEngineMarker = mozilla::baseprofiler::markers::MediaEngineMarker;
+using MediaEngineTextMarker =
+    mozilla::baseprofiler::markers::MediaEngineTextMarker;
+
+}  // namespace geckoprofiler::markers
+
+#endif  // ProfilerMarkerTypes_h
diff --git a/tools/profiler/public/ProfilerMarkers.h b/tools/profiler/public/ProfilerMarkers.h
new file mode 100644
index 0000000000..ca53c3f189
--- /dev/null
+++ b/tools/profiler/public/ProfilerMarkers.h
@@ -0,0 +1,355 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// Markers are useful to delimit something important happening such as the first
+// paint. Unlike labels, which are only recorded in the profile buffer if a
+// sample is collected while the label is on the label stack, markers will
+// always be recorded in the profile buffer.
+//
+// This header contains definitions necessary to add markers to the Gecko
+// Profiler buffer.
+//
+// It #include's "mozilla/BaseProfilerMarkers.h", see that header for base
+// definitions necessary to create marker types.
+//
+// If common marker types are needed, #include "ProfilerMarkerTypes.h" instead.
+//
+// But if you want to create your own marker type locally, you can #include this
+// header only; look at ProfilerMarkerTypes.h for examples of how to define
+// types.
+//
+// To then record markers:
+// - Use `baseprofiler::AddMarker(...)` from mozglue or other libraries that are
+//   outside of xul, especially if they may happen outside of xpcom's lifetime
+//   (typically startup, shutdown, or tests).
+// - Otherwise #include "ProfilerMarkers.h" instead, and use
+//   `profiler_add_marker(...)`.
+// See these functions for more details.
+
+#ifndef ProfilerMarkers_h
+#define ProfilerMarkers_h
+
+#include "mozilla/BaseProfilerMarkers.h"
+#include "mozilla/ProfilerMarkersDetail.h"
+#include "mozilla/ProfilerLabels.h"
+#include "nsJSUtils.h"  // for nsJSUtils::GetCurrentlyRunningCodeInnerWindowID
+
+class nsIDocShell;
+
+namespace geckoprofiler::markers::detail {
+// Please do not use anything from the detail namespace outside the profiler.
+
+#ifdef MOZ_GECKO_PROFILER
+mozilla::Maybe<uint64_t> profiler_get_inner_window_id_from_docshell(
+    nsIDocShell* aDocshell);
+#else
+inline mozilla::Maybe<uint64_t> profiler_get_inner_window_id_from_docshell(
+    nsIDocShell* aDocshell) {
+  return mozilla::Nothing();
+}
+#endif  // MOZ_GECKO_PROFILER
+
+}  // namespace geckoprofiler::markers::detail
+
+// This is a helper function to get the Inner Window ID from DocShell but it's
+// not a recommended method to get it and it's not encouraged to use this
+// function. If there is a computed inner window ID, `window`, or `Document`
+// available in the call site, please use them. Use this function as a last
+// resort.
+inline mozilla::MarkerInnerWindowId MarkerInnerWindowIdFromDocShell(
+    nsIDocShell* aDocshell) {
+  mozilla::Maybe<uint64_t> id = geckoprofiler::markers::detail::
+      profiler_get_inner_window_id_from_docshell(aDocshell);
+  if (!id) {
+    return mozilla::MarkerInnerWindowId::NoId();
+  }
+  return mozilla::MarkerInnerWindowId(*id);
+}
+
+// This is a helper function to get the Inner Window ID from a JS Context but
+// it's not a recommended method to get it and it's not encouraged to use this
+// function. If there is a computed inner window ID, `window`, or `Document`
+// available in the call site, please use them. Use this function as a last
+// resort.
+inline mozilla::MarkerInnerWindowId MarkerInnerWindowIdFromJSContext(
+    JSContext* aContext) {
+  return mozilla::MarkerInnerWindowId(
+      nsJSUtils::GetCurrentlyRunningCodeInnerWindowID(aContext));
+}
+
+// Bring category names from Base Profiler into the geckoprofiler::category
+// namespace, for consistency with other Gecko Profiler identifiers.
+namespace geckoprofiler::category {
+using namespace ::mozilla::baseprofiler::category;
+}
+
+#ifdef MOZ_GECKO_PROFILER
+// Forward-declaration. TODO: Move to more common header, see bug 1681416.
+bool profiler_capture_backtrace_into(
+    mozilla::ProfileChunkedBuffer& aChunkedBuffer,
+    mozilla::StackCaptureOptions aCaptureOptions);
+
+// Add a marker to a given buffer. `AddMarker()` and related macros should be
+// used in most cases, see below for more information about them and the
+// paramters; This function may be useful when markers need to be recorded in a
+// local buffer outside of the main profiler buffer.
+template <typename MarkerType, typename... PayloadArguments>
+mozilla::ProfileBufferBlockIndex AddMarkerToBuffer(
+    mozilla::ProfileChunkedBuffer& aBuffer,
+    const mozilla::ProfilerString8View& aName,
+    const mozilla::MarkerCategory& aCategory, mozilla::MarkerOptions&& aOptions,
+    MarkerType aMarkerType, const PayloadArguments&... aPayloadArguments) {
+  AUTO_PROFILER_LABEL("AddMarkerToBuffer", PROFILER);
+  mozilla::Unused << aMarkerType;  // Only the empty object type is useful.
+  return mozilla::base_profiler_markers_detail::AddMarkerToBuffer<MarkerType>(
+      aBuffer, aName, aCategory, std::move(aOptions),
+      profiler_active_without_feature(ProfilerFeature::NoMarkerStacks)
+          ? ::profiler_capture_backtrace_into
+          : nullptr,
+      aPayloadArguments...);
+}
+
+// Add a marker (without payload) to a given buffer.
+inline mozilla::ProfileBufferBlockIndex AddMarkerToBuffer(
+    mozilla::ProfileChunkedBuffer& aBuffer,
+    const mozilla::ProfilerString8View& aName,
+    const mozilla::MarkerCategory& aCategory,
+    mozilla::MarkerOptions&& aOptions = {}) {
+  return AddMarkerToBuffer(aBuffer, aName, aCategory, std::move(aOptions),
+                           mozilla::baseprofiler::markers::NoPayload{});
+}
+#endif
+
+[[nodiscard]] inline bool profiler_thread_is_being_profiled_for_markers() {
+  return profiler_thread_is_being_profiled(ThreadProfilingFeatures::Markers);
+}
+
+[[nodiscard]] inline bool profiler_thread_is_being_profiled_for_markers(
+    const ProfilerThreadId& aThreadId) {
+  return profiler_thread_is_being_profiled(aThreadId,
+                                           ThreadProfilingFeatures::Markers);
+}
+
+// Add a marker to the Gecko Profiler buffer.
+// - aName: Main name of this marker.
+// - aCategory: Category for this marker.
+// - aOptions: Optional settings (such as timing, inner window id,
+//   backtrace...), see `MarkerOptions` for details.
+// - aMarkerType: Empty object that specifies the type of marker.
+// - aPayloadArguments: Arguments expected by this marker type's
+// ` StreamJSONMarkerData` function.
+template <typename MarkerType, typename... PayloadArguments>
+mozilla::ProfileBufferBlockIndex profiler_add_marker(
+    const mozilla::ProfilerString8View& aName,
+    const mozilla::MarkerCategory& aCategory, mozilla::MarkerOptions&& aOptions,
+    MarkerType aMarkerType, const PayloadArguments&... aPayloadArguments) {
+#ifndef MOZ_GECKO_PROFILER
+  return {};
+#else
+  if (!profiler_thread_is_being_profiled_for_markers(
+          aOptions.ThreadId().ThreadId())) {
+    return {};
+  }
+  AUTO_PROFILER_LABEL("profiler_add_marker", PROFILER);
+  return ::AddMarkerToBuffer(profiler_get_core_buffer(), aName, aCategory,
+                             std::move(aOptions), aMarkerType,
+                             aPayloadArguments...);
+#endif
+}
+
+// Add a marker (without payload) to the Gecko Profiler buffer.
+inline mozilla::ProfileBufferBlockIndex profiler_add_marker(
+    const mozilla::ProfilerString8View& aName,
+    const mozilla::MarkerCategory& aCategory,
+    mozilla::MarkerOptions&& aOptions = {}) {
+  return profiler_add_marker(aName, aCategory, std::move(aOptions),
+                             mozilla::baseprofiler::markers::NoPayload{});
+}
+
+// Same as `profiler_add_marker()` (without payload). This macro is safe to use
+// even if MOZ_GECKO_PROFILER is not #defined.
+#define PROFILER_MARKER_UNTYPED(markerName, categoryName, ...)                 \
+  do {                                                                         \
+    AUTO_PROFILER_STATS(PROFILER_MARKER_UNTYPED);                              \
+    ::profiler_add_marker(markerName, ::geckoprofiler::category::categoryName, \
+                          ##__VA_ARGS__);                                      \
+  } while (false)
+
+// Same as `profiler_add_marker()` (with payload). This macro is safe to use
+// even if MOZ_GECKO_PROFILER is not #defined.
+#define PROFILER_MARKER(markerName, categoryName, options, MarkerType, ...)    \
+  do {                                                                         \
+    AUTO_PROFILER_STATS(PROFILER_MARKER_with_##MarkerType);                    \
+    ::profiler_add_marker(markerName, ::geckoprofiler::category::categoryName, \
+                          options, ::geckoprofiler::markers::MarkerType{},     \
+                          ##__VA_ARGS__);                                      \
+  } while (false)
+
+namespace geckoprofiler::markers {
+// Most common marker types. Others are in ProfilerMarkerTypes.h.
+using TextMarker = ::mozilla::baseprofiler::markers::TextMarker;
+using Tracing = mozilla::baseprofiler::markers::Tracing;
+}  // namespace geckoprofiler::markers
+
+// Add a text marker. This macro is safe to use even if MOZ_GECKO_PROFILER is
+// not #defined.
+#define PROFILER_MARKER_TEXT(markerName, categoryName, options, text)          \
+  do {                                                                         \
+    AUTO_PROFILER_STATS(PROFILER_MARKER_TEXT);                                 \
+    ::profiler_add_marker(markerName, ::geckoprofiler::category::categoryName, \
+                          options, ::geckoprofiler::markers::TextMarker{},     \
+                          text);                                               \
+  } while (false)
+
+// RAII object that adds a PROFILER_MARKER_TEXT when destroyed; the marker's
+// timing will be the interval from construction (unless an instant or start
+// time is already specified in the provided options) until destruction.
+class MOZ_RAII AutoProfilerTextMarker {
+ public:
+  AutoProfilerTextMarker(const char* aMarkerName,
+                         const mozilla::MarkerCategory& aCategory,
+                         mozilla::MarkerOptions&& aOptions,
+                         const nsACString& aText)
+      : mMarkerName(aMarkerName),
+        mCategory(aCategory),
+        mOptions(std::move(aOptions)),
+        mText(aText) {
+    MOZ_ASSERT(mOptions.Timing().EndTime().IsNull(),
+               "AutoProfilerTextMarker options shouldn't have an end time");
+    if (profiler_is_active_and_unpaused() &&
+        mOptions.Timing().StartTime().IsNull()) {
+      mOptions.Set(mozilla::MarkerTiming::InstantNow());
+    }
+  }
+
+  ~AutoProfilerTextMarker() {
+    if (profiler_is_active_and_unpaused()) {
+      AUTO_PROFILER_LABEL("TextMarker", PROFILER);
+      mOptions.TimingRef().SetIntervalEnd();
+      AUTO_PROFILER_STATS(AUTO_PROFILER_MARKER_TEXT);
+      profiler_add_marker(
+          mozilla::ProfilerString8View::WrapNullTerminatedString(mMarkerName),
+          mCategory, std::move(mOptions), geckoprofiler::markers::TextMarker{},
+          mText);
+    }
+  }
+
+ protected:
+  const char* mMarkerName;
+  mozilla::MarkerCategory mCategory;
+  mozilla::MarkerOptions mOptions;
+  nsCString mText;
+};
+
+// Creates an AutoProfilerTextMarker RAII object.  This macro is safe to use
+// even if MOZ_GECKO_PROFILER is not #defined.
+#define AUTO_PROFILER_MARKER_TEXT(markerName, categoryName, options, text)  \
+  AutoProfilerTextMarker PROFILER_RAII(                                     \
+      markerName, ::mozilla::baseprofiler::category::categoryName, options, \
+      text)
+
+class MOZ_RAII AutoProfilerTracing {
+ public:
+  AutoProfilerTracing(const char* aCategoryString, const char* aMarkerName,
+                      mozilla::MarkerCategory aCategoryPair,
+                      const mozilla::Maybe<uint64_t>& aInnerWindowID)
+      : mCategoryString(aCategoryString),
+        mMarkerName(aMarkerName),
+        mCategoryPair(aCategoryPair),
+        mInnerWindowID(aInnerWindowID) {
+    profiler_add_marker(
+        mozilla::ProfilerString8View::WrapNullTerminatedString(mMarkerName),
+        mCategoryPair,
+        {mozilla::MarkerTiming::IntervalStart(),
+         mozilla::MarkerInnerWindowId(mInnerWindowID)},
+        geckoprofiler::markers::Tracing{},
+        mozilla::ProfilerString8View::WrapNullTerminatedString(
+            mCategoryString));
+  }
+
+  AutoProfilerTracing(
+      const char* aCategoryString, const char* aMarkerName,
+      mozilla::MarkerCategory aCategoryPair,
+      mozilla::UniquePtr<mozilla::ProfileChunkedBuffer> aBacktrace,
+      const mozilla::Maybe<uint64_t>& aInnerWindowID)
+      : mCategoryString(aCategoryString),
+        mMarkerName(aMarkerName),
+        mCategoryPair(aCategoryPair),
+        mInnerWindowID(aInnerWindowID) {
+    profiler_add_marker(
+        mozilla::ProfilerString8View::WrapNullTerminatedString(mMarkerName),
+        mCategoryPair,
+        {mozilla::MarkerTiming::IntervalStart(),
+         mozilla::MarkerInnerWindowId(mInnerWindowID),
+         mozilla::MarkerStack::TakeBacktrace(std::move(aBacktrace))},
+        geckoprofiler::markers::Tracing{},
+        mozilla::ProfilerString8View::WrapNullTerminatedString(
+            mCategoryString));
+  }
+
+  ~AutoProfilerTracing() {
+    profiler_add_marker(
+        mozilla::ProfilerString8View::WrapNullTerminatedString(mMarkerName),
+        mCategoryPair,
+        {mozilla::MarkerTiming::IntervalEnd(),
+         mozilla::MarkerInnerWindowId(mInnerWindowID)},
+        geckoprofiler::markers::Tracing{},
+        mozilla::ProfilerString8View::WrapNullTerminatedString(
+            mCategoryString));
+  }
+
+ protected:
+  const char* mCategoryString;
+  const char* mMarkerName;
+  const mozilla::MarkerCategory mCategoryPair;
+  const mozilla::Maybe<uint64_t> mInnerWindowID;
+};
+
+// Adds a START/END pair of tracing markers.
+#define AUTO_PROFILER_TRACING_MARKER(categoryString, markerName, categoryPair) \
+  AutoProfilerTracing PROFILER_RAII(categoryString, markerName,                \
+                                    geckoprofiler::category::categoryPair,     \
+                                    mozilla::Nothing())
+#define AUTO_PROFILER_TRACING_MARKER_INNERWINDOWID(                        \
+    categoryString, markerName, categoryPair, innerWindowId)               \
+  AutoProfilerTracing PROFILER_RAII(categoryString, markerName,            \
+                                    geckoprofiler::category::categoryPair, \
+                                    mozilla::Some(innerWindowId))
+#define AUTO_PROFILER_TRACING_MARKER_DOCSHELL(categoryString, markerName, \
+                                              categoryPair, docShell)     \
+  AutoProfilerTracing PROFILER_RAII(                                      \
+      categoryString, markerName, geckoprofiler::category::categoryPair,  \
+      geckoprofiler::markers::detail::                                    \
+          profiler_get_inner_window_id_from_docshell(docShell))
+
+#ifdef MOZ_GECKO_PROFILER
+extern template mozilla::ProfileBufferBlockIndex AddMarkerToBuffer(
+    mozilla::ProfileChunkedBuffer&, const mozilla::ProfilerString8View&,
+    const mozilla::MarkerCategory&, mozilla::MarkerOptions&&,
+    mozilla::baseprofiler::markers::NoPayload);
+
+extern template mozilla::ProfileBufferBlockIndex AddMarkerToBuffer(
+    mozilla::ProfileChunkedBuffer&, const mozilla::ProfilerString8View&,
+    const mozilla::MarkerCategory&, mozilla::MarkerOptions&&,
+    mozilla::baseprofiler::markers::TextMarker, const std::string&);
+
+extern template mozilla::ProfileBufferBlockIndex profiler_add_marker(
+    const mozilla::ProfilerString8View&, const mozilla::MarkerCategory&,
+    mozilla::MarkerOptions&&, mozilla::baseprofiler::markers::TextMarker,
+    const std::string&);
+
+extern template mozilla::ProfileBufferBlockIndex profiler_add_marker(
+    const mozilla::ProfilerString8View&, const mozilla::MarkerCategory&,
+    mozilla::MarkerOptions&&, mozilla::baseprofiler::markers::TextMarker,
+    const nsCString&);
+
+extern template mozilla::ProfileBufferBlockIndex profiler_add_marker(
+    const mozilla::ProfilerString8View&, const mozilla::MarkerCategory&,
+    mozilla::MarkerOptions&&, mozilla::baseprofiler::markers::Tracing,
+    const mozilla::ProfilerString8View&);
+#endif  // MOZ_GECKO_PROFILER
+
+#endif  // ProfilerMarkers_h
diff --git a/tools/profiler/public/ProfilerMarkersDetail.h b/tools/profiler/public/ProfilerMarkersDetail.h
new file mode 100644
index 0000000000..2308a14bb2
--- /dev/null
+++ b/tools/profiler/public/ProfilerMarkersDetail.h
@@ -0,0 +1,31 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfilerMarkersDetail_h
+#define ProfilerMarkersDetail_h
+
+#ifndef ProfilerMarkers_h
+#  error "This header should only be #included by ProfilerMarkers.h"
+#endif
+
+#include "mozilla/ProfilerMarkersPrerequisites.h"
+
+#ifdef MOZ_GECKO_PROFILER
+
+//                        ~~ HERE BE DRAGONS ~~
+//
+// Everything below is internal implementation detail, you shouldn't need to
+// look at it unless working on the profiler code.
+
+// Header that specializes the (de)serializers for xpcom types.
+#  include "mozilla/ProfileBufferEntrySerializationGeckoExtensions.h"
+
+// Implemented in platform.cpp
+mozilla::ProfileChunkedBuffer& profiler_get_core_buffer();
+
+#endif  // MOZ_GECKO_PROFILER
+
+#endif  // ProfilerMarkersDetail_h
diff --git a/tools/profiler/public/ProfilerMarkersPrerequisites.h b/tools/profiler/public/ProfilerMarkersPrerequisites.h
new file mode 100644
index 0000000000..0f10f7efe2
--- /dev/null
+++ b/tools/profiler/public/ProfilerMarkersPrerequisites.h
@@ -0,0 +1,31 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// This header contains basic definitions required to create marker types, and
+// to add markers to the profiler buffers.
+//
+// In most cases, #include "mozilla/ProfilerMarkers.h" instead, or
+// #include "mozilla/ProfilerMarkerTypes.h" for common marker types.
+
+#ifndef ProfilerMarkersPrerequisites_h
+#define ProfilerMarkersPrerequisites_h
+
+#include "mozilla/BaseProfilerMarkersPrerequisites.h"
+#include "mozilla/ProfilerThreadState.h"
+
+#ifdef MOZ_GECKO_PROFILER
+
+namespace geckoprofiler::markers {
+
+// Default marker payload types, with no extra information, not even a marker
+// type and payload. This is intended for label-only markers.
+using NoPayload = ::mozilla::baseprofiler::markers::NoPayload;
+
+}  // namespace geckoprofiler::markers
+
+#endif  // MOZ_GECKO_PROFILER
+
+#endif  // ProfilerMarkersPrerequisites_h
diff --git a/tools/profiler/public/ProfilerParent.h b/tools/profiler/public/ProfilerParent.h
new file mode 100644
index 0000000000..8bd5c71721
--- /dev/null
+++ b/tools/profiler/public/ProfilerParent.h
@@ -0,0 +1,119 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfilerParent_h
+#define ProfilerParent_h
+
+#include "mozilla/PProfilerParent.h"
+#include "mozilla/RefPtr.h"
+
+class nsIProfilerStartParams;
+
+namespace mozilla {
+
+class ProfileBufferGlobalController;
+class ProfilerParentTracker;
+
+// This is the main process side of the PProfiler protocol.
+// ProfilerParent instances only exist on the main thread of the main process.
+// The other side (ProfilerChild) lives on a background thread in the other
+// process.
+// The creation of PProfiler actors is initiated from the main process, after
+// the other process has been launched.
+// ProfilerParent instances are destroyed once the message channel closes,
+// which can be triggered by either process, depending on which one shuts down
+// first.
+// All ProfilerParent instances are registered with a manager class called
+// ProfilerParentTracker, which has the list of living ProfilerParent instances
+// and handles shutdown.
+class ProfilerParent final : public PProfilerParent {
+ public:
+  NS_INLINE_DECL_REFCOUNTING(ProfilerParent, final)
+
+  static mozilla::ipc::Endpoint<PProfilerChild> CreateForProcess(
+      base::ProcessId aOtherPid);
+
+#ifdef MOZ_GECKO_PROFILER
+  using SingleProcessProfilePromise =
+      MozPromise<IPCProfileAndAdditionalInformation, ResponseRejectReason,
+                 true>;
+
+  struct SingleProcessProfilePromiseAndChildPid {
+    RefPtr<SingleProcessProfilePromise> profilePromise;
+    base::ProcessId childPid;
+  };
+
+  using SingleProcessProgressPromise =
+      MozPromise<GatherProfileProgress, ResponseRejectReason, true>;
+
+  // The following static methods can be called on any thread, but they are
+  // no-ops on anything other than the main thread.
+  // If called on the main thread, the call will be broadcast to all
+  // registered processes (all processes for which we have a ProfilerParent
+  // object).
+  // At the moment, the main process always calls these methods on the main
+  // thread, and that's the only process in which we need to forward these
+  // calls to other processes. The other processes will call these methods on
+  // the ProfilerChild background thread, but those processes don't need to
+  // forward these calls any further.
+
+  // Returns the profiles to expect, as promises and child pids.
+  static nsTArray<SingleProcessProfilePromiseAndChildPid> GatherProfiles();
+
+  // Send a request to get the GatherProfiles() progress update from one child
+  // process, returns a promise to be resolved with that progress.
+  // The promise RefPtr may be null if the child process is unknown.
+  // Progress may be invalid, if the request arrived after the child process
+  // had already responded to the main GatherProfile() IPC, or something went
+  // very wrong in that process.
+  static RefPtr<SingleProcessProgressPromise> RequestGatherProfileProgress(
+      base::ProcessId aChildPid);
+
+  // This will start the profiler in all child processes. The returned promise
+  // will be resolved when all child have completed their operation
+  // (successfully or not.)
+  [[nodiscard]] static RefPtr<GenericPromise> ProfilerStarted(
+      nsIProfilerStartParams* aParams);
+  static void ProfilerWillStopIfStarted();
+  [[nodiscard]] static RefPtr<GenericPromise> ProfilerStopped();
+  [[nodiscard]] static RefPtr<GenericPromise> ProfilerPaused();
+  [[nodiscard]] static RefPtr<GenericPromise> ProfilerResumed();
+  [[nodiscard]] static RefPtr<GenericPromise> ProfilerPausedSampling();
+  [[nodiscard]] static RefPtr<GenericPromise> ProfilerResumedSampling();
+  static void ClearAllPages();
+
+  [[nodiscard]] static RefPtr<GenericPromise> WaitOnePeriodicSampling();
+
+  // Create a "Final" update that the Child can return to its Parent.
+  static ProfileBufferChunkManagerUpdate MakeFinalUpdate();
+
+  // True if the ProfilerParent holds a lock on this thread.
+  static bool IsLockedOnCurrentThread();
+
+ private:
+  friend class ProfileBufferGlobalController;
+  friend class ProfilerParentTracker;
+
+  explicit ProfilerParent(base::ProcessId aChildPid);
+
+  void Init();
+  void ActorDestroy(ActorDestroyReason aActorDestroyReason) override;
+
+  void RequestChunkManagerUpdate();
+
+  base::ProcessId mChildPid;
+  nsTArray<MozPromiseHolder<SingleProcessProfilePromise>>
+      mPendingRequestedProfiles;
+  bool mDestroyed;
+#endif  // MOZ_GECKO_PROFILER
+
+ private:
+  virtual ~ProfilerParent();
+};
+
+}  // namespace mozilla
+
+#endif  // ProfilerParent_h
diff --git a/tools/profiler/public/ProfilerRunnable.h b/tools/profiler/public/ProfilerRunnable.h
new file mode 100644
index 0000000000..b3b4e64043
--- /dev/null
+++ b/tools/profiler/public/ProfilerRunnable.h
@@ -0,0 +1,68 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfilerRunnable_h
+#define ProfilerRunnable_h
+
+#include "GeckoProfiler.h"
+#include "nsIThreadPool.h"
+
+#if !defined(MOZ_GECKO_PROFILER) || !defined(MOZ_COLLECTING_RUNNABLE_TELEMETRY)
+#  define AUTO_PROFILE_FOLLOWING_RUNNABLE(runnable)
+#else
+#  define AUTO_PROFILE_FOLLOWING_RUNNABLE(runnable)                  \
+    mozilla::Maybe<mozilla::AutoProfileRunnable> raiiRunnableMarker; \
+    if (profiler_thread_is_being_profiled_for_markers()) {           \
+      raiiRunnableMarker.emplace(runnable);                          \
+    }
+
+namespace mozilla {
+
+class MOZ_RAII AutoProfileRunnable {
+ public:
+  explicit AutoProfileRunnable(Runnable* aRunnable)
+      : mStartTime(TimeStamp::Now()) {
+    aRunnable->GetName(mName);
+  }
+  explicit AutoProfileRunnable(nsIRunnable* aRunnable)
+      : mStartTime(TimeStamp::Now()) {
+    nsCOMPtr<nsIThreadPool> threadPool = do_QueryInterface(aRunnable);
+    if (threadPool) {
+      // nsThreadPool::Run has its own call to AUTO_PROFILE_FOLLOWING_RUNNABLE,
+      // avoid nesting runnable markers.
+      return;
+    }
+
+    nsCOMPtr<nsINamed> named = do_QueryInterface(aRunnable);
+    if (named) {
+      named->GetName(mName);
+    }
+  }
+  explicit AutoProfileRunnable(nsACString& aName)
+      : mStartTime(TimeStamp::Now()), mName(aName) {}
+
+  ~AutoProfileRunnable() {
+    if (mName.IsEmpty()) {
+      return;
+    }
+
+    AUTO_PROFILER_LABEL("AutoProfileRunnable", PROFILER);
+    AUTO_PROFILER_STATS(AUTO_PROFILE_RUNNABLE);
+    profiler_add_marker("Runnable", ::mozilla::baseprofiler::category::OTHER,
+                        MarkerTiming::IntervalUntilNowFrom(mStartTime),
+                        geckoprofiler::markers::TextMarker{}, mName);
+  }
+
+ protected:
+  TimeStamp mStartTime;
+  nsAutoCString mName;
+};
+
+}  // namespace mozilla
+
+#endif
+
+#endif  // ProfilerRunnable_h
diff --git a/tools/profiler/public/ProfilerRustBindings.h b/tools/profiler/public/ProfilerRustBindings.h
new file mode 100644
index 0000000000..bf290838a1
--- /dev/null
+++ b/tools/profiler/public/ProfilerRustBindings.h
@@ -0,0 +1,12 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+#ifndef ProfilerRustBindings_h
+#define ProfilerRustBindings_h
+
+#include "mozilla/profiler_ffi_generated.h"
+
+// Add any non-generated support code here
+
+#endif  // ProfilerRustBindings_h
diff --git a/tools/profiler/public/ProfilerState.h b/tools/profiler/public/ProfilerState.h
new file mode 100644
index 0000000000..7a9f3f5c73
--- /dev/null
+++ b/tools/profiler/public/ProfilerState.h
@@ -0,0 +1,399 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// This header contains most functions that give information about the Profiler:
+// Whether it is active or not, paused, and the selected features.
+// It is safe to include unconditionally, but uses of structs and functions must
+// be guarded by `#ifdef MOZ_GECKO_PROFILER`.
+
+#ifndef ProfilerState_h
+#define ProfilerState_h
+
+#include <mozilla/DefineEnum.h>
+#include <mozilla/EnumSet.h>
+#include "mozilla/ProfilerUtils.h"
+
+#include <functional>
+
+//---------------------------------------------------------------------------
+// Profiler features
+//---------------------------------------------------------------------------
+
+#if defined(__APPLE__) && defined(__aarch64__)
+#  define POWER_HELP "Sample per process power use"
+#elif defined(__APPLE__) && defined(__x86_64__)
+#  define POWER_HELP \
+    "Record the power used by the entire system with each sample."
+#elif defined(__linux__) && defined(__x86_64__)
+#  define POWER_HELP                                                \
+    "Record the power used by the entire system with each sample. " \
+    "Only available with Intel CPUs and requires setting "          \
+    "the sysctl kernel.perf_event_paranoid to 0."
+
+#elif defined(_MSC_VER)
+#  define POWER_HELP                                                       \
+    "Record the value of every energy meter available on the system with " \
+    "each sample. Only available on Windows 11 with Intel CPUs."
+#else
+#  define POWER_HELP "Not supported on this platform."
+#endif
+
+// Higher-order macro containing all the feature info in one place. Define
+// |MACRO| appropriately to extract the relevant parts. Note that the number
+// values are used internally only and so can be changed without consequence.
+// Any changes to this list should also be applied to the feature list in
+// toolkit/components/extensions/schemas/geckoProfiler.json.
+// *** Synchronize with lists in BaseProfilerState.h and geckoProfiler.json ***
+#define PROFILER_FOR_EACH_FEATURE(MACRO)                                   \
+  MACRO(0, "java", Java, "Profile Java code, Android only")                \
+                                                                           \
+  MACRO(1, "js", JS,                                                       \
+        "Get the JS engine to expose the JS stack to the profiler")        \
+                                                                           \
+  MACRO(2, "mainthreadio", MainThreadIO, "Add main thread file I/O")       \
+                                                                           \
+  MACRO(3, "fileio", FileIO,                                               \
+        "Add file I/O from all profiled threads, implies mainthreadio")    \
+                                                                           \
+  MACRO(4, "fileioall", FileIOAll,                                         \
+        "Add file I/O from all threads, implies fileio")                   \
+                                                                           \
+  MACRO(5, "nomarkerstacks", NoMarkerStacks,                               \
+        "Markers do not capture stacks, to reduce overhead")               \
+                                                                           \
+  MACRO(6, "screenshots", Screenshots,                                     \
+        "Take a snapshot of the window on every composition")              \
+                                                                           \
+  MACRO(7, "seqstyle", SequentialStyle,                                    \
+        "Disable parallel traversal in styling")                           \
+                                                                           \
+  MACRO(8, "stackwalk", StackWalk,                                         \
+        "Walk the C++ stack, not available on all platforms")              \
+                                                                           \
+  MACRO(9, "jsallocations", JSAllocations,                                 \
+        "Have the JavaScript engine track allocations")                    \
+                                                                           \
+  MACRO(10, "nostacksampling", NoStackSampling,                            \
+        "Disable all stack sampling: Cancels \"js\", \"stackwalk\" and "   \
+        "labels")                                                          \
+                                                                           \
+  MACRO(11, "nativeallocations", NativeAllocations,                        \
+        "Collect the stacks from a smaller subset of all native "          \
+        "allocations, biasing towards collecting larger allocations")      \
+                                                                           \
+  MACRO(12, "ipcmessages", IPCMessages,                                    \
+        "Have the IPC layer track cross-process messages")                 \
+                                                                           \
+  MACRO(13, "audiocallbacktracing", AudioCallbackTracing,                  \
+        "Audio callback tracing")                                          \
+                                                                           \
+  MACRO(14, "cpu", CPUUtilization, "CPU utilization")                      \
+                                                                           \
+  MACRO(15, "notimerresolutionchange", NoTimerResolutionChange,            \
+        "Do not adjust the timer resolution for sampling, so that other "  \
+        "Firefox timers do not get affected")                              \
+                                                                           \
+  MACRO(16, "cpuallthreads", CPUAllThreads,                                \
+        "Sample the CPU utilization of all registered threads")            \
+                                                                           \
+  MACRO(17, "samplingallthreads", SamplingAllThreads,                      \
+        "Sample the stacks of all registered threads")                     \
+                                                                           \
+  MACRO(18, "markersallthreads", MarkersAllThreads,                        \
+        "Record markers from all registered threads")                      \
+                                                                           \
+  MACRO(19, "unregisteredthreads", UnregisteredThreads,                    \
+        "Discover and profile unregistered threads -- beware: expensive!") \
+                                                                           \
+  MACRO(20, "processcpu", ProcessCPU,                                      \
+        "Sample the CPU utilization of each process")                      \
+                                                                           \
+  MACRO(21, "power", Power, POWER_HELP)
+// *** Synchronize with lists in BaseProfilerState.h and geckoProfiler.json ***
+
+struct ProfilerFeature {
+#define DECLARE(n_, str_, Name_, desc_)                                \
+  static constexpr uint32_t Name_ = (1u << n_);                        \
+  [[nodiscard]] static constexpr bool Has##Name_(uint32_t aFeatures) { \
+    return aFeatures & Name_;                                          \
+  }                                                                    \
+  static constexpr void Set##Name_(uint32_t& aFeatures) {              \
+    aFeatures |= Name_;                                                \
+  }                                                                    \
+  static constexpr void Clear##Name_(uint32_t& aFeatures) {            \
+    aFeatures &= ~Name_;                                               \
+  }
+
+  // Define a bitfield constant, a getter, and two setters for each feature.
+  PROFILER_FOR_EACH_FEATURE(DECLARE)
+
+#undef DECLARE
+};
+
+// clang-format off
+MOZ_DEFINE_ENUM_CLASS(ProfilingState,(
+  // A callback will be invoked ...
+  AlreadyActive,     // if the profiler is active when the callback is added.
+  RemovingCallback,  // when the callback is removed.
+  Started,           // after the profiler has started.
+  Pausing,           // before the profiler is paused.
+  Resumed,           // after the profiler has resumed.
+  GeneratingProfile, // before a profile is created.
+  Stopping,          // before the profiler stops (unless restarting afterward).
+  ShuttingDown       // before the profiler is shut down.
+));
+// clang-format on
+
+[[nodiscard]] inline static const char* ProfilingStateToString(
+    ProfilingState aProfilingState) {
+  switch (aProfilingState) {
+    case ProfilingState::AlreadyActive:
+      return "Profiler already active";
+    case ProfilingState::RemovingCallback:
+      return "Callback being removed";
+    case ProfilingState::Started:
+      return "Profiler started";
+    case ProfilingState::Pausing:
+      return "Profiler pausing";
+    case ProfilingState::Resumed:
+      return "Profiler resumed";
+    case ProfilingState::GeneratingProfile:
+      return "Generating profile";
+    case ProfilingState::Stopping:
+      return "Profiler stopping";
+    case ProfilingState::ShuttingDown:
+      return "Profiler shutting down";
+    default:
+      MOZ_ASSERT_UNREACHABLE("Unexpected ProfilingState enum value");
+      return "?";
+  }
+}
+
+using ProfilingStateSet = mozilla::EnumSet<ProfilingState>;
+
+[[nodiscard]] constexpr ProfilingStateSet AllProfilingStates() {
+  ProfilingStateSet set;
+  using Value = std::underlying_type_t<ProfilingState>;
+  for (Value stateValue = 0;
+       stateValue <= static_cast<Value>(kHighestProfilingState); ++stateValue) {
+    set += static_cast<ProfilingState>(stateValue);
+  }
+  return set;
+}
+
+// Type of callbacks to be invoked at certain state changes.
+// It must NOT call profiler_add/remove_state_change_callback().
+using ProfilingStateChangeCallback = std::function<void(ProfilingState)>;
+
+#ifndef MOZ_GECKO_PROFILER
+
+[[nodiscard]] inline bool profiler_is_active() { return false; }
+[[nodiscard]] inline bool profiler_is_active_and_unpaused() { return false; }
+[[nodiscard]] inline bool profiler_feature_active(uint32_t aFeature) {
+  return false;
+}
+[[nodiscard]] inline bool profiler_is_locked_on_current_thread() {
+  return false;
+}
+inline void profiler_add_state_change_callback(
+    ProfilingStateSet aProfilingStateSet,
+    ProfilingStateChangeCallback&& aCallback, uintptr_t aUniqueIdentifier = 0) {
+}
+inline void profiler_remove_state_change_callback(uintptr_t aUniqueIdentifier) {
+}
+
+#else  // !MOZ_GECKO_PROFILER
+
+#  include "mozilla/Atomics.h"
+#  include "mozilla/Maybe.h"
+
+#  include <stdint.h>
+
+namespace mozilla::profiler::detail {
+
+// RacyFeatures is only defined in this header file so that its methods can
+// be inlined into profiler_is_active(). Please do not use anything from the
+// detail namespace outside the profiler.
+
+// Within the profiler's code, the preferred way to check profiler activeness
+// and features is via ActivePS(). However, that requires locking gPSMutex.
+// There are some hot operations where absolute precision isn't required, so we
+// duplicate the activeness/feature state in a lock-free manner in this class.
+class RacyFeatures {
+ public:
+  static void SetActive(uint32_t aFeatures) {
+    sActiveAndFeatures = Active | aFeatures;
+  }
+
+  static void SetInactive() { sActiveAndFeatures = 0; }
+
+  static void SetPaused() { sActiveAndFeatures |= Paused; }
+
+  static void SetUnpaused() { sActiveAndFeatures &= ~Paused; }
+
+  static void SetSamplingPaused() { sActiveAndFeatures |= SamplingPaused; }
+
+  static void SetSamplingUnpaused() { sActiveAndFeatures &= ~SamplingPaused; }
+
+  [[nodiscard]] static mozilla::Maybe<uint32_t> FeaturesIfActive() {
+    if (uint32_t af = sActiveAndFeatures; af & Active) {
+      // Active, remove the Active&Paused bits to get all features.
+      return Some(af & ~(Active | Paused | SamplingPaused));
+    }
+    return Nothing();
+  }
+
+  [[nodiscard]] static mozilla::Maybe<uint32_t> FeaturesIfActiveAndUnpaused() {
+    if (uint32_t af = sActiveAndFeatures; (af & (Active | Paused)) == Active) {
+      // Active but not fully paused, remove the Active and sampling-paused bits
+      // to get all features.
+      return Some(af & ~(Active | SamplingPaused));
+    }
+    return Nothing();
+  }
+
+  // This implementation must be kept in sync with `gecko_profiler::is_active`
+  // in the Profiler Rust API.
+  [[nodiscard]] static bool IsActive() {
+    return uint32_t(sActiveAndFeatures) & Active;
+  }
+
+  [[nodiscard]] static bool IsActiveWithFeature(uint32_t aFeature) {
+    uint32_t af = sActiveAndFeatures;  // copy it first
+    return (af & Active) && (af & aFeature);
+  }
+
+  [[nodiscard]] static bool IsActiveWithoutFeature(uint32_t aFeature) {
+    uint32_t af = sActiveAndFeatures;  // copy it first
+    return (af & Active) && !(af & aFeature);
+  }
+
+  // True if profiler is active, and not fully paused.
+  // Note that periodic sampling *could* be paused!
+  // This implementation must be kept in sync with
+  // `gecko_profiler::can_accept_markers` in the Profiler Rust API.
+  [[nodiscard]] static bool IsActiveAndUnpaused() {
+    uint32_t af = sActiveAndFeatures;  // copy it first
+    return (af & Active) && !(af & Paused);
+  }
+
+  // True if profiler is active, and sampling is not paused (though generic
+  // `SetPaused()` or specific `SetSamplingPaused()`).
+  [[nodiscard]] static bool IsActiveAndSamplingUnpaused() {
+    uint32_t af = sActiveAndFeatures;  // copy it first
+    return (af & Active) && !(af & (Paused | SamplingPaused));
+  }
+
+ private:
+  static constexpr uint32_t Active = 1u << 31;
+  static constexpr uint32_t Paused = 1u << 30;
+  static constexpr uint32_t SamplingPaused = 1u << 29;
+
+// Ensure Active/Paused don't overlap with any of the feature bits.
+#  define NO_OVERLAP(n_, str_, Name_, desc_)                \
+    static_assert(ProfilerFeature::Name_ != SamplingPaused, \
+                  "bad feature value");
+
+  PROFILER_FOR_EACH_FEATURE(NO_OVERLAP);
+
+#  undef NO_OVERLAP
+
+  // We combine the active bit with the feature bits so they can be read or
+  // written in a single atomic operation. Accesses to this atomic are not
+  // recorded by web replay as they may occur at non-deterministic points.
+  static mozilla::Atomic<uint32_t, mozilla::MemoryOrdering::Relaxed>
+      sActiveAndFeatures;
+};
+
+}  // namespace mozilla::profiler::detail
+
+//---------------------------------------------------------------------------
+// Get information from the profiler
+//---------------------------------------------------------------------------
+
+// Is the profiler active? Note: the return value of this function can become
+// immediately out-of-date. E.g. the profile might be active but then
+// profiler_stop() is called immediately afterward. One common and reasonable
+// pattern of usage is the following:
+//
+//   if (profiler_is_active()) {
+//     ExpensiveData expensiveData = CreateExpensiveData();
+//     PROFILER_OPERATION(expensiveData);
+//   }
+//
+// where PROFILER_OPERATION is a no-op if the profiler is inactive. In this
+// case the profiler_is_active() check is just an optimization -- it prevents
+// us calling CreateExpensiveData() unnecessarily in most cases, but the
+// expensive data will end up being created but not used if another thread
+// stops the profiler between the CreateExpensiveData() and PROFILER_OPERATION
+// calls.
+[[nodiscard]] inline bool profiler_is_active() {
+  return mozilla::profiler::detail::RacyFeatures::IsActive();
+}
+
+// Same as profiler_is_active(), but also checks if the profiler is not paused.
+[[nodiscard]] inline bool profiler_is_active_and_unpaused() {
+  return mozilla::profiler::detail::RacyFeatures::IsActiveAndUnpaused();
+}
+
+// Is the profiler active and paused? Returns false if the profiler is inactive.
+[[nodiscard]] bool profiler_is_paused();
+
+// Is the profiler active and sampling is paused? Returns false if the profiler
+// is inactive.
+[[nodiscard]] bool profiler_is_sampling_paused();
+
+// Get all the features supported by the profiler that are accepted by
+// profiler_start(). The result is the same whether the profiler is active or
+// not.
+[[nodiscard]] uint32_t profiler_get_available_features();
+
+// Returns the full feature set if the profiler is active.
+// Note: the return value can become immediately out-of-date, much like the
+// return value of profiler_is_active().
+[[nodiscard]] inline mozilla::Maybe<uint32_t> profiler_features_if_active() {
+  return mozilla::profiler::detail::RacyFeatures::FeaturesIfActive();
+}
+
+// Returns the full feature set if the profiler is active and unpaused.
+// Note: the return value can become immediately out-of-date, much like the
+// return value of profiler_is_active().
+[[nodiscard]] inline mozilla::Maybe<uint32_t>
+profiler_features_if_active_and_unpaused() {
+  return mozilla::profiler::detail::RacyFeatures::FeaturesIfActiveAndUnpaused();
+}
+
+// Check if a profiler feature (specified via the ProfilerFeature type) is
+// active. Returns false if the profiler is inactive. Note: the return value
+// can become immediately out-of-date, much like the return value of
+// profiler_is_active().
+[[nodiscard]] bool profiler_feature_active(uint32_t aFeature);
+
+// Check if the profiler is active without a feature (specified via the
+// ProfilerFeature type). Note: the return value can become immediately
+// out-of-date, much like the return value of profiler_is_active().
+[[nodiscard]] bool profiler_active_without_feature(uint32_t aFeature);
+
+// Returns true if any of the profiler mutexes are currently locked *on the
+// current thread*. This may be used by re-entrant code that may call profiler
+// functions while the same of a different profiler mutex is locked, which could
+// deadlock.
+[[nodiscard]] bool profiler_is_locked_on_current_thread();
+
+// Install a callback to be invoked at any of the given profiling state changes.
+// An optional non-zero identifier may be given, to allow later removal of the
+// callback, the caller is responsible for making sure it's really unique (e.g.,
+// by using a pointer to an object it owns.)
+void profiler_add_state_change_callback(
+    ProfilingStateSet aProfilingStateSet,
+    ProfilingStateChangeCallback&& aCallback, uintptr_t aUniqueIdentifier = 0);
+
+// Remove the callback with the given non-zero identifier.
+void profiler_remove_state_change_callback(uintptr_t aUniqueIdentifier);
+
+#endif  // MOZ_GECKO_PROFILER
+
+#endif  // ProfilerState_h
diff --git a/tools/profiler/public/ProfilerThreadPlatformData.h b/tools/profiler/public/ProfilerThreadPlatformData.h
new file mode 100644
index 0000000000..c243a8ee02
--- /dev/null
+++ b/tools/profiler/public/ProfilerThreadPlatformData.h
@@ -0,0 +1,80 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfilerThreadPlatformData_h
+#define ProfilerThreadPlatformData_h
+
+#include "mozilla/ProfilerUtils.h"
+
+#if defined(__APPLE__)
+#  include <mach/mach_types.h>
+#elif defined(__linux__) || defined(__ANDROID__) || defined(__FreeBSD__)
+#  include "mozilla/Maybe.h"
+#  include <time.h>
+#endif
+
+namespace mozilla::profiler {
+
+class PlatformData {
+#if (defined(_MSC_VER) || defined(__MINGW32__)) && defined(MOZ_GECKO_PROFILER)
+ public:
+  explicit PlatformData(ProfilerThreadId aThreadId);
+  ~PlatformData();
+
+  // Faking win32's HANDLE, because #including "windows.h" here causes trouble
+  // (e.g., it #defines `Yield` as nothing!)
+  // This type is static_check'ed against HANDLE in platform-win32.cpp.
+  using WindowsHandle = void*;
+  WindowsHandle ProfiledThread() const { return mProfiledThread; }
+
+ private:
+  WindowsHandle mProfiledThread;
+#elif defined(__APPLE__) && defined(MOZ_GECKO_PROFILER)
+ public:
+  explicit PlatformData(ProfilerThreadId aThreadId);
+  ~PlatformData();
+  thread_act_t ProfiledThread() const { return mProfiledThread; }
+
+ private:
+  // Note: for mProfiledThread Mach primitives are used instead of pthread's
+  // because the latter doesn't provide thread manipulation primitives
+  // required. For details, consult "Mac OS X Internals" book, Section 7.3.
+  thread_act_t mProfiledThread;
+#elif (defined(__linux__) || defined(__ANDROID__) || defined(__FreeBSD__)) && \
+    defined(MOZ_GECKO_PROFILER)
+ public:
+  explicit PlatformData(ProfilerThreadId aThreadId);
+  ~PlatformData();
+  // Clock Id for this profiled thread. `Nothing` if `pthread_getcpuclockid`
+  // failed (e.g., if the system doesn't support per-thread clocks).
+  Maybe<clockid_t> GetClockId() const { return mClockId; }
+
+ private:
+  Maybe<clockid_t> mClockId;
+#else
+ public:
+  explicit PlatformData(ProfilerThreadId aThreadId) {}
+#endif
+};
+
+/**
+ * Return the number of nanoseconds of CPU time used since thread start.
+ *
+ * @return true on success.
+ */
+#if defined(MOZ_GECKO_PROFILER)
+bool GetCpuTimeSinceThreadStartInNs(uint64_t* aResult,
+                                    const PlatformData& aPlatformData);
+#else
+static inline bool GetCpuTimeSinceThreadStartInNs(
+    uint64_t* aResult, const PlatformData& aPlatformData) {
+  return false;
+}
+#endif
+
+}  // namespace mozilla::profiler
+
+#endif  // ProfilerThreadPlatformData_h
diff --git a/tools/profiler/public/ProfilerThreadRegistration.h b/tools/profiler/public/ProfilerThreadRegistration.h
new file mode 100644
index 0000000000..3fb931987d
--- /dev/null
+++ b/tools/profiler/public/ProfilerThreadRegistration.h
@@ -0,0 +1,367 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfilerThreadRegistration_h
+#define ProfilerThreadRegistration_h
+
+#include "mozilla/BaseProfilerDetail.h"
+#include "mozilla/ProfilerThreadRegistrationData.h"
+#include "mozilla/ThreadLocal.h"
+
+namespace mozilla::profiler {
+
+class ThreadRegistry;
+
+// To use as RAII object, or through RegisterThread/UnregisterThread.
+// Automatically registers itself with TLS and Profiler.
+// It can be safely nested, but nested instances are just ignored.
+// See Get.../With... functions for how to access the data.
+class ThreadRegistration {
+ private:
+  using DataMutex = baseprofiler::detail::BaseProfilerMutex;
+  using DataLock = baseprofiler::detail::BaseProfilerAutoLock;
+
+ public:
+  // Constructor to use as RAII auto-registration object.
+  // It stores itself in the TLS (its effective owner), and gives its pointer to
+  // the Profiler.
+  ThreadRegistration(const char* aName, const void* aStackTop);
+
+  // Destruction reverses construction: Remove pointer from the Profiler (except
+  // for the main thread, because it should be done by the profiler itself) and
+  // from the TLS.
+  ~ThreadRegistration();
+
+  // Manual construction&destruction, if RAII is not possible or too expensive
+  // in stack space.
+  // RegisterThread() *must* be paired with exactly one UnregisterThread() on
+  // the same thread. (Extra UnregisterThread() calls are handled safely, but
+  // they may cause profiling of this thread to stop earlier than expected.)
+  static ProfilingStack* RegisterThread(const char* aName,
+                                        const void* aStackTop);
+  static void UnregisterThread();
+
+  [[nodiscard]] static bool IsRegistered() { return GetFromTLS(); }
+
+  // Prevent copies&moves.
+  ThreadRegistration(const ThreadRegistration&) = delete;
+  ThreadRegistration& operator=(const ThreadRegistration&) = delete;
+
+  // Aliases to data accessors (removing the ThreadRegistration prefix).
+
+  using UnlockedConstReader = ThreadRegistrationUnlockedConstReader;
+  using UnlockedConstReaderAndAtomicRW =
+      ThreadRegistrationUnlockedConstReaderAndAtomicRW;
+  using UnlockedRWForLockedProfiler =
+      ThreadRegistrationUnlockedRWForLockedProfiler;
+  using UnlockedReaderAndAtomicRWOnThread =
+      ThreadRegistrationUnlockedReaderAndAtomicRWOnThread;
+  using LockedRWFromAnyThread = ThreadRegistrationLockedRWFromAnyThread;
+  using LockedRWOnThread = ThreadRegistrationLockedRWOnThread;
+
+  // On-thread access from the TLS, providing the following data accessors:
+  // UnlockedConstReader, UnlockedConstReaderAndAtomicRW,
+  // UnlockedRWForLockedProfiler, UnlockedReaderAndAtomicRWOnThread, and
+  // LockedRWOnThread.
+  // (See ThreadRegistry class for OFF-thread access.)
+
+  // Reference-like class pointing at the ThreadRegistration for the current
+  // thread.
+  class OnThreadRef {
+   public:
+    // const UnlockedConstReader
+
+    [[nodiscard]] const UnlockedConstReader& UnlockedConstReaderCRef() const {
+      return mThreadRegistration->mData;
+    }
+
+    template <typename F>
+    auto WithUnlockedConstReader(F&& aF) const {
+      return std::forward<F>(aF)(UnlockedConstReaderCRef());
+    }
+
+    // const UnlockedConstReaderAndAtomicRW
+
+    [[nodiscard]] const UnlockedConstReaderAndAtomicRW&
+    UnlockedConstReaderAndAtomicRWCRef() const {
+      return mThreadRegistration->mData;
+    }
+
+    template <typename F>
+    auto WithUnlockedConstReaderAndAtomicRW(F&& aF) const {
+      return std::forward<F>(aF)(UnlockedConstReaderAndAtomicRWCRef());
+    }
+
+    // UnlockedConstReaderAndAtomicRW
+
+    [[nodiscard]] UnlockedConstReaderAndAtomicRW&
+    UnlockedConstReaderAndAtomicRWRef() {
+      return mThreadRegistration->mData;
+    }
+
+    template <typename F>
+    auto WithUnlockedConstReaderAndAtomicRW(F&& aF) {
+      return std::forward<F>(aF)(UnlockedConstReaderAndAtomicRWRef());
+    }
+
+    // const UnlockedRWForLockedProfiler
+
+    [[nodiscard]] const UnlockedRWForLockedProfiler&
+    UnlockedRWForLockedProfilerCRef() const {
+      return mThreadRegistration->mData;
+    }
+
+    template <typename F>
+    auto WithUnlockedRWForLockedProfiler(F&& aF) const {
+      return std::forward<F>(aF)(UnlockedRWForLockedProfilerCRef());
+    }
+
+    // UnlockedRWForLockedProfiler
+
+    [[nodiscard]] UnlockedRWForLockedProfiler&
+    UnlockedRWForLockedProfilerRef() {
+      return mThreadRegistration->mData;
+    }
+
+    template <typename F>
+    auto WithUnlockedRWForLockedProfiler(F&& aF) {
+      return std::forward<F>(aF)(UnlockedRWForLockedProfilerRef());
+    }
+
+    // const UnlockedReaderAndAtomicRWOnThread
+
+    [[nodiscard]] const UnlockedReaderAndAtomicRWOnThread&
+    UnlockedReaderAndAtomicRWOnThreadCRef() const {
+      return mThreadRegistration->mData;
+    }
+
+    template <typename F>
+    auto WithUnlockedReaderAndAtomicRWOnThread(F&& aF) const {
+      return std::forward<F>(aF)(UnlockedReaderAndAtomicRWOnThreadCRef());
+    }
+
+    // UnlockedReaderAndAtomicRWOnThread
+
+    [[nodiscard]] UnlockedReaderAndAtomicRWOnThread&
+    UnlockedReaderAndAtomicRWOnThreadRef() {
+      return mThreadRegistration->mData;
+    }
+
+    template <typename F>
+    auto WithUnlockedReaderAndAtomicRWOnThread(F&& aF) {
+      return std::forward<F>(aF)(UnlockedReaderAndAtomicRWOnThreadRef());
+    }
+
+    // const LockedRWOnThread through ConstRWOnThreadWithLock
+
+    // Locking order: Profiler, ThreadRegistry, ThreadRegistration.
+    class ConstRWOnThreadWithLock {
+     public:
+      [[nodiscard]] const LockedRWOnThread& DataCRef() const {
+        return mLockedRWOnThread;
+      }
+      [[nodiscard]] const LockedRWOnThread* operator->() const {
+        return &mLockedRWOnThread;
+      }
+
+     private:
+      friend class OnThreadRef;
+      ConstRWOnThreadWithLock(const LockedRWOnThread& aLockedRWOnThread,
+                              DataMutex& aDataMutex)
+          : mLockedRWOnThread(aLockedRWOnThread), mDataLock(aDataMutex) {}
+
+      const LockedRWOnThread& mLockedRWOnThread;
+      DataLock mDataLock;
+    };
+
+    [[nodiscard]] ConstRWOnThreadWithLock ConstLockedRWOnThread() const {
+      return ConstRWOnThreadWithLock{mThreadRegistration->mData,
+                                     mThreadRegistration->mDataMutex};
+    }
+
+    template <typename F>
+    auto WithConstLockedRWOnThread(F&& aF) const {
+      ConstRWOnThreadWithLock lockedData = ConstLockedRWOnThread();
+      return std::forward<F>(aF)(lockedData.DataCRef());
+    }
+
+    // LockedRWOnThread through RWOnThreadWithLock
+
+    // Locking order: Profiler, ThreadRegistry, ThreadRegistration.
+    class RWOnThreadWithLock {
+     public:
+      [[nodiscard]] const LockedRWOnThread& DataCRef() const {
+        return mLockedRWOnThread;
+      }
+      [[nodiscard]] LockedRWOnThread& DataRef() { return mLockedRWOnThread; }
+      [[nodiscard]] const LockedRWOnThread* operator->() const {
+        return &mLockedRWOnThread;
+      }
+      [[nodiscard]] LockedRWOnThread* operator->() {
+        return &mLockedRWOnThread;
+      }
+
+     private:
+      friend class OnThreadRef;
+      RWOnThreadWithLock(LockedRWOnThread& aLockedRWOnThread,
+                         DataMutex& aDataMutex)
+          : mLockedRWOnThread(aLockedRWOnThread), mDataLock(aDataMutex) {}
+
+      LockedRWOnThread& mLockedRWOnThread;
+      DataLock mDataLock;
+    };
+
+    [[nodiscard]] RWOnThreadWithLock GetLockedRWOnThread() {
+      return RWOnThreadWithLock{mThreadRegistration->mData,
+                                mThreadRegistration->mDataMutex};
+    }
+
+    template <typename F>
+    auto WithLockedRWOnThread(F&& aF) {
+      RWOnThreadWithLock lockedData = GetLockedRWOnThread();
+      return std::forward<F>(aF)(lockedData.DataRef());
+    }
+
+    // This is needed to allow OnThreadPtr::operator-> to return a temporary
+    // OnThreadRef object, for which `->` must work; Here it provides a pointer
+    // to itself, so that the next follow-up `->` will work as member accessor.
+    OnThreadRef* operator->() && { return this; }
+
+   private:
+    // Only ThreadRegistration should construct an OnThreadRef.
+    friend class ThreadRegistration;
+    explicit OnThreadRef(ThreadRegistration& aThreadRegistration)
+        : mThreadRegistration(&aThreadRegistration) {}
+
+    // Allow ThreadRegistry to read mThreadRegistration.
+    friend class ThreadRegistry;
+
+    // Guaranted to be non-null by construction from a reference.
+    ThreadRegistration* mThreadRegistration;
+  };
+
+  // Pointer-like class pointing at the ThreadRegistration for the current
+  // thread, if one was registered.
+  class OnThreadPtr {
+   public:
+    [[nodiscard]] explicit operator bool() const { return mThreadRegistration; }
+
+    // Note that this resolves to a temporary OnThreadRef object, which has all
+    // the allowed data accessors.
+    [[nodiscard]] OnThreadRef operator*() const {
+      MOZ_ASSERT(mThreadRegistration);
+      return OnThreadRef(*mThreadRegistration);
+    }
+
+    // Note that this resolves to a temporary OnThreadRef object, which also
+    // overloads operator-> and has all the allowed data accessors.
+    [[nodiscard]] OnThreadRef operator->() const {
+      MOZ_ASSERT(mThreadRegistration);
+      return OnThreadRef(*mThreadRegistration);
+    }
+
+   private:
+    friend class ThreadRegistration;
+    explicit OnThreadPtr(ThreadRegistration* aThreadRegistration)
+        : mThreadRegistration(aThreadRegistration) {}
+
+    ThreadRegistration* mThreadRegistration;
+  };
+
+  [[nodiscard]] static OnThreadPtr GetOnThreadPtr() {
+    return OnThreadPtr{GetFromTLS()};
+  }
+
+  // Call `F(OnThreadRef)`.
+  template <typename F>
+  static void WithOnThreadRef(F&& aF) {
+    const auto* tls = GetTLS();
+    if (tls) {
+      ThreadRegistration* tr = tls->get();
+      if (tr) {
+        std::forward<F>(aF)(OnThreadRef{*tr});
+      }
+    }
+  }
+
+  // Call `F(OnThreadRef)`.
+  template <typename F, typename FallbackReturn>
+  [[nodiscard]] static auto WithOnThreadRefOr(F&& aF,
+                                              FallbackReturn&& aFallbackReturn)
+      -> decltype(std::forward<F>(aF)(std::declval<OnThreadRef>())) {
+    const auto* tls = GetTLS();
+    if (tls) {
+      ThreadRegistration* tr = tls->get();
+      if (tr) {
+        return std::forward<F>(aF)(OnThreadRef{*tr});
+      }
+    }
+    return std::forward<FallbackReturn>(aFallbackReturn);
+  }
+
+  [[nodiscard]] static bool IsDataMutexLockedOnCurrentThread() {
+    if (const ThreadRegistration* tr = GetFromTLS(); tr) {
+      return tr->mDataMutex.IsLockedOnCurrentThread();
+    }
+    return false;
+  }
+
+  size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const {
+    DataLock lock(mDataMutex);
+    return mData.SizeOfExcludingThis(aMallocSizeOf);
+  }
+
+  size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
+    // aMallocSizeOf can only be used on head-allocated objects. Stack
+    // allocations and static objects are not counted.
+    return (mIsOnHeap ? aMallocSizeOf(this) : 0) +
+           SizeOfExcludingThis(aMallocSizeOf);
+  }
+
+ private:
+  friend class ThreadRegistry;
+
+  // This is what is embedded inside ThreadRegistration.
+  // References to sub-classes will be provided, to limit access as appropriate.
+  class EmbeddedData final : public LockedRWOnThread {
+   private:
+    // Only ThreadRegistration can construct (its embedded) `mData`.
+    friend class ThreadRegistration;
+    EmbeddedData(const char* aName, const void* aStackTop)
+        : LockedRWOnThread(aName, aStackTop) {}
+  };
+  EmbeddedData mData;
+
+  // Used when writing on self thread, and for any access from any thread.
+  // Locking order: Profiler, ThreadRegistry, ThreadRegistration.
+  mutable DataMutex mDataMutex;
+
+  // In case of nested (non-RAII) registrations. Only accessed on thread.
+  int mOtherRegistrations = 0;
+
+  // Set to true if allocated by `RegisterThread()`. Otherwise we assume that it
+  // is on the stack.
+  bool mIsOnHeap = false;
+
+  // Only accessed by ThreadRegistry on this thread.
+  bool mIsRegistryLockedSharedOnThisThread = false;
+
+  static MOZ_THREAD_LOCAL(ThreadRegistration*) tlsThreadRegistration;
+
+  [[nodiscard]] static decltype(tlsThreadRegistration)* GetTLS() {
+    static const bool initialized = tlsThreadRegistration.init();
+    return initialized ? &tlsThreadRegistration : nullptr;
+  }
+
+  [[nodiscard]] static ThreadRegistration* GetFromTLS() {
+    const auto tls = GetTLS();
+    return tls ? tls->get() : nullptr;
+  }
+};
+
+}  // namespace mozilla::profiler
+
+#endif  // ProfilerThreadRegistration_h
diff --git a/tools/profiler/public/ProfilerThreadRegistrationData.h b/tools/profiler/public/ProfilerThreadRegistrationData.h
new file mode 100644
index 0000000000..7c14290e4c
--- /dev/null
+++ b/tools/profiler/public/ProfilerThreadRegistrationData.h
@@ -0,0 +1,537 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// This header contains classes that hold data related to thread profiling:
+// Data members are stored `protected` in `ThreadRegistrationData`.
+// Non-virtual sub-classes of ProfilerThreadRegistrationData provide layers of
+// public accessors to subsets of the data. Each level builds on the previous
+// one and adds further access to more data, but always with the appropriate
+// guards where necessary.
+// These classes have protected constructors, so only some trusted classes
+// `ThreadRegistration` and `ThreadRegistry` will be able to construct them, and
+// then give limited access depending on who asks (the owning thread or another
+// one), and how much data they actually need.
+//
+// The hierarchy is, from base to most derived:
+// - ThreadRegistrationData
+// - ThreadRegistrationUnlockedConstReader
+// - ThreadRegistrationUnlockedConstReaderAndAtomicRW
+// - ThreadRegistrationUnlockedRWForLockedProfiler
+// - ThreadRegistrationUnlockedReaderAndAtomicRWOnThread
+// - ThreadRegistrationLockedRWFromAnyThread
+// - ThreadRegistrationLockedRWOnThread
+// - ThreadRegistration::EmbeddedData (actual data member in ThreadRegistration)
+//
+// Tech detail: These classes need to be a single hierarchy so that
+// `ThreadRegistration` can contain the most-derived class, and from there can
+// publish references to base classes without relying on Undefined Behavior.
+// (It's not allowed to have some object and give a reference to a sub-class,
+// unless that object was *really* constructed as that sub-class at least, even
+// if that sub-class only adds member functions!)
+// And where appropriate, these references will come along with the required
+// lock.
+
+#ifndef ProfilerThreadRegistrationData_h
+#define ProfilerThreadRegistrationData_h
+
+#include "js/ProfilingFrameIterator.h"
+#include "js/ProfilingStack.h"
+#include "mozilla/Atomics.h"
+#include "mozilla/BaseProfilerDetail.h"
+#include "mozilla/MemoryReporting.h"
+#include "mozilla/ProfilerThreadPlatformData.h"
+#include "mozilla/ProfilerThreadRegistrationInfo.h"
+#include "nsCOMPtr.h"
+#include "nsIThread.h"
+
+class ProfiledThreadData;
+class PSAutoLock;
+struct JSContext;
+
+// Enum listing which profiling features are active for a single thread.
+enum class ThreadProfilingFeatures : uint32_t {
+  // The thread is not being profiled at all (either the profiler is not
+  // running, or this thread is not examined during profiling.)
+  NotProfiled = 0u,
+
+  // Single features, binary exclusive. May be `Combine()`d.
+  CPUUtilization = 1u << 0,
+  Sampling = 1u << 1,
+  Markers = 1u << 2,
+
+  // All possible features. Usually used as a mask to see if any feature is
+  // active at a given time.
+  Any = CPUUtilization | Sampling | Markers
+};
+
+// Binary OR of one of more ThreadProfilingFeatures, to mix all arguments.
+template <typename... Ts>
+[[nodiscard]] constexpr ThreadProfilingFeatures Combine(
+    ThreadProfilingFeatures a1, Ts... as) {
+  static_assert((true && ... &&
+                 std::is_same_v<std::remove_cv_t<std::remove_reference_t<Ts>>,
+                                ThreadProfilingFeatures>));
+  return static_cast<ThreadProfilingFeatures>(
+      (static_cast<std::underlying_type_t<ThreadProfilingFeatures>>(a1) | ... |
+       static_cast<std::underlying_type_t<ThreadProfilingFeatures>>(as)));
+}
+
+// Binary AND of one of more ThreadProfilingFeatures, to find features common to
+// all arguments.
+template <typename... Ts>
+[[nodiscard]] constexpr ThreadProfilingFeatures Intersect(
+    ThreadProfilingFeatures a1, Ts... as) {
+  static_assert((true && ... &&
+                 std::is_same_v<std::remove_cv_t<std::remove_reference_t<Ts>>,
+                                ThreadProfilingFeatures>));
+  return static_cast<ThreadProfilingFeatures>(
+      (static_cast<std::underlying_type_t<ThreadProfilingFeatures>>(a1) & ... &
+       static_cast<std::underlying_type_t<ThreadProfilingFeatures>>(as)));
+}
+
+// Are there features in common between the two given sets?
+// Mostly useful to test if any of a set of features is present in another set.
+template <typename... Ts>
+[[nodiscard]] constexpr bool DoFeaturesIntersect(ThreadProfilingFeatures a1,
+                                                 ThreadProfilingFeatures a2) {
+  return Intersect(a1, a2) != ThreadProfilingFeatures::NotProfiled;
+}
+
+namespace mozilla::profiler {
+
+// All data members related to thread profiling are stored here.
+// See derived classes below, which give limited unlocked/locked read/write
+// access in different situations, and will be available through
+// ThreadRegistration and ThreadRegistry.
+class ThreadRegistrationData {
+ public:
+  // No public accessors here. See derived classes for accessors, and
+  // Get.../With... functions for who can use these accessors.
+
+  size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const {
+    // Not including data that is not fully owned here.
+    return 0;
+  }
+
+  size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
+    return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
+  }
+
+  static constexpr size_t MAX_JS_FRAMES = 1024;
+  using JsFrame = JS::ProfilingFrameIterator::Frame;
+  using JsFrameBuffer = JsFrame[MAX_JS_FRAMES];
+
+  // `protected` to allow derived classes to read all data members.
+ protected:
+  ThreadRegistrationData(const char* aName, const void* aStackTop);
+
+#ifdef DEBUG
+  // Destructor only used to check invariants.
+  ~ThreadRegistrationData() {
+    MOZ_ASSERT((mProfilingFeatures != ThreadProfilingFeatures::NotProfiled) ==
+               !!mProfiledThreadData);
+    MOZ_ASSERT(!mProfiledThreadData,
+               "mProfiledThreadData pointer should have been reset before "
+               "~ThreadRegistrationData");
+  }
+#endif  // DEBUG
+
+  // Permanent thread information.
+  // Set at construction, read from anywhere, moved-from at destruction.
+  ThreadRegistrationInfo mInfo;
+
+  // Contains profiler labels and JS frames.
+  // Deep-written on thread only, deep-read from thread and suspended thread.
+  ProfilingStack mProfilingStack;
+
+  // In practice, only read from thread and suspended thread.
+  PlatformData mPlatformData;
+
+  // Only read from thread and suspended thread.
+  const void* const mStackTop;
+
+  // Written from thread, read from thread and suspended thread.
+  nsCOMPtr<nsIThread> mThread;
+
+  // If this is a JS thread, this is its JSContext, which is required for any
+  // JS sampling.
+  // Written from thread, read from thread and suspended thread.
+  JSContext* mJSContext = nullptr;
+
+  // If mJSContext is not null AND the thread is being profiled, this points at
+  // the start of a JsFrameBuffer to be used for on-thread synchronous sampling.
+  JsFrame* mJsFrameBuffer = nullptr;
+
+  // The profiler needs to start and stop JS sampling of JS threads at various
+  // times. However, the JS engine can only do the required actions on the
+  // JS thread itself ("on-thread"), not from another thread ("off-thread").
+  // Therefore, we have the following two-step process.
+  //
+  // - The profiler requests (on-thread or off-thread) that the JS sampling be
+  //   started/stopped, by changing mJSSampling to the appropriate REQUESTED
+  //   state.
+  //
+  // - The relevant JS thread polls (on-thread) for changes to mJSSampling.
+  //   When it sees a REQUESTED state, it performs the appropriate actions to
+  //   actually start/stop JS sampling, and changes mJSSampling out of the
+  //   REQUESTED state.
+  //
+  // The state machine is as follows.
+  //
+  //             INACTIVE --> ACTIVE_REQUESTED
+  //                  ^       ^ |
+  //                  |     _/  |
+  //                  |   _/    |
+  //                  |  /      |
+  //                  | v       v
+  //   INACTIVE_REQUESTED <-- ACTIVE
+  //
+  // The polling is done in the following two ways.
+  //
+  // - Via the interrupt callback mechanism; the JS thread must call
+  //   profiler_js_interrupt_callback() from its own interrupt callback.
+  //   This is how sampling must be started/stopped for threads where the
+  //   request was made off-thread.
+  //
+  // - When {Start,Stop}JSSampling() is called on-thread, we can immediately
+  //   follow it with a PollJSSampling() call to avoid the delay between the
+  //   two steps. Likewise, setJSContext() calls PollJSSampling().
+  //
+  // One non-obvious thing about all this: these JS sampling requests are made
+  // on all threads, even non-JS threads. mContext needs to also be set (via
+  // setJSContext(), which can only happen for JS threads) for any JS sampling
+  // to actually happen.
+  //
+  enum {
+    INACTIVE = 0,
+    ACTIVE_REQUESTED = 1,
+    ACTIVE = 2,
+    INACTIVE_REQUESTED = 3,
+  } mJSSampling = INACTIVE;
+
+  uint32_t mJSFlags = 0;
+
+  // Flags to conveniently track various JS instrumentations.
+  enum class JSInstrumentationFlags {
+    StackSampling = 0x1,
+    Allocations = 0x2,
+  };
+
+  [[nodiscard]] bool JSAllocationsEnabled() const {
+    return mJSFlags & uint32_t(JSInstrumentationFlags::Allocations);
+  }
+
+  // The following members may be modified from another thread.
+  // They need to be atomic, because LockData() does not prevent reads from
+  // the owning thread.
+
+  // mSleep tracks whether the thread is sleeping, and if so, whether it has
+  // been previously observed. This is used for an optimization: in some
+  // cases, when a thread is asleep, we duplicate the previous sample, which
+  // is cheaper than taking a new sample.
+  //
+  // mSleep is atomic because it is accessed from multiple threads.
+  //
+  // - It is written only by this thread, via setSleeping() and setAwake().
+  //
+  // - It is read by SamplerThread::Run().
+  //
+  // There are two cases where racing between threads can cause an issue.
+  //
+  // - If CanDuplicateLastSampleDueToSleep() returns false but that result is
+  //   invalidated before being acted upon, we will take a full sample
+  //   unnecessarily. This is additional work but won't cause any correctness
+  //   issues. (In actual fact, this case is impossible. In order to go from
+  //   CanDuplicateLastSampleDueToSleep() returning false to it returning true
+  //   requires an intermediate call to it in order for mSleep to go from
+  //   SLEEPING_NOT_OBSERVED to SLEEPING_OBSERVED.)
+  //
+  // - If CanDuplicateLastSampleDueToSleep() returns true but that result is
+  //   invalidated before being acted upon -- i.e. the thread wakes up before
+  //   DuplicateLastSample() is called -- we will duplicate the previous
+  //   sample. This is inaccurate, but only slightly... we will effectively
+  //   treat the thread as having slept a tiny bit longer than it really did.
+  //
+  // This latter inaccuracy could be avoided by moving the
+  // CanDuplicateLastSampleDueToSleep() check within the thread-freezing code,
+  // e.g. the section where Tick() is called. But that would reduce the
+  // effectiveness of the optimization because more code would have to be run
+  // before we can tell that duplication is allowed.
+  //
+  static const int AWAKE = 0;
+  static const int SLEEPING_NOT_OBSERVED = 1;
+  static const int SLEEPING_OBSERVED = 2;
+  // Read&written from thread and suspended thread.
+  Atomic<int> mSleep{AWAKE};
+  Atomic<uint64_t> mThreadCpuTimeInNsAtLastSleep{0};
+
+#ifdef NIGHTLY_BUILD
+  // The first wake is the thread creation.
+  Atomic<uint64_t, MemoryOrdering::Relaxed> mWakeCount{1};
+  mutable baseprofiler::detail::BaseProfilerMutex mRecordWakeCountMutex;
+  mutable uint64_t mAlreadyRecordedWakeCount = 0;
+  mutable uint64_t mAlreadyRecordedCpuTimeInMs = 0;
+#endif
+
+  // Is this thread currently being profiled, and with which features?
+  // Written from profiler, read from any thread.
+  // Invariant: `!!mProfilingFeatures == !!mProfiledThreadData` (set together.)
+  Atomic<ThreadProfilingFeatures, MemoryOrdering::Relaxed> mProfilingFeatures{
+      ThreadProfilingFeatures::NotProfiled};
+
+  // If the profiler is active and this thread is selected for profiling, this
+  // points at the relevant ProfiledThreadData.
+  // Fully controlled by the profiler.
+  // Invariant: `!!mProfilingFeatures == !!mProfiledThreadData` (set together).
+  ProfiledThreadData* mProfiledThreadData = nullptr;
+};
+
+// Accessing const data from any thread.
+class ThreadRegistrationUnlockedConstReader : public ThreadRegistrationData {
+ public:
+  [[nodiscard]] const ThreadRegistrationInfo& Info() const { return mInfo; }
+
+  [[nodiscard]] const PlatformData& PlatformDataCRef() const {
+    return mPlatformData;
+  }
+
+  [[nodiscard]] const void* StackTop() const { return mStackTop; }
+
+ protected:
+  ThreadRegistrationUnlockedConstReader(const char* aName,
+                                        const void* aStackTop)
+      : ThreadRegistrationData(aName, aStackTop) {}
+};
+
+// Accessing atomic data from any thread.
+class ThreadRegistrationUnlockedConstReaderAndAtomicRW
+    : public ThreadRegistrationUnlockedConstReader {
+ public:
+  [[nodiscard]] const ProfilingStack& ProfilingStackCRef() const {
+    return mProfilingStack;
+  }
+  [[nodiscard]] ProfilingStack& ProfilingStackRef() { return mProfilingStack; }
+
+  // Similar to `profiler_is_active()`, this atomic flag may become out-of-date.
+  // It should only be used as an indication to know whether this thread is
+  // probably being profiled (with some specific features), to avoid doing
+  // expensive operations otherwise. Edge cases:
+  // - This thread could get `NotProfiled`, but the profiler has just started,
+  //   so some very early data may be missing. No real impact on profiling.
+  // - This thread could see profiled features, but the profiled has just
+  //   stopped, so some some work will be done and then discarded when finally
+  //   attempting to write to the buffer. No impact on profiling.
+  // - This thread could see profiled features, but the profiler will quickly
+  //   stop and restart, so this thread will write information relevant to the
+  //   previous profiling session. Very rare, and little impact on profiling.
+  [[nodiscard]] ThreadProfilingFeatures ProfilingFeatures() const {
+    return mProfilingFeatures;
+  }
+
+  // Call this whenever the current thread sleeps. Calling it twice in a row
+  // without an intervening setAwake() call is an error.
+  void SetSleeping() {
+    MOZ_ASSERT(mSleep == AWAKE);
+    mSleep = SLEEPING_NOT_OBSERVED;
+  }
+
+  // Call this whenever the current thread wakes. Calling it twice in a row
+  // without an intervening setSleeping() call is an error.
+  void SetAwake() {
+    MOZ_ASSERT(mSleep != AWAKE);
+    mSleep = AWAKE;
+#ifdef NIGHTLY_BUILD
+    ++mWakeCount;
+#endif
+  }
+
+  // Returns the CPU time used by the thread since the previous call to this
+  // method or since the thread was started if this is the first call.
+  uint64_t GetNewCpuTimeInNs() {
+    uint64_t newCpuTimeNs;
+    if (!GetCpuTimeSinceThreadStartInNs(&newCpuTimeNs, PlatformDataCRef())) {
+      newCpuTimeNs = 0;
+    }
+    uint64_t before = mThreadCpuTimeInNsAtLastSleep;
+    uint64_t result =
+        MOZ_LIKELY(newCpuTimeNs > before) ? newCpuTimeNs - before : 0;
+    mThreadCpuTimeInNsAtLastSleep = newCpuTimeNs;
+    return result;
+  }
+
+#ifdef NIGHTLY_BUILD
+  void RecordWakeCount() const;
+#endif
+
+  // This is called on every profiler restart. Put things that should happen
+  // at that time here.
+  void ReinitializeOnResume() {
+    // This is needed to cause an initial sample to be taken from sleeping
+    // threads that had been observed prior to the profiler stopping and
+    // restarting. Otherwise sleeping threads would not have any samples to
+    // copy forward while sleeping.
+    (void)mSleep.compareExchange(SLEEPING_OBSERVED, SLEEPING_NOT_OBSERVED);
+  }
+
+  // This returns true for the second and subsequent calls in each sleep
+  // cycle, so that the sampler can skip its full sampling and reuse the first
+  // asleep sample instead.
+  [[nodiscard]] bool CanDuplicateLastSampleDueToSleep() {
+    if (mSleep == AWAKE) {
+      return false;
+    }
+    if (mSleep.compareExchange(SLEEPING_NOT_OBSERVED, SLEEPING_OBSERVED)) {
+      return false;
+    }
+    return true;
+  }
+
+  [[nodiscard]] bool IsSleeping() const { return mSleep != AWAKE; }
+
+ protected:
+  ThreadRegistrationUnlockedConstReaderAndAtomicRW(const char* aName,
+                                                   const void* aStackTop)
+      : ThreadRegistrationUnlockedConstReader(aName, aStackTop) {}
+};
+
+// Like above, with special PSAutoLock-guarded accessors.
+class ThreadRegistrationUnlockedRWForLockedProfiler
+    : public ThreadRegistrationUnlockedConstReaderAndAtomicRW {
+ public:
+  // IMPORTANT! IMPORTANT! IMPORTANT! IMPORTANT! IMPORTANT! IMPORTANT!
+  // Only add functions that take a `const PSAutoLock&` proof-of-lock.
+  // (Because there is no other lock.)
+
+  [[nodiscard]] const ProfiledThreadData* GetProfiledThreadData(
+      const PSAutoLock&) const {
+    return mProfiledThreadData;
+  }
+
+  [[nodiscard]] ProfiledThreadData* GetProfiledThreadData(const PSAutoLock&) {
+    return mProfiledThreadData;
+  }
+
+ protected:
+  ThreadRegistrationUnlockedRWForLockedProfiler(const char* aName,
+                                                const void* aStackTop)
+      : ThreadRegistrationUnlockedConstReaderAndAtomicRW(aName, aStackTop) {}
+};
+
+// Reading data, unlocked from the thread, or locked otherwise.
+// This data MUST only be written from the thread with lock (i.e., in
+// LockedRWOnThread through RWOnThreadWithLock.)
+class ThreadRegistrationUnlockedReaderAndAtomicRWOnThread
+    : public ThreadRegistrationUnlockedRWForLockedProfiler {
+ public:
+  // IMPORTANT! IMPORTANT! IMPORTANT! IMPORTANT! IMPORTANT! IMPORTANT!
+  // Non-atomic members read here MUST be written from LockedRWOnThread (to
+  // guarantee that they are only modified on this thread.)
+
+  [[nodiscard]] JSContext* GetJSContext() const { return mJSContext; }
+
+ protected:
+  ThreadRegistrationUnlockedReaderAndAtomicRWOnThread(const char* aName,
+                                                      const void* aStackTop)
+      : ThreadRegistrationUnlockedRWForLockedProfiler(aName, aStackTop) {}
+};
+
+// Accessing locked data from the thread, or from any thread through the locked
+// profiler:
+
+// Like above, and profiler can also read&write mutex-protected members.
+class ThreadRegistrationLockedRWFromAnyThread
+    : public ThreadRegistrationUnlockedReaderAndAtomicRWOnThread {
+ public:
+  void SetProfilingFeaturesAndData(ThreadProfilingFeatures aProfilingFeatures,
+                                   ProfiledThreadData* aProfiledThreadData,
+                                   const PSAutoLock&);
+  void ClearProfilingFeaturesAndData(const PSAutoLock&);
+
+  // Not null when JSContext is not null AND this thread is being profiled.
+  // Points at the start of JsFrameBuffer.
+  [[nodiscard]] JsFrame* GetJsFrameBuffer() const { return mJsFrameBuffer; }
+
+  [[nodiscard]] const nsCOMPtr<nsIEventTarget> GetEventTarget() const {
+    return mThread;
+  }
+
+  void ResetMainThread(nsIThread* aThread) { mThread = aThread; }
+
+  // aDelay is the time the event that is currently running on the thread was
+  // queued before starting to run (if a PrioritizedEventQueue
+  // (i.e. MainThread), this will be 0 for any event at a lower priority
+  // than Input).
+  // aRunning is the time the event has been running. If no event is running
+  // these will both be TimeDuration() (i.e. 0). Both are out params, and are
+  // always set. Their initial value is discarded.
+  void GetRunningEventDelay(const TimeStamp& aNow, TimeDuration& aDelay,
+                            TimeDuration& aRunning) {
+    if (mThread) {  // can be null right at the start of a process
+      TimeStamp start;
+      mThread->GetRunningEventDelay(&aDelay, &start);
+      if (!start.IsNull()) {
+        // Note: the timestamp used here will be from when we started to
+        // suspend and sample the thread; which is also the timestamp
+        // associated with the sample.
+        aRunning = aNow - start;
+        return;
+      }
+    }
+    aDelay = TimeDuration();
+    aRunning = TimeDuration();
+  }
+
+  // Request that this thread start JS sampling. JS sampling won't actually
+  // start until a subsequent PollJSSampling() call occurs *and* mContext has
+  // been set.
+  void StartJSSampling(uint32_t aJSFlags) {
+    // This function runs on-thread or off-thread.
+
+    MOZ_RELEASE_ASSERT(mJSSampling == INACTIVE ||
+                       mJSSampling == INACTIVE_REQUESTED);
+    mJSSampling = ACTIVE_REQUESTED;
+    mJSFlags = aJSFlags;
+  }
+
+  // Request that this thread stop JS sampling. JS sampling won't actually
+  // stop until a subsequent PollJSSampling() call occurs.
+  void StopJSSampling() {
+    // This function runs on-thread or off-thread.
+
+    MOZ_RELEASE_ASSERT(mJSSampling == ACTIVE ||
+                       mJSSampling == ACTIVE_REQUESTED);
+    mJSSampling = INACTIVE_REQUESTED;
+  }
+
+ protected:
+  ThreadRegistrationLockedRWFromAnyThread(const char* aName,
+                                          const void* aStackTop)
+      : ThreadRegistrationUnlockedReaderAndAtomicRWOnThread(aName, aStackTop) {}
+};
+
+// Accessing data, locked, from the thread.
+// If any non-atomic data is readable from UnlockedReaderAndAtomicRWOnThread,
+// it must be written from here, and not in base classes: Since this data is
+// only written on the thread, it can be read from the same thread without
+// lock; but writing must be locked so that other threads can safely read it,
+// typically from LockedRWFromAnyThread.
+class ThreadRegistrationLockedRWOnThread
+    : public ThreadRegistrationLockedRWFromAnyThread {
+ public:
+  void SetJSContext(JSContext* aJSContext);
+  void ClearJSContext();
+
+  // Poll to see if JS sampling should be started/stopped.
+  void PollJSSampling();
+
+ public:
+  ThreadRegistrationLockedRWOnThread(const char* aName, const void* aStackTop)
+      : ThreadRegistrationLockedRWFromAnyThread(aName, aStackTop) {}
+};
+
+}  // namespace mozilla::profiler
+
+#endif  // ProfilerThreadRegistrationData_h
diff --git a/tools/profiler/public/ProfilerThreadRegistrationInfo.h b/tools/profiler/public/ProfilerThreadRegistrationInfo.h
new file mode 100644
index 0000000000..e116c3059e
--- /dev/null
+++ b/tools/profiler/public/ProfilerThreadRegistrationInfo.h
@@ -0,0 +1,64 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfilerThreadRegistrationInfo_h
+#define ProfilerThreadRegistrationInfo_h
+
+#include "mozilla/BaseAndGeckoProfilerDetail.h"
+#include "mozilla/ProfilerUtils.h"
+#include "mozilla/TimeStamp.h"
+
+#include <string>
+
+namespace mozilla::profiler {
+
+// This class contains immutable information about a thread which needs to be
+// stored across restarts of the profiler and which can be useful even after the
+// thread has stopped running.
+class ThreadRegistrationInfo {
+ public:
+  // Construct on the thread.
+  explicit ThreadRegistrationInfo(const char* aName) : mName(aName) {}
+
+  // Construct for a foreign thread (e.g., Java).
+  ThreadRegistrationInfo(const char* aName, ProfilerThreadId aThreadId,
+                         bool aIsMainThread, const TimeStamp& aRegisterTime)
+      : mName(aName),
+        mRegisterTime(aRegisterTime),
+        mThreadId(aThreadId),
+        mIsMainThread(aIsMainThread) {}
+
+  // Only allow move construction, for extraction when the thread ends.
+  ThreadRegistrationInfo(ThreadRegistrationInfo&&) = default;
+
+  // Other copies/moves disallowed.
+  ThreadRegistrationInfo(const ThreadRegistrationInfo&) = delete;
+  ThreadRegistrationInfo& operator=(const ThreadRegistrationInfo&) = delete;
+  ThreadRegistrationInfo& operator=(ThreadRegistrationInfo&&) = delete;
+
+  [[nodiscard]] const char* Name() const { return mName.c_str(); }
+  [[nodiscard]] const TimeStamp& RegisterTime() const { return mRegisterTime; }
+  [[nodiscard]] ProfilerThreadId ThreadId() const { return mThreadId; }
+  [[nodiscard]] bool IsMainThread() const { return mIsMainThread; }
+
+ private:
+  static TimeStamp ExistingRegisterTimeOrNow() {
+    TimeStamp registerTime = baseprofiler::detail::GetThreadRegistrationTime();
+    if (!registerTime) {
+      registerTime = TimeStamp::Now();
+    }
+    return registerTime;
+  }
+
+  const std::string mName;
+  const TimeStamp mRegisterTime = ExistingRegisterTimeOrNow();
+  const ProfilerThreadId mThreadId = profiler_current_thread_id();
+  const bool mIsMainThread = profiler_is_main_thread();
+};
+
+}  // namespace mozilla::profiler
+
+#endif  // ProfilerThreadRegistrationInfo_h
diff --git a/tools/profiler/public/ProfilerThreadRegistry.h b/tools/profiler/public/ProfilerThreadRegistry.h
new file mode 100644
index 0000000000..4d0fd3ef68
--- /dev/null
+++ b/tools/profiler/public/ProfilerThreadRegistry.h
@@ -0,0 +1,321 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfilerThreadRegistry_h
+#define ProfilerThreadRegistry_h
+
+#include "mozilla/BaseProfilerDetail.h"
+#include "mozilla/ProfilerThreadRegistration.h"
+#include "mozilla/Vector.h"
+
+namespace mozilla::profiler {
+
+class ThreadRegistry {
+ private:
+  using RegistryMutex = baseprofiler::detail::BaseProfilerSharedMutex;
+  using RegistryLockExclusive =
+      baseprofiler::detail::BaseProfilerAutoLockExclusive;
+  using RegistryLockShared = baseprofiler::detail::BaseProfilerAutoLockShared;
+
+ public:
+  // Aliases to data accessors (removing the ThreadRegistration prefix).
+
+  using UnlockedConstReader = ThreadRegistrationUnlockedConstReader;
+  using UnlockedConstReaderAndAtomicRW =
+      ThreadRegistrationUnlockedConstReaderAndAtomicRW;
+  using UnlockedRWForLockedProfiler =
+      ThreadRegistrationUnlockedRWForLockedProfiler;
+  using UnlockedReaderAndAtomicRWOnThread =
+      ThreadRegistrationUnlockedReaderAndAtomicRWOnThread;
+  using LockedRWFromAnyThread = ThreadRegistrationLockedRWFromAnyThread;
+  using LockedRWOnThread = ThreadRegistrationLockedRWOnThread;
+
+  // Off-thread access through the registry, providing the following data
+  // accessors: UnlockedConstReader, UnlockedConstReaderAndAtomicRW,
+  // UnlockedRWForLockedProfiler, and LockedRWFromAnyThread.
+  // (See ThreadRegistration class for ON-thread access.)
+
+  // Reference-like class pointing at a ThreadRegistration.
+  // It should only exist while sRegistryMutex is locked.
+  class OffThreadRef {
+   public:
+    // const UnlockedConstReader
+
+    [[nodiscard]] const UnlockedConstReader& UnlockedConstReaderCRef() const {
+      return mThreadRegistration->mData;
+    }
+
+    template <typename F>
+    auto WithUnlockedConstReader(F&& aF) const {
+      return std::forward<F>(aF)(UnlockedConstReaderCRef());
+    }
+
+    // const UnlockedConstReaderAndAtomicRW
+
+    [[nodiscard]] const UnlockedConstReaderAndAtomicRW&
+    UnlockedConstReaderAndAtomicRWCRef() const {
+      return mThreadRegistration->mData;
+    }
+
+    template <typename F>
+    auto WithUnlockedConstReaderAndAtomicRW(F&& aF) const {
+      return std::forward<F>(aF)(UnlockedConstReaderAndAtomicRWCRef());
+    }
+
+    // UnlockedConstReaderAndAtomicRW
+
+    [[nodiscard]] UnlockedConstReaderAndAtomicRW&
+    UnlockedConstReaderAndAtomicRWRef() {
+      return mThreadRegistration->mData;
+    }
+
+    template <typename F>
+    auto WithUnlockedConstReaderAndAtomicRW(F&& aF) {
+      return std::forward<F>(aF)(UnlockedConstReaderAndAtomicRWRef());
+    }
+
+    // const UnlockedRWForLockedProfiler
+
+    [[nodiscard]] const UnlockedRWForLockedProfiler&
+    UnlockedRWForLockedProfilerCRef() const {
+      return mThreadRegistration->mData;
+    }
+
+    template <typename F>
+    auto WithUnlockedRWForLockedProfiler(F&& aF) const {
+      return std::forward<F>(aF)(UnlockedRWForLockedProfilerCRef());
+    }
+
+    // UnlockedRWForLockedProfiler
+
+    [[nodiscard]] UnlockedRWForLockedProfiler&
+    UnlockedRWForLockedProfilerRef() {
+      return mThreadRegistration->mData;
+    }
+
+    template <typename F>
+    auto WithUnlockedRWForLockedProfiler(F&& aF) {
+      return std::forward<F>(aF)(UnlockedRWForLockedProfilerRef());
+    }
+
+    // const LockedRWFromAnyThread through ConstRWFromAnyThreadWithLock
+
+    class ConstRWFromAnyThreadWithLock {
+     public:
+      [[nodiscard]] const LockedRWFromAnyThread& DataCRef() const {
+        return mLockedRWFromAnyThread;
+      }
+      [[nodiscard]] const LockedRWFromAnyThread* operator->() const {
+        return &mLockedRWFromAnyThread;
+      }
+
+      ConstRWFromAnyThreadWithLock(
+          const LockedRWFromAnyThread& aLockedRWFromAnyThread,
+          ThreadRegistration::DataMutex& aDataMutex)
+          : mLockedRWFromAnyThread(aLockedRWFromAnyThread),
+            mDataLock(aDataMutex) {}
+
+     private:
+      const LockedRWFromAnyThread& mLockedRWFromAnyThread;
+      ThreadRegistration::DataLock mDataLock;
+    };
+
+    [[nodiscard]] ConstRWFromAnyThreadWithLock ConstLockedRWFromAnyThread()
+        const {
+      return ConstRWFromAnyThreadWithLock{mThreadRegistration->mData,
+                                          mThreadRegistration->mDataMutex};
+    }
+
+    template <typename F>
+    auto WithConstLockedRWFromAnyThread(F&& aF) const {
+      ConstRWFromAnyThreadWithLock lockedData = ConstLockedRWFromAnyThread();
+      return std::forward<F>(aF)(lockedData.DataCRef());
+    }
+
+    // LockedRWFromAnyThread through RWFromAnyThreadWithLock
+
+    class RWFromAnyThreadWithLock {
+     public:
+      [[nodiscard]] const LockedRWFromAnyThread& DataCRef() const {
+        return mLockedRWFromAnyThread;
+      }
+      [[nodiscard]] LockedRWFromAnyThread& DataRef() {
+        return mLockedRWFromAnyThread;
+      }
+      [[nodiscard]] const LockedRWFromAnyThread* operator->() const {
+        return &mLockedRWFromAnyThread;
+      }
+      [[nodiscard]] LockedRWFromAnyThread* operator->() {
+        return &mLockedRWFromAnyThread;
+      }
+
+      // In some situations, it may be useful to do some on-thread operations if
+      // we are indeed on this thread now. The lock is still held here; caller
+      // should not use this pointer longer than this RWFromAnyThreadWithLock.
+      [[nodiscard]] LockedRWOnThread* GetLockedRWOnThread() {
+        if (mLockedRWFromAnyThread.Info().ThreadId() ==
+            profiler_current_thread_id()) {
+          // mLockedRWFromAnyThread references a subclass of the
+          // ThreadRegistration's mData, so it's safe to downcast it to another
+          // hierarchy level of the object.
+          return &static_cast<LockedRWOnThread&>(mLockedRWFromAnyThread);
+        }
+        return nullptr;
+      }
+
+     private:
+      friend class OffThreadRef;
+      RWFromAnyThreadWithLock(LockedRWFromAnyThread& aLockedRWFromAnyThread,
+                              ThreadRegistration::DataMutex& aDataMutex)
+          : mLockedRWFromAnyThread(aLockedRWFromAnyThread),
+            mDataLock(aDataMutex) {}
+
+      LockedRWFromAnyThread& mLockedRWFromAnyThread;
+      ThreadRegistration::DataLock mDataLock;
+    };
+
+    [[nodiscard]] RWFromAnyThreadWithLock GetLockedRWFromAnyThread() {
+      return RWFromAnyThreadWithLock{mThreadRegistration->mData,
+                                     mThreadRegistration->mDataMutex};
+    }
+
+    template <typename F>
+    auto WithLockedRWFromAnyThread(F&& aF) {
+      RWFromAnyThreadWithLock lockedData = GetLockedRWFromAnyThread();
+      return std::forward<F>(aF)(lockedData.DataRef());
+    }
+
+   private:
+    // Only ThreadRegistry should construct an OnThreadRef.
+    friend class ThreadRegistry;
+    explicit OffThreadRef(ThreadRegistration& aThreadRegistration)
+        : mThreadRegistration(&aThreadRegistration) {}
+
+    // If we have an ON-thread ref, it's safe to convert to an OFF-thread ref.
+    explicit OffThreadRef(ThreadRegistration::OnThreadRef aOnThreadRef)
+        : mThreadRegistration(aOnThreadRef.mThreadRegistration) {}
+
+    [[nodiscard]] bool IsPointingAt(
+        ThreadRegistration& aThreadRegistration) const {
+      return mThreadRegistration == &aThreadRegistration;
+    }
+
+    // Guaranted to be non-null by construction.
+    ThreadRegistration* mThreadRegistration;
+  };
+
+  // Lock the registry non-exclusively and allow iteration. E.g.:
+  // `for (OffThreadRef thread : LockedRegistry{}) { ... }`
+  // Do *not* export copies/references, as they could become dangling.
+  // Locking order: Profiler, ThreadRegistry, ThreadRegistration.
+  class LockedRegistry {
+   public:
+    LockedRegistry()
+        : mRegistryLock([]() -> RegistryMutex& {
+            MOZ_ASSERT(!IsRegistryMutexLockedOnCurrentThread(),
+                       "Recursive locking detected");
+            // In DEBUG builds, *before* we attempt to lock sRegistryMutex, we
+            // want to check that the ThreadRegistration mutex is *not* locked
+            // on this thread, to avoid inversion deadlocks.
+            MOZ_ASSERT(!ThreadRegistration::IsDataMutexLockedOnCurrentThread());
+            return sRegistryMutex;
+          }()) {
+      ThreadRegistration::WithOnThreadRef(
+          [](ThreadRegistration::OnThreadRef aOnThreadRef) {
+            aOnThreadRef.mThreadRegistration
+                ->mIsRegistryLockedSharedOnThisThread = true;
+          });
+    }
+
+    ~LockedRegistry() {
+      ThreadRegistration::WithOnThreadRef(
+          [](ThreadRegistration::OnThreadRef aOnThreadRef) {
+            aOnThreadRef.mThreadRegistration
+                ->mIsRegistryLockedSharedOnThisThread = false;
+          });
+    }
+
+    [[nodiscard]] const OffThreadRef* begin() const {
+      return sRegistryContainer.begin();
+    }
+    [[nodiscard]] OffThreadRef* begin() { return sRegistryContainer.begin(); }
+    [[nodiscard]] const OffThreadRef* end() const {
+      return sRegistryContainer.end();
+    }
+    [[nodiscard]] OffThreadRef* end() { return sRegistryContainer.end(); }
+
+   private:
+    RegistryLockShared mRegistryLock;
+  };
+
+  // Call `F(OffThreadRef)` for the given aThreadId.
+  template <typename F>
+  static void WithOffThreadRef(ProfilerThreadId aThreadId, F&& aF) {
+    for (OffThreadRef thread : LockedRegistry{}) {
+      if (thread.UnlockedConstReaderCRef().Info().ThreadId() == aThreadId) {
+        std::forward<F>(aF)(thread);
+        break;
+      }
+    }
+  }
+
+  template <typename F, typename FallbackReturn>
+  [[nodiscard]] static auto WithOffThreadRefOr(ProfilerThreadId aThreadId,
+                                               F&& aF,
+                                               FallbackReturn&& aFallbackReturn)
+      -> decltype(std::forward<F>(aF)(std::declval<OffThreadRef>())) {
+    for (OffThreadRef thread : LockedRegistry{}) {
+      if (thread.UnlockedConstReaderCRef().Info().ThreadId() == aThreadId) {
+        return std::forward<F>(aF)(thread);
+      }
+    }
+    return std::forward<FallbackReturn>(aFallbackReturn);
+  }
+
+  static size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) {
+    LockedRegistry lockedRegistry;
+    // "Ex" because we don't count static objects, but we count whatever they
+    // allocated on the heap.
+    size_t bytes = sRegistryContainer.sizeOfExcludingThis(aMallocSizeOf);
+    for (const OffThreadRef& offThreadRef : lockedRegistry) {
+      bytes +=
+          offThreadRef.mThreadRegistration->SizeOfExcludingThis(aMallocSizeOf);
+    }
+    return bytes;
+  }
+
+  static size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) {
+    return SizeOfExcludingThis(aMallocSizeOf);
+  }
+
+  [[nodiscard]] static bool IsRegistryMutexLockedOnCurrentThread() {
+    return sRegistryMutex.IsLockedExclusiveOnCurrentThread() ||
+           ThreadRegistration::WithOnThreadRefOr(
+               [](ThreadRegistration::OnThreadRef aOnThreadRef) {
+                 return aOnThreadRef.mThreadRegistration
+                     ->mIsRegistryLockedSharedOnThisThread;
+               },
+               false);
+  }
+
+ private:
+  using RegistryContainer = Vector<OffThreadRef>;
+
+  static RegistryContainer sRegistryContainer;
+
+  // Mutex protecting the registry.
+  // Locking order: Profiler, ThreadRegistry, ThreadRegistration.
+  static RegistryMutex sRegistryMutex;
+
+  // Only allow ThreadRegistration to (un)register itself.
+  friend class ThreadRegistration;
+  static void Register(ThreadRegistration::OnThreadRef aOnThreadRef);
+  static void Unregister(ThreadRegistration::OnThreadRef aOnThreadRef);
+};
+
+}  // namespace mozilla::profiler
+
+#endif  // ProfilerThreadRegistry_h
diff --git a/tools/profiler/public/ProfilerThreadSleep.h b/tools/profiler/public/ProfilerThreadSleep.h
new file mode 100644
index 0000000000..730176d39f
--- /dev/null
+++ b/tools/profiler/public/ProfilerThreadSleep.h
@@ -0,0 +1,58 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// APIs that inform the profiler when a thread is effectively asleep so that we
+// can avoid sampling it more than once.
+
+#ifndef ProfilerThreadSleep_h
+#define ProfilerThreadSleep_h
+
+#ifndef MOZ_GECKO_PROFILER
+
+// This file can be #included unconditionally. However, everything within this
+// file must be guarded by a #ifdef MOZ_GECKO_PROFILER, *except* for the
+// following macros and functions, which encapsulate the most common operations
+// and thus avoid the need for many #ifdefs.
+
+#  define AUTO_PROFILER_THREAD_SLEEP
+
+static inline void profiler_thread_sleep() {}
+
+static inline void profiler_thread_wake() {}
+
+#else  // !MOZ_GECKO_PROFILER
+
+#  include "mozilla/Attributes.h"
+#  include "mozilla/BaseProfilerRAIIMacro.h"
+
+// These functions tell the profiler that a thread went to sleep so that we can
+// avoid sampling it more than once while it's sleeping. Calling
+// profiler_thread_sleep() twice without an intervening profiler_thread_wake()
+// is an error. All three functions operate the same whether the profiler is
+// active or inactive.
+void profiler_thread_sleep();
+void profiler_thread_wake();
+
+// Mark a thread as asleep within a scope.
+// (See also AUTO_PROFILER_THREAD_WAKE in ProfilerThreadState.h)
+#  define AUTO_PROFILER_THREAD_SLEEP \
+    mozilla::AutoProfilerThreadSleep PROFILER_RAII
+
+namespace mozilla {
+
+// (See also AutoProfilerThreadWake in ProfilerThreadState.h)
+class MOZ_RAII AutoProfilerThreadSleep {
+ public:
+  explicit AutoProfilerThreadSleep() { profiler_thread_sleep(); }
+
+  ~AutoProfilerThreadSleep() { profiler_thread_wake(); }
+};
+
+}  // namespace mozilla
+
+#endif  // !MOZ_GECKO_PROFILER
+
+#endif  // ProfilerThreadSleep_h
diff --git a/tools/profiler/public/ProfilerThreadState.h b/tools/profiler/public/ProfilerThreadState.h
new file mode 100644
index 0000000000..6ac48e41dd
--- /dev/null
+++ b/tools/profiler/public/ProfilerThreadState.h
@@ -0,0 +1,128 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// This header contains functions that give information about the Profiler state
+// with regards to the current thread.
+
+#ifndef ProfilerThreadState_h
+#define ProfilerThreadState_h
+
+#include "mozilla/ProfilerState.h"
+#include "mozilla/ProfilerThreadRegistration.h"
+#include "mozilla/ProfilerThreadRegistry.h"
+#include "mozilla/ProfilerThreadSleep.h"
+
+// During profiling, if the current thread is registered, return true
+// (regardless of whether it is actively being profiled).
+// (Same caveats and recommended usage as profiler_is_active().)
+[[nodiscard]] inline bool profiler_is_active_and_thread_is_registered() {
+  return profiler_is_active() &&
+         mozilla::profiler::ThreadRegistration::IsRegistered();
+}
+
+// Is the profiler active and unpaused, and is the current thread being
+// profiled for any of the given features? (Same caveats and recommended usage
+// as profiler_is_active().)
+[[nodiscard]] inline bool profiler_thread_is_being_profiled(
+    ThreadProfilingFeatures aThreadProfilingFeatures) {
+  return profiler_is_active_and_unpaused() &&
+         mozilla::profiler::ThreadRegistration::WithOnThreadRefOr(
+             [aThreadProfilingFeatures](
+                 mozilla::profiler::ThreadRegistration::OnThreadRef aTR) {
+               return DoFeaturesIntersect(
+                   aTR.UnlockedConstReaderAndAtomicRWCRef().ProfilingFeatures(),
+                   aThreadProfilingFeatures);
+             },
+             false);
+}
+
+// Is the profiler active and unpaused, and is the given thread being profiled?
+// (Same caveats and recommended usage as profiler_is_active().)
+// Safe to use with the current thread id, or unspecified ProfilerThreadId (same
+// as current thread id).
+[[nodiscard]] inline bool profiler_thread_is_being_profiled(
+    const ProfilerThreadId& aThreadId,
+    ThreadProfilingFeatures aThreadProfilingFeatures) {
+  if (!profiler_is_active_and_unpaused()) {
+    return false;
+  }
+
+  if (!aThreadId.IsSpecified() || aThreadId == profiler_current_thread_id()) {
+    // For the current thread id, use the ThreadRegistration directly, it is
+    // more efficient.
+    return mozilla::profiler::ThreadRegistration::WithOnThreadRefOr(
+        [aThreadProfilingFeatures](
+            mozilla::profiler::ThreadRegistration::OnThreadRef aTR) {
+          return DoFeaturesIntersect(
+              aTR.UnlockedConstReaderAndAtomicRWCRef().ProfilingFeatures(),
+              aThreadProfilingFeatures);
+        },
+        false);
+  }
+
+  // For other threads, go through the ThreadRegistry.
+  return mozilla::profiler::ThreadRegistry::WithOffThreadRefOr(
+      aThreadId,
+      [aThreadProfilingFeatures](
+          mozilla::profiler::ThreadRegistry::OffThreadRef aTR) {
+        return DoFeaturesIntersect(
+            aTR.UnlockedConstReaderAndAtomicRWCRef().ProfilingFeatures(),
+            aThreadProfilingFeatures);
+      },
+      false);
+}
+
+// Is the current thread registered and sleeping?
+[[nodiscard]] inline bool profiler_thread_is_sleeping() {
+  return profiler_is_active() &&
+         mozilla::profiler::ThreadRegistration::WithOnThreadRefOr(
+             [](mozilla::profiler::ThreadRegistration::OnThreadRef aTR) {
+               return aTR.UnlockedConstReaderAndAtomicRWCRef().IsSleeping();
+             },
+             false);
+}
+
+#ifndef MOZ_GECKO_PROFILER
+
+#  define AUTO_PROFILER_THREAD_WAKE
+
+#else  // !MOZ_GECKO_PROFILER
+
+// Mark a thread as awake within a scope.
+// (See also AUTO_PROFILER_THREAD_SLEEP in mozilla/ProfilerThreadSleep.h)
+#  define AUTO_PROFILER_THREAD_WAKE \
+    mozilla::AutoProfilerThreadWake PROFILER_RAII
+
+namespace mozilla {
+
+// Temporarily wake up the profiling of a thread while servicing events such as
+// Asynchronous Procedure Calls (APCs).
+// (See also AutoProfilerThreadSleep in ProfilerThreadSleep.h)
+class MOZ_RAII AutoProfilerThreadWake {
+ public:
+  explicit AutoProfilerThreadWake()
+      : mIssuedWake(profiler_thread_is_sleeping()) {
+    if (mIssuedWake) {
+      profiler_thread_wake();
+    }
+  }
+
+  ~AutoProfilerThreadWake() {
+    if (mIssuedWake) {
+      MOZ_ASSERT(!profiler_thread_is_sleeping());
+      profiler_thread_sleep();
+    }
+  }
+
+ private:
+  bool mIssuedWake;
+};
+
+}  // namespace mozilla
+
+#endif  // !MOZ_GECKO_PROFILER
+
+#endif  // ProfilerThreadState_h
diff --git a/tools/profiler/public/ProfilerUtils.h b/tools/profiler/public/ProfilerUtils.h
new file mode 100644
index 0000000000..3969761e18
--- /dev/null
+++ b/tools/profiler/public/ProfilerUtils.h
@@ -0,0 +1,32 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfilerUtils_h
+#define ProfilerUtils_h
+
+// This header contains most process- and thread-related functions.
+// It is safe to include unconditionally.
+
+#include "mozilla/BaseProfilerUtils.h"
+
+using ProfilerProcessId = mozilla::baseprofiler::BaseProfilerProcessId;
+using ProfilerThreadId = mozilla::baseprofiler::BaseProfilerThreadId;
+
+// Get the current process's ID.
+[[nodiscard]] ProfilerProcessId profiler_current_process_id();
+
+// Get the current thread's ID.
+[[nodiscard]] ProfilerThreadId profiler_current_thread_id();
+
+// Must be called at least once from the main thread, before any other main-
+// thread id function.
+void profiler_init_main_thread_id();
+
+[[nodiscard]] ProfilerThreadId profiler_main_thread_id();
+
+[[nodiscard]] bool profiler_is_main_thread();
+
+#endif  // ProfilerUtils_h
diff --git a/tools/profiler/public/shared-libraries.h b/tools/profiler/public/shared-libraries.h
new file mode 100644
index 0000000000..dfd3599e71
--- /dev/null
+++ b/tools/profiler/public/shared-libraries.h
@@ -0,0 +1,213 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef SHARED_LIBRARIES_H_
+#define SHARED_LIBRARIES_H_
+
+#ifndef MOZ_GECKO_PROFILER
+#  error This header does not have a useful implementation on your platform!
+#endif
+
+#include "nsNativeCharsetUtils.h"
+#include "nsString.h"
+#include <nsID.h>
+
+#include <algorithm>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string>
+#include <vector>
+
+namespace IPC {
+class MessageReader;
+class MessageWriter;
+template <typename T>
+struct ParamTraits;
+}  // namespace IPC
+
+class SharedLibrary {
+ public:
+  SharedLibrary(uintptr_t aStart, uintptr_t aEnd, uintptr_t aOffset,
+                const nsCString& aBreakpadId, const nsCString& aCodeId,
+                const nsString& aModuleName, const nsString& aModulePath,
+                const nsString& aDebugName, const nsString& aDebugPath,
+                const nsCString& aVersion, const char* aArch)
+      : mStart(aStart),
+        mEnd(aEnd),
+        mOffset(aOffset),
+        mBreakpadId(aBreakpadId),
+        mCodeId(aCodeId),
+        mModuleName(aModuleName),
+        mModulePath(aModulePath),
+        mDebugName(aDebugName),
+        mDebugPath(aDebugPath),
+        mVersion(aVersion),
+        mArch(aArch) {}
+
+  bool operator==(const SharedLibrary& other) const {
+    return (mStart == other.mStart) && (mEnd == other.mEnd) &&
+           (mOffset == other.mOffset) && (mModuleName == other.mModuleName) &&
+           (mModulePath == other.mModulePath) &&
+           (mDebugName == other.mDebugName) &&
+           (mDebugPath == other.mDebugPath) &&
+           (mBreakpadId == other.mBreakpadId) && (mCodeId == other.mCodeId) &&
+           (mVersion == other.mVersion) && (mArch == other.mArch);
+  }
+
+  uintptr_t GetStart() const { return mStart; }
+  uintptr_t GetEnd() const { return mEnd; }
+  uintptr_t GetOffset() const { return mOffset; }
+  const nsCString& GetBreakpadId() const { return mBreakpadId; }
+  const nsCString& GetCodeId() const { return mCodeId; }
+  const nsString& GetModuleName() const { return mModuleName; }
+  const nsString& GetModulePath() const { return mModulePath; }
+  const std::string GetNativeDebugPath() const {
+    nsAutoCString debugPathStr;
+
+    NS_CopyUnicodeToNative(mDebugPath, debugPathStr);
+
+    return debugPathStr.get();
+  }
+  const nsString& GetDebugName() const { return mDebugName; }
+  const nsString& GetDebugPath() const { return mDebugPath; }
+  const nsCString& GetVersion() const { return mVersion; }
+  const std::string& GetArch() const { return mArch; }
+  size_t SizeOf() const {
+    return sizeof *this + mBreakpadId.Length() + mCodeId.Length() +
+           mModuleName.Length() * 2 + mModulePath.Length() * 2 +
+           mDebugName.Length() * 2 + mDebugPath.Length() * 2 +
+           mVersion.Length() + mArch.size();
+  }
+
+  SharedLibrary() : mStart{0}, mEnd{0}, mOffset{0} {}
+
+ private:
+  uintptr_t mStart;
+  uintptr_t mEnd;
+  uintptr_t mOffset;
+  nsCString mBreakpadId;
+  // A string carrying an identifier for a binary.
+  //
+  // All platforms have different formats:
+  // - Windows: The code ID for a Windows PE file.
+  //  It's the PE timestamp and PE image size.
+  // - macOS: The code ID for a macOS / iOS binary (mach-O).
+  //  It's the mach-O UUID without dashes and without the trailing 0 for the
+  //  breakpad ID.
+  // - Linux/Android: The code ID for a Linux ELF file.
+  //  It's the complete build ID, as hex string.
+  nsCString mCodeId;
+  nsString mModuleName;
+  nsString mModulePath;
+  nsString mDebugName;
+  nsString mDebugPath;
+  nsCString mVersion;
+  std::string mArch;
+
+  friend struct IPC::ParamTraits<SharedLibrary>;
+};
+
+static bool CompareAddresses(const SharedLibrary& first,
+                             const SharedLibrary& second) {
+  return first.GetStart() < second.GetStart();
+}
+
+class SharedLibraryInfo {
+ public:
+  static SharedLibraryInfo GetInfoForSelf();
+#ifdef XP_WIN
+  static SharedLibraryInfo GetInfoFromPath(const wchar_t* aPath);
+#endif
+
+  static void Initialize();
+
+  void AddSharedLibrary(SharedLibrary entry) { mEntries.push_back(entry); }
+
+  void AddAllSharedLibraries(const SharedLibraryInfo& sharedLibraryInfo) {
+    mEntries.insert(mEntries.end(), sharedLibraryInfo.mEntries.begin(),
+                    sharedLibraryInfo.mEntries.end());
+  }
+
+  const SharedLibrary& GetEntry(size_t i) const { return mEntries[i]; }
+
+  SharedLibrary& GetMutableEntry(size_t i) { return mEntries[i]; }
+
+  // Removes items in the range [first, last)
+  // i.e. element at the "last" index is not removed
+  void RemoveEntries(size_t first, size_t last) {
+    mEntries.erase(mEntries.begin() + first, mEntries.begin() + last);
+  }
+
+  bool Contains(const SharedLibrary& searchItem) const {
+    return (mEntries.end() !=
+            std::find(mEntries.begin(), mEntries.end(), searchItem));
+  }
+
+  size_t GetSize() const { return mEntries.size(); }
+
+  void SortByAddress() {
+    std::sort(mEntries.begin(), mEntries.end(), CompareAddresses);
+  }
+
+  // Remove duplicate entries from the vector.
+  //
+  // We purposefully don't use the operator== implementation of SharedLibrary
+  // because it compares all the fields including mStart, mEnd and mOffset which
+  // are not the same across different processes.
+  void DeduplicateEntries() {
+    static auto cmpSort = [](const SharedLibrary& a, const SharedLibrary& b) {
+      return std::tie(a.GetModuleName(), a.GetBreakpadId()) <
+             std::tie(b.GetModuleName(), b.GetBreakpadId());
+    };
+    static auto cmpEqual = [](const SharedLibrary& a, const SharedLibrary& b) {
+      return std::tie(a.GetModuleName(), a.GetBreakpadId()) ==
+             std::tie(b.GetModuleName(), b.GetBreakpadId());
+    };
+    // std::unique requires the vector to be sorted first. It can only remove
+    // consecutive duplicate elements.
+    std::sort(mEntries.begin(), mEntries.end(), cmpSort);
+    // Remove the duplicates since it's sorted now.
+    mEntries.erase(std::unique(mEntries.begin(), mEntries.end(), cmpEqual),
+                   mEntries.end());
+  }
+
+  void Clear() { mEntries.clear(); }
+
+  size_t SizeOf() const {
+    size_t size = 0;
+
+    for (const auto& item : mEntries) {
+      size += item.SizeOf();
+    }
+
+    return size;
+  }
+
+ private:
+  std::vector<SharedLibrary> mEntries;
+
+  friend struct IPC::ParamTraits<SharedLibraryInfo>;
+};
+
+namespace IPC {
+template <>
+struct ParamTraits<SharedLibrary> {
+  typedef SharedLibrary paramType;
+
+  static void Write(MessageWriter* aWriter, const paramType& aParam);
+  static bool Read(MessageReader* aReader, paramType* aResult);
+};
+
+template <>
+struct ParamTraits<SharedLibraryInfo> {
+  typedef SharedLibraryInfo paramType;
+
+  static void Write(MessageWriter* aWriter, const paramType& aParam);
+  static bool Read(MessageReader* aReader, paramType* aResult);
+};
+}  // namespace IPC
+
+#endif
diff --git a/tools/profiler/rust-api/Cargo.toml b/tools/profiler/rust-api/Cargo.toml
new file mode 100644
index 0000000000..93800051e4
--- /dev/null
+++ b/tools/profiler/rust-api/Cargo.toml
@@ -0,0 +1,23 @@
+[package]
+name = "gecko-profiler"
+version = "0.1.0"
+authors = ["The Mozilla Project Developers"]
+edition = "2018"
+license = "MPL-2.0"
+
+[dependencies]
+profiler-macros = { path = "./macros" }
+lazy_static = "1"
+serde = { version = "1.0", features = ["derive"] }
+bincode = "1"
+mozbuild = "0.1"
+
+[build-dependencies]
+lazy_static = "1"
+bindgen = {version = "0.64", default-features = false}
+mozbuild = "0.1"
+
+[features]
+# This feature is being set by Gecko. If it's not set, all public functions and
+# structs will be no-op.
+enabled = []
diff --git a/tools/profiler/rust-api/README.md b/tools/profiler/rust-api/README.md
new file mode 100644
index 0000000000..60926a85c7
--- /dev/null
+++ b/tools/profiler/rust-api/README.md
@@ -0,0 +1,5 @@
+# Gecko Profiler API for Rust
+
+This crate is the collection of all the API endpoints for Gecko Profiler. Please use this crate instead of using raw FFI calls.
+
+See the module documentations for more information about the specific API endpoints.
diff --git a/tools/profiler/rust-api/build.rs b/tools/profiler/rust-api/build.rs
new file mode 100644
index 0000000000..2dd70ed55c
--- /dev/null
+++ b/tools/profiler/rust-api/build.rs
@@ -0,0 +1,118 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+//! Build script for the Gecko Profiler bindings.
+//!
+//! This file is executed by cargo when this crate is built. It generates the
+//! `$OUT_DIR/bindings.rs` file which is then included by `src/gecko_bindings/mod.rs`.
+
+#[macro_use]
+extern crate lazy_static;
+
+use bindgen::{Builder, CargoCallbacks, CodegenConfig};
+use std::env;
+use std::fs;
+use std::path::PathBuf;
+
+lazy_static! {
+    static ref OUTDIR_PATH: PathBuf = PathBuf::from(env::var_os("OUT_DIR").unwrap()).join("gecko");
+}
+
+const BINDINGS_FILE: &str = "bindings.rs";
+
+lazy_static! {
+    static ref BINDGEN_FLAGS: Vec<String> = {
+        // Load build-specific config overrides.
+        let path = mozbuild::TOPOBJDIR.join("tools/profiler/rust-api/extra-bindgen-flags");
+        println!("cargo:rerun-if-changed={}", path.to_str().unwrap());
+        fs::read_to_string(path).expect("Failed to read extra-bindgen-flags file")
+            .split_whitespace()
+            .map(std::borrow::ToOwned::to_owned)
+            .collect()
+    };
+    static ref SEARCH_PATHS: Vec<PathBuf> = vec![
+        mozbuild::TOPOBJDIR.join("dist/include"),
+        mozbuild::TOPOBJDIR.join("dist/include/nspr"),
+    ];
+}
+
+fn search_include(name: &str) -> Option<PathBuf> {
+    for path in SEARCH_PATHS.iter() {
+        let file = path.join(name);
+        if file.is_file() {
+            return Some(file);
+        }
+    }
+    None
+}
+
+fn add_include(name: &str) -> String {
+    let file = match search_include(name) {
+        Some(file) => file,
+        None => panic!("Include not found: {}", name),
+    };
+    let file_path = String::from(file.to_str().unwrap());
+    println!("cargo:rerun-if-changed={}", file_path);
+    file_path
+}
+
+fn generate_bindings() {
+    let mut builder = Builder::default()
+        .enable_cxx_namespaces()
+        .with_codegen_config(CodegenConfig::TYPES | CodegenConfig::VARS | CodegenConfig::FUNCTIONS)
+        .disable_untagged_union()
+        .size_t_is_usize(true);
+
+    for dir in SEARCH_PATHS.iter() {
+        builder = builder.clang_arg("-I").clang_arg(dir.to_str().unwrap());
+    }
+
+    builder = builder
+        .clang_arg("-include")
+        .clang_arg(add_include("mozilla-config.h"));
+
+    for item in &*BINDGEN_FLAGS {
+        builder = builder.clang_arg(item);
+    }
+
+    let bindings = builder
+        .header(add_include("GeckoProfiler.h"))
+        .header(add_include("ProfilerBindings.h"))
+        .allowlist_function("gecko_profiler_.*")
+        .allowlist_var("mozilla::profiler::detail::RacyFeatures::sActiveAndFeatures")
+        .allowlist_type("mozilla::profiler::detail::RacyFeatures")
+        .rustified_enum("mozilla::StackCaptureOptions")
+        .rustified_enum("mozilla::MarkerSchema_Location")
+        .rustified_enum("mozilla::MarkerSchema_Format")
+        .rustified_enum("mozilla::MarkerSchema_Searchable")
+        // Converting std::string to an opaque type makes some platforms build
+        // successfully. Otherwise, it fails to build because MarkerSchema has
+        // some std::strings as its fields.
+        .opaque_type("std::string")
+        // std::vector needs to be converted to an opaque type because, if it's
+        // not an opaque type, bindgen can't find its size properly and
+        // MarkerSchema's total size reduces. That causes a heap buffer overflow.
+        .opaque_type("std::vector")
+        .raw_line("pub use self::root::*;")
+        // Tell cargo to invalidate the built crate whenever any of the
+        // included header files changed.
+        .parse_callbacks(Box::new(CargoCallbacks))
+        // Finish the builder and generate the bindings.
+        .generate()
+        // Unwrap the Result and panic on failure.
+        .expect("Unable to generate bindings");
+
+    let out_file = OUTDIR_PATH.join(BINDINGS_FILE);
+    bindings
+        .write_to_file(out_file)
+        .expect("Couldn't write bindings!");
+}
+
+fn main() {
+    println!("cargo:rerun-if-changed=build.rs");
+    println!("cargo:out_dir={}", env::var("OUT_DIR").unwrap());
+
+    fs::create_dir_all(&*OUTDIR_PATH).unwrap();
+    generate_bindings();
+}
diff --git a/tools/profiler/rust-api/cbindgen.toml b/tools/profiler/rust-api/cbindgen.toml
new file mode 100644
index 0000000000..3f0df0f34f
--- /dev/null
+++ b/tools/profiler/rust-api/cbindgen.toml
@@ -0,0 +1,15 @@
+header = """/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */"""
+autogen_warning = """/* DO NOT MODIFY THIS MANUALLY! This file was generated using cbindgen. See RunCbindgen.py */
+#ifndef ProfilerRustBindings_h
+#error "Don't include this file directly, instead include ProfilerRustBindings.h"
+#endif
+"""
+include_version = true
+braces = "SameLine"
+line_length = 100
+tab_width = 2
+language = "C++"
+# Put FFI calls in the `mozilla::profiler::ffi` namespace.
+namespaces = ["mozilla", "profiler", "ffi"]
diff --git a/tools/profiler/rust-api/extra-bindgen-flags.in b/tools/profiler/rust-api/extra-bindgen-flags.in
new file mode 100644
index 0000000000..b0275a031b
--- /dev/null
+++ b/tools/profiler/rust-api/extra-bindgen-flags.in
@@ -0,0 +1 @@
+@BINDGEN_SYSTEM_FLAGS@ @NSPR_CFLAGS@
diff --git a/tools/profiler/rust-api/macros/Cargo.toml b/tools/profiler/rust-api/macros/Cargo.toml
new file mode 100644
index 0000000000..b8bd9910dc
--- /dev/null
+++ b/tools/profiler/rust-api/macros/Cargo.toml
@@ -0,0 +1,13 @@
+[package]
+name = "profiler-macros"
+version = "0.1.0"
+authors = ["The Mozilla Project Developers"]
+edition = "2018"
+license = "MPL-2.0"
+
+[lib]
+proc-macro = true
+
+[dependencies]
+syn = "1"
+quote = "1.0"
diff --git a/tools/profiler/rust-api/macros/src/lib.rs b/tools/profiler/rust-api/macros/src/lib.rs
new file mode 100644
index 0000000000..48617b758e
--- /dev/null
+++ b/tools/profiler/rust-api/macros/src/lib.rs
@@ -0,0 +1,65 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#![deny(warnings)]
+
+//! A procedural macro as a syntactical sugar to `gecko_profiler_label!` macro.
+//! You can use this macro on top of functions to automatically append the
+//! label frame to the function.
+//!
+//! Example usage:
+//! ```rust
+//! #[gecko_profiler_fn_label(DOM)]
+//! fn foo(bar: u32) -> u32 {
+//!     bar
+//! }
+//!
+//! #[gecko_profiler_fn_label(Javascript, IonMonkey)]
+//! pub fn bar(baz: i8) -> i8 {
+//!     baz
+//! }
+//! ```
+//!
+//! See the documentation of `gecko_profiler_label!` macro to learn more about
+//! its parameters.
+
+extern crate proc_macro;
+
+use proc_macro::TokenStream;
+use quote::quote;
+use syn::{parse_macro_input, AttributeArgs, ItemFn};
+
+#[proc_macro_attribute]
+pub fn gecko_profiler_fn_label(attrs: TokenStream, input: TokenStream) -> TokenStream {
+    let attr_args = parse_macro_input!(attrs as AttributeArgs);
+    let input = parse_macro_input!(input as ItemFn);
+
+    if attr_args.is_empty() || attr_args.len() > 2 {
+        panic!("Expected one or two arguments as ProfilingCategory or ProfilingCategoryPair but {} arguments provided!", attr_args.len());
+    }
+
+    let category_name = &attr_args[0];
+    // Try to get the subcategory if possible. Otherwise, use `None`.
+    let subcategory_if_provided = match attr_args.get(1) {
+        Some(subcategory) => quote!(, #subcategory),
+        None => quote!(),
+    };
+
+    let ItemFn {
+        attrs,
+        vis,
+        sig,
+        block,
+    } = input;
+    let stmts = &block.stmts;
+
+    let new_fn = quote! {
+        #(#attrs)* #vis #sig {
+          gecko_profiler_label!(#category_name#subcategory_if_provided);
+          #(#stmts)*
+        }
+    };
+
+    new_fn.into()
+}
diff --git a/tools/profiler/rust-api/src/gecko_bindings/glue.rs b/tools/profiler/rust-api/src/gecko_bindings/glue.rs
new file mode 100644
index 0000000000..531f727a00
--- /dev/null
+++ b/tools/profiler/rust-api/src/gecko_bindings/glue.rs
@@ -0,0 +1,53 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+use crate::gecko_bindings::{bindings, structs::mozilla};
+use crate::json_writer::JSONWriter;
+use crate::marker::deserializer_tags_state::{
+    get_marker_type_functions_read_guard, MarkerTypeFunctions,
+};
+use std::ops::DerefMut;
+use std::os::raw::{c_char, c_void};
+
+#[no_mangle]
+pub unsafe extern "C" fn gecko_profiler_serialize_marker_for_tag(
+    deserializer_tag: u8,
+    payload: *const u8,
+    payload_size: usize,
+    json_writer: &mut mozilla::baseprofiler::SpliceableJSONWriter,
+) {
+    let marker_type_functions = get_marker_type_functions_read_guard();
+    let &MarkerTypeFunctions {
+        transmute_and_stream_fn,
+        marker_type_name_fn,
+        ..
+    } = marker_type_functions.get(deserializer_tag);
+    let mut json_writer = JSONWriter::new(&mut *json_writer);
+
+    // Serialize the marker type name first.
+    json_writer.string_property("type", marker_type_name_fn());
+    // Serialize the marker payload now.
+    transmute_and_stream_fn(payload, payload_size, &mut json_writer);
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn gecko_profiler_stream_marker_schemas(
+    json_writer: &mut mozilla::baseprofiler::SpliceableJSONWriter,
+    streamed_names_set: *mut c_void,
+) {
+    let marker_type_functions = get_marker_type_functions_read_guard();
+
+    for funcs in marker_type_functions.iter() {
+        let marker_name = (funcs.marker_type_name_fn)();
+        let mut marker_schema = (funcs.marker_type_display_fn)();
+
+        bindings::gecko_profiler_marker_schema_stream(
+            json_writer,
+            marker_name.as_ptr() as *const c_char,
+            marker_name.len(),
+            marker_schema.pin.deref_mut().as_mut_ptr(),
+            streamed_names_set,
+        )
+    }
+}
diff --git a/tools/profiler/rust-api/src/gecko_bindings/mod.rs b/tools/profiler/rust-api/src/gecko_bindings/mod.rs
new file mode 100644
index 0000000000..f1ec667bb2
--- /dev/null
+++ b/tools/profiler/rust-api/src/gecko_bindings/mod.rs
@@ -0,0 +1,21 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+//! Gecko's C++ bindings for the profiler.
+
+#[allow(
+    dead_code,
+    non_camel_case_types,
+    non_snake_case,
+    non_upper_case_globals,
+    missing_docs
+)]
+pub mod structs {
+    include!(concat!(env!("OUT_DIR"), "/gecko/bindings.rs"));
+}
+
+pub use self::structs as bindings;
+
+mod glue;
+pub mod profiling_categories;
diff --git a/tools/profiler/rust-api/src/gecko_bindings/profiling_categories.rs b/tools/profiler/rust-api/src/gecko_bindings/profiling_categories.rs
new file mode 100644
index 0000000000..0f24aa9c35
--- /dev/null
+++ b/tools/profiler/rust-api/src/gecko_bindings/profiling_categories.rs
@@ -0,0 +1,32 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! This file contains the generated ProfilingCategory and ProfilingCategoryPair enums.
+//!
+//! The contents of this module are generated by
+//! `mozglue/baseprofiler/generate_profiling_categories.py`, from
+//! 'mozglue/baseprofiler/core/profiling_categories.yaml`.
+
+include!(mozbuild::objdir_path!(
+    "tools/profiler/rust-api/src/gecko_bindings/profiling_categories.rs"
+));
+
+/// Helper macro that returns the profiling category pair from either only
+/// "category", or "category + sub category" pair. Refer to `profiling_categories.yaml`
+/// or generated `profiling_categories.rs` to see all the marker categories.
+/// This is useful to make the APIs similar to each other since
+/// `gecko_profiler_label!` API also requires the same syntax.
+///
+/// Example usages:
+///  - `gecko_profiler_category!(DOM)`
+///  - `gecko_profiler_category!(JavaScript, Parsing)`
+#[macro_export]
+macro_rules! gecko_profiler_category {
+    ($category:ident) => {
+        $crate::ProfilingCategoryPair::$category(None)
+    };
+    ($category:ident, $subcategory:ident) => {
+        $crate::ProfilingCategoryPair::$category(Some($crate::$category::$subcategory))
+    };
+}
diff --git a/tools/profiler/rust-api/src/json_writer.rs b/tools/profiler/rust-api/src/json_writer.rs
new file mode 100644
index 0000000000..8ab6f2ed99
--- /dev/null
+++ b/tools/profiler/rust-api/src/json_writer.rs
@@ -0,0 +1,86 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! Gecko JSON writer support for marker API.
+
+use crate::gecko_bindings::{bindings, structs::mozilla};
+use std::os::raw::c_char;
+
+/// Wrapper for the C++ SpliceableJSONWriter object. It exposes some methods to
+/// add various properties to the JSON.
+#[derive(Debug)]
+pub struct JSONWriter<'a>(&'a mut mozilla::baseprofiler::SpliceableJSONWriter);
+
+impl<'a> JSONWriter<'a> {
+    /// Constructor for the JSONWriter object. It takes a C++ SpliceableJSONWriter
+    /// reference as its argument and stores it for later accesses.
+    pub(crate) fn new(json_writer: &'a mut mozilla::baseprofiler::SpliceableJSONWriter) -> Self {
+        JSONWriter(json_writer)
+    }
+
+    /// Adds an int property to the JSON.
+    /// Prints: "<name>": <value>
+    pub fn int_property(&mut self, name: &str, value: i64) {
+        unsafe {
+            bindings::gecko_profiler_json_writer_int_property(
+                self.0,
+                name.as_ptr() as *const c_char,
+                name.len(),
+                value,
+            );
+        }
+    }
+
+    /// Adds a float property to the JSON.
+    /// Prints: "<name>": <value>
+    pub fn float_property(&mut self, name: &str, value: f64) {
+        unsafe {
+            bindings::gecko_profiler_json_writer_float_property(
+                self.0,
+                name.as_ptr() as *const c_char,
+                name.len(),
+                value,
+            );
+        }
+    }
+
+    /// Adds an bool property to the JSON.
+    /// Prints: "<name>": <value>
+    pub fn bool_property(&mut self, name: &str, value: bool) {
+        unsafe {
+            bindings::gecko_profiler_json_writer_bool_property(
+                self.0,
+                name.as_ptr() as *const c_char,
+                name.len(),
+                value,
+            );
+        }
+    }
+
+    /// Adds a string property to the JSON.
+    /// Prints: "<name>": "<value>"
+    pub fn string_property(&mut self, name: &str, value: &str) {
+        unsafe {
+            bindings::gecko_profiler_json_writer_string_property(
+                self.0,
+                name.as_ptr() as *const c_char,
+                name.len(),
+                value.as_ptr() as *const c_char,
+                value.len(),
+            );
+        }
+    }
+
+    /// Adds a null property to the JSON.
+    /// Prints: "<name>": null
+    pub fn null_property(&mut self, name: &str) {
+        unsafe {
+            bindings::gecko_profiler_json_writer_null_property(
+                self.0,
+                name.as_ptr() as *const c_char,
+                name.len(),
+            );
+        }
+    }
+}
diff --git a/tools/profiler/rust-api/src/label.rs b/tools/profiler/rust-api/src/label.rs
new file mode 100644
index 0000000000..10970c90ad
--- /dev/null
+++ b/tools/profiler/rust-api/src/label.rs
@@ -0,0 +1,137 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! Gecko profiler label support.
+//!
+//! Use the `profiler_label!` macro directly instead of using `AutoProfilerLabel`.
+//! See the `profiler_label!` macro documentation on how to use it.
+
+#[cfg(feature = "enabled")]
+use crate::gecko_bindings::{
+    bindings, profiling_categories::ProfilingCategoryPair, structs::mozilla,
+};
+
+/// RAII object that constructs and destroys a C++ AutoProfilerLabel object
+/// pointed to be the specified reference.
+/// Use `profiler_label!` macro directly instead of this, if possible.
+#[cfg(feature = "enabled")]
+pub struct AutoProfilerLabel<'a>(&'a mut mozilla::AutoProfilerLabel);
+
+#[cfg(feature = "enabled")]
+impl<'a> AutoProfilerLabel<'a> {
+    /// Creates a new AutoProfilerLabel with the specified label type.
+    ///
+    /// unsafe since the caller must ensure that `label` is allocated on the
+    /// stack.
+    #[inline]
+    pub unsafe fn new(
+        label: &mut std::mem::MaybeUninit<mozilla::AutoProfilerLabel>,
+        category_pair: ProfilingCategoryPair,
+    ) -> AutoProfilerLabel {
+        bindings::gecko_profiler_construct_label(
+            label.as_mut_ptr(),
+            category_pair.to_cpp_enum_value(),
+        );
+        AutoProfilerLabel(&mut *label.as_mut_ptr())
+    }
+}
+
+#[cfg(feature = "enabled")]
+impl<'a> Drop for AutoProfilerLabel<'a> {
+    #[inline]
+    fn drop(&mut self) {
+        unsafe {
+            bindings::gecko_profiler_destruct_label(self.0);
+        }
+    }
+}
+
+/// Place a Gecko profiler label on the stack.
+///
+/// The first `category` argument must be the name of a variant of `ProfilerLabelCategoryPair`
+/// and the second optional `subcategory` argument must be one of the sub variants of
+/// `ProfilerLabelCategoryPair`. All options can be seen either in the
+/// profiling_categories.yaml file or generated profiling_categories.rs file.
+///
+/// Example usage:
+/// ```rust
+/// gecko_profiler_label!(Layout);
+/// gecko_profiler_label!(JavaScript, Parsing);
+/// ```
+/// You can wrap this macro with a block to only label a specific part of a function.
+#[cfg(feature = "enabled")]
+#[macro_export]
+macro_rules! gecko_profiler_label {
+    ($category:ident) => {
+        gecko_profiler_label!($crate::ProfilingCategoryPair::$category(None))
+    };
+    ($category:ident, $subcategory:ident) => {
+        gecko_profiler_label!($crate::ProfilingCategoryPair::$category(Some(
+            $crate::$category::$subcategory
+        )))
+    };
+
+    ($category_path:expr) => {
+        let mut _profiler_label = ::std::mem::MaybeUninit::<
+            $crate::gecko_bindings::structs::mozilla::AutoProfilerLabel,
+        >::uninit();
+        let _profiler_label = if $crate::is_active() {
+            unsafe {
+                Some($crate::AutoProfilerLabel::new(
+                    &mut _profiler_label,
+                    $category_path,
+                ))
+            }
+        } else {
+            None
+        };
+    };
+}
+
+/// No-op when MOZ_GECKO_PROFILER is not defined.
+#[cfg(not(feature = "enabled"))]
+#[macro_export]
+macro_rules! gecko_profiler_label {
+    ($category:ident) => {};
+    ($category:ident, $subcategory:ident) => {};
+}
+
+#[cfg(test)]
+mod tests {
+    use profiler_macros::gecko_profiler_fn_label;
+
+    #[test]
+    fn test_gecko_profiler_label() {
+        gecko_profiler_label!(Layout);
+        gecko_profiler_label!(JavaScript, Parsing);
+    }
+
+    #[gecko_profiler_fn_label(DOM)]
+    fn foo(bar: u32) -> u32 {
+        bar
+    }
+
+    #[gecko_profiler_fn_label(Javascript, IonMonkey)]
+    pub(self) fn bar(baz: i8) -> i8 {
+        baz
+    }
+
+    struct A;
+
+    impl A {
+        #[gecko_profiler_fn_label(Idle)]
+        pub fn test(&self) -> i8 {
+            1
+        }
+    }
+
+    #[test]
+    fn test_gecko_profiler_fn_label() {
+        let _: u32 = foo(100000);
+        let _: i8 = bar(127);
+
+        let a = A;
+        let _ = a.test(100);
+    }
+}
diff --git a/tools/profiler/rust-api/src/lib.rs b/tools/profiler/rust-api/src/lib.rs
new file mode 100644
index 0000000000..3c857ae8ac
--- /dev/null
+++ b/tools/profiler/rust-api/src/lib.rs
@@ -0,0 +1,29 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+///! Profiler Rust API
+
+#[macro_use]
+extern crate lazy_static;
+
+pub mod gecko_bindings;
+mod json_writer;
+mod label;
+mod marker;
+mod profiler_state;
+mod thread;
+mod time;
+
+pub use gecko_bindings::profiling_categories::*;
+pub use json_writer::*;
+pub use label::*;
+pub use marker::options::*;
+pub use marker::schema::MarkerSchema;
+pub use marker::*;
+pub use profiler_macros::gecko_profiler_fn_label;
+pub use profiler_state::*;
+pub use thread::*;
+pub use time::*;
+
+pub use serde::{Deserialize, Serialize};
diff --git a/tools/profiler/rust-api/src/marker/deserializer_tags_state.rs b/tools/profiler/rust-api/src/marker/deserializer_tags_state.rs
new file mode 100644
index 0000000000..890cc3f263
--- /dev/null
+++ b/tools/profiler/rust-api/src/marker/deserializer_tags_state.rs
@@ -0,0 +1,116 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use crate::json_writer::JSONWriter;
+use crate::marker::schema::MarkerSchema;
+use crate::marker::{transmute_and_stream, ProfilerMarker};
+use std::collections::HashMap;
+use std::sync::{RwLock, RwLockReadGuard};
+
+lazy_static! {
+    static ref DESERIALIZER_TAGS_STATE: RwLock<DeserializerTagsState> =
+        RwLock::new(DeserializerTagsState::new());
+}
+
+/// A state that keeps track of each marker types and their deserializer tags.
+/// They are added during the marker insertion and read during the marker serialization.
+pub struct DeserializerTagsState {
+    /// C++ side accepts only u8 values, but we only know usize values as the
+    /// unique marker type values. So, we need to keep track of each
+    /// "marker tag -> deserializer tag" conversions to directly get the
+    /// deserializer tags of the already added marker types.
+    pub marker_tag_to_deserializer_tag: HashMap<usize, u8>,
+    /// Vector of marker type functions.
+    /// 1-based, i.e.: [0] -> tag 1. Elements are pushed to the end of the vector
+    /// whenever a new marker type is used in a Firefox session; the content is
+    /// kept between profiler runs in that session. On the C++ side, we have the
+    /// same algorithm (althought it's a sized array). See `sMarkerTypeFunctions1Based`.
+    pub marker_type_functions_1_based: Vec<MarkerTypeFunctions>,
+}
+
+/// Functions that will be stored per marker type, so we can serialize the marker
+/// schema and stream the marker payload for a specific type.
+pub struct MarkerTypeFunctions {
+    /// A function that returns the name of the marker type.
+    pub marker_type_name_fn: fn() -> &'static str,
+    /// A function that returns a `MarkerSchema`, which contains all the
+    /// information needed to stream the display schema associated with a
+    /// marker type.
+    pub marker_type_display_fn: fn() -> MarkerSchema,
+    /// A function that can read a serialized payload from bytes and streams it
+    /// as JSON object properties.
+    pub transmute_and_stream_fn:
+        unsafe fn(payload: *const u8, payload_size: usize, json_writer: &mut JSONWriter),
+}
+
+impl DeserializerTagsState {
+    fn new() -> Self {
+        DeserializerTagsState {
+            marker_tag_to_deserializer_tag: HashMap::new(),
+            marker_type_functions_1_based: vec![],
+        }
+    }
+}
+
+/// Get or insert the deserializer tag for each marker type. The tag storage
+/// is limited to 255 marker types. This is the same with the C++ side. It's
+/// unlikely to reach to this limit, but if that's the case, C++ side needs
+/// to change the uint8_t type for the deserializer tag as well.
+pub fn get_or_insert_deserializer_tag<T>() -> u8
+where
+    T: ProfilerMarker,
+{
+    let unique_marker_tag = &T::marker_type_name as *const _ as usize;
+    let mut state = DESERIALIZER_TAGS_STATE.write().unwrap();
+
+    match state.marker_tag_to_deserializer_tag.get(&unique_marker_tag) {
+        None => {
+            // It's impossible to have length more than u8.
+            let deserializer_tag = state.marker_type_functions_1_based.len() as u8 + 1;
+            debug_assert!(
+                deserializer_tag < 250,
+                "Too many rust marker payload types! Please consider increasing the profiler \
+                 buffer tag size."
+            );
+
+            state
+                .marker_tag_to_deserializer_tag
+                .insert(unique_marker_tag, deserializer_tag);
+            state
+                .marker_type_functions_1_based
+                .push(MarkerTypeFunctions {
+                    marker_type_name_fn: T::marker_type_name,
+                    marker_type_display_fn: T::marker_type_display,
+                    transmute_and_stream_fn: transmute_and_stream::<T>,
+                });
+            deserializer_tag
+        }
+        Some(deserializer_tag) => *deserializer_tag,
+    }
+}
+
+/// A guard that will be used by the marker FFI functions for getting marker type functions.
+pub struct MarkerTypeFunctionsReadGuard {
+    guard: RwLockReadGuard<'static, DeserializerTagsState>,
+}
+
+impl MarkerTypeFunctionsReadGuard {
+    pub fn iter<'a>(&'a self) -> impl Iterator<Item = &'a MarkerTypeFunctions> {
+        self.guard.marker_type_functions_1_based.iter()
+    }
+
+    pub fn get<'a>(&'a self, deserializer_tag: u8) -> &'a MarkerTypeFunctions {
+        self.guard
+            .marker_type_functions_1_based
+            .get(deserializer_tag as usize - 1)
+            .expect("Failed to find the marker type functions for given deserializer tag")
+    }
+}
+
+/// Locks the DESERIALIZER_TAGS_STATE and returns the marker type functions read guard.
+pub fn get_marker_type_functions_read_guard() -> MarkerTypeFunctionsReadGuard {
+    MarkerTypeFunctionsReadGuard {
+        guard: DESERIALIZER_TAGS_STATE.read().unwrap(),
+    }
+}
diff --git a/tools/profiler/rust-api/src/marker/mod.rs b/tools/profiler/rust-api/src/marker/mod.rs
new file mode 100644
index 0000000000..984a475089
--- /dev/null
+++ b/tools/profiler/rust-api/src/marker/mod.rs
@@ -0,0 +1,284 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! ## Gecko profiler marker support
+//!
+//! This marker API has a few different functions that you can use to mark a part of your code.
+//! There are three main marker functions to use from Rust: [`add_untyped_marker`],
+//! [`add_text_marker`] and [`add_marker`]. They are similar to what we have on
+//! the C++ side. Please take a look at the marker documentation in the Firefox
+//! source docs to learn more about them:
+//! https://firefox-source-docs.mozilla.org/tools/profiler/markers-guide.html
+//!
+//! ### Simple marker without any additional data
+//!
+//! The simplest way to add a marker without any additional information is the
+//! [`add_untyped_marker`] API. You can use it to mark a part of the code with
+//! only a name. E.g.:
+//!
+//! ```
+//! gecko_profiler::add_untyped_marker(
+//!     // Name of the marker as a string.
+//!     "Marker Name",
+//!     // Category with an optional sub-category.
+//!     gecko_profiler_category!(Graphics, DisplayListBuilding),
+//!     // MarkerOptions that keeps options like marker timing and marker stack.
+//!     Default::default(),
+//! );
+//! ```
+//!
+//! Please see the [`gecko_profiler_category!`], [`MarkerOptions`],[`MarkerTiming`]
+//! and [`MarkerStack`] to learn more about these.
+//!
+//! You can also give explicit [`MarkerOptions`] value like these:
+//!
+//! ```
+//! // With both timing and stack fields:
+//! MarkerOptions { timing: MarkerTiming::instant_now(), stack: MarkerStack::Full }
+//! // Or with some fields as default:
+//! MarkerOptions { timing: MarkerTiming::instant_now(), ..Default::default() }
+//! ```
+//!
+//! ### Marker with only an additional text for more information:
+//!
+//! The next and slightly more advanced API is [`add_text_marker`].
+//! This is used to add a marker name + a string value for extra information.
+//! E.g.:
+//!
+//! ```
+//! let info = "info about this marker";
+//! ...
+//! gecko_profiler::add_text_marker(
+//!     // Name of the marker as a string.
+//!     "Marker Name",
+//!     // Category with an optional sub-category.
+//!     gecko_profiler_category!(DOM),
+//!     // MarkerOptions that keeps options like marker timing and marker stack.
+//!     MarkerOptions {
+//!         timing: MarkerTiming::instant_now(),
+//!         ..Default::default()
+//!     },
+//!     // Additional information as a string.
+//!     info,
+//! );
+//! ```
+//!
+//! ### Marker with a more complex payload and different visualization in the profiler front-end.
+//!
+//! [`add_marker`] is the most advanced API that you can use to add different types
+//! of values as data to your marker and customize the visualization of that marker
+//! in the profiler front-end (profiler.firefox.com).
+//!
+//! To be able to add a a marker, first you need to create your marker payload
+//! struct in your codebase and implement the [`ProfilerMarker`] trait like this:
+//!
+//! ```
+//! #[derive(Serialize, Deserialize, Debug)]
+//! pub struct TestMarker {
+//!     a: u32,
+//!     b: String,
+//! }
+//!
+//! // Please see the documentation of [`ProfilerMarker`].
+//! impl gecko_profiler::ProfilerMarker for TestMarker {
+//!     fn marker_type_name() -> &'static str {
+//!         "marker type from rust"
+//!     }
+//!     fn marker_type_display() -> gecko_profiler::MarkerSchema {
+//!         use gecko_profiler::marker::schema::*;
+//!         let mut schema = MarkerSchema::new(&[Location::MarkerChart]);
+//!         schema.set_chart_label("Name: {marker.name}");
+//!         schema.set_tooltip_label("{marker.data.a}");
+//!         schema.add_key_label_format("a", "A Value", Format::Integer);
+//!         schema.add_key_label_format("b", "B Value", Format::String);
+//!         schema
+//!     }
+//!     fn stream_json_marker_data(&self, json_writer: &mut gecko_profiler::JSONWriter) {
+//!         json_writer.int_property("a", self.a.into());
+//!         json_writer.string_property("b", &self.b);
+//!     }
+//! }
+//! ```
+//!
+//! Once you've created this payload and implemented the [`ProfilerMarker`], you
+//! can now add this marker in the code that you would like to measure. E.g.:
+//!
+//! ```
+//! gecko_profiler::add_marker(
+//!     // Name of the marker as a string.
+//!     "Marker Name",
+//!     // Category with an optional sub-category.
+//!     gecko_profiler_category!(Graphics, DisplayListBuilding),
+//!     // MarkerOptions that keeps options like marker timing and marker stack.
+//!     Default::default(),
+//!     // Marker payload.
+//!     TestMarker {a: 12, b: "hello".to_owned()},
+//! );
+//! ```
+
+pub(crate) mod deserializer_tags_state;
+pub mod options;
+pub mod schema;
+
+pub use options::*;
+pub use schema::MarkerSchema;
+
+use crate::gecko_bindings::{bindings, profiling_categories::ProfilingCategoryPair};
+use crate::json_writer::JSONWriter;
+use crate::marker::deserializer_tags_state::get_or_insert_deserializer_tag;
+use crate::marker::options::MarkerOptions;
+use serde::{de::DeserializeOwned, Deserialize, Serialize};
+use std::os::raw::c_char;
+
+/// Marker API to add a new simple marker without any payload.
+/// Please see the module documentation on how to add a marker with this API.
+pub fn add_untyped_marker(name: &str, category: ProfilingCategoryPair, mut options: MarkerOptions) {
+    if !crate::profiler_state::can_accept_markers() {
+        // Nothing to do.
+        return;
+    }
+
+    unsafe {
+        bindings::gecko_profiler_add_marker_untyped(
+            name.as_ptr() as *const c_char,
+            name.len(),
+            category.to_cpp_enum_value(),
+            options.timing.0.as_mut_ptr(),
+            options.stack,
+        )
+    }
+}
+
+/// Marker API to add a new marker with additional text for details.
+/// Please see the module documentation on how to add a marker with this API.
+pub fn add_text_marker(
+    name: &str,
+    category: ProfilingCategoryPair,
+    mut options: MarkerOptions,
+    text: &str,
+) {
+    if !crate::profiler_state::can_accept_markers() {
+        // Nothing to do.
+        return;
+    }
+
+    unsafe {
+        bindings::gecko_profiler_add_marker_text(
+            name.as_ptr() as *const c_char,
+            name.len(),
+            category.to_cpp_enum_value(),
+            options.timing.0.as_mut_ptr(),
+            options.stack,
+            text.as_ptr() as *const c_char,
+            text.len(),
+        )
+    }
+}
+
+/// Trait that every profiler marker payload struct needs to implement.
+/// This will tell the profiler back-end how to serialize it as json and
+/// the front-end how to display the marker.
+/// Please also see the documentation here:
+/// https://firefox-source-docs.mozilla.org/tools/profiler/markers-guide.html#how-to-define-new-marker-types
+///
+/// - `marker_type_name`: Returns a static string as the marker type name. This
+/// should be unique and it is used to keep track of the type of markers in the
+/// profiler storage, and to identify them uniquely on the profiler front-end.
+/// - `marker_type_display`: Where and how to display the marker and its data.
+/// Returns a `MarkerSchema` object which will be forwarded to the profiler
+/// front-end.
+/// - `stream_json_marker_data`: Data specific to this marker type should be
+/// serialized to JSON for the profiler front-end. All the common marker data
+/// like marker name, category, timing will be serialized automatically. But
+/// marker specific data should be serialized here.
+pub trait ProfilerMarker: Serialize + DeserializeOwned {
+    /// A static method that returns the name of the marker type.
+    fn marker_type_name() -> &'static str;
+    /// A static method that returns a `MarkerSchema`, which contains all the
+    /// information needed to stream the display schema associated with a
+    /// marker type.
+    fn marker_type_display() -> schema::MarkerSchema;
+    /// A method that streams the marker payload data as JSON object properties.
+    /// Please see the [JSONWriter] struct to see its methods.
+    fn stream_json_marker_data(&self, json_writer: &mut JSONWriter);
+}
+
+/// A function that deserializes the marker payload and streams it to the JSON.
+unsafe fn transmute_and_stream<T>(
+    payload: *const u8,
+    payload_size: usize,
+    json_writer: &mut JSONWriter,
+) where
+    T: ProfilerMarker,
+{
+    let payload_slice = std::slice::from_raw_parts(payload, payload_size);
+    let payload: T = bincode::deserialize(&payload_slice).unwrap();
+    payload.stream_json_marker_data(json_writer);
+}
+
+/// Main marker API to add a new marker to profiler buffer.
+/// Please see the module documentation on how to add a marker with this API.
+pub fn add_marker<T>(
+    name: &str,
+    category: ProfilingCategoryPair,
+    mut options: MarkerOptions,
+    payload: T,
+) where
+    T: ProfilerMarker,
+{
+    if !crate::profiler_state::can_accept_markers() {
+        // Nothing to do.
+        return;
+    }
+
+    let encoded_payload: Vec<u8> = bincode::serialize(&payload).unwrap();
+    let payload_size = encoded_payload.len();
+    let maker_tag = get_or_insert_deserializer_tag::<T>();
+
+    unsafe {
+        bindings::gecko_profiler_add_marker(
+            name.as_ptr() as *const c_char,
+            name.len(),
+            category.to_cpp_enum_value(),
+            options.timing.0.as_mut_ptr(),
+            options.stack,
+            maker_tag,
+            encoded_payload.as_ptr(),
+            payload_size,
+        )
+    }
+}
+
+/// Tracing marker type for Rust code.
+/// This must be kept in sync with the `mozilla::baseprofiler::markers::Tracing`
+/// C++ counterpart.
+#[derive(Serialize, Deserialize, Debug)]
+pub struct Tracing(pub String);
+
+impl ProfilerMarker for Tracing {
+    fn marker_type_name() -> &'static str {
+        "tracing"
+    }
+
+    fn stream_json_marker_data(&self, json_writer: &mut JSONWriter) {
+        if self.0.len() != 0 {
+            json_writer.string_property("category", &self.0);
+        }
+    }
+
+    // Tracing marker is a bit special because we have the same schema in the
+    // C++ side. This function will only get called when no Tracing markers are
+    // generated from the C++ side. But, most of the time, this will not be called
+    // when there is another C++ Tracing marker.
+    fn marker_type_display() -> schema::MarkerSchema {
+        use crate::marker::schema::*;
+        let mut schema = MarkerSchema::new(&[
+            Location::MarkerChart,
+            Location::MarkerTable,
+            Location::TimelineOverview,
+        ]);
+        schema.add_key_label_format("category", "Type", Format::String);
+        schema
+    }
+}
diff --git a/tools/profiler/rust-api/src/marker/options.rs b/tools/profiler/rust-api/src/marker/options.rs
new file mode 100644
index 0000000000..a5d4e11094
--- /dev/null
+++ b/tools/profiler/rust-api/src/marker/options.rs
@@ -0,0 +1,138 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! Different options for the marker API.
+//! See [`MarkerOptions`] and its fields.
+
+use crate::gecko_bindings::{bindings, structs::mozilla};
+use crate::ProfilerTime;
+use std::mem::MaybeUninit;
+
+/// Marker option that contains marker timing information.
+/// This class encapsulates the logic for correctly storing a marker based on its
+/// constructor types. Use the static methods to create the MarkerTiming. This is
+/// a transient object that is being used to enforce the constraints of the
+/// combinations of the data.
+///
+/// Implementation details: This is a RAII object that constructs and destroys a
+/// C++ MarkerTiming object pointed to a specified reference. It allocates the
+/// marker timing on stack and it's safe to move around because it's a
+/// trivially-copyable object that only contains a few numbers.
+#[derive(Debug)]
+pub struct MarkerTiming(pub(crate) MaybeUninit<mozilla::MarkerTiming>);
+
+impl MarkerTiming {
+    /// Instant marker timing at a specific time.
+    pub fn instant_at(time: ProfilerTime) -> MarkerTiming {
+        let mut marker_timing = MaybeUninit::<mozilla::MarkerTiming>::uninit();
+        unsafe {
+            bindings::gecko_profiler_construct_marker_timing_instant_at(
+                marker_timing.as_mut_ptr(),
+                &time.0,
+            );
+        }
+        MarkerTiming(marker_timing)
+    }
+
+    /// Instant marker timing at this time.
+    pub fn instant_now() -> MarkerTiming {
+        let mut marker_timing = MaybeUninit::<mozilla::MarkerTiming>::uninit();
+        unsafe {
+            bindings::gecko_profiler_construct_marker_timing_instant_now(
+                marker_timing.as_mut_ptr(),
+            );
+        }
+        MarkerTiming(marker_timing)
+    }
+
+    /// Interval marker timing with start and end times.
+    pub fn interval(start_time: ProfilerTime, end_time: ProfilerTime) -> MarkerTiming {
+        let mut marker_timing = MaybeUninit::<mozilla::MarkerTiming>::uninit();
+        unsafe {
+            bindings::gecko_profiler_construct_marker_timing_interval(
+                marker_timing.as_mut_ptr(),
+                &start_time.0,
+                &end_time.0,
+            );
+        }
+        MarkerTiming(marker_timing)
+    }
+
+    /// Interval marker with a start time and end time as "now".
+    pub fn interval_until_now_from(start_time: ProfilerTime) -> MarkerTiming {
+        let mut marker_timing = MaybeUninit::<mozilla::MarkerTiming>::uninit();
+        unsafe {
+            bindings::gecko_profiler_construct_marker_timing_interval_until_now_from(
+                marker_timing.as_mut_ptr(),
+                &start_time.0,
+            );
+        }
+        MarkerTiming(marker_timing)
+    }
+
+    /// Interval start marker with only start time. This is a partial marker and
+    /// it requires another marker with `instant_end` to be complete.
+    pub fn interval_start(time: ProfilerTime) -> MarkerTiming {
+        let mut marker_timing = MaybeUninit::<mozilla::MarkerTiming>::uninit();
+        unsafe {
+            bindings::gecko_profiler_construct_marker_timing_interval_start(
+                marker_timing.as_mut_ptr(),
+                &time.0,
+            );
+        }
+        MarkerTiming(marker_timing)
+    }
+
+    /// Interval end marker with only end time. This is a partial marker and
+    /// it requires another marker with `interval_start` to be complete.
+    pub fn interval_end(time: ProfilerTime) -> MarkerTiming {
+        let mut marker_timing = MaybeUninit::<mozilla::MarkerTiming>::uninit();
+        unsafe {
+            bindings::gecko_profiler_construct_marker_timing_interval_end(
+                marker_timing.as_mut_ptr(),
+                &time.0,
+            );
+        }
+        MarkerTiming(marker_timing)
+    }
+}
+
+impl Default for MarkerTiming {
+    fn default() -> Self {
+        MarkerTiming::instant_now()
+    }
+}
+
+impl Drop for MarkerTiming {
+    fn drop(&mut self) {
+        unsafe {
+            bindings::gecko_profiler_destruct_marker_timing(self.0.as_mut_ptr());
+        }
+    }
+}
+
+/// Marker option that contains marker stack information.
+pub type MarkerStack = mozilla::StackCaptureOptions;
+
+impl Default for MarkerStack {
+    fn default() -> Self {
+        MarkerStack::NoStack
+    }
+}
+
+/// This class combines each of the possible marker options above.
+/// Use Default::default() for the options that you don't want to provide or the
+/// options you want to leave as default. Example usage:
+///
+/// ```rust
+///  MarkerOptions {
+///     timing: MarkerTiming::instant_now(),
+///     ..Default::default()
+///  }
+/// ```
+#[derive(Debug, Default)]
+pub struct MarkerOptions {
+    pub timing: MarkerTiming,
+    pub stack: MarkerStack,
+}
diff --git a/tools/profiler/rust-api/src/marker/schema.rs b/tools/profiler/rust-api/src/marker/schema.rs
new file mode 100644
index 0000000000..9368582f11
--- /dev/null
+++ b/tools/profiler/rust-api/src/marker/schema.rs
@@ -0,0 +1,233 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! [`MarkerSchema`] and other enums that will be used by `MarkerSchema`.
+
+use crate::gecko_bindings::{bindings, structs::mozilla};
+use std::mem::MaybeUninit;
+use std::ops::DerefMut;
+use std::os::raw::c_char;
+use std::pin::Pin;
+
+/// Marker locations to be displayed in the profiler front-end.
+pub type Location = mozilla::MarkerSchema_Location;
+
+/// Formats of marker properties for profiler front-end.
+pub type Format = mozilla::MarkerSchema_Format;
+
+/// Whether it's searchable or not in the profiler front-end.
+pub type Searchable = mozilla::MarkerSchema_Searchable;
+
+/// This object collects all the information necessary to stream the JSON schema
+/// that informs the front-end how to display a type of markers.
+/// It will be created and populated in `marker_type_display()` functions in each
+/// marker type definition, see add/set functions.
+///
+/// It's a RAII object that constructs and destroys a C++ MarkerSchema object
+/// pointed to a specified reference.
+pub struct MarkerSchema {
+    pub(crate) pin: Pin<Box<MaybeUninit<mozilla::MarkerSchema>>>,
+}
+
+impl MarkerSchema {
+    // Initialize a marker schema with the given `Location`s.
+    pub fn new(locations: &[Location]) -> Self {
+        let mut marker_schema = Box::pin(std::mem::MaybeUninit::<mozilla::MarkerSchema>::uninit());
+
+        unsafe {
+            bindings::gecko_profiler_construct_marker_schema(
+                marker_schema.deref_mut().as_mut_ptr(),
+                locations.as_ptr(),
+                locations.len(),
+            );
+        }
+        MarkerSchema { pin: marker_schema }
+    }
+
+    /// Marker schema for types that have special frontend handling.
+    /// Nothing else should be set in this case.
+    pub fn new_with_special_frontend_location() -> Self {
+        let mut marker_schema = Box::pin(std::mem::MaybeUninit::<mozilla::MarkerSchema>::uninit());
+        unsafe {
+            bindings::gecko_profiler_construct_marker_schema_with_special_front_end_location(
+                marker_schema.deref_mut().as_mut_ptr(),
+            );
+        }
+        MarkerSchema { pin: marker_schema }
+    }
+
+    /// Optional label in the marker chart.
+    /// If not provided, the marker "name" will be used. The given string
+    /// can contain element keys in braces to include data elements streamed by
+    /// `stream_json_marker_data()`. E.g.: "This is {marker.data.text}"
+    pub fn set_chart_label(&mut self, label: &str) -> &mut Self {
+        unsafe {
+            bindings::gecko_profiler_marker_schema_set_chart_label(
+                self.pin.deref_mut().as_mut_ptr(),
+                label.as_ptr() as *const c_char,
+                label.len(),
+            );
+        }
+        self
+    }
+
+    /// Optional label in the marker chart tooltip.
+    /// If not provided, the marker "name" will be used. The given string
+    /// can contain element keys in braces to include data elements streamed by
+    /// `stream_json_marker_data()`. E.g.: "This is {marker.data.text}"
+    pub fn set_tooltip_label(&mut self, label: &str) -> &mut Self {
+        unsafe {
+            bindings::gecko_profiler_marker_schema_set_tooltip_label(
+                self.pin.deref_mut().as_mut_ptr(),
+                label.as_ptr() as *const c_char,
+                label.len(),
+            );
+        }
+        self
+    }
+
+    /// Optional label in the marker table.
+    /// If not provided, the marker "name" will be used. The given string
+    /// can contain element keys in braces to include data elements streamed by
+    /// `stream_json_marker_data()`. E.g.: "This is {marker.data.text}"
+    pub fn set_table_label(&mut self, label: &str) -> &mut Self {
+        unsafe {
+            bindings::gecko_profiler_marker_schema_set_table_label(
+                self.pin.deref_mut().as_mut_ptr(),
+                label.as_ptr() as *const c_char,
+                label.len(),
+            );
+        }
+        self
+    }
+
+    /// Set all marker chart / marker tooltip / marker table labels with the same text.
+    /// Same as the individual methods, the given string can contain element keys
+    /// in braces to include data elements streamed by `stream_json_marker_data()`.
+    /// E.g.: "This is {marker.data.text}"
+    pub fn set_all_labels(&mut self, label: &str) -> &mut Self {
+        unsafe {
+            bindings::gecko_profiler_marker_schema_set_all_labels(
+                self.pin.deref_mut().as_mut_ptr(),
+                label.as_ptr() as *const c_char,
+                label.len(),
+            );
+        }
+        self
+    }
+
+    // Each data element that is streamed by `stream_json_marker_data()` can be
+    // displayed as indicated by using one of the `add_...` function below.
+    // Each `add...` will add a line in the full marker description. Parameters:
+    // - `key`: Element property name as streamed by `stream_json_marker_data()`.
+    // - `label`: Optional label. Defaults to the key name.
+    // - `format`: How to format the data element value, see `Format` above.
+    // - `searchable`: Optional, indicates if the value is used in searches,
+    //   defaults to false.
+
+    /// Add a key / format row for the marker data element.
+    /// - `key`: Element property name as streamed by `stream_json_marker_data()`.
+    /// - `format`: How to format the data element value, see `Format` above.
+    pub fn add_key_format(&mut self, key: &str, format: Format) -> &mut Self {
+        unsafe {
+            bindings::gecko_profiler_marker_schema_add_key_format(
+                self.pin.deref_mut().as_mut_ptr(),
+                key.as_ptr() as *const c_char,
+                key.len(),
+                format,
+            );
+        }
+        self
+    }
+
+    /// Add a key / label / format row for the marker data element.
+    /// - `key`: Element property name as streamed by `stream_json_marker_data()`.
+    /// - `label`: Optional label. Defaults to the key name.
+    /// - `format`: How to format the data element value, see `Format` above.
+    pub fn add_key_label_format(&mut self, key: &str, label: &str, format: Format) -> &mut Self {
+        unsafe {
+            bindings::gecko_profiler_marker_schema_add_key_label_format(
+                self.pin.deref_mut().as_mut_ptr(),
+                key.as_ptr() as *const c_char,
+                key.len(),
+                label.as_ptr() as *const c_char,
+                label.len(),
+                format,
+            );
+        }
+        self
+    }
+
+    /// Add a key / format / searchable row for the marker data element.
+    /// - `key`: Element property name as streamed by `stream_json_marker_data()`.
+    /// - `format`: How to format the data element value, see `Format` above.
+    pub fn add_key_format_searchable(
+        &mut self,
+        key: &str,
+        format: Format,
+        searchable: Searchable,
+    ) -> &mut Self {
+        unsafe {
+            bindings::gecko_profiler_marker_schema_add_key_format_searchable(
+                self.pin.deref_mut().as_mut_ptr(),
+                key.as_ptr() as *const c_char,
+                key.len(),
+                format,
+                searchable,
+            );
+        }
+        self
+    }
+
+    /// Add a key / label / format / searchable row for the marker data element.
+    /// - `key`: Element property name as streamed by `stream_json_marker_data()`.
+    /// - `label`: Optional label. Defaults to the key name.
+    /// - `format`: How to format the data element value, see `Format` above.
+    /// - `searchable`: Optional, indicates if the value is used in searches,
+    ///   defaults to false.
+    pub fn add_key_label_format_searchable(
+        &mut self,
+        key: &str,
+        label: &str,
+        format: Format,
+        searchable: Searchable,
+    ) -> &mut Self {
+        unsafe {
+            bindings::gecko_profiler_marker_schema_add_key_label_format_searchable(
+                self.pin.deref_mut().as_mut_ptr(),
+                key.as_ptr() as *const c_char,
+                key.len(),
+                label.as_ptr() as *const c_char,
+                label.len(),
+                format,
+                searchable,
+            );
+        }
+        self
+    }
+
+    /// Add a key / value static row.
+    /// - `key`: Element property name as streamed by `stream_json_marker_data()`.
+    /// - `value`: Static value to display.
+    pub fn add_static_label_value(&mut self, label: &str, value: &str) -> &mut Self {
+        unsafe {
+            bindings::gecko_profiler_marker_schema_add_static_label_value(
+                self.pin.deref_mut().as_mut_ptr(),
+                label.as_ptr() as *const c_char,
+                label.len(),
+                value.as_ptr() as *const c_char,
+                value.len(),
+            );
+        }
+        self
+    }
+}
+
+impl Drop for MarkerSchema {
+    fn drop(&mut self) {
+        unsafe {
+            bindings::gecko_profiler_destruct_marker_schema(self.pin.deref_mut().as_mut_ptr());
+        }
+    }
+}
diff --git a/tools/profiler/rust-api/src/profiler_state.rs b/tools/profiler/rust-api/src/profiler_state.rs
new file mode 100644
index 0000000000..0d5359684d
--- /dev/null
+++ b/tools/profiler/rust-api/src/profiler_state.rs
@@ -0,0 +1,78 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! Gecko profiler state.
+
+/// Whether the Gecko profiler is currently active.
+/// A typical use of this API:
+/// ```rust
+/// if gecko_profiler::is_active() {
+///   // do something.
+/// }
+/// ```
+///
+/// This implementation must be kept in sync with
+/// `mozilla::profiler::detail::RacyFeatures::IsActive`.
+#[cfg(feature = "enabled")]
+#[inline]
+pub fn is_active() -> bool {
+    use crate::gecko_bindings::structs::mozilla::profiler::detail;
+
+    let active_and_features = get_active_and_features();
+    (active_and_features & detail::RacyFeatures_Active) != 0
+}
+
+/// Always false when MOZ_GECKO_PROFILER is not defined.
+#[cfg(not(feature = "enabled"))]
+#[inline]
+pub fn is_active() -> bool {
+    false
+}
+
+/// Whether the Gecko Profiler can accept markers.
+/// Similar to `is_active`, but with some extra checks that determine if the
+/// profiler would currently store markers. So this should be used before
+/// doing some potentially-expensive work that's used in a marker. E.g.:
+///
+/// ```rust
+/// if gecko_profiler::can_accept_markers() {
+///   // Do something expensive and add the marker with that data.
+/// }
+/// ```
+///
+/// This implementation must be kept in sync with
+/// `mozilla::profiler::detail::RacyFeatures::IsActiveAndUnpaused`.
+#[cfg(feature = "enabled")]
+#[inline]
+pub fn can_accept_markers() -> bool {
+    use crate::gecko_bindings::structs::mozilla::profiler::detail;
+
+    let active_and_features = get_active_and_features();
+    (active_and_features & detail::RacyFeatures_Active) != 0
+        && (active_and_features & detail::RacyFeatures_Paused) == 0
+}
+
+/// Always false when MOZ_GECKO_PROFILER is not defined.
+#[cfg(not(feature = "enabled"))]
+#[inline]
+pub fn can_accept_markers() -> bool {
+    false
+}
+
+/// Returns the value of atomic `RacyFeatures::sActiveAndFeatures` from the C++ side.
+#[cfg(feature = "enabled")]
+#[inline]
+fn get_active_and_features() -> u32 {
+    use crate::gecko_bindings::structs::mozilla::profiler::detail;
+    use std::mem;
+    use std::sync::atomic::{AtomicU32, Ordering};
+
+    // This is reaching for the C++ atomic value instead of calling an FFI
+    // function to return this value. Because, calling an FFI function is much
+    // more expensive compared to this method. That's why it's worth to go with
+    // this solution for performance. But it's crucial to keep the implementation
+    // of this and the callers in sync with the C++ counterparts.
+    unsafe { mem::transmute::<_, &AtomicU32>(&detail::RacyFeatures_sActiveAndFeatures) }
+        .load(Ordering::Relaxed)
+}
diff --git a/tools/profiler/rust-api/src/thread.rs b/tools/profiler/rust-api/src/thread.rs
new file mode 100644
index 0000000000..353469a4bb
--- /dev/null
+++ b/tools/profiler/rust-api/src/thread.rs
@@ -0,0 +1,23 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+///! Profiler API for thread registration and unregistration.
+use crate::gecko_bindings::bindings;
+use std::ffi::CString;
+
+/// Register a thread with the Gecko Profiler.
+pub fn register_thread(thread_name: &str) {
+    let name = CString::new(thread_name).unwrap();
+    unsafe {
+        // gecko_profiler_register_thread copies the passed name here.
+        bindings::gecko_profiler_register_thread(name.as_ptr());
+    }
+}
+
+/// Unregister a thread with the Gecko Profiler.
+pub fn unregister_thread() {
+    unsafe {
+        bindings::gecko_profiler_unregister_thread();
+    }
+}
diff --git a/tools/profiler/rust-api/src/time.rs b/tools/profiler/rust-api/src/time.rs
new file mode 100644
index 0000000000..56315690c9
--- /dev/null
+++ b/tools/profiler/rust-api/src/time.rs
@@ -0,0 +1,71 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! Gecko profiler time.
+
+use crate::gecko_bindings::{bindings, structs::mozilla};
+use std::mem::MaybeUninit;
+
+/// Profiler time for the marker API.
+/// This should be used as the `MarkerTiming` parameter.
+/// E.g.:
+///
+/// ```
+/// let start = ProfilerTime::now();
+/// // ...some code...
+/// gecko_profiler::add_untyped_marker(
+///     "marker name",
+///     category,
+///     MarkerOptions {
+///         timing: MarkerTiming::interval_until_now_from(start),
+///         ..Default::default()
+///     },
+/// );
+/// ```
+#[derive(Debug)]
+pub struct ProfilerTime(pub(crate) mozilla::TimeStamp);
+
+impl ProfilerTime {
+    pub fn now() -> ProfilerTime {
+        let mut marker_timing = MaybeUninit::<mozilla::TimeStamp>::uninit();
+        unsafe {
+            bindings::gecko_profiler_construct_timestamp_now(marker_timing.as_mut_ptr());
+            ProfilerTime(marker_timing.assume_init())
+        }
+    }
+
+    pub fn add_microseconds(self, microseconds: f64) -> Self {
+        let mut dest = MaybeUninit::<mozilla::TimeStamp>::uninit();
+        unsafe {
+            bindings::gecko_profiler_add_timestamp(&self.0, dest.as_mut_ptr(), microseconds);
+            ProfilerTime(dest.assume_init())
+        }
+    }
+
+    pub fn subtract_microseconds(self, microseconds: f64) -> Self {
+        let mut dest = MaybeUninit::<mozilla::TimeStamp>::uninit();
+        unsafe {
+            bindings::gecko_profiler_subtract_timestamp(&self.0, dest.as_mut_ptr(), microseconds);
+            ProfilerTime(dest.assume_init())
+        }
+    }
+}
+
+impl Clone for ProfilerTime {
+    fn clone(&self) -> Self {
+        let mut dest = MaybeUninit::<mozilla::TimeStamp>::uninit();
+        unsafe {
+            bindings::gecko_profiler_clone_timestamp(&self.0, dest.as_mut_ptr());
+            ProfilerTime(dest.assume_init())
+        }
+    }
+}
+
+impl Drop for ProfilerTime {
+    fn drop(&mut self) {
+        unsafe {
+            bindings::gecko_profiler_destruct_timestamp(&mut self.0);
+        }
+    }
+}
diff --git a/tools/profiler/rust-helper/Cargo.toml b/tools/profiler/rust-helper/Cargo.toml
new file mode 100644
index 0000000000..6d3d168ed4
--- /dev/null
+++ b/tools/profiler/rust-helper/Cargo.toml
@@ -0,0 +1,23 @@
+[package]
+name = "profiler_helper"
+version = "0.1.0"
+authors = ["Markus Stange <mstange@themasta.com>"]
+license = "MPL-2.0"
+
+[dependencies]
+memmap2 = "0.5"
+rustc-demangle = "0.1"
+uuid = "1.0"
+
+[dependencies.object]
+version = "0.30"
+optional = true
+default-features = false
+features = ["std", "read_core", "elf"]
+
+[dependencies.thin-vec]
+version = "0.2.1"
+features = ["gecko-ffi"]
+
+[features]
+parse_elf = ["object"]
diff --git a/tools/profiler/rust-helper/src/compact_symbol_table.rs b/tools/profiler/rust-helper/src/compact_symbol_table.rs
new file mode 100644
index 0000000000..12c4ca081b
--- /dev/null
+++ b/tools/profiler/rust-helper/src/compact_symbol_table.rs
@@ -0,0 +1,40 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use std::collections::HashMap;
+use thin_vec::ThinVec;
+
+#[repr(C)]
+pub struct CompactSymbolTable {
+    pub addr: ThinVec<u32>,
+    pub index: ThinVec<u32>,
+    pub buffer: ThinVec<u8>,
+}
+
+impl CompactSymbolTable {
+    pub fn new() -> Self {
+        Self {
+            addr: ThinVec::new(),
+            index: ThinVec::new(),
+            buffer: ThinVec::new(),
+        }
+    }
+
+    pub fn from_map(map: HashMap<u32, &str>) -> Self {
+        let mut table = Self::new();
+        let mut entries: Vec<_> = map.into_iter().collect();
+        entries.sort_by_key(|&(addr, _)| addr);
+        for (addr, name) in entries {
+            table.addr.push(addr);
+            table.index.push(table.buffer.len() as u32);
+            table.add_name(name);
+        }
+        table.index.push(table.buffer.len() as u32);
+        table
+    }
+
+    fn add_name(&mut self, name: &str) {
+        self.buffer.extend_from_slice(name.as_bytes());
+    }
+}
diff --git a/tools/profiler/rust-helper/src/elf.rs b/tools/profiler/rust-helper/src/elf.rs
new file mode 100644
index 0000000000..4930884f05
--- /dev/null
+++ b/tools/profiler/rust-helper/src/elf.rs
@@ -0,0 +1,101 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use compact_symbol_table::CompactSymbolTable;
+use object::read::{NativeFile, Object};
+use object::{ObjectSection, ObjectSymbol, SectionKind, SymbolKind};
+use std::cmp;
+use std::collections::HashMap;
+use uuid::Uuid;
+
+const UUID_SIZE: usize = 16;
+const PAGE_SIZE: usize = 4096;
+
+fn get_symbol_map<'a: 'b, 'b, T>(object_file: &'b T) -> HashMap<u32, &'a str>
+where
+    T: Object<'a, 'b>,
+{
+    object_file
+        .dynamic_symbols()
+        .chain(object_file.symbols())
+        .filter(|symbol| symbol.kind() == SymbolKind::Text)
+        .filter_map(|symbol| {
+            symbol
+                .name()
+                .map(|name| (symbol.address() as u32, name))
+                .ok()
+        })
+        .collect()
+}
+
+pub fn get_compact_symbol_table(
+    buffer: &[u8],
+    breakpad_id: Option<&str>,
+) -> Option<CompactSymbolTable> {
+    let elf_file = NativeFile::parse(buffer).ok()?;
+    let elf_id = get_elf_id(&elf_file)?;
+    if !breakpad_id.map_or(true, |id| id == format!("{:X}0", elf_id.as_simple())) {
+        return None;
+    }
+    return Some(CompactSymbolTable::from_map(get_symbol_map(&elf_file)));
+}
+
+fn create_elf_id(identifier: &[u8], little_endian: bool) -> Uuid {
+    // Make sure that we have exactly UUID_SIZE bytes available
+    let mut data = [0 as u8; UUID_SIZE];
+    let len = cmp::min(identifier.len(), UUID_SIZE);
+    data[0..len].copy_from_slice(&identifier[0..len]);
+
+    if little_endian {
+        // The file ELF file targets a little endian architecture. Convert to
+        // network byte order (big endian) to match the Breakpad processor's
+        // expectations. For big endian object files, this is not needed.
+        data[0..4].reverse(); // uuid field 1
+        data[4..6].reverse(); // uuid field 2
+        data[6..8].reverse(); // uuid field 3
+    }
+
+    Uuid::from_bytes(data)
+}
+
+/// Tries to obtain the object identifier of an ELF object.
+///
+/// As opposed to Mach-O, ELF does not specify a unique ID for object files in
+/// its header. Compilers and linkers usually add either `SHT_NOTE` sections or
+/// `PT_NOTE` program header elements for this purpose. If one of these notes
+/// is present, ElfFile's build_id() method will find it.
+///
+/// If neither of the above are present, this function will hash the first page
+/// of the `.text` section (program code). This matches what the Breakpad
+/// processor does.
+///
+/// If all of the above fails, this function will return `None`.
+pub fn get_elf_id(elf_file: &NativeFile) -> Option<Uuid> {
+    if let Ok(Some(identifier)) = elf_file.build_id() {
+        return Some(create_elf_id(identifier, elf_file.is_little_endian()));
+    }
+
+    // We were not able to locate the build ID, so fall back to hashing the
+    // first page of the ".text" (program code) section. This algorithm XORs
+    // 16-byte chunks directly into a UUID buffer.
+    if let Some(section_data) = find_text_section(elf_file) {
+        let mut hash = [0; UUID_SIZE];
+        for i in 0..cmp::min(section_data.len(), PAGE_SIZE) {
+            hash[i % UUID_SIZE] ^= section_data[i];
+        }
+        return Some(create_elf_id(&hash, elf_file.is_little_endian()));
+    }
+
+    None
+}
+
+/// Returns a reference to the data of the the .text section in an ELF binary.
+fn find_text_section<'elf>(elf_file: &'elf NativeFile) -> Option<&'elf [u8]> {
+    if let Some(section) = elf_file.section_by_name(".text") {
+        if section.kind() == SectionKind::Text {
+            return section.data().ok();
+        }
+    }
+    None
+}
diff --git a/tools/profiler/rust-helper/src/lib.rs b/tools/profiler/rust-helper/src/lib.rs
new file mode 100644
index 0000000000..22f8e04a2e
--- /dev/null
+++ b/tools/profiler/rust-helper/src/lib.rs
@@ -0,0 +1,107 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+extern crate memmap2;
+extern crate rustc_demangle;
+extern crate thin_vec;
+extern crate uuid;
+
+#[cfg(feature = "parse_elf")]
+extern crate object;
+
+mod compact_symbol_table;
+
+#[cfg(feature = "parse_elf")]
+mod elf;
+
+#[cfg(feature = "parse_elf")]
+use memmap2::MmapOptions;
+#[cfg(feature = "parse_elf")]
+use std::fs::File;
+
+use compact_symbol_table::CompactSymbolTable;
+use rustc_demangle::try_demangle;
+use std::ffi::CStr;
+use std::mem;
+use std::os::raw::c_char;
+use std::ptr;
+
+#[cfg(feature = "parse_elf")]
+pub fn get_compact_symbol_table_from_file(
+    debug_path: &str,
+    breakpad_id: Option<&str>,
+) -> Option<CompactSymbolTable> {
+    let file = File::open(debug_path).ok()?;
+    let buffer = unsafe { MmapOptions::new().map(&file).ok()? };
+    elf::get_compact_symbol_table(&buffer, breakpad_id)
+}
+
+#[cfg(not(feature = "parse_elf"))]
+pub fn get_compact_symbol_table_from_file(
+    _debug_path: &str,
+    _breakpad_id: Option<&str>,
+) -> Option<CompactSymbolTable> {
+    None
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn profiler_get_symbol_table(
+    debug_path: *const c_char,
+    breakpad_id: *const c_char,
+    symbol_table: &mut CompactSymbolTable,
+) -> bool {
+    let debug_path = CStr::from_ptr(debug_path).to_string_lossy();
+    let breakpad_id = if breakpad_id.is_null() {
+        None
+    } else {
+        match CStr::from_ptr(breakpad_id).to_str() {
+            Ok(s) => Some(s),
+            Err(_) => return false,
+        }
+    };
+
+    match get_compact_symbol_table_from_file(&debug_path, breakpad_id.map(|id| id.as_ref())) {
+        Some(mut st) => {
+            std::mem::swap(symbol_table, &mut st);
+            true
+        }
+        None => false,
+    }
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn profiler_demangle_rust(
+    mangled: *const c_char,
+    buffer: *mut c_char,
+    buffer_len: usize,
+) -> bool {
+    assert!(!mangled.is_null());
+    assert!(!buffer.is_null());
+
+    if buffer_len == 0 {
+        return false;
+    }
+
+    let buffer: *mut u8 = mem::transmute(buffer);
+    let mangled = match CStr::from_ptr(mangled).to_str() {
+        Ok(s) => s,
+        Err(_) => return false,
+    };
+
+    match try_demangle(mangled) {
+        Ok(demangled) => {
+            let mut demangled = format!("{:#}", demangled);
+            if !demangled.is_ascii() {
+                return false;
+            }
+            demangled.truncate(buffer_len - 1);
+
+            let bytes = demangled.as_bytes();
+            ptr::copy(bytes.as_ptr(), buffer, bytes.len());
+            ptr::write(buffer.offset(bytes.len() as isize), 0);
+            true
+        }
+        Err(_) => false,
+    }
+}
diff --git a/tools/profiler/tests/browser/browser.ini b/tools/profiler/tests/browser/browser.ini
new file mode 100644
index 0000000000..654446e36e
--- /dev/null
+++ b/tools/profiler/tests/browser/browser.ini
@@ -0,0 +1,102 @@
+[DEFAULT]
+skip-if = tsan # Bug 1804081 - TSan times out on pretty much all of these tests
+support-files =
+  ../shared-head.js
+  head.js
+
+[browser_test_feature_ipcmessages.js]
+support-files = simple.html
+
+[browser_test_feature_jsallocations.js]
+support-files = do_work_500ms.html
+
+[browser_test_feature_nostacksampling.js]
+support-files = do_work_500ms.html
+
+[browser_test_markers_parent_process.js]
+skip-if =
+  os == "win" && os_version == "6.1" # Skip on Azure - frequent failure
+
+[browser_test_markers_preferencereads.js]
+support-files = single_frame.html
+
+[browser_test_markers_gc_cc.js]
+
+[browser_test_profile_capture_by_pid.js]
+skip-if = os == "win" && os_version == "6.1" # No thread names on win7, needed for these tests
+https_first_disabled = true
+support-files = single_frame.html
+
+[browser_test_profile_fission.js]
+support-files = single_frame.html
+
+[browser_test_profile_single_frame_page_info.js]
+https_first_disabled = true
+support-files = single_frame.html
+
+[browser_test_profile_slow_capture.js]
+https_first_disabled = true
+support-files = single_frame.html
+skip-if = !debug
+
+[browser_test_profile_multi_frame_page_info.js]
+https_first_disabled = true
+support-files =
+  multi_frame.html
+  single_frame.html
+
+[browser_test_marker_network_simple.js]
+https_first_disabled = true
+support-files = simple.html
+
+[browser_test_marker_network_private_browsing.js]
+support-files = simple.html
+
+[browser_test_marker_network_cancel.js]
+https_first_disabled = true
+support-files = simple.html
+
+[browser_test_marker_network_sts.js]
+support-files = simple.html
+
+[browser_test_marker_network_redirect.js]
+https_first_disabled = true
+support-files =
+  redirect.sjs
+  simple.html
+  page_with_resources.html
+  firefox-logo-nightly.svg
+skip-if =
+  os == "win" && os_version == "6.1" # Skip on Azure - frequent failure
+
+[browser_test_marker_network_serviceworker_cache_first.js]
+support-files =
+  serviceworkers/serviceworker-utils.js
+  serviceworkers/serviceworker_register.html
+  serviceworkers/serviceworker_page.html
+  serviceworkers/firefox-logo-nightly.svg
+  serviceworkers/serviceworker_cache_first.js
+
+[browser_test_marker_network_serviceworker_no_fetch_handler.js]
+support-files =
+  serviceworkers/serviceworker-utils.js
+  serviceworkers/serviceworker_register.html
+  serviceworkers/serviceworker_page.html
+  serviceworkers/firefox-logo-nightly.svg
+  serviceworkers/serviceworker_no_fetch_handler.js
+
+[browser_test_marker_network_serviceworker_no_respondWith_in_fetch_handler.js]
+support-files =
+  serviceworkers/serviceworker-utils.js
+  serviceworkers/serviceworker_register.html
+  serviceworkers/serviceworker_page.html
+  serviceworkers/firefox-logo-nightly.svg
+  serviceworkers/serviceworker_no_respondWith_in_fetch_handler.js
+
+[browser_test_marker_network_serviceworker_synthetized_response.js]
+support-files =
+  serviceworkers/serviceworker-utils.js
+  serviceworkers/serviceworker_register.html
+  serviceworkers/serviceworker_simple.html
+  serviceworkers/firefox-logo-nightly.svg
+  serviceworkers/serviceworker_synthetized_response.js
diff --git a/tools/profiler/tests/browser/browser_test_feature_ipcmessages.js b/tools/profiler/tests/browser/browser_test_feature_ipcmessages.js
new file mode 100644
index 0000000000..f5fb2921a1
--- /dev/null
+++ b/tools/profiler/tests/browser/browser_test_feature_ipcmessages.js
@@ -0,0 +1,100 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+requestLongerTimeout(10);
+
+async function waitForLoad() {
+  return SpecialPowers.spawn(gBrowser.selectedBrowser, [], () => {
+    return new Promise(function (resolve) {
+      if (content.document.readyState !== "complete") {
+        content.document.addEventListener("readystatechange", () => {
+          if (content.document.readyState === "complete") {
+            resolve();
+          }
+        });
+      } else {
+        resolve();
+      }
+    });
+  });
+}
+
+/**
+ * Test the IPCMessages feature.
+ */
+add_task(async function test_profile_feature_ipcmessges() {
+  const url = BASE_URL + "simple.html";
+
+  info("Open a tab while profiling IPC messages.");
+  await startProfiler({ features: ["js", "ipcmessages"] });
+  info("Started the profiler sucessfully! Now, let's open a tab.");
+
+  await BrowserTestUtils.withNewTab(url, async contentBrowser => {
+    info("We opened a tab!");
+    const contentPid = await SpecialPowers.spawn(
+      contentBrowser,
+      [],
+      () => Services.appinfo.processID
+    );
+    info("Now let's wait until it's fully loaded.");
+    await waitForLoad();
+
+    info(
+      "Check that some IPC profile markers were generated when " +
+        "the feature is enabled."
+    );
+    {
+      const { parentThread, contentThread } =
+        await waitSamplingAndStopProfilerAndGetThreads(contentPid);
+
+      Assert.greater(
+        getPayloadsOfType(parentThread, "IPC").length,
+        0,
+        "IPC profile markers were recorded for the parent process' main " +
+          "thread when the IPCMessages feature was turned on."
+      );
+
+      Assert.greater(
+        getPayloadsOfType(contentThread, "IPC").length,
+        0,
+        "IPC profile markers were recorded for the content process' main " +
+          "thread when the IPCMessages feature was turned on."
+      );
+    }
+  });
+
+  info("Now open a tab without profiling IPC messages.");
+  await startProfiler({ features: ["js"] });
+
+  await BrowserTestUtils.withNewTab(url, async contentBrowser => {
+    const contentPid = await SpecialPowers.spawn(
+      contentBrowser,
+      [],
+      () => Services.appinfo.processID
+    );
+    await waitForLoad();
+
+    info(
+      "Check that no IPC profile markers were recorded when the " +
+        "feature is turned off."
+    );
+    {
+      const { parentThread, contentThread } =
+        await waitSamplingAndStopProfilerAndGetThreads(contentPid);
+      Assert.equal(
+        getPayloadsOfType(parentThread, "IPC").length,
+        0,
+        "No IPC profile markers were recorded for the parent process' main " +
+          "thread when the IPCMessages feature was turned off."
+      );
+
+      Assert.equal(
+        getPayloadsOfType(contentThread, "IPC").length,
+        0,
+        "No IPC profile markers were recorded for the content process' main " +
+          "thread when the IPCMessages feature was turned off."
+      );
+    }
+  });
+});
diff --git a/tools/profiler/tests/browser/browser_test_feature_jsallocations.js b/tools/profiler/tests/browser/browser_test_feature_jsallocations.js
new file mode 100644
index 0000000000..60d072bed9
--- /dev/null
+++ b/tools/profiler/tests/browser/browser_test_feature_jsallocations.js
@@ -0,0 +1,74 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+requestLongerTimeout(10);
+
+/**
+ * Test the JS Allocations feature. This is done as a browser test to ensure that
+ * we realistically try out how the JS allocations are running. This ensures that
+ * we are collecting allocations for the content process and the parent process.
+ */
+add_task(async function test_profile_feature_jsallocations() {
+  Assert.ok(
+    !Services.profiler.IsActive(),
+    "The profiler is not currently active"
+  );
+
+  await startProfiler({ features: ["js", "jsallocations"] });
+
+  const url = BASE_URL + "do_work_500ms.html";
+  await BrowserTestUtils.withNewTab(url, async contentBrowser => {
+    const contentPid = await SpecialPowers.spawn(
+      contentBrowser,
+      [],
+      () => Services.appinfo.processID
+    );
+
+    // Wait 500ms so that the tab finishes executing.
+    await wait(500);
+
+    // Check that we can get some allocations when the feature is turned on.
+    {
+      const { parentThread, contentThread } =
+        await waitSamplingAndStopProfilerAndGetThreads(contentPid);
+      Assert.greater(
+        getPayloadsOfType(parentThread, "JS allocation").length,
+        0,
+        "Allocations were recorded for the parent process' main thread when the " +
+          "JS Allocation feature was turned on."
+      );
+      Assert.greater(
+        getPayloadsOfType(contentThread, "JS allocation").length,
+        0,
+        "Allocations were recorded for the content process' main thread when the " +
+          "JS Allocation feature was turned on."
+      );
+    }
+
+    await startProfiler({ features: ["js"] });
+    // Now reload the tab with a clean run.
+    gBrowser.reload();
+    await wait(500);
+
+    // Check that no allocations were recorded, and allocation tracking was correctly
+    // turned off.
+    {
+      const { parentThread, contentThread } =
+        await waitSamplingAndStopProfilerAndGetThreads(contentPid);
+      Assert.equal(
+        getPayloadsOfType(parentThread, "JS allocation").length,
+        0,
+        "No allocations were recorded for the parent processes' main thread when " +
+          "JS allocation was not turned on."
+      );
+
+      Assert.equal(
+        getPayloadsOfType(contentThread, "JS allocation").length,
+        0,
+        "No allocations were recorded for the content processes' main thread when " +
+          "JS allocation was not turned on."
+      );
+    }
+  });
+});
diff --git a/tools/profiler/tests/browser/browser_test_feature_nostacksampling.js b/tools/profiler/tests/browser/browser_test_feature_nostacksampling.js
new file mode 100644
index 0000000000..323a87e191
--- /dev/null
+++ b/tools/profiler/tests/browser/browser_test_feature_nostacksampling.js
@@ -0,0 +1,72 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/**
+ * Test the No Stack Sampling feature.
+ */
+add_task(async function test_profile_feature_nostacksampling() {
+  Assert.ok(
+    !Services.profiler.IsActive(),
+    "The profiler is not currently active"
+  );
+
+  await startProfiler({ features: ["js", "nostacksampling"] });
+
+  const url = BASE_URL + "do_work_500ms.html";
+  await BrowserTestUtils.withNewTab(url, async contentBrowser => {
+    const contentPid = await SpecialPowers.spawn(
+      contentBrowser,
+      [],
+      () => Services.appinfo.processID
+    );
+
+    // Wait 500ms so that the tab finishes executing.
+    await wait(500);
+
+    // Check that we can get no stacks when the feature is turned on.
+    {
+      const { parentThread, contentThread } =
+        await stopProfilerNowAndGetThreads(contentPid);
+      Assert.equal(
+        parentThread.samples.data.length,
+        0,
+        "Stack samples were recorded from the parent process' main thread" +
+          "when the No Stack Sampling feature was turned on."
+      );
+      Assert.equal(
+        contentThread.samples.data.length,
+        0,
+        "Stack samples were recorded from the content process' main thread" +
+          "when the No Stack Sampling feature was turned on."
+      );
+    }
+
+    // Flush out any straggling allocation markers that may have not been collected
+    // yet by starting and stopping the profiler once.
+    await startProfiler({ features: ["js"] });
+
+    // Now reload the tab with a clean run.
+    gBrowser.reload();
+    await wait(500);
+
+    // Check that stack samples were recorded.
+    {
+      const { parentThread, contentThread } =
+        await waitSamplingAndStopProfilerAndGetThreads(contentPid);
+      Assert.greater(
+        parentThread.samples.data.length,
+        0,
+        "No Stack samples were recorded from the parent process' main thread" +
+          "when the No Stack Sampling feature was not turned on."
+      );
+
+      Assert.greater(
+        contentThread.samples.data.length,
+        0,
+        "No Stack samples were recorded from the content process' main thread" +
+          "when the No Stack Sampling feature was not turned on."
+      );
+    }
+  });
+});
diff --git a/tools/profiler/tests/browser/browser_test_marker_network_cancel.js b/tools/profiler/tests/browser/browser_test_marker_network_cancel.js
new file mode 100644
index 0000000000..0a850487af
--- /dev/null
+++ b/tools/profiler/tests/browser/browser_test_marker_network_cancel.js
@@ -0,0 +1,71 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/**
+ * Test that we emit network markers with the cancel status.
+ */
+add_task(async function test_network_markers_early_cancel() {
+  Assert.ok(
+    !Services.profiler.IsActive(),
+    "The profiler is not currently active"
+  );
+
+  startProfilerForMarkerTests();
+
+  const url = BASE_URL + "simple.html?cacheBust=" + Math.random();
+  const options = {
+    gBrowser,
+    url: "about:blank",
+    waitForLoad: false,
+  };
+
+  const tab = await BrowserTestUtils.openNewForegroundTab(options);
+  const loadPromise = BrowserTestUtils.waitForDocLoadAndStopIt(url, tab);
+  BrowserTestUtils.loadURIString(tab.linkedBrowser, url);
+  const contentPid = await SpecialPowers.spawn(
+    tab.linkedBrowser,
+    [],
+    () => Services.appinfo.processID
+  );
+  await loadPromise;
+  const { parentThread, contentThread } = await stopProfilerNowAndGetThreads(
+    contentPid
+  );
+  BrowserTestUtils.removeTab(tab);
+
+  const parentNetworkMarkers = getInflatedNetworkMarkers(parentThread);
+  const contentNetworkMarkers = getInflatedNetworkMarkers(contentThread);
+
+  info("parent process: " + JSON.stringify(parentNetworkMarkers, null, 2));
+  info("content process: " + JSON.stringify(contentNetworkMarkers, null, 2));
+
+  Assert.equal(
+    parentNetworkMarkers.length,
+    2,
+    `We should get a pair of network markers in the parent thread.`
+  );
+
+  // We don't test the markers in the content process, because depending on some
+  // timing we can have 0 or 1 (and maybe even 2 (?)).
+
+  const parentStopMarker = parentNetworkMarkers[1];
+
+  const expectedProperties = {
+    name: Expect.stringMatches(`Load \\d+:.*${escapeStringRegexp(url)}`),
+    data: Expect.objectContainsOnly({
+      type: "Network",
+      status: "STATUS_CANCEL",
+      URI: url,
+      requestMethod: "GET",
+      contentType: null,
+      startTime: Expect.number(),
+      endTime: Expect.number(),
+      id: Expect.number(),
+      pri: Expect.number(),
+      cache: "Unresolved",
+    }),
+  };
+
+  Assert.objectContains(parentStopMarker, expectedProperties);
+});
diff --git a/tools/profiler/tests/browser/browser_test_marker_network_private_browsing.js b/tools/profiler/tests/browser/browser_test_marker_network_private_browsing.js
new file mode 100644
index 0000000000..f898ebda29
--- /dev/null
+++ b/tools/profiler/tests/browser/browser_test_marker_network_private_browsing.js
@@ -0,0 +1,91 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/**
+ * Test that we emit network markers accordingly
+ */
+add_task(async function test_network_markers() {
+  Assert.ok(
+    !Services.profiler.IsActive(),
+    "The profiler is not currently active"
+  );
+
+  startProfilerForMarkerTests();
+
+  const win = await BrowserTestUtils.openNewBrowserWindow({
+    private: true,
+    fission: true,
+  });
+  try {
+    const url = BASE_URL_HTTPS + "simple.html?cacheBust=" + Math.random();
+    const contentBrowser = win.gBrowser.selectedBrowser;
+    BrowserTestUtils.loadURIString(contentBrowser, url);
+    await BrowserTestUtils.browserLoaded(contentBrowser, false, url);
+    const contentPid = await SpecialPowers.spawn(
+      contentBrowser,
+      [],
+      () => Services.appinfo.processID
+    );
+
+    const { parentThread, contentThread } = await stopProfilerNowAndGetThreads(
+      contentPid
+    );
+
+    const parentNetworkMarkers = getInflatedNetworkMarkers(parentThread);
+    const contentNetworkMarkers = getInflatedNetworkMarkers(contentThread);
+    info(JSON.stringify(parentNetworkMarkers, null, 2));
+    info(JSON.stringify(contentNetworkMarkers, null, 2));
+
+    Assert.equal(
+      parentNetworkMarkers.length,
+      2,
+      `We should get a pair of network markers in the parent thread.`
+    );
+    Assert.equal(
+      contentNetworkMarkers.length,
+      2,
+      `We should get a pair of network markers in the content thread.`
+    );
+
+    const parentStopMarker = parentNetworkMarkers[1];
+    const contentStopMarker = contentNetworkMarkers[1];
+
+    const expectedProperties = {
+      name: Expect.stringMatches(`Load \\d+:.*${escapeStringRegexp(url)}`),
+      data: Expect.objectContains({
+        status: "STATUS_STOP",
+        URI: url,
+        requestMethod: "GET",
+        contentType: "text/html",
+        startTime: Expect.number(),
+        endTime: Expect.number(),
+        domainLookupStart: Expect.number(),
+        domainLookupEnd: Expect.number(),
+        connectStart: Expect.number(),
+        tcpConnectEnd: Expect.number(),
+        connectEnd: Expect.number(),
+        requestStart: Expect.number(),
+        responseStart: Expect.number(),
+        responseEnd: Expect.number(),
+        id: Expect.number(),
+        count: Expect.number(),
+        pri: Expect.number(),
+        isPrivateBrowsing: true,
+      }),
+    };
+
+    Assert.objectContains(parentStopMarker, expectedProperties);
+    // The cache information is missing from the content marker, it's only part
+    // of the parent marker. See Bug 1544821.
+    Assert.objectContains(parentStopMarker.data, {
+      // Because the request races with the cache, these 2 values are valid:
+      // "Missed" when the cache answered before we get a result from the network.
+      // "Unresolved" when we got a response from the network before the cache subsystem.
+      cache: Expect.stringMatches(/^(Missed|Unresolved)$/),
+    });
+    Assert.objectContains(contentStopMarker, expectedProperties);
+  } finally {
+    await BrowserTestUtils.closeWindow(win);
+  }
+});
diff --git a/tools/profiler/tests/browser/browser_test_marker_network_redirect.js b/tools/profiler/tests/browser/browser_test_marker_network_redirect.js
new file mode 100644
index 0000000000..28478c2b3b
--- /dev/null
+++ b/tools/profiler/tests/browser/browser_test_marker_network_redirect.js
@@ -0,0 +1,341 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/**
+ * Test that we emit network markers accordingly.
+ * In this file we'll test the redirect cases.
+ */
+add_task(async function test_network_markers_service_worker_setup() {
+  // Disabling cache makes the result more predictable especially in verify mode.
+  await SpecialPowers.pushPrefEnv({
+    set: [
+      ["browser.cache.disk.enable", false],
+      ["browser.cache.memory.enable", false],
+    ],
+  });
+});
+
+add_task(async function test_network_markers_redirect_simple() {
+  // In this test, we request an HTML page that gets redirected. This is a
+  // top-level navigation.
+  Assert.ok(
+    !Services.profiler.IsActive(),
+    "The profiler is not currently active"
+  );
+
+  startProfilerForMarkerTests();
+
+  const targetFileNameWithCacheBust = "simple.html";
+  const url =
+    BASE_URL +
+    "redirect.sjs?" +
+    encodeURIComponent(targetFileNameWithCacheBust);
+  const targetUrl = BASE_URL + targetFileNameWithCacheBust;
+
+  await BrowserTestUtils.withNewTab(url, async contentBrowser => {
+    const contentPid = await SpecialPowers.spawn(
+      contentBrowser,
+      [],
+      () => Services.appinfo.processID
+    );
+
+    const { parentThread, contentThread } = await stopProfilerNowAndGetThreads(
+      contentPid
+    );
+
+    const parentNetworkMarkers = getInflatedNetworkMarkers(parentThread);
+    const contentNetworkMarkers = getInflatedNetworkMarkers(contentThread);
+    info(JSON.stringify(parentNetworkMarkers, null, 2));
+    info(JSON.stringify(contentNetworkMarkers, null, 2));
+
+    Assert.equal(
+      parentNetworkMarkers.length,
+      4,
+      `We should get 2 pairs of network markers in the parent thread.`
+    );
+
+    /* It looks like that for a redirection for the top level navigation, the
+     * content thread sees the markers for the second request only.
+     * See Bug 1692879. */
+    Assert.equal(
+      contentNetworkMarkers.length,
+      2,
+      `We should get one pair of network markers in the content thread.`
+    );
+
+    const parentRedirectMarker = parentNetworkMarkers[1];
+    const parentStopMarker = parentNetworkMarkers[3];
+    // There's no content redirect marker for the reason outlined above.
+    const contentStopMarker = contentNetworkMarkers[1];
+
+    Assert.objectContains(parentRedirectMarker, {
+      name: Expect.stringMatches(`Load \\d+:.*${escapeStringRegexp(url)}`),
+      data: Expect.objectContainsOnly({
+        type: "Network",
+        status: "STATUS_REDIRECT",
+        URI: url,
+        RedirectURI: targetUrl,
+        requestMethod: "GET",
+        contentType: null,
+        startTime: Expect.number(),
+        endTime: Expect.number(),
+        domainLookupStart: Expect.number(),
+        domainLookupEnd: Expect.number(),
+        connectStart: Expect.number(),
+        tcpConnectEnd: Expect.number(),
+        connectEnd: Expect.number(),
+        requestStart: Expect.number(),
+        responseStart: Expect.number(),
+        responseEnd: Expect.number(),
+        id: Expect.number(),
+        redirectId: parentStopMarker.data.id,
+        pri: Expect.number(),
+        cache: Expect.stringMatches(/Missed|Unresolved/),
+        redirectType: "Permanent",
+        isHttpToHttpsRedirect: false,
+      }),
+    });
+
+    const expectedProperties = {
+      name: Expect.stringMatches(
+        `Load \\d+:.*${escapeStringRegexp(targetUrl)}`
+      ),
+    };
+    const expectedDataProperties = {
+      type: "Network",
+      status: "STATUS_STOP",
+      URI: targetUrl,
+      requestMethod: "GET",
+      contentType: "text/html",
+      startTime: Expect.number(),
+      endTime: Expect.number(),
+      domainLookupStart: Expect.number(),
+      domainLookupEnd: Expect.number(),
+      connectStart: Expect.number(),
+      tcpConnectEnd: Expect.number(),
+      connectEnd: Expect.number(),
+      requestStart: Expect.number(),
+      responseStart: Expect.number(),
+      responseEnd: Expect.number(),
+      id: Expect.number(),
+      count: Expect.number(),
+      pri: Expect.number(),
+    };
+
+    Assert.objectContains(parentStopMarker, expectedProperties);
+    Assert.objectContains(contentStopMarker, expectedProperties);
+
+    // The cache information is missing from the content marker, it's only part
+    // of the parent marker. See Bug 1544821.
+    Assert.objectContainsOnly(parentStopMarker.data, {
+      ...expectedDataProperties,
+      // Because the request races with the cache, these 2 values are valid:
+      // "Missed" when the cache answered before we get a result from the network.
+      // "Unresolved" when we got a response from the network before the cache subsystem.
+      cache: Expect.stringMatches(/^(Missed|Unresolved)$/),
+    });
+    Assert.objectContainsOnly(contentStopMarker.data, expectedDataProperties);
+  });
+});
+
+add_task(async function test_network_markers_redirect_resources() {
+  // In this test we request an HTML file that itself contains resources that
+  // are redirected.
+  Assert.ok(
+    !Services.profiler.IsActive(),
+    "The profiler is not currently active"
+  );
+
+  startProfilerForMarkerTests();
+
+  const url = BASE_URL + "page_with_resources.html?cacheBust=" + Math.random();
+  await BrowserTestUtils.withNewTab(url, async contentBrowser => {
+    const contentPid = await SpecialPowers.spawn(
+      contentBrowser,
+      [],
+      () => Services.appinfo.processID
+    );
+
+    const { parentThread, contentThread } = await stopProfilerNowAndGetThreads(
+      contentPid
+    );
+
+    const parentNetworkMarkers = getInflatedNetworkMarkers(parentThread);
+    const contentNetworkMarkers = getInflatedNetworkMarkers(contentThread);
+    info(JSON.stringify(parentNetworkMarkers, null, 2));
+    info(JSON.stringify(contentNetworkMarkers, null, 2));
+
+    Assert.equal(
+      parentNetworkMarkers.length,
+      8,
+      `We should get 4 pairs of network markers in the parent thread.`
+      // 1 - The main page
+      // 2 - The SVG
+      // 3 - The redirected request for the second SVG request.
+      // 4 - The SVG, again
+    );
+
+    /* In this second test, the top level navigation request isn't redirected.
+     * Contrary to Bug 1692879 we get all network markers for redirected
+     * resources. */
+    Assert.equal(
+      contentNetworkMarkers.length,
+      8,
+      `We should get 4 pairs of network markers in the content thread.`
+    );
+
+    // The same resource firefox-logo-nightly.svg is requested twice, but the
+    // second time it is redirected.
+    // We're not interested in the main page, as we test that in other files.
+    // In this page we're only interested in the marker for requested resources.
+
+    const parentPairs = getPairsOfNetworkMarkers(parentNetworkMarkers);
+    const contentPairs = getPairsOfNetworkMarkers(contentNetworkMarkers);
+
+    // First, make sure we properly matched all start with stop markers. This
+    // means that both arrays should contain only arrays of 2 elements.
+    parentPairs.forEach(pair =>
+      Assert.equal(
+        pair.length,
+        2,
+        `For the URL ${pair[0].data.URI} we should get 2 markers in the parent process.`
+      )
+    );
+    contentPairs.forEach(pair =>
+      Assert.equal(
+        pair.length,
+        2,
+        `For the URL ${pair[0].data.URI} we should get 2 markers in the content process.`
+      )
+    );
+
+    const parentFirstStopMarker = parentPairs[1][1];
+    const parentRedirectMarker = parentPairs[2][1];
+    const parentSecondStopMarker = parentPairs[3][1];
+    const contentFirstStopMarker = contentPairs[1][1];
+    const contentRedirectMarker = contentPairs[2][1];
+    const contentSecondStopMarker = contentPairs[3][1];
+
+    const expectedCommonDataProperties = {
+      type: "Network",
+      requestMethod: "GET",
+      startTime: Expect.number(),
+      endTime: Expect.number(),
+      id: Expect.number(),
+      pri: Expect.number(),
+      innerWindowID: Expect.number(),
+    };
+
+    // These properties are present when a connection is fully opened. This is
+    // most often the case, unless we're in verify mode, because in that case
+    // we run the same tests several times in the same Firefox and they might be
+    // cached, or in chaos mode Firefox may make all requests sequentially on
+    // the same connection.
+    // In these cases, these properties won't always be present.
+    const expectedConnectionProperties = {
+      domainLookupStart: Expect.number(),
+      domainLookupEnd: Expect.number(),
+      connectStart: Expect.number(),
+      tcpConnectEnd: Expect.number(),
+      connectEnd: Expect.number(),
+      requestStart: Expect.number(),
+      responseStart: Expect.number(),
+      responseEnd: Expect.number(),
+    };
+
+    const expectedPropertiesForStopMarker = {
+      name: Expect.stringMatches(/Load \d+:.*\/firefox-logo-nightly\.svg/),
+    };
+
+    const expectedDataPropertiesForStopMarker = {
+      ...expectedCommonDataProperties,
+      ...expectedConnectionProperties,
+      status: "STATUS_STOP",
+      URI: Expect.stringContains("/firefox-logo-nightly.svg"),
+      contentType: "image/svg+xml",
+      count: Expect.number(),
+    };
+
+    const expectedPropertiesForRedirectMarker = {
+      name: Expect.stringMatches(
+        /Load \d+:.*\/redirect.sjs\?firefox-logo-nightly\.svg/
+      ),
+    };
+
+    const expectedDataPropertiesForRedirectMarker = {
+      ...expectedCommonDataProperties,
+      ...expectedConnectionProperties,
+      status: "STATUS_REDIRECT",
+      URI: Expect.stringContains("/redirect.sjs?firefox-logo-nightly.svg"),
+      RedirectURI: Expect.stringContains("/firefox-logo-nightly.svg"),
+      contentType: null,
+      redirectType: "Permanent",
+      isHttpToHttpsRedirect: false,
+    };
+
+    Assert.objectContains(
+      parentFirstStopMarker,
+      expectedPropertiesForStopMarker
+    );
+    Assert.objectContainsOnly(parentFirstStopMarker.data, {
+      ...expectedDataPropertiesForStopMarker,
+      // The cache information is missing from the content marker, it's only part
+      // of the parent marker. See Bug 1544821.
+      // Also, because the request races with the cache, these 2 values are valid:
+      // "Missed" when the cache answered before we get a result from the network.
+      // "Unresolved" when we got a response from the network before the cache subsystem.
+      cache: Expect.stringMatches(/^(Missed|Unresolved)$/),
+    });
+
+    Assert.objectContains(
+      contentFirstStopMarker,
+      expectedPropertiesForStopMarker
+    );
+    Assert.objectContainsOnly(
+      contentFirstStopMarker.data,
+      expectedDataPropertiesForStopMarker
+    );
+
+    Assert.objectContains(
+      parentRedirectMarker,
+      expectedPropertiesForRedirectMarker
+    );
+    Assert.objectContainsOnly(parentRedirectMarker.data, {
+      ...expectedDataPropertiesForRedirectMarker,
+      redirectId: parentSecondStopMarker.data.id,
+      // See above for the full explanation about the cache property.
+      cache: Expect.stringMatches(/^(Missed|Unresolved)$/),
+    });
+
+    Assert.objectContains(
+      contentRedirectMarker,
+      expectedPropertiesForRedirectMarker
+    );
+    Assert.objectContainsOnly(contentRedirectMarker.data, {
+      ...expectedDataPropertiesForRedirectMarker,
+      redirectId: contentSecondStopMarker.data.id,
+    });
+
+    Assert.objectContains(
+      parentSecondStopMarker,
+      expectedPropertiesForStopMarker
+    );
+    Assert.objectContainsOnly(parentSecondStopMarker.data, {
+      ...expectedDataPropertiesForStopMarker,
+      // The "count" property is absent from the content marker.
+      count: Expect.number(),
+      // See above for the full explanation about the cache property.
+      cache: Expect.stringMatches(/^(Missed|Unresolved)$/),
+    });
+
+    Assert.objectContains(
+      contentSecondStopMarker,
+      expectedPropertiesForStopMarker
+    );
+    Assert.objectContainsOnly(
+      contentSecondStopMarker.data,
+      expectedDataPropertiesForStopMarker
+    );
+  });
+});
diff --git a/tools/profiler/tests/browser/browser_test_marker_network_serviceworker_cache_first.js b/tools/profiler/tests/browser/browser_test_marker_network_serviceworker_cache_first.js
new file mode 100644
index 0000000000..c1ad49b262
--- /dev/null
+++ b/tools/profiler/tests/browser/browser_test_marker_network_serviceworker_cache_first.js
@@ -0,0 +1,378 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/**
+ * Test that we emit network markers accordingly.
+ * In this file we'll test a caching service worker. This service worker will
+ * fetch and store requests at install time, and serve them when the page
+ * requests them.
+ */
+
+const serviceWorkerFileName = "serviceworker_cache_first.js";
+registerCleanupFunction(() => SpecialPowers.removeAllServiceWorkerData());
+
+add_task(async function test_network_markers_service_worker_setup() {
+  // Disabling cache makes the result more predictable. Also this makes things
+  // simpler when dealing with service workers.
+  await SpecialPowers.pushPrefEnv({
+    set: [
+      ["browser.cache.disk.enable", false],
+      ["browser.cache.memory.enable", false],
+    ],
+  });
+});
+
+add_task(async function test_network_markers_service_worker_register() {
+  // In this first step, we request an HTML page that will register a service
+  // worker. We'll wait until the service worker is fully installed before
+  // checking various things.
+  Assert.ok(
+    !Services.profiler.IsActive(),
+    "The profiler is not currently active"
+  );
+
+  startProfilerForMarkerTests();
+
+  const url = `${BASE_URL_HTTPS}serviceworkers/serviceworker_register.html`;
+  await BrowserTestUtils.withNewTab(url, async contentBrowser => {
+    const contentPid = await SpecialPowers.spawn(
+      contentBrowser,
+      [],
+      () => Services.appinfo.processID
+    );
+
+    await SpecialPowers.spawn(
+      contentBrowser,
+      [serviceWorkerFileName],
+      async function (serviceWorkerFileName) {
+        await content.wrappedJSObject.registerServiceWorkerAndWait(
+          serviceWorkerFileName
+        );
+      }
+    );
+
+    const { parentThread, contentThread, profile } =
+      await stopProfilerNowAndGetThreads(contentPid);
+
+    // The service worker work happens in a third "thread" or process, let's try
+    // to find it.
+    // Currently the fetches happen on the main thread for the content process,
+    // this may change in the future and we may have to adapt this function.
+    // Also please note this isn't necessarily the same content process as the
+    // ones for the tab.
+    const { serviceWorkerParentThread } = findServiceWorkerThreads(profile);
+
+    // Here are a few sanity checks.
+    ok(
+      serviceWorkerParentThread,
+      "We should find a thread for the service worker."
+    );
+
+    Assert.notEqual(
+      serviceWorkerParentThread.pid,
+      parentThread.pid,
+      "We should have a different pid than the parent thread."
+    );
+    Assert.notEqual(
+      serviceWorkerParentThread.tid,
+      parentThread.tid,
+      "We should have a different tid than the parent thread."
+    );
+
+    // Let's make sure we actually have a registered service workers.
+    const workers = await SpecialPowers.registeredServiceWorkers();
+    Assert.equal(
+      workers.length,
+      1,
+      "One service worker should be properly registered."
+    );
+
+    // By logging a few information about the threads we make debugging easier.
+    logInformationForThread("parentThread information", parentThread);
+    logInformationForThread("contentThread information", contentThread);
+    logInformationForThread(
+      "serviceWorkerParentThread information",
+      serviceWorkerParentThread
+    );
+
+    // Now let's check the marker payloads.
+    const parentNetworkMarkers = getInflatedNetworkMarkers(parentThread)
+      // When we load a page, Firefox will check the service worker freshness
+      // after a few seconds. So when the test lasts a long time (with some test
+      // environments) we might see spurious markers about that that we're not
+      // interesting in in this part of the test. They're only present in the
+      // parent process.
+      .filter(marker => !marker.data.URI.includes(serviceWorkerFileName));
+    const contentNetworkMarkers = getInflatedNetworkMarkers(contentThread);
+    const serviceWorkerNetworkMarkers = getInflatedNetworkMarkers(
+      serviceWorkerParentThread
+    );
+
+    // Some more logs for debugging purposes.
+    info(
+      "Parent network markers: " + JSON.stringify(parentNetworkMarkers, null, 2)
+    );
+    info(
+      "Content network markers: " +
+        JSON.stringify(contentNetworkMarkers, null, 2)
+    );
+    info(
+      "Serviceworker network markers: " +
+        JSON.stringify(serviceWorkerNetworkMarkers, null, 2)
+    );
+
+    const parentPairs = getPairsOfNetworkMarkers(parentNetworkMarkers);
+    const contentPairs = getPairsOfNetworkMarkers(contentNetworkMarkers);
+    const serviceWorkerPairs = getPairsOfNetworkMarkers(
+      serviceWorkerNetworkMarkers
+    );
+
+    // First, make sure we properly matched all start with stop markers. This
+    // means that both arrays should contain only arrays of 2 elements.
+    parentPairs.forEach(pair =>
+      Assert.equal(
+        pair.length,
+        2,
+        `For the URL ${pair[0].data.URI} we should get 2 markers in the parent process.`
+      )
+    );
+    contentPairs.forEach(pair =>
+      Assert.equal(
+        pair.length,
+        2,
+        `For the URL ${pair[0].data.URI} we should get 2 markers in the content process.`
+      )
+    );
+    serviceWorkerPairs.forEach(pair =>
+      Assert.equal(
+        pair.length,
+        2,
+        `For the URL ${pair[0].data.URI} we should get 2 markers in the service worker process.`
+      )
+    );
+
+    // Let's look at all pairs and make sure we requested all expected files.
+    const parentStopMarkers = parentPairs.map(([_, stopMarker]) => stopMarker);
+
+    // These are the files cached by the service worker. We should see markers
+    // for both the parent thread and the service worker thread.
+    const expectedFiles = [
+      "serviceworker_page.html",
+      "firefox-logo-nightly.svg",
+    ].map(filename => `${BASE_URL_HTTPS}serviceworkers/${filename}`);
+
+    for (const expectedFile of expectedFiles) {
+      info(
+        `Checking if "${expectedFile}" is present in the network markers in both processes.`
+      );
+      const parentMarker = parentStopMarkers.find(
+        marker => marker.data.URI === expectedFile
+      );
+
+      const expectedProperties = {
+        name: Expect.stringMatches(
+          `Load \\d+:.*${escapeStringRegexp(expectedFile)}`
+        ),
+        data: Expect.objectContains({
+          status: "STATUS_STOP",
+          URI: expectedFile,
+          requestMethod: "GET",
+          contentType: Expect.stringMatches(/^(text\/html|image\/svg\+xml)$/),
+          startTime: Expect.number(),
+          endTime: Expect.number(),
+          domainLookupStart: Expect.number(),
+          domainLookupEnd: Expect.number(),
+          connectStart: Expect.number(),
+          tcpConnectEnd: Expect.number(),
+          connectEnd: Expect.number(),
+          requestStart: Expect.number(),
+          responseStart: Expect.number(),
+          responseEnd: Expect.number(),
+          id: Expect.number(),
+          count: Expect.number(),
+          pri: Expect.number(),
+        }),
+      };
+
+      Assert.objectContains(parentMarker, expectedProperties);
+    }
+  });
+});
+
+add_task(async function test_network_markers_service_worker_use() {
+  // In this test we request an HTML file that itself contains resources that
+  // are redirected.
+  Assert.ok(
+    !Services.profiler.IsActive(),
+    "The profiler is not currently active"
+  );
+
+  startProfilerForMarkerTests();
+
+  const url = `${BASE_URL_HTTPS}serviceworkers/serviceworker_page.html`;
+  await BrowserTestUtils.withNewTab(url, async contentBrowser => {
+    const contentPid = await SpecialPowers.spawn(
+      contentBrowser,
+      [],
+      () => Services.appinfo.processID
+    );
+
+    const { parentThread, contentThread } = await stopProfilerNowAndGetThreads(
+      contentPid
+    );
+
+    // By logging a few information about the threads we make debugging easier.
+    logInformationForThread("parentThread information", parentThread);
+    logInformationForThread("contentThread information", contentThread);
+
+    const parentNetworkMarkers = getInflatedNetworkMarkers(parentThread)
+      // When we load a page, Firefox will check the service worker freshness
+      // after a few seconds. So when the test lasts a long time (with some test
+      // environments) we might see spurious markers about that that we're not
+      // interesting in in this part of the test. They're only present in the
+      // parent process.
+      .filter(marker => !marker.data.URI.includes(serviceWorkerFileName));
+    const contentNetworkMarkers = getInflatedNetworkMarkers(contentThread);
+
+    // Here are some logs to ease debugging.
+    info(
+      "Parent network markers: " + JSON.stringify(parentNetworkMarkers, null, 2)
+    );
+    info(
+      "Content network markers: " +
+        JSON.stringify(contentNetworkMarkers, null, 2)
+    );
+
+    const parentPairs = getPairsOfNetworkMarkers(parentNetworkMarkers);
+    const contentPairs = getPairsOfNetworkMarkers(contentNetworkMarkers);
+
+    // These are the files cached by the service worker. We should see markers
+    // for the parent thread and the content thread.
+    const expectedFiles = [
+      "serviceworker_page.html",
+      "firefox-logo-nightly.svg",
+    ].map(filename => `${BASE_URL_HTTPS}serviceworkers/${filename}`);
+
+    // First, make sure we properly matched all start with stop markers. This
+    // means that both arrays should contain only arrays of 2 elements.
+    parentPairs.forEach(pair =>
+      Assert.equal(
+        pair.length,
+        2,
+        `For the URL ${pair[0].data.URI} we should get 2 markers in the parent process.`
+      )
+    );
+
+    contentPairs.forEach(pair =>
+      Assert.equal(
+        pair.length,
+        2,
+        `For the URL ${pair[0].data.URI} we should get 2 markers in the content process.`
+      )
+    );
+
+    // Let's look at all pairs and make sure we requested all expected files.
+    const parentEndMarkers = parentPairs.map(([_, endMarker]) => endMarker);
+    const contentStopMarkers = contentPairs.map(
+      ([_, stopMarker]) => stopMarker
+    );
+
+    Assert.equal(
+      parentEndMarkers.length,
+      expectedFiles.length * 2, // one redirect + one stop
+      "There should be twice as many end markers in the parent process as requested files."
+    );
+    Assert.equal(
+      contentStopMarkers.length,
+      expectedFiles.length,
+      "There should be as many stop markers in the content process as requested files."
+    );
+
+    for (const [i, expectedFile] of expectedFiles.entries()) {
+      info(
+        `Checking if "${expectedFile}" if present in the network markers in both processes.`
+      );
+      const [parentRedirectMarker, parentStopMarker] = parentEndMarkers.filter(
+        marker => marker.data.URI === expectedFile
+      );
+      const contentMarker = contentStopMarkers.find(
+        marker => marker.data.URI === expectedFile
+      );
+
+      const commonDataProperties = {
+        type: "Network",
+        URI: expectedFile,
+        requestMethod: "GET",
+        contentType: Expect.stringMatches(/^(text\/html|image\/svg\+xml)$/),
+        startTime: Expect.number(),
+        endTime: Expect.number(),
+        id: Expect.number(),
+        pri: Expect.number(),
+      };
+
+      const expectedProperties = {
+        name: Expect.stringMatches(
+          `Load \\d+:.*${escapeStringRegexp(expectedFile)}`
+        ),
+      };
+
+      Assert.objectContains(parentRedirectMarker, expectedProperties);
+      Assert.objectContains(parentStopMarker, expectedProperties);
+      Assert.objectContains(contentMarker, expectedProperties);
+      if (i === 0) {
+        // This is the top level navigation, the HTML file.
+        Assert.objectContainsOnly(parentRedirectMarker.data, {
+          ...commonDataProperties,
+          status: "STATUS_REDIRECT",
+          contentType: null,
+          cache: "Unresolved",
+          RedirectURI: expectedFile,
+          redirectType: "Internal",
+          redirectId: parentStopMarker.data.id,
+          isHttpToHttpsRedirect: false,
+        });
+
+        Assert.objectContainsOnly(parentStopMarker.data, {
+          ...commonDataProperties,
+          status: "STATUS_STOP",
+        });
+
+        Assert.objectContainsOnly(contentMarker.data, {
+          ...commonDataProperties,
+          status: "STATUS_STOP",
+        });
+      } else {
+        Assert.objectContainsOnly(parentRedirectMarker.data, {
+          ...commonDataProperties,
+          status: "STATUS_REDIRECT",
+          contentType: null,
+          cache: "Unresolved",
+          innerWindowID: Expect.number(),
+          RedirectURI: expectedFile,
+          redirectType: "Internal",
+          redirectId: parentStopMarker.data.id,
+          isHttpToHttpsRedirect: false,
+        });
+
+        Assert.objectContainsOnly(
+          parentStopMarker.data,
+          // Note: in the future we may have more properties. We're using the
+          // "Only" flavor of the matcher so that we don't forget to update this
+          // test when this changes.
+          {
+            ...commonDataProperties,
+            innerWindowID: Expect.number(),
+            status: "STATUS_STOP",
+          }
+        );
+
+        Assert.objectContainsOnly(contentMarker.data, {
+          ...commonDataProperties,
+          innerWindowID: Expect.number(),
+          status: "STATUS_STOP",
+        });
+      }
+    }
+  });
+});
diff --git a/tools/profiler/tests/browser/browser_test_marker_network_serviceworker_no_fetch_handler.js b/tools/profiler/tests/browser/browser_test_marker_network_serviceworker_no_fetch_handler.js
new file mode 100644
index 0000000000..ad2cc81661
--- /dev/null
+++ b/tools/profiler/tests/browser/browser_test_marker_network_serviceworker_no_fetch_handler.js
@@ -0,0 +1,218 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/**
+ * Test that we emit network markers accordingly.
+ * In this file we'll test the case of a service worker that has no fetch
+ * handlers. In this case, a fetch is done to the network. There may be
+ * shortcuts in our code in this case, that's why it's important to test it
+ * separately.
+ */
+
+const serviceWorkerFileName = "serviceworker_no_fetch_handler.js";
+registerCleanupFunction(() => SpecialPowers.removeAllServiceWorkerData());
+
+add_task(async function test_network_markers_service_worker_setup() {
+  // Disabling cache makes the result more predictable. Also this makes things
+  // simpler when dealing with service workers.
+  await SpecialPowers.pushPrefEnv({
+    set: [
+      ["browser.cache.disk.enable", false],
+      ["browser.cache.memory.enable", false],
+    ],
+  });
+});
+
+add_task(async function test_network_markers_service_worker_register() {
+  // In this first step, we request an HTML page that will register a service
+  // worker. We'll wait until the service worker is fully installed before
+  // checking various things.
+  const url = `${BASE_URL_HTTPS}serviceworkers/serviceworker_register.html`;
+  await BrowserTestUtils.withNewTab(url, async contentBrowser => {
+    await SpecialPowers.spawn(
+      contentBrowser,
+      [serviceWorkerFileName],
+      async function (serviceWorkerFileName) {
+        await content.wrappedJSObject.registerServiceWorkerAndWait(
+          serviceWorkerFileName
+        );
+      }
+    );
+
+    // Let's make sure we actually have a registered service workers.
+    const workers = await SpecialPowers.registeredServiceWorkers();
+    Assert.equal(
+      workers.length,
+      1,
+      "One service worker should be properly registered."
+    );
+  });
+});
+
+add_task(async function test_network_markers_service_worker_use() {
+  Assert.ok(
+    !Services.profiler.IsActive(),
+    "The profiler is not currently active"
+  );
+
+  startProfilerForMarkerTests();
+
+  const url = `${BASE_URL_HTTPS}serviceworkers/serviceworker_page.html`;
+  await BrowserTestUtils.withNewTab(url, async contentBrowser => {
+    const contentPid = await SpecialPowers.spawn(
+      contentBrowser,
+      [],
+      () => Services.appinfo.processID
+    );
+
+    const { parentThread, contentThread } = await stopProfilerNowAndGetThreads(
+      contentPid
+    );
+
+    // By logging a few information about the threads we make debugging easier.
+    logInformationForThread("parentThread information", parentThread);
+    logInformationForThread("contentThread information", contentThread);
+
+    const parentNetworkMarkers = getInflatedNetworkMarkers(parentThread)
+      // When we load a page, Firefox will check the service worker freshness
+      // after a few seconds. So when the test lasts a long time (with some test
+      // environments) we might see spurious markers about that that we're not
+      // interesting in in this part of the test. They're only present in the
+      // parent process.
+      .filter(marker => !marker.data.URI.includes(serviceWorkerFileName));
+    const contentNetworkMarkers = getInflatedNetworkMarkers(contentThread);
+
+    // Here are some logs to ease debugging.
+    info(
+      "Parent network markers:" + JSON.stringify(parentNetworkMarkers, null, 2)
+    );
+    info(
+      "Content network markers:" +
+        JSON.stringify(contentNetworkMarkers, null, 2)
+    );
+
+    const parentPairs = getPairsOfNetworkMarkers(parentNetworkMarkers);
+    const contentPairs = getPairsOfNetworkMarkers(contentNetworkMarkers);
+
+    // First, make sure we properly matched all start with stop markers. This
+    // means that both arrays should contain only arrays of 2 elements.
+    parentPairs.forEach(pair =>
+      Assert.equal(
+        pair.length,
+        2,
+        `For the URL ${pair[0].data.URI} we should get 2 markers in the parent process.`
+      )
+    );
+
+    contentPairs.forEach(pair =>
+      Assert.equal(
+        pair.length,
+        2,
+        `For the URL ${pair[0].data.URI} we should get 2 markers in the content process.`
+      )
+    );
+
+    // Let's look at all pairs and make sure we requested all expected files.
+    const parentStopMarkers = parentPairs.map(([_, stopMarker]) => stopMarker);
+    const contentStopMarkers = contentPairs.map(
+      ([_, stopMarker]) => stopMarker
+    );
+
+    // These are the files requested by the page.
+    // We should see markers for the parent thread and the content thread.
+    const expectedFiles = [
+      // Please take care that the first element is the top level navigation, as
+      // this is special-cased below.
+      "serviceworker_page.html",
+      "firefox-logo-nightly.svg",
+    ].map(filename => `${BASE_URL_HTTPS}serviceworkers/${filename}`);
+
+    Assert.equal(
+      parentStopMarkers.length,
+      expectedFiles.length,
+      "There should be as many stop markers in the parent process as requested files."
+    );
+    Assert.equal(
+      contentStopMarkers.length,
+      expectedFiles.length,
+      "There should be as many stop markers in the content process as requested files."
+    );
+
+    for (const [i, expectedFile] of expectedFiles.entries()) {
+      info(
+        `Checking if "${expectedFile}" if present in the network markers in both processes.`
+      );
+      const parentMarker = parentStopMarkers.find(
+        marker => marker.data.URI === expectedFile
+      );
+      const contentMarker = contentStopMarkers.find(
+        marker => marker.data.URI === expectedFile
+      );
+
+      const commonProperties = {
+        name: Expect.stringMatches(
+          `Load \\d+:.*${escapeStringRegexp(expectedFile)}`
+        ),
+      };
+      Assert.objectContains(parentMarker, commonProperties);
+      Assert.objectContains(contentMarker, commonProperties);
+
+      // We get the full set of properties in this case, because we do an actual
+      // fetch to the network.
+      const commonDataProperties = {
+        type: "Network",
+        status: "STATUS_STOP",
+        URI: expectedFile,
+        requestMethod: "GET",
+        contentType: Expect.stringMatches(/^(text\/html|image\/svg\+xml)$/),
+        startTime: Expect.number(),
+        endTime: Expect.number(),
+        id: Expect.number(),
+        pri: Expect.number(),
+        count: Expect.number(),
+        domainLookupStart: Expect.number(),
+        domainLookupEnd: Expect.number(),
+        connectStart: Expect.number(),
+        tcpConnectEnd: Expect.number(),
+        connectEnd: Expect.number(),
+        requestStart: Expect.number(),
+        responseStart: Expect.number(),
+        responseEnd: Expect.number(),
+      };
+
+      if (i === 0) {
+        // The first marker is special cased: this is the top level navigation
+        // serviceworker_page.html,
+        // and in this case we don't have all the same properties. Especially
+        // the innerWindowID information is missing.
+        Assert.objectContainsOnly(parentMarker.data, {
+          ...commonDataProperties,
+          // Note that the parent process has the "cache" information, but not the content
+          // process. See Bug 1544821.
+          // Also because the request races with the cache, these 2 values are valid:
+          // "Missed" when the cache answered before we get a result from the network.
+          // "Unresolved" when we got a response from the network before the cache subsystem.
+          cache: Expect.stringMatches(/^(Missed|Unresolved)$/),
+        });
+
+        Assert.objectContainsOnly(contentMarker.data, commonDataProperties);
+      } else {
+        // This is the other file firefox-logo-nightly.svg.
+        Assert.objectContainsOnly(parentMarker.data, {
+          ...commonDataProperties,
+          // Because the request races with the cache, these 2 values are valid:
+          // "Missed" when the cache answered before we get a result from the network.
+          // "Unresolved" when we got a response from the network before the cache subsystem.
+          cache: Expect.stringMatches(/^(Missed|Unresolved)$/),
+          innerWindowID: Expect.number(),
+        });
+
+        Assert.objectContainsOnly(contentMarker.data, {
+          ...commonDataProperties,
+          innerWindowID: Expect.number(),
+        });
+      }
+    }
+  });
+});
diff --git a/tools/profiler/tests/browser/browser_test_marker_network_serviceworker_no_respondWith_in_fetch_handler.js b/tools/profiler/tests/browser/browser_test_marker_network_serviceworker_no_respondWith_in_fetch_handler.js
new file mode 100644
index 0000000000..973ae61a7f
--- /dev/null
+++ b/tools/profiler/tests/browser/browser_test_marker_network_serviceworker_no_respondWith_in_fetch_handler.js
@@ -0,0 +1,294 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/**
+ * Test that we emit network markers accordingly.
+ * In this file we'll test the case of a service worker that has a fetch
+ * handler, but no respondWith. In this case, some process called "reset
+ * interception" happens, and the fetch is still carried on by our code. Because
+ * this is a bit of an edge case, it's important to have a test for this case.
+ */
+
+const serviceWorkerFileName =
+  "serviceworker_no_respondWith_in_fetch_handler.js";
+registerCleanupFunction(() => SpecialPowers.removeAllServiceWorkerData());
+
+add_task(async function test_network_markers_service_worker_setup() {
+  // Disabling cache makes the result more predictable. Also this makes things
+  // simpler when dealing with service workers.
+  await SpecialPowers.pushPrefEnv({
+    set: [
+      ["browser.cache.disk.enable", false],
+      ["browser.cache.memory.enable", false],
+    ],
+  });
+});
+
+add_task(async function test_network_markers_service_worker_register() {
+  // In this first step, we request an HTML page that will register a service
+  // worker. We'll wait until the service worker is fully installed before
+  // checking various things.
+  const url = `${BASE_URL_HTTPS}serviceworkers/serviceworker_register.html`;
+  await BrowserTestUtils.withNewTab(url, async contentBrowser => {
+    await SpecialPowers.spawn(
+      contentBrowser,
+      [serviceWorkerFileName],
+      async function (serviceWorkerFileName) {
+        await content.wrappedJSObject.registerServiceWorkerAndWait(
+          serviceWorkerFileName
+        );
+      }
+    );
+
+    // Let's make sure we actually have a registered service workers.
+    const workers = await SpecialPowers.registeredServiceWorkers();
+    Assert.equal(
+      workers.length,
+      1,
+      "One service worker should be properly registered."
+    );
+  });
+});
+
+add_task(async function test_network_markers_service_worker_use() {
+  Assert.ok(
+    !Services.profiler.IsActive(),
+    "The profiler is not currently active"
+  );
+
+  startProfilerForMarkerTests();
+
+  const url = `${BASE_URL_HTTPS}serviceworkers/serviceworker_page.html`;
+  await BrowserTestUtils.withNewTab(url, async contentBrowser => {
+    const contentPid = await SpecialPowers.spawn(
+      contentBrowser,
+      [],
+      () => Services.appinfo.processID
+    );
+
+    const { parentThread, contentThread } = await stopProfilerNowAndGetThreads(
+      contentPid
+    );
+
+    // By logging a few information about the threads we make debugging easier.
+    logInformationForThread("parentThread information", parentThread);
+    logInformationForThread("contentThread information", contentThread);
+
+    const parentNetworkMarkers = getInflatedNetworkMarkers(parentThread)
+      // When we load a page, Firefox will check the service worker freshness
+      // after a few seconds. So when the test lasts a long time (with some test
+      // environments) we might see spurious markers about that that we're not
+      // interesting in in this part of the test. They're only present in the
+      // parent process.
+      .filter(marker => !marker.data.URI.includes(serviceWorkerFileName));
+    const contentNetworkMarkers = getInflatedNetworkMarkers(contentThread);
+
+    // Here are some logs to ease debugging.
+    info(
+      "Parent network markers:" + JSON.stringify(parentNetworkMarkers, null, 2)
+    );
+    info(
+      "Content network markers:" +
+        JSON.stringify(contentNetworkMarkers, null, 2)
+    );
+
+    const parentPairs = getPairsOfNetworkMarkers(parentNetworkMarkers);
+    const contentPairs = getPairsOfNetworkMarkers(contentNetworkMarkers);
+
+    // First, make sure we properly matched all start with stop markers. This
+    // means that both arrays should contain only arrays of 2 elements.
+    parentPairs.forEach(pair =>
+      Assert.equal(
+        pair.length,
+        2,
+        `For the URL ${pair[0].data.URI} we should get 2 markers in the parent process.`
+      )
+    );
+
+    contentPairs.forEach(pair =>
+      Assert.equal(
+        pair.length,
+        2,
+        `For the URL ${pair[0].data.URI} we should get 2 markers in the content process.`
+      )
+    );
+
+    // Let's look at all pairs and make sure we requested all expected files.
+    // In this test, we should have redirect markers as well as stop markers,
+    // because this case generates internal redirects. We may want to change
+    // that in the future, or handle this specially in the frontend.
+    // Let's create various arrays to help assert.
+
+    const parentEndMarkers = parentPairs.map(([_, stopMarker]) => stopMarker);
+    const parentStopMarkers = parentEndMarkers.filter(
+      marker => marker.data.status === "STATUS_STOP"
+    );
+    const parentRedirectMarkers = parentEndMarkers.filter(
+      marker => marker.data.status === "STATUS_REDIRECT"
+    );
+    const contentEndMarkers = contentPairs.map(([_, stopMarker]) => stopMarker);
+    const contentStopMarkers = contentEndMarkers.filter(
+      marker => marker.data.status === "STATUS_STOP"
+    );
+    const contentRedirectMarkers = contentEndMarkers.filter(
+      marker => marker.data.status === "STATUS_REDIRECT"
+    );
+
+    // These are the files requested by the page.
+    // We should see markers for the parent thread and the content thread.
+    const expectedFiles = [
+      // Please take care that the first element is the top level navigation, as
+      // this is special-cased below.
+      "serviceworker_page.html",
+      "firefox-logo-nightly.svg",
+    ].map(filename => `${BASE_URL_HTTPS}serviceworkers/${filename}`);
+
+    Assert.equal(
+      parentStopMarkers.length,
+      expectedFiles.length,
+      "There should be as many stop markers in the parent process as requested files."
+    );
+    Assert.equal(
+      parentRedirectMarkers.length,
+      expectedFiles.length * 2, // http -> intercepted, intercepted -> http
+      "There should be twice as many redirect markers in the parent process as requested files."
+    );
+    Assert.equal(
+      contentStopMarkers.length,
+      expectedFiles.length,
+      "There should be as many stop markers in the content process as requested files."
+    );
+    // Note: there will no redirect markers in the content process for
+    // ServiceWorker fallbacks request to network.
+    // See Bug 1793940.
+    Assert.equal(
+      contentRedirectMarkers.length,
+      0,
+      "There should be no redirect markers in the content process than requested files."
+    );
+
+    for (const [i, expectedFile] of expectedFiles.entries()) {
+      info(
+        `Checking if "${expectedFile}" if present in the network markers in both processes.`
+      );
+      const [parentRedirectMarkerIntercept, parentRedirectMarkerReset] =
+        parentRedirectMarkers.filter(
+          marker => marker.data.URI === expectedFile
+        );
+      const parentStopMarker = parentStopMarkers.find(
+        marker => marker.data.URI === expectedFile
+      );
+      const contentStopMarker = contentStopMarkers.find(
+        marker => marker.data.URI === expectedFile
+      );
+
+      const commonProperties = {
+        name: Expect.stringMatches(
+          `Load \\d+:.*${escapeStringRegexp(expectedFile)}`
+        ),
+      };
+      Assert.objectContains(parentRedirectMarkerIntercept, commonProperties);
+      Assert.objectContains(parentRedirectMarkerReset, commonProperties);
+      Assert.objectContains(parentStopMarker, commonProperties);
+      Assert.objectContains(contentStopMarker, commonProperties);
+      // Note: there's no check for the contentRedirectMarker, because there's
+      // no marker for a top level navigation redirect in the content process.
+
+      // We get the full set of properties in this case, because we do an actual
+      // fetch to the network.
+      const commonDataProperties = {
+        type: "Network",
+        status: "STATUS_STOP",
+        URI: expectedFile,
+        requestMethod: "GET",
+        contentType: Expect.stringMatches(/^(text\/html|image\/svg\+xml)$/),
+        startTime: Expect.number(),
+        endTime: Expect.number(),
+        id: Expect.number(),
+        pri: Expect.number(),
+        count: Expect.number(),
+        domainLookupStart: Expect.number(),
+        domainLookupEnd: Expect.number(),
+        connectStart: Expect.number(),
+        tcpConnectEnd: Expect.number(),
+        connectEnd: Expect.number(),
+        requestStart: Expect.number(),
+        responseStart: Expect.number(),
+        responseEnd: Expect.number(),
+      };
+
+      const commonRedirectProperties = {
+        type: "Network",
+        status: "STATUS_REDIRECT",
+        URI: expectedFile,
+        RedirectURI: expectedFile,
+        requestMethod: "GET",
+        contentType: null,
+        startTime: Expect.number(),
+        endTime: Expect.number(),
+        id: Expect.number(),
+        pri: Expect.number(),
+        redirectType: "Internal",
+        isHttpToHttpsRedirect: false,
+      };
+
+      if (i === 0) {
+        // The first marker is special cased: this is the top level navigation
+        // serviceworker_page.html,
+        // and in this case we don't have all the same properties. Especially
+        // the innerWindowID information is missing.
+        Assert.objectContainsOnly(parentStopMarker.data, {
+          ...commonDataProperties,
+          // Note that the parent process has the "cache" information, but not the content
+          // process. See Bug 1544821.
+          // Also, because the request races with the cache, these 2 values are valid:
+          // "Missed" when the cache answered before we get a result from the network.
+          // "Unresolved" when we got a response from the network before the cache subsystem.
+          cache: Expect.stringMatches(/^(Missed|Unresolved)$/),
+        });
+        Assert.objectContainsOnly(contentStopMarker.data, commonDataProperties);
+
+        Assert.objectContainsOnly(parentRedirectMarkerIntercept.data, {
+          ...commonRedirectProperties,
+          redirectId: parentRedirectMarkerReset.data.id,
+          cache: "Unresolved",
+        });
+        Assert.objectContainsOnly(parentRedirectMarkerReset.data, {
+          ...commonRedirectProperties,
+          redirectId: parentStopMarker.data.id,
+        });
+
+        // Note: there's no check for the contentRedirectMarker, because there's
+        // no marker for a top level navigation redirect in the content process.
+      } else {
+        // This is the other file firefox-logo-nightly.svg.
+        Assert.objectContainsOnly(parentStopMarker.data, {
+          ...commonDataProperties,
+          // Because the request races with the cache, these 2 values are valid:
+          // "Missed" when the cache answered before we get a result from the network.
+          // "Unresolved" when we got a response from the network before the cache subsystem.
+          cache: Expect.stringMatches(/^(Missed|Unresolved)$/),
+          innerWindowID: Expect.number(),
+        });
+        Assert.objectContains(contentStopMarker, commonProperties);
+        Assert.objectContainsOnly(contentStopMarker.data, {
+          ...commonDataProperties,
+          innerWindowID: Expect.number(),
+        });
+
+        Assert.objectContainsOnly(parentRedirectMarkerIntercept.data, {
+          ...commonRedirectProperties,
+          innerWindowID: Expect.number(),
+          redirectId: parentRedirectMarkerReset.data.id,
+          cache: "Unresolved",
+        });
+        Assert.objectContainsOnly(parentRedirectMarkerReset.data, {
+          ...commonRedirectProperties,
+          innerWindowID: Expect.number(),
+          redirectId: parentStopMarker.data.id,
+        });
+      }
+    }
+  });
+});
diff --git a/tools/profiler/tests/browser/browser_test_marker_network_serviceworker_synthetized_response.js b/tools/profiler/tests/browser/browser_test_marker_network_serviceworker_synthetized_response.js
new file mode 100644
index 0000000000..060592840a
--- /dev/null
+++ b/tools/profiler/tests/browser/browser_test_marker_network_serviceworker_synthetized_response.js
@@ -0,0 +1,480 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/**
+ * Test that we emit network markers accordingly.
+ * In this file we'll test a service worker that returns a synthetized response.
+ * This means the service worker will make up a response by itself.
+ */
+
+const serviceWorkerFileName = "serviceworker_synthetized_response.js";
+registerCleanupFunction(() => SpecialPowers.removeAllServiceWorkerData());
+
+add_task(async function test_network_markers_service_worker_setup() {
+  // Disabling cache makes the result more predictable. Also this makes things
+  // simpler when dealing with service workers.
+  await SpecialPowers.pushPrefEnv({
+    set: [
+      ["browser.cache.disk.enable", false],
+      ["browser.cache.memory.enable", false],
+    ],
+  });
+});
+
+add_task(async function test_network_markers_service_worker_register() {
+  // In this first step, we request an HTML page that will register a service
+  // worker. We'll wait until the service worker is fully installed before
+  // checking various things.
+  Assert.ok(
+    !Services.profiler.IsActive(),
+    "The profiler is not currently active"
+  );
+
+  const url = `${BASE_URL_HTTPS}serviceworkers/serviceworker_register.html`;
+  await BrowserTestUtils.withNewTab(url, async contentBrowser => {
+    await SpecialPowers.spawn(
+      contentBrowser,
+      [serviceWorkerFileName],
+      async function (serviceWorkerFileName) {
+        await content.wrappedJSObject.registerServiceWorkerAndWait(
+          serviceWorkerFileName
+        );
+      }
+    );
+
+    // Let's make sure we actually have a registered service workers.
+    const workers = await SpecialPowers.registeredServiceWorkers();
+    Assert.equal(
+      workers.length,
+      1,
+      "One service worker should be properly registered."
+    );
+  });
+});
+
+add_task(async function test_network_markers_service_worker_use() {
+  // In this test, we'll first load a plain html file, then do some fetch
+  // requests in the context of the page. One request is served with a
+  // synthetized response, the other request is served with a real "fetch" done
+  // by the service worker.
+  Assert.ok(
+    !Services.profiler.IsActive(),
+    "The profiler is not currently active"
+  );
+
+  startProfilerForMarkerTests();
+
+  const url = `${BASE_URL_HTTPS}serviceworkers/serviceworker_simple.html`;
+  await BrowserTestUtils.withNewTab(url, async contentBrowser => {
+    const contentPid = await SpecialPowers.spawn(
+      contentBrowser,
+      [],
+      () => Services.appinfo.processID
+    );
+
+    await SpecialPowers.spawn(contentBrowser, [], async () => {
+      // This request is served directly by the service worker as a synthetized response.
+      await content
+        .fetch("firefox-generated.svg")
+        .then(res => res.arrayBuffer());
+
+      // This request is served by a fetch done inside the service worker.
+      await content
+        .fetch("firefox-logo-nightly.svg")
+        .then(res => res.arrayBuffer());
+    });
+
+    const { parentThread, contentThread, profile } =
+      await stopProfilerNowAndGetThreads(contentPid);
+
+    // The service worker work happens in a third "thread" or process, let's try
+    // to find it.
+    // Currently the fetches happen on the main thread for the content process,
+    // this may change in the future and we may have to adapt this function.
+    // Also please note this isn't necessarily the same content process as the
+    // ones for the tab.
+    const { serviceWorkerParentThread } = findServiceWorkerThreads(profile);
+
+    ok(
+      serviceWorkerParentThread,
+      "We should find a thread for the service worker."
+    );
+
+    // By logging a few information about the threads we make debugging easier.
+    logInformationForThread("parentThread information", parentThread);
+    logInformationForThread("contentThread information", contentThread);
+    logInformationForThread(
+      "serviceWorkerParentThread information",
+      serviceWorkerParentThread
+    );
+
+    const parentNetworkMarkers = getInflatedNetworkMarkers(parentThread)
+      // When we load a page, Firefox will check the service worker freshness
+      // after a few seconds. So when the test lasts a long time (with some test
+      // environments) we might see spurious markers about that that we're not
+      // interesting in in this part of the test. They're only present in the
+      // parent process.
+      .filter(marker => !marker.data.URI.includes(serviceWorkerFileName));
+
+    const contentNetworkMarkers = getInflatedNetworkMarkers(contentThread);
+    const serviceWorkerNetworkMarkers = getInflatedNetworkMarkers(
+      serviceWorkerParentThread
+    );
+
+    // Some more logs for debugging purposes.
+    info(
+      "Parent network markers: " + JSON.stringify(parentNetworkMarkers, null, 2)
+    );
+    info(
+      "Content network markers: " +
+        JSON.stringify(contentNetworkMarkers, null, 2)
+    );
+    info(
+      "Serviceworker network markers: " +
+        JSON.stringify(serviceWorkerNetworkMarkers, null, 2)
+    );
+
+    const parentPairs = getPairsOfNetworkMarkers(parentNetworkMarkers);
+    const contentPairs = getPairsOfNetworkMarkers(contentNetworkMarkers);
+    const serviceWorkerPairs = getPairsOfNetworkMarkers(
+      serviceWorkerNetworkMarkers
+    );
+
+    // First, make sure we properly matched all start with stop markers. This
+    // means that both arrays should contain only arrays of 2 elements.
+    parentPairs.forEach(pair =>
+      Assert.equal(
+        pair.length,
+        2,
+        `For the URL ${pair[0].data.URI} we should get 2 markers in the parent process.`
+      )
+    );
+
+    contentPairs.forEach(pair =>
+      Assert.equal(
+        pair.length,
+        2,
+        `For the URL ${pair[0].data.URI} we should get 2 markers in the content process.`
+      )
+    );
+    serviceWorkerPairs.forEach(pair =>
+      Assert.equal(
+        pair.length,
+        2,
+        `For the URL ${pair[0].data.URI} we should get 2 markers in the service worker process.`
+      )
+    );
+
+    // Let's look at all pairs and make sure we requested all expected files.
+    // In this test, we should have redirect markers as well as stop markers,
+    // because this case generates internal redirects.
+    // Let's create various arrays to help assert.
+
+    let parentStopMarkers = parentPairs.map(([_, stopMarker]) => stopMarker);
+    const contentStopMarkers = contentPairs.map(
+      ([_, stopMarker]) => stopMarker
+    );
+    // In this test we have very different results in the various threads, so
+    // we'll assert every case separately.
+    // A simple function to help constructing better assertions:
+    const fullUrl = filename => `${BASE_URL_HTTPS}serviceworkers/${filename}`;
+
+    {
+      // In the parent process, we have 8 network markers:
+      // - twice the html file -- because it's not cached by the SW, we get the
+      //   marker both for the initial request and for the request initied from the
+      //   SW.
+      // - twice the firefox svg file -- similar situation
+      // - once the generated svg file -- this one isn't fetched by the SW but
+      //   rather forged directly, so there's no "second fetch", and thus we have
+      //   only one marker.
+      // - for each of these files, we have first an internal redirect from the
+      //   main channel to the service worker. => 3 redirect markers more.
+      Assert.equal(
+        parentStopMarkers.length,
+        8, // 3 html files, 3 firefox svg files, 2 generated svg file
+        "There should be 8 stop markers in the parent process."
+      );
+
+      // The "1" requests are the initial requests that are intercepted, coming
+      // from the web page, while the "2" requests are requests to the network,
+      // coming from the service worker. The 1 were requested before 2, 2 ends
+      // before 1.
+      // "Intercept" requests are the internal redirects from the main channel
+      // to the service worker. They happen before others.
+      const [
+        htmlFetchIntercept,
+        htmlFetch1,
+        htmlFetch2,
+        generatedSvgIntercept,
+        generatedSvgFetch,
+        firefoxSvgIntercept,
+        firefoxSvgFetch1,
+        firefoxSvgFetch2,
+      ] = parentStopMarkers;
+
+      /* ----- /HTML FILE ---- */
+      Assert.objectContains(htmlFetchIntercept, {
+        name: Expect.stringMatches(/Load \d+:.*serviceworker_simple.html/),
+        data: Expect.objectContainsOnly({
+          type: "Network",
+          status: "STATUS_REDIRECT",
+          URI: fullUrl("serviceworker_simple.html"),
+          requestMethod: "GET",
+          contentType: null,
+          startTime: Expect.number(),
+          endTime: Expect.number(),
+          id: Expect.number(),
+          pri: Expect.number(),
+          redirectId: htmlFetch1.data.id,
+          redirectType: "Internal",
+          isHttpToHttpsRedirect: false,
+          RedirectURI: fullUrl("serviceworker_simple.html"),
+          cache: "Unresolved",
+        }),
+      });
+
+      Assert.objectContains(htmlFetch1, {
+        name: Expect.stringMatches(/Load \d+:.*serviceworker_simple.html/),
+        data: Expect.objectContainsOnly({
+          type: "Network",
+          status: "STATUS_STOP",
+          URI: fullUrl("serviceworker_simple.html"),
+          requestMethod: "GET",
+          contentType: "text/html",
+          startTime: Expect.number(),
+          endTime: Expect.number(),
+          id: Expect.number(),
+          pri: Expect.number(),
+        }),
+      });
+      Assert.objectContains(htmlFetch2, {
+        name: Expect.stringMatches(/Load \d+:.*serviceworker_simple.html/),
+        data: Expect.objectContainsOnly({
+          type: "Network",
+          status: "STATUS_STOP",
+          URI: fullUrl("serviceworker_simple.html"),
+          requestMethod: "GET",
+          contentType: "text/html",
+          // Because the request races with the cache, these 2 values are valid:
+          // "Missed" when the cache answered before we get a result from the network.
+          // "Unresolved" when we got a response from the network before the cache subsystem.
+          cache: Expect.stringMatches(/^(Missed|Unresolved)$/),
+          startTime: Expect.number(),
+          endTime: Expect.number(),
+          domainLookupStart: Expect.number(),
+          domainLookupEnd: Expect.number(),
+          connectStart: Expect.number(),
+          tcpConnectEnd: Expect.number(),
+          connectEnd: Expect.number(),
+          requestStart: Expect.number(),
+          responseStart: Expect.number(),
+          responseEnd: Expect.number(),
+          id: Expect.number(),
+          count: Expect.number(),
+          pri: Expect.number(),
+        }),
+      });
+      /* ----- /HTML FILE ---- */
+
+      /* ----- GENERATED SVG FILE ---- */
+      Assert.objectContains(generatedSvgIntercept, {
+        name: Expect.stringMatches(/Load \d+:.*firefox-generated.svg/),
+        data: Expect.objectContainsOnly({
+          type: "Network",
+          status: "STATUS_REDIRECT",
+          URI: fullUrl("firefox-generated.svg"),
+          requestMethod: "GET",
+          contentType: null,
+          startTime: Expect.number(),
+          endTime: Expect.number(),
+          id: Expect.number(),
+          pri: Expect.number(),
+          redirectId: generatedSvgFetch.data.id,
+          redirectType: "Internal",
+          isHttpToHttpsRedirect: false,
+          RedirectURI: fullUrl("firefox-generated.svg"),
+          cache: "Unresolved",
+          innerWindowID: Expect.number(),
+        }),
+      });
+      Assert.objectContains(generatedSvgFetch, {
+        name: Expect.stringMatches(/Load \d+:.*firefox-generated.svg/),
+        data: Expect.objectContainsOnly({
+          type: "Network",
+          status: "STATUS_STOP",
+          URI: fullUrl("firefox-generated.svg"),
+          requestMethod: "GET",
+          contentType: "image/svg+xml",
+          startTime: Expect.number(),
+          endTime: Expect.number(),
+          id: Expect.number(),
+          pri: Expect.number(),
+          innerWindowID: Expect.number(),
+        }),
+      });
+      /* ----- ∕GENERATED SVG FILE ---- */
+      /* ----- REQUESTED SVG FILE ---- */
+      Assert.objectContains(firefoxSvgIntercept, {
+        name: Expect.stringMatches(/Load \d+:.*firefox-logo-nightly.svg/),
+        data: Expect.objectContainsOnly({
+          type: "Network",
+          status: "STATUS_REDIRECT",
+          URI: fullUrl("firefox-logo-nightly.svg"),
+          requestMethod: "GET",
+          contentType: null,
+          startTime: Expect.number(),
+          endTime: Expect.number(),
+          id: Expect.number(),
+          pri: Expect.number(),
+          redirectId: firefoxSvgFetch1.data.id,
+          redirectType: "Internal",
+          isHttpToHttpsRedirect: false,
+          RedirectURI: fullUrl("firefox-logo-nightly.svg"),
+          cache: "Unresolved",
+          innerWindowID: Expect.number(),
+        }),
+      });
+      Assert.objectContains(firefoxSvgFetch1, {
+        name: Expect.stringMatches(/Load \d+:.*firefox-logo-nightly.svg/),
+        data: Expect.objectContainsOnly({
+          type: "Network",
+          status: "STATUS_STOP",
+          URI: fullUrl("firefox-logo-nightly.svg"),
+          requestMethod: "GET",
+          contentType: "image/svg+xml",
+          startTime: Expect.number(),
+          endTime: Expect.number(),
+          id: Expect.number(),
+          pri: Expect.number(),
+          innerWindowID: Expect.number(),
+        }),
+      });
+      Assert.objectContains(firefoxSvgFetch2, {
+        name: Expect.stringMatches(/Load \d+:.*firefox-logo-nightly.svg/),
+        data: Expect.objectContainsOnly({
+          type: "Network",
+          status: "STATUS_STOP",
+          URI: fullUrl("firefox-logo-nightly.svg"),
+          requestMethod: "GET",
+          contentType: "image/svg+xml",
+          // Because the request races with the cache, these 2 values are valid:
+          // "Missed" when the cache answered before we get a result from the network.
+          // "Unresolved" when we got a response from the network before the cache subsystem.
+          cache: Expect.stringMatches(/^(Missed|Unresolved)$/),
+          startTime: Expect.number(),
+          endTime: Expect.number(),
+          domainLookupStart: Expect.number(),
+          domainLookupEnd: Expect.number(),
+          connectStart: Expect.number(),
+          tcpConnectEnd: Expect.number(),
+          connectEnd: Expect.number(),
+          requestStart: Expect.number(),
+          responseStart: Expect.number(),
+          responseEnd: Expect.number(),
+          id: Expect.number(),
+          count: Expect.number(),
+          pri: Expect.number(),
+          // Note: no innerWindowID here, is that a bug?
+        }),
+      });
+      /* ----- ∕REQUESTED SVG FILE ---- */
+    }
+
+    // It's possible that the service worker thread IS the content thread, in
+    // that case we'll get all markers in the same thread.
+    // The "1" requests are the initial requests that are intercepted, coming
+    // from the web page, while the "2" requests are the requests coming from
+    // the service worker.
+    let htmlFetch1, generatedSvgFetch1, firefoxSvgFetch1;
+
+    // First, let's handle the case where the threads are different:
+    if (serviceWorkerParentThread !== contentThread) {
+      // In the content process (that is the process for the web page), we have
+      // 3 network markers:
+      // - 1 for the HTML page
+      // - 1 for the generated svg file
+      // - 1 for the firefox svg file
+      // Indeed, the service worker interception is invisible from the context
+      // of the web page, so we just get 3 "normal" requests. However these
+      // requests will miss all timing information, because they're hidden by
+      // the service worker interception. We may want to fix this...
+      Assert.equal(
+        contentStopMarkers.length,
+        3, // 1 for each file
+        "There should be 3 stop markers in the content process."
+      );
+
+      [htmlFetch1, generatedSvgFetch1, firefoxSvgFetch1] = contentStopMarkers;
+    } else {
+      // Else case: the service worker parent thread IS the content thread
+      // (note: this is always the case with fission). In that case all network
+      // markers tested in the above block are together in the same object.
+      Assert.equal(
+        contentStopMarkers.length,
+        5,
+        "There should be 5 stop markers in the combined process (containing both the content page and the service worker)"
+      );
+
+      // Because of how the test is done, these markers are ordered by the
+      // position of the START markers.
+      [
+        // For the htmlFetch request, note that 2 is before 1, because that's
+        // the top level navigation. Indeed for the top level navigation
+        // everything happens first in the main process, possibly before a
+        // content process even exists, and the content process is merely
+        // notified at the end.
+        htmlFetch1,
+        generatedSvgFetch1,
+        firefoxSvgFetch1,
+      ] = contentStopMarkers;
+    }
+
+    // Let's test first the markers coming from the content page.
+    Assert.objectContains(htmlFetch1, {
+      name: Expect.stringMatches(/Load \d+:.*serviceworker_simple.html/),
+      data: Expect.objectContainsOnly({
+        type: "Network",
+        status: "STATUS_STOP",
+        URI: fullUrl("serviceworker_simple.html"),
+        requestMethod: "GET",
+        contentType: "text/html",
+        startTime: Expect.number(),
+        endTime: Expect.number(),
+        id: Expect.number(),
+        pri: Expect.number(),
+      }),
+    });
+    Assert.objectContains(generatedSvgFetch1, {
+      name: Expect.stringMatches(/Load \d+:.*firefox-generated.svg/),
+      data: Expect.objectContainsOnly({
+        type: "Network",
+        status: "STATUS_STOP",
+        URI: fullUrl("firefox-generated.svg"),
+        requestMethod: "GET",
+        contentType: "image/svg+xml",
+        startTime: Expect.number(),
+        endTime: Expect.number(),
+        id: Expect.number(),
+        pri: Expect.number(),
+        innerWindowID: Expect.number(),
+      }),
+    });
+    Assert.objectContains(firefoxSvgFetch1, {
+      name: Expect.stringMatches(/Load \d+:.*firefox-logo-nightly.svg/),
+      data: Expect.objectContainsOnly({
+        type: "Network",
+        status: "STATUS_STOP",
+        URI: fullUrl("firefox-logo-nightly.svg"),
+        requestMethod: "GET",
+        contentType: "image/svg+xml",
+        startTime: Expect.number(),
+        endTime: Expect.number(),
+        id: Expect.number(),
+        pri: Expect.number(),
+        innerWindowID: Expect.number(),
+      }),
+    });
+  });
+});
diff --git a/tools/profiler/tests/browser/browser_test_marker_network_simple.js b/tools/profiler/tests/browser/browser_test_marker_network_simple.js
new file mode 100644
index 0000000000..15894305a7
--- /dev/null
+++ b/tools/profiler/tests/browser/browser_test_marker_network_simple.js
@@ -0,0 +1,81 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/**
+ * Test that we emit network markers accordingly
+ */
+add_task(async function test_network_markers() {
+  Assert.ok(
+    !Services.profiler.IsActive(),
+    "The profiler is not currently active"
+  );
+
+  startProfilerForMarkerTests();
+
+  const url = BASE_URL + "simple.html?cacheBust=" + Math.random();
+  await BrowserTestUtils.withNewTab(url, async contentBrowser => {
+    const contentPid = await SpecialPowers.spawn(
+      contentBrowser,
+      [],
+      () => Services.appinfo.processID
+    );
+
+    const { parentThread, contentThread } = await stopProfilerNowAndGetThreads(
+      contentPid
+    );
+
+    const parentNetworkMarkers = getInflatedNetworkMarkers(parentThread);
+    const contentNetworkMarkers = getInflatedNetworkMarkers(contentThread);
+    info(JSON.stringify(parentNetworkMarkers, null, 2));
+    info(JSON.stringify(contentNetworkMarkers, null, 2));
+
+    Assert.equal(
+      parentNetworkMarkers.length,
+      2,
+      `We should get a pair of network markers in the parent thread.`
+    );
+    Assert.equal(
+      contentNetworkMarkers.length,
+      2,
+      `We should get a pair of network markers in the content thread.`
+    );
+
+    const parentStopMarker = parentNetworkMarkers[1];
+    const contentStopMarker = contentNetworkMarkers[1];
+
+    const expectedProperties = {
+      name: Expect.stringMatches(`Load \\d+:.*${escapeStringRegexp(url)}`),
+      data: Expect.objectContains({
+        status: "STATUS_STOP",
+        URI: url,
+        requestMethod: "GET",
+        contentType: "text/html",
+        startTime: Expect.number(),
+        endTime: Expect.number(),
+        domainLookupStart: Expect.number(),
+        domainLookupEnd: Expect.number(),
+        connectStart: Expect.number(),
+        tcpConnectEnd: Expect.number(),
+        connectEnd: Expect.number(),
+        requestStart: Expect.number(),
+        responseStart: Expect.number(),
+        responseEnd: Expect.number(),
+        id: Expect.number(),
+        count: Expect.number(),
+        pri: Expect.number(),
+      }),
+    };
+
+    Assert.objectContains(parentStopMarker, expectedProperties);
+    // The cache information is missing from the content marker, it's only part
+    // of the parent marker. See Bug 1544821.
+    Assert.objectContains(parentStopMarker.data, {
+      // Because the request races with the cache, these 2 values are valid:
+      // "Missed" when the cache answered before we get a result from the network.
+      // "Unresolved" when we got a response from the network before the cache subsystem.
+      cache: Expect.stringMatches(/^(Missed|Unresolved)$/),
+    });
+    Assert.objectContains(contentStopMarker, expectedProperties);
+  });
+});
diff --git a/tools/profiler/tests/browser/browser_test_marker_network_sts.js b/tools/profiler/tests/browser/browser_test_marker_network_sts.js
new file mode 100644
index 0000000000..26f2a1c756
--- /dev/null
+++ b/tools/profiler/tests/browser/browser_test_marker_network_sts.js
@@ -0,0 +1,130 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/**
+ * Test that we emit network markers accordingly.
+ * In this file we'll test that we behave properly with STS redirections.
+ */
+
+add_task(async function test_network_markers_service_worker_setup() {
+  await SpecialPowers.pushPrefEnv({
+    set: [
+      // Disabling cache makes the result more predictable especially in verify mode.
+      ["browser.cache.disk.enable", false],
+      ["browser.cache.memory.enable", false],
+      // We want to test upgrading requests
+      ["dom.security.https_only_mode", true],
+    ],
+  });
+});
+
+add_task(async function test_network_markers_redirect_to_https() {
+  // In this test, we request an HTML page with http that gets redirected to https.
+  Assert.ok(
+    !Services.profiler.IsActive(),
+    "The profiler is not currently active"
+  );
+
+  startProfilerForMarkerTests();
+
+  const url = BASE_URL + "simple.html";
+  const targetUrl = BASE_URL_HTTPS + "simple.html";
+
+  await BrowserTestUtils.withNewTab(url, async contentBrowser => {
+    const contentPid = await SpecialPowers.spawn(
+      contentBrowser,
+      [],
+      () => Services.appinfo.processID
+    );
+
+    const { parentThread, contentThread } = await stopProfilerNowAndGetThreads(
+      contentPid
+    );
+
+    const parentNetworkMarkers = getInflatedNetworkMarkers(parentThread);
+    const contentNetworkMarkers = getInflatedNetworkMarkers(contentThread);
+    info(JSON.stringify(parentNetworkMarkers, null, 2));
+    info(JSON.stringify(contentNetworkMarkers, null, 2));
+
+    Assert.equal(
+      parentNetworkMarkers.length,
+      4,
+      `We should get 2 pairs of network markers in the parent thread.`
+    );
+
+    /* It looks like that for a redirection for the top level navigation, the
+     * content thread sees the markers for the second request only.
+     * See Bug 1692879. */
+    Assert.equal(
+      contentNetworkMarkers.length,
+      2,
+      `We should get one pair of network markers in the content thread.`
+    );
+
+    const parentRedirectMarker = parentNetworkMarkers[1];
+    const parentStopMarker = parentNetworkMarkers[3];
+    // There's no content redirect marker for the reason outlined above.
+    const contentStopMarker = contentNetworkMarkers[1];
+
+    Assert.objectContains(parentRedirectMarker, {
+      name: Expect.stringMatches(`Load \\d+:.*${escapeStringRegexp(url)}`),
+      data: Expect.objectContainsOnly({
+        type: "Network",
+        status: "STATUS_REDIRECT",
+        URI: url,
+        RedirectURI: targetUrl,
+        requestMethod: "GET",
+        contentType: null,
+        startTime: Expect.number(),
+        endTime: Expect.number(),
+        id: Expect.number(),
+        redirectId: parentStopMarker.data.id,
+        pri: Expect.number(),
+        cache: "Unresolved",
+        redirectType: "Permanent",
+        isHttpToHttpsRedirect: true,
+      }),
+    });
+
+    const expectedProperties = {
+      name: Expect.stringMatches(
+        `Load \\d+:.*${escapeStringRegexp(targetUrl)}`
+      ),
+    };
+    const expectedDataProperties = {
+      type: "Network",
+      status: "STATUS_STOP",
+      URI: targetUrl,
+      requestMethod: "GET",
+      contentType: "text/html",
+      startTime: Expect.number(),
+      endTime: Expect.number(),
+      domainLookupStart: Expect.number(),
+      domainLookupEnd: Expect.number(),
+      connectStart: Expect.number(),
+      tcpConnectEnd: Expect.number(),
+      connectEnd: Expect.number(),
+      requestStart: Expect.number(),
+      responseStart: Expect.number(),
+      responseEnd: Expect.number(),
+      id: Expect.number(),
+      count: Expect.number(),
+      pri: Expect.number(),
+    };
+
+    Assert.objectContains(parentStopMarker, expectedProperties);
+    Assert.objectContains(contentStopMarker, expectedProperties);
+
+    // The cache information is missing from the content marker, it's only part
+    // of the parent marker. See Bug 1544821.
+    Assert.objectContainsOnly(parentStopMarker.data, {
+      ...expectedDataProperties,
+      // Because the request races with the cache, these 2 values are valid:
+      // "Missed" when the cache answered before we get a result from the network.
+      // "Unresolved" when we got a response from the network before the cache subsystem.
+      cache: Expect.stringMatches(/^(Missed|Unresolved)$/),
+    });
+    Assert.objectContainsOnly(contentStopMarker.data, expectedDataProperties);
+  });
+});
diff --git a/tools/profiler/tests/browser/browser_test_markers_gc_cc.js b/tools/profiler/tests/browser/browser_test_markers_gc_cc.js
new file mode 100644
index 0000000000..a4a94d60cc
--- /dev/null
+++ b/tools/profiler/tests/browser/browser_test_markers_gc_cc.js
@@ -0,0 +1,49 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+add_task(async function test_markers_gc_cc() {
+  info("Test GC&CC markers.");
+
+  info("Create a throwaway profile.");
+  await startProfiler({});
+  let tempProfileContainer = { profile: null };
+  tempProfileContainer.profile = await waitSamplingAndStopAndGetProfile();
+
+  info("Restart the profiler.");
+  await startProfiler({});
+
+  info("Throw away the previous profile, which should be garbage-collected.");
+  Assert.equal(
+    typeof tempProfileContainer.profile,
+    "object",
+    "Previously-captured profile should be an object"
+  );
+  delete tempProfileContainer.profile;
+  Assert.equal(
+    typeof tempProfileContainer.profile,
+    "undefined",
+    "Deleted profile should now be undefined"
+  );
+
+  info("Force GC&CC");
+  SpecialPowers.gc();
+  SpecialPowers.forceShrinkingGC();
+  SpecialPowers.forceCC();
+  SpecialPowers.gc();
+  SpecialPowers.forceShrinkingGC();
+  SpecialPowers.forceCC();
+
+  info("Stop the profiler and get the profile.");
+  const profile = await waitSamplingAndStopAndGetProfile();
+
+  const markers = getInflatedMarkerData(profile.threads[0]);
+  Assert.ok(
+    markers.some(({ data }) => data?.type === "GCSlice"),
+    "A GCSlice marker was recorded"
+  );
+  Assert.ok(
+    markers.some(({ data }) => data?.type === "CCSlice"),
+    "A CCSlice marker was recorded"
+  );
+});
diff --git a/tools/profiler/tests/browser/browser_test_markers_parent_process.js b/tools/profiler/tests/browser/browser_test_markers_parent_process.js
new file mode 100644
index 0000000000..28b82f8054
--- /dev/null
+++ b/tools/profiler/tests/browser/browser_test_markers_parent_process.js
@@ -0,0 +1,37 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+add_task(async function test_markers_parent_process() {
+  info("Test markers that are generated by the browser's parent process.");
+
+  info("Start the profiler in nostacksampling mode.");
+  await startProfiler({ features: ["nostacksampling"] });
+
+  info("Dispatch a DOMEvent");
+  window.dispatchEvent(new Event("synthetic"));
+
+  info("Stop the profiler and get the profile.");
+  const profile = await stopNowAndGetProfile();
+
+  const markers = getInflatedMarkerData(profile.threads[0]);
+  {
+    const domEventStart = markers.find(
+      ({ phase, data }) =>
+        phase === INTERVAL_START && data?.eventType === "synthetic"
+    );
+    const domEventEnd = markers.find(
+      ({ phase, data }) =>
+        phase === INTERVAL_END && data?.eventType === "synthetic"
+    );
+    ok(domEventStart, "A start DOMEvent was generated");
+    ok(domEventEnd, "An end DOMEvent was generated");
+    ok(
+      domEventEnd.data.latency > 0,
+      "DOMEvent had a a latency value generated."
+    );
+    ok(domEventEnd.data.type === "DOMEvent");
+    ok(domEventEnd.name === "DOMEvent");
+  }
+  // Add more marker tests.
+});
diff --git a/tools/profiler/tests/browser/browser_test_markers_preferencereads.js b/tools/profiler/tests/browser/browser_test_markers_preferencereads.js
new file mode 100644
index 0000000000..0ae183f874
--- /dev/null
+++ b/tools/profiler/tests/browser/browser_test_markers_preferencereads.js
@@ -0,0 +1,73 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+requestLongerTimeout(10);
+
+const kContentPref = "font.size.variable.x-western";
+
+function countPrefReadsInThread(pref, thread) {
+  let count = 0;
+  for (let payload of getPayloadsOfType(thread, "Preference")) {
+    if (payload.prefName === pref) {
+      count++;
+    }
+  }
+  return count;
+}
+
+async function waitForPaintAfterLoad() {
+  return SpecialPowers.spawn(gBrowser.selectedBrowser, [], () => {
+    return new Promise(function (resolve) {
+      function listener() {
+        if (content.document.readyState == "complete") {
+          content.requestAnimationFrame(() => content.setTimeout(resolve, 0));
+        }
+      }
+      if (content.document.readyState != "complete") {
+        content.document.addEventListener("readystatechange", listener);
+      } else {
+        listener();
+      }
+    });
+  });
+}
+
+/**
+ * Test the Preference Read markers.
+ */
+add_task(async function test_profile_preferencereads_markers() {
+  Assert.ok(
+    !Services.profiler.IsActive(),
+    "The profiler is not currently active"
+  );
+
+  await startProfiler({ features: ["js"] });
+
+  const url = BASE_URL + "single_frame.html";
+  await BrowserTestUtils.withNewTab(url, async contentBrowser => {
+    const contentPid = await SpecialPowers.spawn(
+      contentBrowser,
+      [],
+      () => Services.appinfo.processID
+    );
+
+    await waitForPaintAfterLoad();
+
+    // Ensure we read a pref in the content process.
+    await SpecialPowers.spawn(contentBrowser, [kContentPref], pref => {
+      Services.prefs.getIntPref(pref);
+    });
+
+    // Check that some Preference Read profile markers were generated.
+    {
+      const { contentThread } = await stopProfilerNowAndGetThreads(contentPid);
+
+      Assert.greater(
+        countPrefReadsInThread(kContentPref, contentThread),
+        0,
+        `Preference Read profile markers for ${kContentPref} were recorded.`
+      );
+    }
+  });
+});
diff --git a/tools/profiler/tests/browser/browser_test_profile_capture_by_pid.js b/tools/profiler/tests/browser/browser_test_profile_capture_by_pid.js
new file mode 100644
index 0000000000..14d76dbcaf
--- /dev/null
+++ b/tools/profiler/tests/browser/browser_test_profile_capture_by_pid.js
@@ -0,0 +1,199 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+function ProcessHasSamplerThread(process) {
+  return process.threads.some(t => t.name == "SamplerThread");
+}
+
+async function GetPidsWithSamplerThread() {
+  let parentProc = await ChromeUtils.requestProcInfo();
+
+  let pids = parentProc.children
+    .filter(ProcessHasSamplerThread)
+    .map(proc => proc.pid);
+  if (ProcessHasSamplerThread(parentProc)) {
+    pids.unshift(parentProc.pid);
+  }
+  return pids;
+}
+
+// fnFilterWithContentId: Called with content child pid, returns filters to use.
+// E.g.: 123 => ["GeckoMain", "pid:123"], or 123 => ["pid:456"].
+async function test_with_filter(fnFilterWithContentId) {
+  Assert.ok(!Services.profiler.IsActive());
+  info("Clear the previous pages just in case we still some open tabs.");
+  await Services.profiler.ClearAllPages();
+
+  info("Open a tab with single_frame.html in it.");
+  const url = BASE_URL + "single_frame.html";
+  return BrowserTestUtils.withNewTab(url, async function (contentBrowser) {
+    const contentPid = await SpecialPowers.spawn(contentBrowser, [], () => {
+      return Services.appinfo.processID;
+    });
+
+    Assert.deepEqual(
+      await GetPidsWithSamplerThread(),
+      [],
+      "There should be no SamplerThreads before starting the profiler"
+    );
+
+    info("Start the profiler to test filters including 'pid:<content>'.");
+    await startProfiler({ threads: fnFilterWithContentId(contentPid) });
+
+    let pidsWithSamplerThread = null;
+    await TestUtils.waitForCondition(
+      async function () {
+        let pidsStringBefore = JSON.stringify(pidsWithSamplerThread);
+        pidsWithSamplerThread = await GetPidsWithSamplerThread();
+        return JSON.stringify(pidsWithSamplerThread) == pidsStringBefore;
+      },
+      "Wait for sampler threads to stabilize after profiler start",
+      /* interval (ms) */ 250,
+      /* maxTries */ 10
+    );
+
+    info("Capture the profile data.");
+    const profile = await waitSamplingAndStopAndGetProfile();
+
+    await TestUtils.waitForCondition(async function () {
+      return !(await GetPidsWithSamplerThread()).length;
+    }, "Wait for all sampler threads to stop after profiler stop");
+
+    return { contentPid, pidsWithSamplerThread, profile };
+  });
+}
+
+add_task(async function browser_test_profile_capture_along_with_content_pid() {
+  const { contentPid, pidsWithSamplerThread, profile } = await test_with_filter(
+    contentPid => ["GeckoMain", "pid:" + contentPid]
+  );
+
+  Assert.greater(
+    pidsWithSamplerThread.length,
+    2,
+    "There should be lots of SamplerThreads after starting the profiler"
+  );
+
+  let contentProcessIndex = profile.processes.findIndex(
+    p => p.threads[0].pid == contentPid
+  );
+  Assert.notEqual(
+    contentProcessIndex,
+    -1,
+    "The content process should be present"
+  );
+
+  // Note: Some threads may not be registered, so we can't expect that many. But
+  // 10 is much more than the default 4.
+  Assert.greater(
+    profile.processes[contentProcessIndex].threads.length,
+    10,
+    "The content process should have many threads"
+  );
+
+  Assert.equal(
+    profile.threads.length,
+    1,
+    "The parent process should have only one thread"
+  );
+  Assert.equal(
+    profile.threads[0].name,
+    "GeckoMain",
+    "The parent process should have the main thread"
+  );
+});
+
+add_task(async function browser_test_profile_capture_along_with_other_pid() {
+  const parentPid = Services.appinfo.processID;
+  const { contentPid, pidsWithSamplerThread, profile } = await test_with_filter(
+    contentPid => ["GeckoMain", "pid:" + parentPid]
+  );
+
+  Assert.greater(
+    pidsWithSamplerThread.length,
+    2,
+    "There should be lots of SamplerThreads after starting the profiler"
+  );
+
+  let contentProcessIndex = profile.processes.findIndex(
+    p => p.threads[0].pid == contentPid
+  );
+  Assert.notEqual(
+    contentProcessIndex,
+    -1,
+    "The content process should be present"
+  );
+
+  Assert.equal(
+    profile.processes[contentProcessIndex].threads.length,
+    1,
+    "The content process should have only one thread"
+  );
+
+  // Note: Some threads may not be registered, so we can't expect that many. But
+  // 10 is much more than the default 4.
+  Assert.greater(
+    profile.threads.length,
+    10,
+    "The parent process should have many threads"
+  );
+});
+
+add_task(async function browser_test_profile_capture_by_only_content_pid() {
+  const parentPid = Services.appinfo.processID;
+  const { contentPid, pidsWithSamplerThread, profile } = await test_with_filter(
+    contentPid => ["pid:" + contentPid]
+  );
+
+  // The sampler thread always runs in the parent process, see bug 1754100.
+  Assert.deepEqual(
+    pidsWithSamplerThread,
+    [parentPid, contentPid],
+    "There should only be SamplerThreads in the parent and the target child"
+  );
+
+  Assert.equal(
+    profile.processes.length,
+    1,
+    "There should only be one child process"
+  );
+  // Note: Some threads may not be registered, so we can't expect that many. But
+  // 10 is much more than the default 4.
+  Assert.greater(
+    profile.processes[0].threads.length,
+    10,
+    "The child process should have many threads"
+  );
+  Assert.equal(
+    profile.processes[0].threads[0].pid,
+    contentPid,
+    "The only child process should be our content"
+  );
+});
+
+add_task(async function browser_test_profile_capture_by_only_parent_pid() {
+  const parentPid = Services.appinfo.processID;
+  const { pidsWithSamplerThread, profile } = await test_with_filter(
+    contentPid => ["pid:" + parentPid]
+  );
+
+  Assert.deepEqual(
+    pidsWithSamplerThread,
+    [parentPid],
+    "There should only be a SamplerThread in the parent"
+  );
+
+  // Note: Some threads may not be registered, so we can't expect that many. But
+  // 10 is much more than the default 4.
+  Assert.greater(
+    profile.threads.length,
+    10,
+    "The parent process should have many threads"
+  );
+  Assert.equal(
+    profile.processes.length,
+    0,
+    "There should be no child processes"
+  );
+});
diff --git a/tools/profiler/tests/browser/browser_test_profile_fission.js b/tools/profiler/tests/browser/browser_test_profile_fission.js
new file mode 100644
index 0000000000..775fc8048e
--- /dev/null
+++ b/tools/profiler/tests/browser/browser_test_profile_fission.js
@@ -0,0 +1,191 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+if (SpecialPowers.useRemoteSubframes) {
+  // Bug 1586105: these tests could time out in some extremely slow conditions,
+  // when fission is enabled.
+  // Requesting a longer timeout should make it pass.
+  requestLongerTimeout(2);
+}
+
+add_task(async function test_profile_fission_no_private_browsing() {
+  // Requesting the complete log to be able to debug Bug 1586105.
+  SimpleTest.requestCompleteLog();
+  Assert.ok(!Services.profiler.IsActive());
+  info("Clear the previous pages just in case we still have some open tabs.");
+  await Services.profiler.ClearAllPages();
+
+  info(
+    "Start the profiler to test the page information with single frame page."
+  );
+  await startProfiler();
+
+  info("Open a private window with single_frame.html in it.");
+  const win = await BrowserTestUtils.openNewBrowserWindow({
+    fission: true,
+  });
+
+  try {
+    const url = BASE_URL_HTTPS + "single_frame.html";
+    const contentBrowser = win.gBrowser.selectedBrowser;
+    BrowserTestUtils.loadURIString(contentBrowser, url);
+    await BrowserTestUtils.browserLoaded(contentBrowser, false, url);
+
+    const parentPid = Services.appinfo.processID;
+    const contentPid = await SpecialPowers.spawn(contentBrowser, [], () => {
+      return Services.appinfo.processID;
+    });
+
+    // Getting the active Browser ID to assert the page info tabID later.
+    const activeTabID = contentBrowser.browsingContext.browserId;
+
+    info("Capture the profile data.");
+    const { profile, contentProcess, contentThread } =
+      await stopProfilerNowAndGetThreads(contentPid);
+
+    Assert.equal(
+      contentThread.isPrivateBrowsing,
+      false,
+      "The content process has the private browsing flag set to false."
+    );
+
+    Assert.equal(
+      contentThread.userContextId,
+      0,
+      "The content process has the information about the container used for this process"
+    );
+
+    info(
+      "Check if the captured page is the one with correct values we created."
+    );
+
+    let pageFound = false;
+    for (const page of contentProcess.pages) {
+      if (page.url == url) {
+        Assert.equal(page.url, url);
+        Assert.equal(typeof page.tabID, "number");
+        Assert.equal(page.tabID, activeTabID);
+        Assert.equal(typeof page.innerWindowID, "number");
+        // Top level document will have no embedder.
+        Assert.equal(page.embedderInnerWindowID, 0);
+        Assert.equal(typeof page.isPrivateBrowsing, "boolean");
+        Assert.equal(page.isPrivateBrowsing, false);
+        pageFound = true;
+        break;
+      }
+    }
+    Assert.equal(pageFound, true);
+
+    info("Check that the profiling logs exist with the expected properties.");
+    Assert.equal(typeof profile.profilingLog, "object");
+    Assert.equal(typeof profile.profilingLog[parentPid], "object");
+    const parentLog = profile.profilingLog[parentPid];
+    Assert.equal(typeof parentLog.profilingLogBegin_TSms, "number");
+    Assert.equal(typeof parentLog.profilingLogEnd_TSms, "number");
+    Assert.equal(typeof parentLog.bufferGlobalController, "object");
+    Assert.equal(
+      typeof parentLog.bufferGlobalController.controllerCreationTime_TSms,
+      "number"
+    );
+
+    Assert.equal(typeof profile.profileGatheringLog, "object");
+    Assert.equal(typeof profile.profileGatheringLog[parentPid], "object");
+    Assert.equal(
+      typeof profile.profileGatheringLog[parentPid]
+        .profileGatheringLogBegin_TSms,
+      "number"
+    );
+    Assert.equal(
+      typeof profile.profileGatheringLog[parentPid].profileGatheringLogEnd_TSms,
+      "number"
+    );
+
+    Assert.equal(typeof contentProcess.profilingLog, "object");
+    Assert.equal(typeof contentProcess.profilingLog[contentPid], "object");
+    Assert.equal(
+      typeof contentProcess.profilingLog[contentPid].profilingLogBegin_TSms,
+      "number"
+    );
+    Assert.equal(
+      typeof contentProcess.profilingLog[contentPid].profilingLogEnd_TSms,
+      "number"
+    );
+
+    Assert.equal(typeof contentProcess.profileGatheringLog, "undefined");
+  } finally {
+    await BrowserTestUtils.closeWindow(win);
+  }
+});
+
+add_task(async function test_profile_fission_private_browsing() {
+  // Requesting the complete log to be able to debug Bug 1586105.
+  SimpleTest.requestCompleteLog();
+  Assert.ok(!Services.profiler.IsActive());
+  info("Clear the previous pages just in case we still have some open tabs.");
+  await Services.profiler.ClearAllPages();
+
+  info(
+    "Start the profiler to test the page information with single frame page."
+  );
+  await startProfiler();
+
+  info("Open a private window with single_frame.html in it.");
+  const win = await BrowserTestUtils.openNewBrowserWindow({
+    private: true,
+    fission: true,
+  });
+
+  try {
+    const url = BASE_URL_HTTPS + "single_frame.html";
+    const contentBrowser = win.gBrowser.selectedBrowser;
+    BrowserTestUtils.loadURIString(contentBrowser, url);
+    await BrowserTestUtils.browserLoaded(contentBrowser, false, url);
+
+    const contentPid = await SpecialPowers.spawn(contentBrowser, [], () => {
+      return Services.appinfo.processID;
+    });
+
+    // Getting the active Browser ID to assert the page info tabID later.
+    const activeTabID = contentBrowser.browsingContext.browserId;
+
+    info("Capture the profile data.");
+    const { contentProcess, contentThread } =
+      await stopProfilerNowAndGetThreads(contentPid);
+
+    Assert.equal(
+      contentThread.isPrivateBrowsing,
+      true,
+      "The content process has the private browsing flag set to true."
+    );
+
+    Assert.equal(
+      contentThread.userContextId,
+      0,
+      "The content process has the information about the container used for this process"
+    );
+
+    info(
+      "Check if the captured page is the one with correct values we created."
+    );
+
+    let pageFound = false;
+    for (const page of contentProcess.pages) {
+      if (page.url == url) {
+        Assert.equal(page.url, url);
+        Assert.equal(typeof page.tabID, "number");
+        Assert.equal(page.tabID, activeTabID);
+        Assert.equal(typeof page.innerWindowID, "number");
+        // Top level document will have no embedder.
+        Assert.equal(page.embedderInnerWindowID, 0);
+        Assert.equal(typeof page.isPrivateBrowsing, "boolean");
+        Assert.equal(page.isPrivateBrowsing, true);
+        pageFound = true;
+        break;
+      }
+    }
+    Assert.equal(pageFound, true);
+  } finally {
+    await BrowserTestUtils.closeWindow(win);
+  }
+});
diff --git a/tools/profiler/tests/browser/browser_test_profile_multi_frame_page_info.js b/tools/profiler/tests/browser/browser_test_profile_multi_frame_page_info.js
new file mode 100644
index 0000000000..854587678d
--- /dev/null
+++ b/tools/profiler/tests/browser/browser_test_profile_multi_frame_page_info.js
@@ -0,0 +1,83 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+if (SpecialPowers.useRemoteSubframes) {
+  // Bug 1586105: these tests could time out in some extremely slow conditions,
+  // when fission is enabled.
+  // Requesting a longer timeout should make it pass.
+  requestLongerTimeout(2);
+}
+
+add_task(async function test_profile_multi_frame_page_info() {
+  // Requesting the complete log to be able to debug Bug 1586105.
+  SimpleTest.requestCompleteLog();
+  Assert.ok(!Services.profiler.IsActive());
+  info("Clear the previous pages just in case we still have some open tabs.");
+  await Services.profiler.ClearAllPages();
+
+  info(
+    "Start the profiler to test the page information with multi frame page."
+  );
+  await startProfiler();
+
+  info("Open a tab with multi_frame.html in it.");
+  // multi_frame.html embeds single_frame.html inside an iframe.
+  const url = BASE_URL + "multi_frame.html";
+  await BrowserTestUtils.withNewTab(url, async function (contentBrowser) {
+    const contentPid = await SpecialPowers.spawn(contentBrowser, [], () => {
+      return Services.appinfo.processID;
+    });
+
+    // Getting the active Browser ID to assert the page info tabID later.
+    const win = Services.wm.getMostRecentWindow("navigator:browser");
+    const activeTabID = win.gBrowser.selectedBrowser.browsingContext.browserId;
+
+    info("Capture the profile data.");
+    const { contentProcess } = await stopProfilerNowAndGetThreads(contentPid);
+
+    info(
+      "Check if the captured pages are the ones with correct values we created."
+    );
+
+    let parentPage;
+    let foundPage = 0;
+    for (const page of contentProcess.pages) {
+      // Parent page
+      if (page.url == url) {
+        Assert.equal(page.url, url);
+        Assert.equal(typeof page.tabID, "number");
+        Assert.equal(page.tabID, activeTabID);
+        Assert.equal(typeof page.innerWindowID, "number");
+        // Top level document will have no embedder.
+        Assert.equal(page.embedderInnerWindowID, 0);
+        Assert.equal(typeof page.isPrivateBrowsing, "boolean");
+        Assert.equal(page.isPrivateBrowsing, false);
+        parentPage = page;
+        foundPage++;
+        break;
+      }
+    }
+
+    Assert.notEqual(typeof parentPage, "undefined");
+
+    for (const page of contentProcess.pages) {
+      // Child page (iframe)
+      if (page.url == BASE_URL + "single_frame.html") {
+        Assert.equal(page.url, BASE_URL + "single_frame.html");
+        Assert.equal(typeof page.tabID, "number");
+        Assert.equal(page.tabID, activeTabID);
+        Assert.equal(typeof page.innerWindowID, "number");
+        Assert.equal(typeof page.embedderInnerWindowID, "number");
+        Assert.notEqual(typeof parentPage, "undefined");
+        Assert.equal(page.embedderInnerWindowID, parentPage.innerWindowID);
+        Assert.equal(typeof page.isPrivateBrowsing, "boolean");
+        Assert.equal(page.isPrivateBrowsing, false);
+        foundPage++;
+        break;
+      }
+    }
+
+    Assert.equal(foundPage, 2);
+  });
+});
diff --git a/tools/profiler/tests/browser/browser_test_profile_single_frame_page_info.js b/tools/profiler/tests/browser/browser_test_profile_single_frame_page_info.js
new file mode 100644
index 0000000000..240213be56
--- /dev/null
+++ b/tools/profiler/tests/browser/browser_test_profile_single_frame_page_info.js
@@ -0,0 +1,132 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+if (SpecialPowers.useRemoteSubframes) {
+  // Bug 1586105: these tests could time out in some extremely slow conditions,
+  // when fission is enabled.
+  // Requesting a longer timeout should make it pass.
+  requestLongerTimeout(2);
+}
+
+add_task(async function test_profile_single_frame_page_info() {
+  // Requesting the complete log to be able to debug Bug 1586105.
+  SimpleTest.requestCompleteLog();
+  Assert.ok(!Services.profiler.IsActive());
+  info("Clear the previous pages just in case we still have some open tabs.");
+  await Services.profiler.ClearAllPages();
+
+  info(
+    "Start the profiler to test the page information with single frame page."
+  );
+  await startProfiler();
+
+  info("Open a tab with single_frame.html in it.");
+  const url = BASE_URL + "single_frame.html";
+  await BrowserTestUtils.withNewTab(url, async function (contentBrowser) {
+    const contentPid = await SpecialPowers.spawn(contentBrowser, [], () => {
+      return Services.appinfo.processID;
+    });
+
+    // Getting the active Browser ID to assert the page info tabID later.
+    const win = Services.wm.getMostRecentWindow("navigator:browser");
+    const activeTabID = win.gBrowser.selectedBrowser.browsingContext.browserId;
+
+    info("Capture the profile data.");
+    const { contentProcess } = await stopProfilerNowAndGetThreads(contentPid);
+
+    info(
+      "Check if the captured page is the one with correct values we created."
+    );
+
+    let pageFound = false;
+    for (const page of contentProcess.pages) {
+      if (page.url == url) {
+        Assert.equal(page.url, url);
+        Assert.equal(typeof page.tabID, "number");
+        Assert.equal(page.tabID, activeTabID);
+        Assert.equal(typeof page.innerWindowID, "number");
+        // Top level document will have no embedder.
+        Assert.equal(page.embedderInnerWindowID, 0);
+        Assert.equal(typeof page.isPrivateBrowsing, "boolean");
+        Assert.equal(page.isPrivateBrowsing, false);
+        pageFound = true;
+        break;
+      }
+    }
+    Assert.equal(pageFound, true);
+  });
+});
+
+add_task(async function test_profile_private_browsing() {
+  // Requesting the complete log to be able to debug Bug 1586105.
+  SimpleTest.requestCompleteLog();
+  Assert.ok(!Services.profiler.IsActive());
+  info("Clear the previous pages just in case we still have some open tabs.");
+  await Services.profiler.ClearAllPages();
+
+  info(
+    "Start the profiler to test the page information with single frame page."
+  );
+  await startProfiler();
+
+  info("Open a private window with single_frame.html in it.");
+  const win = await BrowserTestUtils.openNewBrowserWindow({
+    fission: false,
+    private: true,
+  });
+
+  try {
+    const url = BASE_URL_HTTPS + "single_frame.html";
+    const contentBrowser = win.gBrowser.selectedBrowser;
+    BrowserTestUtils.loadURIString(contentBrowser, url);
+    await BrowserTestUtils.browserLoaded(contentBrowser, false, url);
+
+    const contentPid = await SpecialPowers.spawn(contentBrowser, [], () => {
+      return Services.appinfo.processID;
+    });
+
+    // Getting the active Browser ID to assert the page info tabID later.
+    const activeTabID = contentBrowser.browsingContext.browserId;
+
+    info("Capture the profile data.");
+    const { contentProcess, contentThread } =
+      await stopProfilerNowAndGetThreads(contentPid);
+
+    // This information is available with fission only.
+    Assert.equal(
+      contentThread.isPrivateBrowsing,
+      undefined,
+      "The content process has no private browsing flag."
+    );
+
+    Assert.equal(
+      contentThread.userContextId,
+      undefined,
+      "The content process has no information about the container used for this process."
+    );
+
+    info(
+      "Check if the captured page is the one with correct values we created."
+    );
+
+    let pageFound = false;
+    for (const page of contentProcess.pages) {
+      if (page.url == url) {
+        Assert.equal(page.url, url);
+        Assert.equal(typeof page.tabID, "number");
+        Assert.equal(page.tabID, activeTabID);
+        Assert.equal(typeof page.innerWindowID, "number");
+        // Top level document will have no embedder.
+        Assert.equal(page.embedderInnerWindowID, 0);
+        Assert.equal(typeof page.isPrivateBrowsing, "boolean");
+        Assert.equal(page.isPrivateBrowsing, true);
+        pageFound = true;
+        break;
+      }
+    }
+    Assert.equal(pageFound, true);
+  } finally {
+    await BrowserTestUtils.closeWindow(win);
+  }
+});
diff --git a/tools/profiler/tests/browser/browser_test_profile_slow_capture.js b/tools/profiler/tests/browser/browser_test_profile_slow_capture.js
new file mode 100644
index 0000000000..4a675b84d1
--- /dev/null
+++ b/tools/profiler/tests/browser/browser_test_profile_slow_capture.js
@@ -0,0 +1,104 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+add_task(async function browser_test_profile_slow_capture() {
+  Assert.ok(!Services.profiler.IsActive());
+  info("Clear the previous pages just in case we still some open tabs.");
+  await Services.profiler.ClearAllPages();
+
+  info(
+    "Start the profiler to test the page information with single frame page."
+  );
+  await startProfiler({ threads: ["GeckoMain", "test-debug-child-slow-json"] });
+
+  info("Open a tab with single_frame.html in it.");
+  const url = BASE_URL + "single_frame.html";
+  await BrowserTestUtils.withNewTab(url, async function (contentBrowser) {
+    const contentPid = await SpecialPowers.spawn(contentBrowser, [], () => {
+      return Services.appinfo.processID;
+    });
+
+    // Getting the active Browser ID to assert the page info tabID later.
+    const win = Services.wm.getMostRecentWindow("navigator:browser");
+    const activeTabID = win.gBrowser.selectedBrowser.browsingContext.browserId;
+
+    info("Capture the profile data.");
+    const profile = await waitSamplingAndStopAndGetProfile();
+
+    let pageFound = false;
+    // We need to find the correct content process for that tab.
+    let contentProcess = profile.processes.find(
+      p => p.threads[0].pid == contentPid
+    );
+
+    if (!contentProcess) {
+      throw new Error(
+        `Could not find the content process with given pid: ${contentPid}`
+      );
+    }
+
+    info(
+      "Check if the captured page is the one with correct values we created."
+    );
+
+    for (const page of contentProcess.pages) {
+      if (page.url == url) {
+        Assert.equal(page.url, url);
+        Assert.equal(typeof page.tabID, "number");
+        Assert.equal(page.tabID, activeTabID);
+        Assert.equal(typeof page.innerWindowID, "number");
+        // Top level document will have no embedder.
+        Assert.equal(page.embedderInnerWindowID, 0);
+        pageFound = true;
+        break;
+      }
+    }
+    Assert.equal(pageFound, true);
+
+    info("Flush slow processes with a quick profile.");
+    await startProfiler();
+    for (let i = 0; i < 10; ++i) {
+      await Services.profiler.waitOnePeriodicSampling();
+    }
+    await stopNowAndGetProfile();
+  });
+});
+
+add_task(async function browser_test_profile_very_slow_capture() {
+  Assert.ok(!Services.profiler.IsActive());
+  info("Clear the previous pages just in case we still some open tabs.");
+  await Services.profiler.ClearAllPages();
+
+  info(
+    "Start the profiler to test the page information with single frame page."
+  );
+  await startProfiler({
+    threads: ["GeckoMain", "test-debug-child-very-slow-json"],
+  });
+
+  info("Open a tab with single_frame.html in it.");
+  const url = BASE_URL + "single_frame.html";
+  await BrowserTestUtils.withNewTab(url, async function (contentBrowser) {
+    const contentPid = await SpecialPowers.spawn(contentBrowser, [], () => {
+      return Services.appinfo.processID;
+    });
+
+    info("Capture the profile data.");
+    const profile = await waitSamplingAndStopAndGetProfile();
+
+    info("Check that the content process is missing.");
+
+    let contentProcessIndex = profile.processes.findIndex(
+      p => p.threads[0].pid == contentPid
+    );
+    Assert.equal(contentProcessIndex, -1);
+
+    info("Flush slow processes with a quick profile.");
+    await startProfiler();
+    for (let i = 0; i < 10; ++i) {
+      await Services.profiler.waitOnePeriodicSampling();
+    }
+    await stopNowAndGetProfile();
+  });
+});
diff --git a/tools/profiler/tests/browser/do_work_500ms.html b/tools/profiler/tests/browser/do_work_500ms.html
new file mode 100644
index 0000000000..9713a80671
--- /dev/null
+++ b/tools/profiler/tests/browser/do_work_500ms.html
@@ -0,0 +1,41 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta charset="utf-8">
+  <title>Do some work for 500ms</title>
+  <script>
+    const milliseconds = 500;
+    const millisecondsPerBatch = 10;
+    const end = Date.now() + milliseconds;
+    window.total = 0;
+    let i = 0;
+
+    /**
+     * Do work for a set number of milliseconds, but only do the work in batches
+     * so the browser does not get unresponsive.
+     */
+    function doWork() {
+      const batchEnd = Date.now() + millisecondsPerBatch;
+      // Do some work for a set amount of time.
+      while (Date.now() < end) {
+        // Do some kind of work that is non-deterministic to guard against optimizations.
+        window.total += Math.random();
+        i++;
+
+        // Check if a batch is done yet.
+        if (Date.now() > batchEnd) {
+          // Defer the rest of the work into a micro task. Keep on doing this until
+          // the total milliseconds have elapsed.
+          setTimeout(doWork, 0);
+          return;
+        }
+      }
+    }
+
+    doWork();
+  </script>
+</head>
+<body>
+  Do some work for 500ms.
+</body>
+</html>
diff --git a/tools/profiler/tests/browser/firefox-logo-nightly.svg b/tools/profiler/tests/browser/firefox-logo-nightly.svg
new file mode 100644
index 0000000000..f1af370d87
--- /dev/null
+++ b/tools/profiler/tests/browser/firefox-logo-nightly.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 953.37 984"><defs><linearGradient id="linear-gradient" x1="-14706.28" y1="9250.14" x2="-14443.04" y2="9250.14" gradientTransform="matrix(0.76, 0.03, 0.05, -1.12, 11485.47, 11148)" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#0083ff"/><stop offset="0.1" stop-color="#0092f8"/><stop offset="0.31" stop-color="#00abeb"/><stop offset="0.52" stop-color="#00bee1"/><stop offset="0.75" stop-color="#00c8dc"/><stop offset="1" stop-color="#00ccda"/></linearGradient><radialGradient id="radial-gradient" cx="-7588.66" cy="8866.53" r="791.23" gradientTransform="matrix(1.23, 0, 0, -1.22, 9958.21, 11048.11)" gradientUnits="userSpaceOnUse"><stop offset="0.02" stop-color="#005fe7"/><stop offset="0.18" stop-color="#0042b4"/><stop offset="0.32" stop-color="#002989"/><stop offset="0.4" stop-color="#002079"/><stop offset="0.47" stop-color="#131d78"/><stop offset="0.66" stop-color="#3b1676"/><stop offset="0.75" stop-color="#4a1475"/></radialGradient><linearGradient id="linear-gradient-2" x1="539.64" y1="254.8" x2="348.2" y2="881.03" gradientTransform="matrix(1, 0, 0, -1, 1, 984)" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#000f43" stop-opacity="0.4"/><stop offset="0.48" stop-color="#001962" stop-opacity="0.17"/><stop offset="1" stop-color="#002079" stop-opacity="0"/></linearGradient><linearGradient id="linear-gradient-3" x1="540.64" y1="254.8" x2="349.2" y2="881.03" gradientTransform="matrix(1, 0, 0, -1, 0, 984)" xlink:href="#linear-gradient-2"/><linearGradient id="linear-gradient-4" x1="-8367.12" y1="7348.87" x2="-8482.36" y2="7357.76" gradientTransform="matrix(1.22, 0.12, 0.12, -1.22, 10241.06, 10765.32)" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#812cc9"/><stop offset="1" stop-color="#005fe7"/></linearGradient><linearGradient id="linear-gradient-5" x1="-8449.89" y1="7496.97" x2="-8341.94" y2="7609.09" gradientTransform="matrix(1.22, 0.12, 0.12, -1.22, 10241.06, 10765.32)" gradientUnits="userSpaceOnUse"><stop offset="0.05" stop-color="#005fe7"/><stop offset="0.18" stop-color="#065de6"/><stop offset="0.35" stop-color="#1856e1"/><stop offset="0.56" stop-color="#354adb"/><stop offset="0.78" stop-color="#5d3ad1"/><stop offset="0.95" stop-color="#812cc9"/></linearGradient><linearGradient id="linear-gradient-6" x1="-8653.41" y1="7245.3" x2="-8422.52" y2="7244.76" gradientTransform="matrix(1.22, 0.12, 0.12, -1.22, 10241.06, 10765.32)" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#002079"/><stop offset="0.99" stop-color="#a238ff"/></linearGradient><radialGradient id="radial-gradient-2" cx="644.11" cy="599.83" fx="785.0454815336918" fy="470.6889181532662" r="793.95" gradientTransform="matrix(1, 0, 0, -1, 0, 984)" gradientUnits="userSpaceOnUse"><stop offset="0.2" stop-color="#00fdff"/><stop offset="0.26" stop-color="#0af1ff"/><stop offset="0.37" stop-color="#23d2ff"/><stop offset="0.52" stop-color="#4da0ff"/><stop offset="0.69" stop-color="#855bff"/><stop offset="0.77" stop-color="#a238ff"/><stop offset="0.81" stop-color="#a738fd"/><stop offset="0.86" stop-color="#b539f9"/><stop offset="0.9" stop-color="#cd39f1"/><stop offset="0.96" stop-color="#ee3ae6"/><stop offset="0.98" stop-color="#ff3be0"/></radialGradient><linearGradient id="linear-gradient-7" x1="-7458.97" y1="9093.17" x2="-7531.06" y2="8282.84" gradientTransform="matrix(1.23, 0, 0, -1.22, 9958.21, 11048.11)" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#00ec00"/><stop offset="0.1" stop-color="#00e244"/><stop offset="0.22" stop-color="#00d694"/><stop offset="0.31" stop-color="#00cfc7"/><stop offset="0.35" stop-color="#00ccda"/><stop offset="0.42" stop-color="#0bc2dd" stop-opacity="0.92"/><stop offset="0.57" stop-color="#29a7e4" stop-opacity="0.72"/><stop offset="0.77" stop-color="#597df0" stop-opacity="0.4"/><stop offset="1" stop-color="#9448ff" stop-opacity="0"/></linearGradient><linearGradient id="linear-gradient-8" x1="-8926.61" y1="7680.53" x2="-8790.14" y2="7680.53" gradientTransform="matrix(1.22, 0.12, 0.12, -1.22, 10241.06, 10765.32)" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#005fe7"/><stop offset="0.46" stop-color="#0071f3" stop-opacity="0.51"/><stop offset="0.83" stop-color="#007efc" stop-opacity="0.14"/><stop offset="1" stop-color="#0083ff" stop-opacity="0"/></linearGradient><radialGradient id="radial-gradient-3" cx="-8914.62" cy="7721.05" r="165.97" gradientTransform="matrix(1.22, 0.12, 0.12, -1.22, 10241.06, 10765.32)" gradientUnits="userSpaceOnUse"><stop offset="0.63" stop-color="#ffe302" stop-opacity="0"/><stop offset="0.67" stop-color="#ffe302" stop-opacity="0.05"/><stop offset="0.75" stop-color="#ffe302" stop-opacity="0.19"/><stop offset="0.86" stop-color="#ffe302" stop-opacity="0.4"/><stop offset="0.99" stop-color="#ffe302" stop-opacity="0.7"/></radialGradient><linearGradient id="linear-gradient-9" x1="214.02" y1="2032.47" x2="96.19" y2="2284.31" gradientTransform="matrix(0.99, 0.1, 0.1, -0.99, -250.1, 2306.29)" gradientUnits="userSpaceOnUse"><stop offset="0.19" stop-color="#4a1475" stop-opacity="0.5"/><stop offset="0.62" stop-color="#2277ac" stop-opacity="0.23"/><stop offset="0.94" stop-color="#00ccda" stop-opacity="0"/></linearGradient><linearGradient id="linear-gradient-10" x1="-38.44" y1="278.18" x2="55.67" y2="171.29" gradientTransform="matrix(0.99, 0.1, 0.1, -0.99, 229.04, 745.87)" gradientUnits="userSpaceOnUse"><stop offset="0.01" stop-color="#002079" stop-opacity="0.5"/><stop offset="1" stop-color="#0083ff" stop-opacity="0"/></linearGradient><linearGradient id="linear-gradient-11" x1="142.45" y1="96.25" x2="142.5" y2="149.68" gradientTransform="matrix(0.99, 0.1, 0.1, -0.99, 229.04, 745.87)" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#4a1475" stop-opacity="0.9"/><stop offset="0.18" stop-color="#6720a2" stop-opacity="0.6"/><stop offset="0.38" stop-color="#812acb" stop-opacity="0.34"/><stop offset="0.57" stop-color="#9332e8" stop-opacity="0.15"/><stop offset="0.76" stop-color="#9e36f9" stop-opacity="0.04"/><stop offset="0.93" stop-color="#a238ff" stop-opacity="0"/></linearGradient><linearGradient id="linear-gradient-12" x1="620.52" y1="947.88" x2="926.18" y2="264.39" gradientTransform="matrix(1, 0, 0, -1, 0, 984)" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#00ec00" stop-opacity="0"/><stop offset="0.28" stop-color="#00dc6d" stop-opacity="0.5"/><stop offset="0.5" stop-color="#00d1bb" stop-opacity="0.86"/><stop offset="0.6" stop-color="#00ccda"/><stop offset="0.68" stop-color="#04c9db"/><stop offset="0.75" stop-color="#0fc1df"/><stop offset="0.83" stop-color="#23b2e6"/><stop offset="0.9" stop-color="#3e9ef0"/><stop offset="0.98" stop-color="#6184fc"/><stop offset="0.99" stop-color="#6680fe"/></linearGradient><linearGradient id="linear-gradient-13" x1="680.88" y1="554.79" x2="536.1" y2="166.04" gradientTransform="matrix(1, 0, 0, -1, 0, 984)" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#0083ff"/><stop offset="0.04" stop-color="#0083ff" stop-opacity="0.92"/><stop offset="0.14" stop-color="#0083ff" stop-opacity="0.71"/><stop offset="0.26" stop-color="#0083ff" stop-opacity="0.52"/><stop offset="0.37" stop-color="#0083ff" stop-opacity="0.36"/><stop offset="0.49" stop-color="#0083ff" stop-opacity="0.23"/><stop offset="0.61" stop-color="#0083ff" stop-opacity="0.13"/><stop offset="0.73" stop-color="#0083ff" stop-opacity="0.06"/><stop offset="0.86" stop-color="#0083ff" stop-opacity="0.01"/><stop offset="1" stop-color="#0083ff" stop-opacity="0"/></linearGradient></defs><title>firefox-logo-nightly</title><g id="Layer_2" data-name="Layer 2"><g id="Layer_1-2" data-name="Layer 1"><g id="Layer_2-2" data-name="Layer 2"><g id="Firefox"><path d="M770.28,91.56c-23.95,27.88-35.1,90.64-10.82,154.26s61.5,49.8,84.7,114.67c30.62,85.6,16.37,200.59,16.37,200.59s36.81,106.61,62.47-6.63C979.79,341.74,770.28,143.94,770.28,91.56Z" style="fill:url(#linear-gradient)"/><path id="_Path_" data-name=" Path " d="M476.92,972.83c245.24,0,443.9-199.74,443.9-446s-198.66-446-443.66-446S33.5,280.51,33.5,526.8C33,773.33,231.92,972.83,476.92,972.83Z" style="fill:url(#radial-gradient)"/><path d="M810.67,803.64a246.8,246.8,0,0,1-30.12,18.18,705.31,705.31,0,0,0,38.3-63c9.46-10.47,18.13-20.65,25.19-31.65,3.44-5.41,7.31-12.08,11.42-19.82,24.92-44.9,52.4-117.56,53.18-192.2v-5.66a257.25,257.25,0,0,0-5.71-55.75c.2,1.43.38,2.86.56,4.29-.22-1.1-.41-2.21-.64-3.31.37,2,.66,4,1,6,5.09,43.22,1.47,85.37-16.68,116.45-.29.45-.58.88-.87,1.32,9.41-47.23,12.56-99.39,2.09-151.6,0,0-4.19-25.38-35.38-102.44-18-44.35-49.83-80.72-78-107.21-24.69-30.55-47.11-51-59.47-64.06C689.72,126,678.9,105.61,674.45,92.31c-3.85-1.93-53.14-49.81-57.05-51.63-21.51,33.35-89.16,137.67-57,235.15,14.58,44.17,51.47,90,90.07,115.74,1.69,1.94,23,25,33.09,77.16,10.45,53.85,5,95.86-16.54,158C641.73,681.24,577,735.12,516.3,740.63c-129.67,11.78-177.15-65.11-177.15-65.11C385.49,694,436.72,690.17,467.87,671c31.4-19.43,50.39-33.83,65.81-28.15C548.86,648.43,561,632,550.1,615a78.5,78.5,0,0,0-79.4-34.57c-31.43,5.11-60.23,30-101.41,5.89a86.29,86.29,0,0,1-7.73-5.06c-2.71-1.79,8.83,2.72,6.13.69-8-4.35-22.2-13.84-25.88-17.22-.61-.56,6.22,2.18,5.61,1.62-38.51-31.71-33.7-53.13-32.49-66.57,1-10.75,8-24.52,19.75-30.11,5.69,3.11,9.24,5.48,9.24,5.48s-2.43-5-3.74-7.58c.46-.2.9-.15,1.36-.34,4.66,2.25,15,8.1,20.41,11.67,7.07,5,9.33,9.44,9.33,9.44s1.86-1,.48-5.37c-.5-1.78-2.65-7.45-9.65-13.17h.44A81.61,81.61,0,0,1,374.42,478c2-7.18,5.53-14.68,4.75-28.09-.48-9.43-.26-11.87-1.92-15.51-1.49-3.13.83-4.35,3.42-1.1a32.5,32.5,0,0,0-2.21-7.4v-.24c3.23-11.24,68.25-40.46,73-43.88A67.2,67.2,0,0,0,470.59,361c3.62-5.76,6.34-13.85,7-26.11.36-8.84-3.76-14.73-69.51-21.62-18-1.77-28.53-14.8-34.53-26.82-1.09-2.59-2.21-4.94-3.33-7.28a57.68,57.68,0,0,1-2.56-8.43c10.75-30.87,28.81-57,55.37-76.7,1.45-1.32-5.78.34-4.34-1,1.69-1.54,12.71-6,14.79-7,2.54-1.2-10.88-6.9-22.73-5.51-12.07,1.36-14.63,2.8-21.07,5.53,2.67-2.66,11.17-6.15,9.18-6.13-13,2-29.18,9.56-43,18.12a10.66,10.66,0,0,1,.83-4.35c-6.44,2.73-22.26,13.79-26.87,23.14a44.29,44.29,0,0,0,.27-5.4,84.17,84.17,0,0,0-13.19,13.82l-.24.22c-37.36-15-70.23-16-98.05-9.28-6.09-6.11-9.06-1.64-22.91-32.07-.94-1.83.72,1.81,0,0-2.28-5.9,1.39,7.87,0,0-23.28,18.37-53.92,39.19-68.63,53.89-.18.59,17.16-4.9,0,0-6,1.72-5.6,5.28-6.51,37.5-.22,2.44,0,5.18-.22,7.38-11.75,15-19.75,27.64-22.78,34.21-15.19,26.18-31.93,67-48.15,131.55A334.82,334.82,0,0,1,75.2,398.36C61.71,432.63,48.67,486.44,46.07,569.3A482.08,482.08,0,0,1,58.6,518.64,473,473,0,0,0,93.33,719.71c9.33,22.82,24.76,57.46,51,95.4C226.9,902,343.31,956,472.21,956,606.79,956,727.64,897.13,810.67,803.64Z" style="fill:url(#linear-gradient-2)"/><path d="M810.67,803.64a246.8,246.8,0,0,1-30.12,18.18,705.31,705.31,0,0,0,38.3-63c9.46-10.47,18.13-20.65,25.19-31.65,3.44-5.41,7.31-12.08,11.42-19.82,24.92-44.9,52.4-117.56,53.18-192.2v-5.66a257.25,257.25,0,0,0-5.71-55.75c.2,1.43.38,2.86.56,4.29-.22-1.1-.41-2.21-.64-3.31.37,2,.66,4,1,6,5.09,43.22,1.47,85.37-16.68,116.45-.29.45-.58.88-.87,1.32,9.41-47.23,12.56-99.39,2.09-151.6,0,0-4.19-25.38-35.38-102.44-18-44.35-49.83-80.72-78-107.21-24.69-30.55-47.11-51-59.47-64.06C689.72,126,678.9,105.61,674.45,92.31c-3.85-1.93-53.14-49.81-57.05-51.63-21.51,33.35-89.16,137.67-57,235.15,14.58,44.17,51.47,90,90.07,115.74,1.69,1.94,23,25,33.09,77.16,10.45,53.85,5,95.86-16.54,158C641.73,681.24,577,735.12,516.3,740.63c-129.67,11.78-177.15-65.11-177.15-65.11C385.49,694,436.72,690.17,467.87,671c31.4-19.43,50.39-33.83,65.81-28.15C548.86,648.43,561,632,550.1,615a78.5,78.5,0,0,0-79.4-34.57c-31.43,5.11-60.23,30-101.41,5.89a86.29,86.29,0,0,1-7.73-5.06c-2.71-1.79,8.83,2.72,6.13.69-8-4.35-22.2-13.84-25.88-17.22-.61-.56,6.22,2.18,5.61,1.62-38.51-31.71-33.7-53.13-32.49-66.57,1-10.75,8-24.52,19.75-30.11,5.69,3.11,9.24,5.48,9.24,5.48s-2.43-5-3.74-7.58c.46-.2.9-.15,1.36-.34,4.66,2.25,15,8.1,20.41,11.67,7.07,5,9.33,9.44,9.33,9.44s1.86-1,.48-5.37c-.5-1.78-2.65-7.45-9.65-13.17h.44A81.61,81.61,0,0,1,374.42,478c2-7.18,5.53-14.68,4.75-28.09-.48-9.43-.26-11.87-1.92-15.51-1.49-3.13.83-4.35,3.42-1.1a32.5,32.5,0,0,0-2.21-7.4v-.24c3.23-11.24,68.25-40.46,73-43.88A67.2,67.2,0,0,0,470.59,361c3.62-5.76,6.34-13.85,7-26.11.36-8.84-3.76-14.73-69.51-21.62-18-1.77-28.53-14.8-34.53-26.82-1.09-2.59-2.21-4.94-3.33-7.28a57.68,57.68,0,0,1-2.56-8.43c10.75-30.87,28.81-57,55.37-76.7,1.45-1.32-5.78.34-4.34-1,1.69-1.54,12.71-6,14.79-7,2.54-1.2-10.88-6.9-22.73-5.51-12.07,1.36-14.63,2.8-21.07,5.53,2.67-2.66,11.17-6.15,9.18-6.13-13,2-29.18,9.56-43,18.12a10.66,10.66,0,0,1,.83-4.35c-6.44,2.73-22.26,13.79-26.87,23.14a44.29,44.29,0,0,0,.27-5.4,84.17,84.17,0,0,0-13.19,13.82l-.24.22c-37.36-15-70.23-16-98.05-9.28-6.09-6.11-9.06-1.64-22.91-32.07-.94-1.83.72,1.81,0,0-2.28-5.9,1.39,7.87,0,0-23.28,18.37-53.92,39.19-68.63,53.89-.18.59,17.16-4.9,0,0-6,1.72-5.6,5.28-6.51,37.5-.22,2.44,0,5.18-.22,7.38-11.75,15-19.75,27.64-22.78,34.21-15.19,26.18-31.93,67-48.15,131.55A334.82,334.82,0,0,1,75.2,398.36C61.71,432.63,48.67,486.44,46.07,569.3A482.08,482.08,0,0,1,58.6,518.64,473,473,0,0,0,93.33,719.71c9.33,22.82,24.76,57.46,51,95.4C226.9,902,343.31,956,472.21,956,606.79,956,727.64,897.13,810.67,803.64Z" style="fill:url(#linear-gradient-3)"/><path d="M711.1,866.71c162.87-18.86,235-186.7,142.38-190C769.85,674,634,875.61,711.1,866.71Z" style="fill:url(#linear-gradient-4)"/><path d="M865.21,642.42C977.26,577.21,948,436.34,948,436.34s-43.25,50.24-72.62,130.32C846.4,646,797.84,681.81,865.21,642.42Z" style="fill:url(#linear-gradient-5)"/><path d="M509.47,950.06C665.7,999.91,800,876.84,717.21,835.74,642,798.68,435.32,926.49,509.47,950.06Z" style="fill:url(#linear-gradient-6)"/><path d="M638.58,21.42l.53-.57A1.7,1.7,0,0,0,638.58,21.42ZM876.85,702.23c3.8-5.36,8.94-22.53,13.48-30.21,27.58-44.52,27.78-80,27.78-80.84,16.66-83.22,15.15-117.2,4.9-180-8.25-50.6-44.32-123.09-75.57-158-32.2-36-9.51-24.25-40.69-50.52-27.33-30.29-53.82-60.29-68.25-72.36C634.22,43.09,636.57,24.58,638.58,21.42c-.34.37-.84.92-1.47,1.64C635.87,18.14,635,14,635,14s-57,57-69,152c-7.83,62,15.38,126.68,49,168a381.62,381.62,0,0,0,59,58h0c25.4,36.48,39.38,81.49,39.38,129.91,0,121.24-98.34,219.53-219.65,219.53a220.14,220.14,0,0,1-49.13-5.52c-57.24-10.92-90.3-39.8-106.78-59.41-9.45-11.23-13.46-19.42-13.46-19.42,51.28,18.37,108,14.53,142.47-4.52,34.75-19.26,55.77-33.55,72.84-27.92,16.82,5.61,30.21-10.67,18.2-27.54-11.77-16.85-42.4-41-87.88-34.29-34.79,5.07-66.66,29.76-112.24,5.84a97.34,97.34,0,0,1-8.55-5c-3-1.77,9.77,2.69,6.79.68-8.87-4.32-24.57-13.73-28.64-17.07-.68-.56,6.88,2.16,6.2,1.6-42.62-31.45-37.3-52.69-36-66,1.07-10.66,8.81-24.32,21.86-29.86,6.3,3.08,10.23,5.43,10.23,5.43s-2.69-4.92-4.14-7.51c.51-.19,1-.15,1.5-.34,5.16,2.23,16.58,8,22.59,11.57,7.83,4.95,10.32,9.36,10.32,9.36s2.06-1,.54-5.33c-.56-1.77-2.93-7.39-10.68-13.07h.48a91.65,91.65,0,0,1,13.13,8.17c2.19-7.12,6.12-14.56,5.25-27.86-.53-9.35-.28-11.78-2.12-15.39-1.65-3.1.92-4.31,3.78-1.09a29.73,29.73,0,0,0-2.44-7.34v-.24c3.57-11.14,75.53-40.12,80.77-43.51a70.24,70.24,0,0,0,21.17-20.63c4-5.72,7-13.73,7.75-25.89.25-5.48-1.44-9.82-20.5-14-11.44-2.49-29.14-4.91-56.43-7.47-19.9-1.76-31.58-14.68-38.21-26.6-1.21-2.57-2.45-4.9-3.68-7.22a53.41,53.41,0,0,1-2.83-8.36,158.47,158.47,0,0,1,61.28-76.06c1.6-1.31-6.4.33-4.8-1,1.87-1.52,14.06-5.93,16.37-6.92,2.81-1.19-12-6.84-25.16-5.47-13.36,1.35-16.19,2.78-23.32,5.49,3-2.64,12.37-6.1,10.16-6.08-14.4,2-32.3,9.48-47.6,18a9.72,9.72,0,0,1,.92-4.31c-7.13,2.71-24.64,13.67-29.73,23a39.79,39.79,0,0,0,.29-5.35,88.55,88.55,0,0,0-14.6,13.7l-.27.22C258.14,196,221.75,195,191,201.72c-6.74-6.06-17.57-15.23-32.89-45.4-1-1.82-1.6,3.75-2.4,2-6-13.81-9.55-36.44-9-52,0,0-12.32,5.61-22.51,29.06-1.89,4.21-3.11,6.54-4.32,8.87-.56.68,1.27-7.7,1-7.24-1.77,3-6.36,7.19-8.37,12.62-1.38,4-3.32,6.27-4.56,11.29l-.29.46c-.1-1.48.37-6.08,0-5.14A235.4,235.4,0,0,0,95.34,186c-5.49,18-11.88,42.61-12.89,74.57-.24,2.42,0,5.14-.25,7.32-13,14.83-21.86,27.39-25.2,33.91-16.81,26-35.33,66.44-53.29,130.46a319.35,319.35,0,0,1,28.54-50C17.32,416.25,2.89,469.62,0,551.8a436.92,436.92,0,0,1,13.87-50.24C11.29,556.36,17.68,624.3,52.32,701c20.57,45,67.92,136.6,183.62,208h0s39.36,29.3,107,51.26c5,1.81,10.06,3.6,15.23,5.33q-2.43-1-4.71-2A484.9,484.9,0,0,0,492.27,984c175.18.15,226.85-70.2,226.85-70.2l-.51.38q3.71-3.49,7.14-7.26c-27.64,26.08-90.75,27.84-114.3,26,40.22-11.81,66.69-21.81,118.17-41.52q9-3.36,18.48-7.64l2-.94c1.25-.58,2.49-1.13,3.75-1.74a349.3,349.3,0,0,0,70.26-44c51.7-41.3,63-81.56,68.83-108.1-.82,2.54-3.37,8.47-5.17,12.32-13.31,28.48-42.84,46-74.91,61a689.05,689.05,0,0,0,42.38-62.44C865.77,729.39,869,713.15,876.85,702.23Z" style="fill:url(#radial-gradient-2)"/><path d="M813.92,801c21.08-23.24,40-49.82,54.35-80,36.9-77.58,94-206.58,49-341.31C881.77,273.22,833,215,771.11,158.12,670.56,65.76,642.48,24.52,642.48,0c0,0-116.09,129.41-65.74,264.38s153.46,130,221.68,270.87c80.27,165.74-64.95,346.61-185,397.24,7.35-1.63,267-60.38,280.61-208.88C893.68,726.34,887.83,767.41,813.92,801Z" style="fill:url(#linear-gradient-7)"/><path d="M477.59,319.37c.39-8.77-4.16-14.66-76.68-21.46-29.84-2.76-41.26-30.33-44.75-41.94-10.61,27.56-15,56.49-12.64,91.48,1.61,22.92,17,47.52,24.37,62,0,0,1.64-2.13,2.39-2.91,13.86-14.43,71.94-36.42,77.39-39.54C453.69,363.16,476.58,346.44,477.59,319.37Z" style="fill:url(#linear-gradient-8)"/><path d="M477.59,319.37c.39-8.77-4.16-14.66-76.68-21.46-29.84-2.76-41.26-30.33-44.75-41.94-10.61,27.56-15,56.49-12.64,91.48,1.61,22.92,17,47.52,24.37,62,0,0,1.64-2.13,2.39-2.91,13.86-14.43,71.94-36.42,77.39-39.54C453.69,363.16,476.58,346.44,477.59,319.37Z" style="opacity:0.5;isolation:isolate;fill:url(#radial-gradient-3)"/><path d="M158.31,156.47c-1-1.82-1.6,3.75-2.4,2-6-13.81-9.58-36.2-8.72-52,0,0-12.32,5.61-22.51,29.06-1.89,4.21-3.11,6.54-4.32,8.86-.56.68,1.27-7.7,1-7.24-1.77,3-6.36,7.19-8.35,12.38-1.65,4.24-3.35,6.52-4.61,11.77-.39,1.43.39-6.32,0-5.38C84.72,201.68,80.19,271,82.69,268,133.17,214.14,191,201.36,191,201.36c-6.15-4.53-19.53-17.63-32.7-44.89Z" style="fill:url(#linear-gradient-9)"/><path d="M349.84,720.1c-69.72-29.77-149-71.75-146-167.14C207.92,427.35,321,452.18,321,452.18c-4.27,1-15.68,9.16-19.72,17.82-4.27,10.83-12.07,35.28,11.55,60.9,37.09,40.19-76.2,95.36,98.66,199.57,4.41,2.4-41-1.43-61.64-10.36Z" style="fill:url(#linear-gradient-10)"/><path d="M325.07,657.5c49.44,17.21,107,14.19,141.52-4.86,23.09-12.85,52.7-33.43,70.92-28.35-15.78-6.24-27.73-9.15-42.1-9.86-2.45,0-5.38,0-8-.32a136,136,0,0,0-15.76.86c-8.9.82-18.77,6.43-27.74,5.53-.48,0,8.7-3.77,8-3.61-4.75,1-9.92,1.21-15.37,1.88-3.47.39-6.45.82-9.89,1-103,8.73-190-55.81-190-55.81-7.41,25,33.17,74.3,88.52,93.57Z" style="opacity:0.5;isolation:isolate;fill:url(#linear-gradient-11)"/><path d="M813.74,801.65c104.16-102.27,156.86-226.58,134.58-366,0,0,8.9,71.5-24.85,144.63,16.21-71.39,18.1-160.11-25-252C841,205.64,746.45,141.11,710.35,114.19,655.66,73.4,633,31.87,632.57,23.3c-16.34,33.48-65.77,148.2-5.31,247,56.64,92.56,145.86,120,208.33,205C950.67,631.67,813.74,801.65,813.74,801.65Z" style="fill:url(#linear-gradient-12)"/><path d="M798.81,535.55C762.41,460.35,717,427.55,674,392c5,7,6.23,9.47,9,14,37.83,40.32,93.61,138.66,53.11,262.11C659.88,900.48,355,791.06,323,760.32,335.93,894.81,561,959.16,707.6,872,791,793,858.47,658.79,798.81,535.55Z" style="fill:url(#linear-gradient-13)"/></g></g></g></g></svg>
+\ No newline at end of file
diff --git a/tools/profiler/tests/browser/head.js b/tools/profiler/tests/browser/head.js
new file mode 100644
index 0000000000..ef0e3128c0
--- /dev/null
+++ b/tools/profiler/tests/browser/head.js
@@ -0,0 +1,159 @@
+/* import-globals-from ../shared-head.js */
+
+Services.scriptloader.loadSubScript(
+  "chrome://mochitests/content/browser/tools/profiler/tests/browser/shared-head.js",
+  this
+);
+
+const BASE_URL = "http://example.com/browser/tools/profiler/tests/browser/";
+const BASE_URL_HTTPS =
+  "https://example.com/browser/tools/profiler/tests/browser/";
+
+registerCleanupFunction(async () => {
+  if (Services.profiler.IsActive()) {
+    info(
+      "The profiler was found to still be running at the end of the test, which means that some error likely occured. Let's stop it to prevent issues with following tests!"
+    );
+    await Services.profiler.StopProfiler();
+  }
+});
+
+/**
+ * This is a helper function that will stop the profiler and returns the main
+ * threads for the parent process and the content process with PID contentPid.
+ * This happens immediately, without waiting for any sampling to happen or
+ * finish. Use waitSamplingAndStopProfilerAndGetThreads below instead to wait
+ * for samples before stopping.
+ * This returns also the full profile in case the caller wants more information.
+ *
+ * @param {number} contentPid
+ * @returns {Promise<{profile, parentThread, contentProcess, contentThread}>}
+ */
+async function stopProfilerNowAndGetThreads(contentPid) {
+  const profile = await stopNowAndGetProfile();
+
+  const parentThread = profile.threads[0];
+  const contentProcess = profile.processes.find(
+    p => p.threads[0].pid == contentPid
+  );
+  if (!contentProcess) {
+    throw new Error(
+      `Could not find the content process with given pid: ${contentPid}`
+    );
+  }
+
+  if (!parentThread) {
+    throw new Error("The parent thread was not found in the profile.");
+  }
+
+  const contentThread = contentProcess.threads[0];
+  if (!contentThread) {
+    throw new Error("The content thread was not found in the profile.");
+  }
+
+  return { profile, parentThread, contentProcess, contentThread };
+}
+
+/**
+ * This is a helper function that will stop the profiler and returns the main
+ * threads for the parent process and the content process with PID contentPid.
+ * As opposed to stopProfilerNowAndGetThreads (with "Now") above, the profiler
+ * in that PID will not stop until there is at least one periodic sample taken.
+ *
+ * @param {number} contentPid
+ * @returns {Promise<{profile, parentThread, contentProcess, contentThread}>}
+ */
+async function waitSamplingAndStopProfilerAndGetThreads(contentPid) {
+  await Services.profiler.waitOnePeriodicSampling();
+
+  return stopProfilerNowAndGetThreads(contentPid);
+}
+
+/** This tries to find the service worker thread by targeting a very specific
+ * UserTiming marker. Indeed we use performance.mark to add this marker from the
+ * service worker's events.
+ * Then from this thread we get its parent thread. Indeed the parent thread is
+ * where all network stuff happens, so this is useful for network marker tests.
+ *
+ * @param {Object} profile
+ * @returns {{ serviceWorkerThread: Object, serviceWorkerParentThread: Object }} the found threads
+ */
+function findServiceWorkerThreads(profile) {
+  const allThreads = [
+    profile.threads,
+    ...profile.processes.map(process => process.threads),
+  ].flat();
+
+  const serviceWorkerThread = allThreads.find(
+    ({ processType, markers }) =>
+      processType === "tab" &&
+      markers.data.some(markerTuple => {
+        const data = markerTuple[markers.schema.data];
+        return (
+          data &&
+          data.type === "UserTiming" &&
+          data.name === "__serviceworker_event"
+        );
+      })
+  );
+
+  if (!serviceWorkerThread) {
+    info(
+      "We couldn't find a service worker thread. Here are all the threads in this profile:"
+    );
+    allThreads.forEach(logInformationForThread.bind(null, ""));
+    return null;
+  }
+
+  const serviceWorkerParentThread = allThreads.find(
+    ({ name, pid }) => pid === serviceWorkerThread.pid && name === "GeckoMain"
+  );
+
+  if (!serviceWorkerParentThread) {
+    info(
+      `We couldn't find a parent thread for the service worker thread (pid: ${serviceWorkerThread.pid}, tid: ${serviceWorkerThread.tid}).`
+    );
+    info("Here are all the threads in this profile:");
+    allThreads.forEach(logInformationForThread.bind(null, ""));
+
+    // Let's write the profile on disk if MOZ_UPLOAD_DIR is present
+    const path = Services.env.get("MOZ_UPLOAD_DIR");
+    if (path) {
+      const profileName = `profile_${Date.now()}.json`;
+      const profilePath = PathUtils.join(path, profileName);
+      info(
+        `We wrote down the profile on disk as an artifact, with name ${profileName}.`
+      );
+      // This function returns a Promise, but we're not waiting on it because
+      // we're in a synchronous function. Hopefully writing will be finished
+      // when the process ends.
+      IOUtils.writeJSON(profilePath, profile).catch(err =>
+        console.error("An error happened when writing the profile on disk", err)
+      );
+    }
+    throw new Error(
+      "We couldn't find a parent thread for the service worker thread. Please read logs to find more information."
+    );
+  }
+
+  return { serviceWorkerThread, serviceWorkerParentThread };
+}
+
+/**
+ * This logs some basic information about the passed thread.
+ *
+ * @param {string} prefix
+ * @param {Object} thread
+ */
+function logInformationForThread(prefix, thread) {
+  if (!thread) {
+    info(prefix + ": thread is null or undefined.");
+    return;
+  }
+
+  const { name, pid, tid, processName, processType } = thread;
+  info(
+    `${prefix}: ` +
+      `name(${name}) pid(${pid}) tid(${tid}) processName(${processName}) processType(${processType})`
+  );
+}
diff --git a/tools/profiler/tests/browser/multi_frame.html b/tools/profiler/tests/browser/multi_frame.html
new file mode 100644
index 0000000000..b2efcedd50
--- /dev/null
+++ b/tools/profiler/tests/browser/multi_frame.html
@@ -0,0 +1,11 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta charset="utf-8">
+  <title>Multi Frame</title>
+</head>
+<body>
+  Multi Frame
+  <iframe src="single_frame.html"></iframe>
+</body>
+</html>
diff --git a/tools/profiler/tests/browser/page_with_resources.html b/tools/profiler/tests/browser/page_with_resources.html
new file mode 100644
index 0000000000..9d2bb8f218
--- /dev/null
+++ b/tools/profiler/tests/browser/page_with_resources.html
@@ -0,0 +1,11 @@
+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8"/>
+  </head>
+  <body>
+    Testing
+    <img src='firefox-logo-nightly.svg' width="24"/>
+    <img src='redirect.sjs?firefox-logo-nightly.svg' width="24"/>
+  </body>
+</html>
diff --git a/tools/profiler/tests/browser/redirect.sjs b/tools/profiler/tests/browser/redirect.sjs
new file mode 100644
index 0000000000..2a325c3d0b
--- /dev/null
+++ b/tools/profiler/tests/browser/redirect.sjs
@@ -0,0 +1,8 @@
+function handleRequest(request, response) {
+  response.setStatusLine(request.httpVersion, 301, "Moved Permanently");
+  response.setHeader(
+    "Location",
+    decodeURIComponent(request.queryString),
+    false
+  );
+}
diff --git a/tools/profiler/tests/browser/serviceworkers/firefox-logo-nightly.svg b/tools/profiler/tests/browser/serviceworkers/firefox-logo-nightly.svg
new file mode 100644
index 0000000000..f1af370d87
--- /dev/null
+++ b/tools/profiler/tests/browser/serviceworkers/firefox-logo-nightly.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 953.37 984"><defs><linearGradient id="linear-gradient" x1="-14706.28" y1="9250.14" x2="-14443.04" y2="9250.14" gradientTransform="matrix(0.76, 0.03, 0.05, -1.12, 11485.47, 11148)" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#0083ff"/><stop offset="0.1" stop-color="#0092f8"/><stop offset="0.31" stop-color="#00abeb"/><stop offset="0.52" stop-color="#00bee1"/><stop offset="0.75" stop-color="#00c8dc"/><stop offset="1" stop-color="#00ccda"/></linearGradient><radialGradient id="radial-gradient" cx="-7588.66" cy="8866.53" r="791.23" gradientTransform="matrix(1.23, 0, 0, -1.22, 9958.21, 11048.11)" gradientUnits="userSpaceOnUse"><stop offset="0.02" stop-color="#005fe7"/><stop offset="0.18" stop-color="#0042b4"/><stop offset="0.32" stop-color="#002989"/><stop offset="0.4" stop-color="#002079"/><stop offset="0.47" stop-color="#131d78"/><stop offset="0.66" stop-color="#3b1676"/><stop offset="0.75" stop-color="#4a1475"/></radialGradient><linearGradient id="linear-gradient-2" x1="539.64" y1="254.8" x2="348.2" y2="881.03" gradientTransform="matrix(1, 0, 0, -1, 1, 984)" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#000f43" stop-opacity="0.4"/><stop offset="0.48" stop-color="#001962" stop-opacity="0.17"/><stop offset="1" stop-color="#002079" stop-opacity="0"/></linearGradient><linearGradient id="linear-gradient-3" x1="540.64" y1="254.8" x2="349.2" y2="881.03" gradientTransform="matrix(1, 0, 0, -1, 0, 984)" xlink:href="#linear-gradient-2"/><linearGradient id="linear-gradient-4" x1="-8367.12" y1="7348.87" x2="-8482.36" y2="7357.76" gradientTransform="matrix(1.22, 0.12, 0.12, -1.22, 10241.06, 10765.32)" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#812cc9"/><stop offset="1" stop-color="#005fe7"/></linearGradient><linearGradient id="linear-gradient-5" x1="-8449.89" y1="7496.97" x2="-8341.94" y2="7609.09" gradientTransform="matrix(1.22, 0.12, 0.12, -1.22, 10241.06, 10765.32)" gradientUnits="userSpaceOnUse"><stop offset="0.05" stop-color="#005fe7"/><stop offset="0.18" stop-color="#065de6"/><stop offset="0.35" stop-color="#1856e1"/><stop offset="0.56" stop-color="#354adb"/><stop offset="0.78" stop-color="#5d3ad1"/><stop offset="0.95" stop-color="#812cc9"/></linearGradient><linearGradient id="linear-gradient-6" x1="-8653.41" y1="7245.3" x2="-8422.52" y2="7244.76" gradientTransform="matrix(1.22, 0.12, 0.12, -1.22, 10241.06, 10765.32)" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#002079"/><stop offset="0.99" stop-color="#a238ff"/></linearGradient><radialGradient id="radial-gradient-2" cx="644.11" cy="599.83" fx="785.0454815336918" fy="470.6889181532662" r="793.95" gradientTransform="matrix(1, 0, 0, -1, 0, 984)" gradientUnits="userSpaceOnUse"><stop offset="0.2" stop-color="#00fdff"/><stop offset="0.26" stop-color="#0af1ff"/><stop offset="0.37" stop-color="#23d2ff"/><stop offset="0.52" stop-color="#4da0ff"/><stop offset="0.69" stop-color="#855bff"/><stop offset="0.77" stop-color="#a238ff"/><stop offset="0.81" stop-color="#a738fd"/><stop offset="0.86" stop-color="#b539f9"/><stop offset="0.9" stop-color="#cd39f1"/><stop offset="0.96" stop-color="#ee3ae6"/><stop offset="0.98" stop-color="#ff3be0"/></radialGradient><linearGradient id="linear-gradient-7" x1="-7458.97" y1="9093.17" x2="-7531.06" y2="8282.84" gradientTransform="matrix(1.23, 0, 0, -1.22, 9958.21, 11048.11)" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#00ec00"/><stop offset="0.1" stop-color="#00e244"/><stop offset="0.22" stop-color="#00d694"/><stop offset="0.31" stop-color="#00cfc7"/><stop offset="0.35" stop-color="#00ccda"/><stop offset="0.42" stop-color="#0bc2dd" stop-opacity="0.92"/><stop offset="0.57" stop-color="#29a7e4" stop-opacity="0.72"/><stop offset="0.77" stop-color="#597df0" stop-opacity="0.4"/><stop offset="1" stop-color="#9448ff" stop-opacity="0"/></linearGradient><linearGradient id="linear-gradient-8" x1="-8926.61" y1="7680.53" x2="-8790.14" y2="7680.53" gradientTransform="matrix(1.22, 0.12, 0.12, -1.22, 10241.06, 10765.32)" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#005fe7"/><stop offset="0.46" stop-color="#0071f3" stop-opacity="0.51"/><stop offset="0.83" stop-color="#007efc" stop-opacity="0.14"/><stop offset="1" stop-color="#0083ff" stop-opacity="0"/></linearGradient><radialGradient id="radial-gradient-3" cx="-8914.62" cy="7721.05" r="165.97" gradientTransform="matrix(1.22, 0.12, 0.12, -1.22, 10241.06, 10765.32)" gradientUnits="userSpaceOnUse"><stop offset="0.63" stop-color="#ffe302" stop-opacity="0"/><stop offset="0.67" stop-color="#ffe302" stop-opacity="0.05"/><stop offset="0.75" stop-color="#ffe302" stop-opacity="0.19"/><stop offset="0.86" stop-color="#ffe302" stop-opacity="0.4"/><stop offset="0.99" stop-color="#ffe302" stop-opacity="0.7"/></radialGradient><linearGradient id="linear-gradient-9" x1="214.02" y1="2032.47" x2="96.19" y2="2284.31" gradientTransform="matrix(0.99, 0.1, 0.1, -0.99, -250.1, 2306.29)" gradientUnits="userSpaceOnUse"><stop offset="0.19" stop-color="#4a1475" stop-opacity="0.5"/><stop offset="0.62" stop-color="#2277ac" stop-opacity="0.23"/><stop offset="0.94" stop-color="#00ccda" stop-opacity="0"/></linearGradient><linearGradient id="linear-gradient-10" x1="-38.44" y1="278.18" x2="55.67" y2="171.29" gradientTransform="matrix(0.99, 0.1, 0.1, -0.99, 229.04, 745.87)" gradientUnits="userSpaceOnUse"><stop offset="0.01" stop-color="#002079" stop-opacity="0.5"/><stop offset="1" stop-color="#0083ff" stop-opacity="0"/></linearGradient><linearGradient id="linear-gradient-11" x1="142.45" y1="96.25" x2="142.5" y2="149.68" gradientTransform="matrix(0.99, 0.1, 0.1, -0.99, 229.04, 745.87)" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#4a1475" stop-opacity="0.9"/><stop offset="0.18" stop-color="#6720a2" stop-opacity="0.6"/><stop offset="0.38" stop-color="#812acb" stop-opacity="0.34"/><stop offset="0.57" stop-color="#9332e8" stop-opacity="0.15"/><stop offset="0.76" stop-color="#9e36f9" stop-opacity="0.04"/><stop offset="0.93" stop-color="#a238ff" stop-opacity="0"/></linearGradient><linearGradient id="linear-gradient-12" x1="620.52" y1="947.88" x2="926.18" y2="264.39" gradientTransform="matrix(1, 0, 0, -1, 0, 984)" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#00ec00" stop-opacity="0"/><stop offset="0.28" stop-color="#00dc6d" stop-opacity="0.5"/><stop offset="0.5" stop-color="#00d1bb" stop-opacity="0.86"/><stop offset="0.6" stop-color="#00ccda"/><stop offset="0.68" stop-color="#04c9db"/><stop offset="0.75" stop-color="#0fc1df"/><stop offset="0.83" stop-color="#23b2e6"/><stop offset="0.9" stop-color="#3e9ef0"/><stop offset="0.98" stop-color="#6184fc"/><stop offset="0.99" stop-color="#6680fe"/></linearGradient><linearGradient id="linear-gradient-13" x1="680.88" y1="554.79" x2="536.1" y2="166.04" gradientTransform="matrix(1, 0, 0, -1, 0, 984)" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#0083ff"/><stop offset="0.04" stop-color="#0083ff" stop-opacity="0.92"/><stop offset="0.14" stop-color="#0083ff" stop-opacity="0.71"/><stop offset="0.26" stop-color="#0083ff" stop-opacity="0.52"/><stop offset="0.37" stop-color="#0083ff" stop-opacity="0.36"/><stop offset="0.49" stop-color="#0083ff" stop-opacity="0.23"/><stop offset="0.61" stop-color="#0083ff" stop-opacity="0.13"/><stop offset="0.73" stop-color="#0083ff" stop-opacity="0.06"/><stop offset="0.86" stop-color="#0083ff" stop-opacity="0.01"/><stop offset="1" stop-color="#0083ff" stop-opacity="0"/></linearGradient></defs><title>firefox-logo-nightly</title><g id="Layer_2" data-name="Layer 2"><g id="Layer_1-2" data-name="Layer 1"><g id="Layer_2-2" data-name="Layer 2"><g id="Firefox"><path d="M770.28,91.56c-23.95,27.88-35.1,90.64-10.82,154.26s61.5,49.8,84.7,114.67c30.62,85.6,16.37,200.59,16.37,200.59s36.81,106.61,62.47-6.63C979.79,341.74,770.28,143.94,770.28,91.56Z" style="fill:url(#linear-gradient)"/><path id="_Path_" data-name=" Path " d="M476.92,972.83c245.24,0,443.9-199.74,443.9-446s-198.66-446-443.66-446S33.5,280.51,33.5,526.8C33,773.33,231.92,972.83,476.92,972.83Z" style="fill:url(#radial-gradient)"/><path d="M810.67,803.64a246.8,246.8,0,0,1-30.12,18.18,705.31,705.31,0,0,0,38.3-63c9.46-10.47,18.13-20.65,25.19-31.65,3.44-5.41,7.31-12.08,11.42-19.82,24.92-44.9,52.4-117.56,53.18-192.2v-5.66a257.25,257.25,0,0,0-5.71-55.75c.2,1.43.38,2.86.56,4.29-.22-1.1-.41-2.21-.64-3.31.37,2,.66,4,1,6,5.09,43.22,1.47,85.37-16.68,116.45-.29.45-.58.88-.87,1.32,9.41-47.23,12.56-99.39,2.09-151.6,0,0-4.19-25.38-35.38-102.44-18-44.35-49.83-80.72-78-107.21-24.69-30.55-47.11-51-59.47-64.06C689.72,126,678.9,105.61,674.45,92.31c-3.85-1.93-53.14-49.81-57.05-51.63-21.51,33.35-89.16,137.67-57,235.15,14.58,44.17,51.47,90,90.07,115.74,1.69,1.94,23,25,33.09,77.16,10.45,53.85,5,95.86-16.54,158C641.73,681.24,577,735.12,516.3,740.63c-129.67,11.78-177.15-65.11-177.15-65.11C385.49,694,436.72,690.17,467.87,671c31.4-19.43,50.39-33.83,65.81-28.15C548.86,648.43,561,632,550.1,615a78.5,78.5,0,0,0-79.4-34.57c-31.43,5.11-60.23,30-101.41,5.89a86.29,86.29,0,0,1-7.73-5.06c-2.71-1.79,8.83,2.72,6.13.69-8-4.35-22.2-13.84-25.88-17.22-.61-.56,6.22,2.18,5.61,1.62-38.51-31.71-33.7-53.13-32.49-66.57,1-10.75,8-24.52,19.75-30.11,5.69,3.11,9.24,5.48,9.24,5.48s-2.43-5-3.74-7.58c.46-.2.9-.15,1.36-.34,4.66,2.25,15,8.1,20.41,11.67,7.07,5,9.33,9.44,9.33,9.44s1.86-1,.48-5.37c-.5-1.78-2.65-7.45-9.65-13.17h.44A81.61,81.61,0,0,1,374.42,478c2-7.18,5.53-14.68,4.75-28.09-.48-9.43-.26-11.87-1.92-15.51-1.49-3.13.83-4.35,3.42-1.1a32.5,32.5,0,0,0-2.21-7.4v-.24c3.23-11.24,68.25-40.46,73-43.88A67.2,67.2,0,0,0,470.59,361c3.62-5.76,6.34-13.85,7-26.11.36-8.84-3.76-14.73-69.51-21.62-18-1.77-28.53-14.8-34.53-26.82-1.09-2.59-2.21-4.94-3.33-7.28a57.68,57.68,0,0,1-2.56-8.43c10.75-30.87,28.81-57,55.37-76.7,1.45-1.32-5.78.34-4.34-1,1.69-1.54,12.71-6,14.79-7,2.54-1.2-10.88-6.9-22.73-5.51-12.07,1.36-14.63,2.8-21.07,5.53,2.67-2.66,11.17-6.15,9.18-6.13-13,2-29.18,9.56-43,18.12a10.66,10.66,0,0,1,.83-4.35c-6.44,2.73-22.26,13.79-26.87,23.14a44.29,44.29,0,0,0,.27-5.4,84.17,84.17,0,0,0-13.19,13.82l-.24.22c-37.36-15-70.23-16-98.05-9.28-6.09-6.11-9.06-1.64-22.91-32.07-.94-1.83.72,1.81,0,0-2.28-5.9,1.39,7.87,0,0-23.28,18.37-53.92,39.19-68.63,53.89-.18.59,17.16-4.9,0,0-6,1.72-5.6,5.28-6.51,37.5-.22,2.44,0,5.18-.22,7.38-11.75,15-19.75,27.64-22.78,34.21-15.19,26.18-31.93,67-48.15,131.55A334.82,334.82,0,0,1,75.2,398.36C61.71,432.63,48.67,486.44,46.07,569.3A482.08,482.08,0,0,1,58.6,518.64,473,473,0,0,0,93.33,719.71c9.33,22.82,24.76,57.46,51,95.4C226.9,902,343.31,956,472.21,956,606.79,956,727.64,897.13,810.67,803.64Z" style="fill:url(#linear-gradient-2)"/><path d="M810.67,803.64a246.8,246.8,0,0,1-30.12,18.18,705.31,705.31,0,0,0,38.3-63c9.46-10.47,18.13-20.65,25.19-31.65,3.44-5.41,7.31-12.08,11.42-19.82,24.92-44.9,52.4-117.56,53.18-192.2v-5.66a257.25,257.25,0,0,0-5.71-55.75c.2,1.43.38,2.86.56,4.29-.22-1.1-.41-2.21-.64-3.31.37,2,.66,4,1,6,5.09,43.22,1.47,85.37-16.68,116.45-.29.45-.58.88-.87,1.32,9.41-47.23,12.56-99.39,2.09-151.6,0,0-4.19-25.38-35.38-102.44-18-44.35-49.83-80.72-78-107.21-24.69-30.55-47.11-51-59.47-64.06C689.72,126,678.9,105.61,674.45,92.31c-3.85-1.93-53.14-49.81-57.05-51.63-21.51,33.35-89.16,137.67-57,235.15,14.58,44.17,51.47,90,90.07,115.74,1.69,1.94,23,25,33.09,77.16,10.45,53.85,5,95.86-16.54,158C641.73,681.24,577,735.12,516.3,740.63c-129.67,11.78-177.15-65.11-177.15-65.11C385.49,694,436.72,690.17,467.87,671c31.4-19.43,50.39-33.83,65.81-28.15C548.86,648.43,561,632,550.1,615a78.5,78.5,0,0,0-79.4-34.57c-31.43,5.11-60.23,30-101.41,5.89a86.29,86.29,0,0,1-7.73-5.06c-2.71-1.79,8.83,2.72,6.13.69-8-4.35-22.2-13.84-25.88-17.22-.61-.56,6.22,2.18,5.61,1.62-38.51-31.71-33.7-53.13-32.49-66.57,1-10.75,8-24.52,19.75-30.11,5.69,3.11,9.24,5.48,9.24,5.48s-2.43-5-3.74-7.58c.46-.2.9-.15,1.36-.34,4.66,2.25,15,8.1,20.41,11.67,7.07,5,9.33,9.44,9.33,9.44s1.86-1,.48-5.37c-.5-1.78-2.65-7.45-9.65-13.17h.44A81.61,81.61,0,0,1,374.42,478c2-7.18,5.53-14.68,4.75-28.09-.48-9.43-.26-11.87-1.92-15.51-1.49-3.13.83-4.35,3.42-1.1a32.5,32.5,0,0,0-2.21-7.4v-.24c3.23-11.24,68.25-40.46,73-43.88A67.2,67.2,0,0,0,470.59,361c3.62-5.76,6.34-13.85,7-26.11.36-8.84-3.76-14.73-69.51-21.62-18-1.77-28.53-14.8-34.53-26.82-1.09-2.59-2.21-4.94-3.33-7.28a57.68,57.68,0,0,1-2.56-8.43c10.75-30.87,28.81-57,55.37-76.7,1.45-1.32-5.78.34-4.34-1,1.69-1.54,12.71-6,14.79-7,2.54-1.2-10.88-6.9-22.73-5.51-12.07,1.36-14.63,2.8-21.07,5.53,2.67-2.66,11.17-6.15,9.18-6.13-13,2-29.18,9.56-43,18.12a10.66,10.66,0,0,1,.83-4.35c-6.44,2.73-22.26,13.79-26.87,23.14a44.29,44.29,0,0,0,.27-5.4,84.17,84.17,0,0,0-13.19,13.82l-.24.22c-37.36-15-70.23-16-98.05-9.28-6.09-6.11-9.06-1.64-22.91-32.07-.94-1.83.72,1.81,0,0-2.28-5.9,1.39,7.87,0,0-23.28,18.37-53.92,39.19-68.63,53.89-.18.59,17.16-4.9,0,0-6,1.72-5.6,5.28-6.51,37.5-.22,2.44,0,5.18-.22,7.38-11.75,15-19.75,27.64-22.78,34.21-15.19,26.18-31.93,67-48.15,131.55A334.82,334.82,0,0,1,75.2,398.36C61.71,432.63,48.67,486.44,46.07,569.3A482.08,482.08,0,0,1,58.6,518.64,473,473,0,0,0,93.33,719.71c9.33,22.82,24.76,57.46,51,95.4C226.9,902,343.31,956,472.21,956,606.79,956,727.64,897.13,810.67,803.64Z" style="fill:url(#linear-gradient-3)"/><path d="M711.1,866.71c162.87-18.86,235-186.7,142.38-190C769.85,674,634,875.61,711.1,866.71Z" style="fill:url(#linear-gradient-4)"/><path d="M865.21,642.42C977.26,577.21,948,436.34,948,436.34s-43.25,50.24-72.62,130.32C846.4,646,797.84,681.81,865.21,642.42Z" style="fill:url(#linear-gradient-5)"/><path d="M509.47,950.06C665.7,999.91,800,876.84,717.21,835.74,642,798.68,435.32,926.49,509.47,950.06Z" style="fill:url(#linear-gradient-6)"/><path d="M638.58,21.42l.53-.57A1.7,1.7,0,0,0,638.58,21.42ZM876.85,702.23c3.8-5.36,8.94-22.53,13.48-30.21,27.58-44.52,27.78-80,27.78-80.84,16.66-83.22,15.15-117.2,4.9-180-8.25-50.6-44.32-123.09-75.57-158-32.2-36-9.51-24.25-40.69-50.52-27.33-30.29-53.82-60.29-68.25-72.36C634.22,43.09,636.57,24.58,638.58,21.42c-.34.37-.84.92-1.47,1.64C635.87,18.14,635,14,635,14s-57,57-69,152c-7.83,62,15.38,126.68,49,168a381.62,381.62,0,0,0,59,58h0c25.4,36.48,39.38,81.49,39.38,129.91,0,121.24-98.34,219.53-219.65,219.53a220.14,220.14,0,0,1-49.13-5.52c-57.24-10.92-90.3-39.8-106.78-59.41-9.45-11.23-13.46-19.42-13.46-19.42,51.28,18.37,108,14.53,142.47-4.52,34.75-19.26,55.77-33.55,72.84-27.92,16.82,5.61,30.21-10.67,18.2-27.54-11.77-16.85-42.4-41-87.88-34.29-34.79,5.07-66.66,29.76-112.24,5.84a97.34,97.34,0,0,1-8.55-5c-3-1.77,9.77,2.69,6.79.68-8.87-4.32-24.57-13.73-28.64-17.07-.68-.56,6.88,2.16,6.2,1.6-42.62-31.45-37.3-52.69-36-66,1.07-10.66,8.81-24.32,21.86-29.86,6.3,3.08,10.23,5.43,10.23,5.43s-2.69-4.92-4.14-7.51c.51-.19,1-.15,1.5-.34,5.16,2.23,16.58,8,22.59,11.57,7.83,4.95,10.32,9.36,10.32,9.36s2.06-1,.54-5.33c-.56-1.77-2.93-7.39-10.68-13.07h.48a91.65,91.65,0,0,1,13.13,8.17c2.19-7.12,6.12-14.56,5.25-27.86-.53-9.35-.28-11.78-2.12-15.39-1.65-3.1.92-4.31,3.78-1.09a29.73,29.73,0,0,0-2.44-7.34v-.24c3.57-11.14,75.53-40.12,80.77-43.51a70.24,70.24,0,0,0,21.17-20.63c4-5.72,7-13.73,7.75-25.89.25-5.48-1.44-9.82-20.5-14-11.44-2.49-29.14-4.91-56.43-7.47-19.9-1.76-31.58-14.68-38.21-26.6-1.21-2.57-2.45-4.9-3.68-7.22a53.41,53.41,0,0,1-2.83-8.36,158.47,158.47,0,0,1,61.28-76.06c1.6-1.31-6.4.33-4.8-1,1.87-1.52,14.06-5.93,16.37-6.92,2.81-1.19-12-6.84-25.16-5.47-13.36,1.35-16.19,2.78-23.32,5.49,3-2.64,12.37-6.1,10.16-6.08-14.4,2-32.3,9.48-47.6,18a9.72,9.72,0,0,1,.92-4.31c-7.13,2.71-24.64,13.67-29.73,23a39.79,39.79,0,0,0,.29-5.35,88.55,88.55,0,0,0-14.6,13.7l-.27.22C258.14,196,221.75,195,191,201.72c-6.74-6.06-17.57-15.23-32.89-45.4-1-1.82-1.6,3.75-2.4,2-6-13.81-9.55-36.44-9-52,0,0-12.32,5.61-22.51,29.06-1.89,4.21-3.11,6.54-4.32,8.87-.56.68,1.27-7.7,1-7.24-1.77,3-6.36,7.19-8.37,12.62-1.38,4-3.32,6.27-4.56,11.29l-.29.46c-.1-1.48.37-6.08,0-5.14A235.4,235.4,0,0,0,95.34,186c-5.49,18-11.88,42.61-12.89,74.57-.24,2.42,0,5.14-.25,7.32-13,14.83-21.86,27.39-25.2,33.91-16.81,26-35.33,66.44-53.29,130.46a319.35,319.35,0,0,1,28.54-50C17.32,416.25,2.89,469.62,0,551.8a436.92,436.92,0,0,1,13.87-50.24C11.29,556.36,17.68,624.3,52.32,701c20.57,45,67.92,136.6,183.62,208h0s39.36,29.3,107,51.26c5,1.81,10.06,3.6,15.23,5.33q-2.43-1-4.71-2A484.9,484.9,0,0,0,492.27,984c175.18.15,226.85-70.2,226.85-70.2l-.51.38q3.71-3.49,7.14-7.26c-27.64,26.08-90.75,27.84-114.3,26,40.22-11.81,66.69-21.81,118.17-41.52q9-3.36,18.48-7.64l2-.94c1.25-.58,2.49-1.13,3.75-1.74a349.3,349.3,0,0,0,70.26-44c51.7-41.3,63-81.56,68.83-108.1-.82,2.54-3.37,8.47-5.17,12.32-13.31,28.48-42.84,46-74.91,61a689.05,689.05,0,0,0,42.38-62.44C865.77,729.39,869,713.15,876.85,702.23Z" style="fill:url(#radial-gradient-2)"/><path d="M813.92,801c21.08-23.24,40-49.82,54.35-80,36.9-77.58,94-206.58,49-341.31C881.77,273.22,833,215,771.11,158.12,670.56,65.76,642.48,24.52,642.48,0c0,0-116.09,129.41-65.74,264.38s153.46,130,221.68,270.87c80.27,165.74-64.95,346.61-185,397.24,7.35-1.63,267-60.38,280.61-208.88C893.68,726.34,887.83,767.41,813.92,801Z" style="fill:url(#linear-gradient-7)"/><path d="M477.59,319.37c.39-8.77-4.16-14.66-76.68-21.46-29.84-2.76-41.26-30.33-44.75-41.94-10.61,27.56-15,56.49-12.64,91.48,1.61,22.92,17,47.52,24.37,62,0,0,1.64-2.13,2.39-2.91,13.86-14.43,71.94-36.42,77.39-39.54C453.69,363.16,476.58,346.44,477.59,319.37Z" style="fill:url(#linear-gradient-8)"/><path d="M477.59,319.37c.39-8.77-4.16-14.66-76.68-21.46-29.84-2.76-41.26-30.33-44.75-41.94-10.61,27.56-15,56.49-12.64,91.48,1.61,22.92,17,47.52,24.37,62,0,0,1.64-2.13,2.39-2.91,13.86-14.43,71.94-36.42,77.39-39.54C453.69,363.16,476.58,346.44,477.59,319.37Z" style="opacity:0.5;isolation:isolate;fill:url(#radial-gradient-3)"/><path d="M158.31,156.47c-1-1.82-1.6,3.75-2.4,2-6-13.81-9.58-36.2-8.72-52,0,0-12.32,5.61-22.51,29.06-1.89,4.21-3.11,6.54-4.32,8.86-.56.68,1.27-7.7,1-7.24-1.77,3-6.36,7.19-8.35,12.38-1.65,4.24-3.35,6.52-4.61,11.77-.39,1.43.39-6.32,0-5.38C84.72,201.68,80.19,271,82.69,268,133.17,214.14,191,201.36,191,201.36c-6.15-4.53-19.53-17.63-32.7-44.89Z" style="fill:url(#linear-gradient-9)"/><path d="M349.84,720.1c-69.72-29.77-149-71.75-146-167.14C207.92,427.35,321,452.18,321,452.18c-4.27,1-15.68,9.16-19.72,17.82-4.27,10.83-12.07,35.28,11.55,60.9,37.09,40.19-76.2,95.36,98.66,199.57,4.41,2.4-41-1.43-61.64-10.36Z" style="fill:url(#linear-gradient-10)"/><path d="M325.07,657.5c49.44,17.21,107,14.19,141.52-4.86,23.09-12.85,52.7-33.43,70.92-28.35-15.78-6.24-27.73-9.15-42.1-9.86-2.45,0-5.38,0-8-.32a136,136,0,0,0-15.76.86c-8.9.82-18.77,6.43-27.74,5.53-.48,0,8.7-3.77,8-3.61-4.75,1-9.92,1.21-15.37,1.88-3.47.39-6.45.82-9.89,1-103,8.73-190-55.81-190-55.81-7.41,25,33.17,74.3,88.52,93.57Z" style="opacity:0.5;isolation:isolate;fill:url(#linear-gradient-11)"/><path d="M813.74,801.65c104.16-102.27,156.86-226.58,134.58-366,0,0,8.9,71.5-24.85,144.63,16.21-71.39,18.1-160.11-25-252C841,205.64,746.45,141.11,710.35,114.19,655.66,73.4,633,31.87,632.57,23.3c-16.34,33.48-65.77,148.2-5.31,247,56.64,92.56,145.86,120,208.33,205C950.67,631.67,813.74,801.65,813.74,801.65Z" style="fill:url(#linear-gradient-12)"/><path d="M798.81,535.55C762.41,460.35,717,427.55,674,392c5,7,6.23,9.47,9,14,37.83,40.32,93.61,138.66,53.11,262.11C659.88,900.48,355,791.06,323,760.32,335.93,894.81,561,959.16,707.6,872,791,793,858.47,658.79,798.81,535.55Z" style="fill:url(#linear-gradient-13)"/></g></g></g></g></svg>
+\ No newline at end of file
diff --git a/tools/profiler/tests/browser/serviceworkers/serviceworker-utils.js b/tools/profiler/tests/browser/serviceworkers/serviceworker-utils.js
new file mode 100644
index 0000000000..16a9f0c91f
--- /dev/null
+++ b/tools/profiler/tests/browser/serviceworkers/serviceworker-utils.js
@@ -0,0 +1,39 @@
+// Most of this file has been stolen from dom/serviceworkers/test/utils.js.
+
+function waitForState(worker, state) {
+  return new Promise((resolve, reject) => {
+    function onStateChange() {
+      if (worker.state === state) {
+        worker.removeEventListener("statechange", onStateChange);
+        resolve();
+      }
+      if (worker.state === "redundant") {
+        worker.removeEventListener("statechange", onStateChange);
+        reject(new Error("The service worker failed to install."));
+      }
+    }
+
+    // First add an event listener, so we won't miss any change that happens
+    // before we check the current state.
+    worker.addEventListener("statechange", onStateChange);
+
+    // Now check if the worker is already in the desired state.
+    onStateChange();
+  });
+}
+
+async function registerServiceWorkerAndWait(serviceWorkerFile) {
+  if (!serviceWorkerFile) {
+    throw new Error(
+      "No service worker filename has been specified. Please specify a valid filename."
+    );
+  }
+
+  console.log(`...registering the serviceworker "${serviceWorkerFile}"`);
+  const reg = await navigator.serviceWorker.register(`./${serviceWorkerFile}`, {
+    scope: "./",
+  });
+  console.log("...waiting for activation");
+  await waitForState(reg.installing, "activated");
+  console.log("...activated!");
+}
diff --git a/tools/profiler/tests/browser/serviceworkers/serviceworker_cache_first.js b/tools/profiler/tests/browser/serviceworkers/serviceworker_cache_first.js
new file mode 100644
index 0000000000..baa07fd6d8
--- /dev/null
+++ b/tools/profiler/tests/browser/serviceworkers/serviceworker_cache_first.js
@@ -0,0 +1,34 @@
+const files = ["serviceworker_page.html", "firefox-logo-nightly.svg"];
+const cacheName = "v1";
+
+self.addEventListener("install", event => {
+  performance.mark("__serviceworker_event");
+  console.log("[SW]:", "Install event");
+
+  event.waitUntil(cacheAssets());
+});
+
+async function cacheAssets() {
+  const cache = await caches.open(cacheName);
+  await cache.addAll(files);
+}
+
+self.addEventListener("fetch", event => {
+  performance.mark("__serviceworker_event");
+  console.log("Handling fetch event for", event.request.url);
+  event.respondWith(handleFetch(event.request));
+});
+
+async function handleFetch(request) {
+  const cachedResponse = await caches.match(request);
+  if (cachedResponse) {
+    console.log("Found response in cache:", cachedResponse);
+
+    return cachedResponse;
+  }
+  console.log("No response found in cache. About to fetch from network...");
+
+  const networkResponse = await fetch(request);
+  console.log("Response from network is:", networkResponse);
+  return networkResponse;
+}
diff --git a/tools/profiler/tests/browser/serviceworkers/serviceworker_no_fetch_handler.js b/tools/profiler/tests/browser/serviceworkers/serviceworker_no_fetch_handler.js
new file mode 100644
index 0000000000..f656665ca0
--- /dev/null
+++ b/tools/profiler/tests/browser/serviceworkers/serviceworker_no_fetch_handler.js
@@ -0,0 +1,4 @@
+self.addEventListener("install", event => {
+  performance.mark("__serviceworker_event");
+  console.log("[SW]:", "Install event");
+});
diff --git a/tools/profiler/tests/browser/serviceworkers/serviceworker_no_respondWith_in_fetch_handler.js b/tools/profiler/tests/browser/serviceworkers/serviceworker_no_respondWith_in_fetch_handler.js
new file mode 100644
index 0000000000..255c8269a1
--- /dev/null
+++ b/tools/profiler/tests/browser/serviceworkers/serviceworker_no_respondWith_in_fetch_handler.js
@@ -0,0 +1,9 @@
+self.addEventListener("install", event => {
+  performance.mark("__serviceworker_event");
+  console.log("[SW]:", "Install event");
+});
+
+self.addEventListener("fetch", event => {
+  performance.mark("__serviceworker_event");
+  console.log("Handling fetch event for", event.request.url);
+});
diff --git a/tools/profiler/tests/browser/serviceworkers/serviceworker_page.html b/tools/profiler/tests/browser/serviceworkers/serviceworker_page.html
new file mode 100644
index 0000000000..1c2100a9d6
--- /dev/null
+++ b/tools/profiler/tests/browser/serviceworkers/serviceworker_page.html
@@ -0,0 +1,10 @@
+<!doctype html>
+<html>
+  <head>
+    <meta charset='utf-8'>
+    <meta name='viewport' content='initial-scale=1'>
+  </head>
+  <body>
+    <img src='firefox-logo-nightly.svg' width="24">
+  </body>
+</html>
diff --git a/tools/profiler/tests/browser/serviceworkers/serviceworker_register.html b/tools/profiler/tests/browser/serviceworkers/serviceworker_register.html
new file mode 100644
index 0000000000..86719787f4
--- /dev/null
+++ b/tools/profiler/tests/browser/serviceworkers/serviceworker_register.html
@@ -0,0 +1,9 @@
+<!doctype html>
+<html>
+  <head>
+    <meta charset='utf-8'>
+    <script src='serviceworker-utils.js'></script>
+  </head>
+  <body>
+  </body>
+</html>
diff --git a/tools/profiler/tests/browser/serviceworkers/serviceworker_simple.html b/tools/profiler/tests/browser/serviceworkers/serviceworker_simple.html
new file mode 100644
index 0000000000..f7c32d02c3
--- /dev/null
+++ b/tools/profiler/tests/browser/serviceworkers/serviceworker_simple.html
@@ -0,0 +1,9 @@
+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8"/>
+  </head>
+  <body>
+    Testing
+  </body>
+</html>
diff --git a/tools/profiler/tests/browser/serviceworkers/serviceworker_synthetized_response.js b/tools/profiler/tests/browser/serviceworkers/serviceworker_synthetized_response.js
new file mode 100644
index 0000000000..891b679a5f
--- /dev/null
+++ b/tools/profiler/tests/browser/serviceworkers/serviceworker_synthetized_response.js
@@ -0,0 +1,27 @@
+self.addEventListener("install", event => {
+  performance.mark("__serviceworker_event");
+  dump("[SW]:", "Install event\n");
+});
+
+self.addEventListener("fetch", event => {
+  performance.mark("__serviceworker_event");
+  dump(`Handling fetch event for ${event.request.url}\n`);
+  event.respondWith(handleFetch(event.request));
+});
+
+async function handleFetch(request) {
+  if (request.url.endsWith("-generated.svg")) {
+    dump(
+      "An icon file that should be generated was requested, let's answer directly.\n"
+    );
+    return new Response(
+      `<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 953.37 984"><defs><linearGradient id="linear-gradient" x1="-14706.28" y1="9250.14" x2="-14443.04" y2="9250.14" gradientTransform="matrix(0.76, 0.03, 0.05, -1.12, 11485.47, 11148)" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#0083ff"/><stop offset="0.1" stop-color="#0092f8"/><stop offset="0.31" stop-color="#00abeb"/><stop offset="0.52" stop-color="#00bee1"/><stop offset="0.75" stop-color="#00c8dc"/><stop offset="1" stop-color="#00ccda"/></linearGradient><radialGradient id="radial-gradient" cx="-7588.66" cy="8866.53" r="791.23" gradientTransform="matrix(1.23, 0, 0, -1.22, 9958.21, 11048.11)" gradientUnits="userSpaceOnUse"><stop offset="0.02" stop-color="#005fe7"/><stop offset="0.18" stop-color="#0042b4"/><stop offset="0.32" stop-color="#002989"/><stop offset="0.4" stop-color="#002079"/><stop offset="0.47" stop-color="#131d78"/><stop offset="0.66" stop-color="#3b1676"/><stop offset="0.75" stop-color="#4a1475"/></radialGradient><linearGradient id="linear-gradient-2" x1="539.64" y1="254.8" x2="348.2" y2="881.03" gradientTransform="matrix(1, 0, 0, -1, 1, 984)" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#000f43" stop-opacity="0.4"/><stop offset="0.48" stop-color="#001962" stop-opacity="0.17"/><stop offset="1" stop-color="#002079" stop-opacity="0"/></linearGradient><linearGradient id="linear-gradient-3" x1="540.64" y1="254.8" x2="349.2" y2="881.03" gradientTransform="matrix(1, 0, 0, -1, 0, 984)" xlink:href="#linear-gradient-2"/><linearGradient id="linear-gradient-4" x1="-8367.12" y1="7348.87" x2="-8482.36" y2="7357.76" gradientTransform="matrix(1.22, 0.12, 0.12, -1.22, 10241.06, 10765.32)" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#812cc9"/><stop offset="1" stop-color="#005fe7"/></linearGradient><linearGradient id="linear-gradient-5" x1="-8449.89" y1="7496.97" x2="-8341.94" y2="7609.09" gradientTransform="matrix(1.22, 0.12, 0.12, -1.22, 10241.06, 10765.32)" gradientUnits="userSpaceOnUse"><stop offset="0.05" stop-color="#005fe7"/><stop offset="0.18" stop-color="#065de6"/><stop offset="0.35" stop-color="#1856e1"/><stop offset="0.56" stop-color="#354adb"/><stop offset="0.78" stop-color="#5d3ad1"/><stop offset="0.95" stop-color="#812cc9"/></linearGradient><linearGradient id="linear-gradient-6" x1="-8653.41" y1="7245.3" x2="-8422.52" y2="7244.76" gradientTransform="matrix(1.22, 0.12, 0.12, -1.22, 10241.06, 10765.32)" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#002079"/><stop offset="0.99" stop-color="#a238ff"/></linearGradient><radialGradient id="radial-gradient-2" cx="644.11" cy="599.83" fx="785.0454815336918" fy="470.6889181532662" r="793.95" gradientTransform="matrix(1, 0, 0, -1, 0, 984)" gradientUnits="userSpaceOnUse"><stop offset="0.2" stop-color="#00fdff"/><stop offset="0.26" stop-color="#0af1ff"/><stop offset="0.37" stop-color="#23d2ff"/><stop offset="0.52" stop-color="#4da0ff"/><stop offset="0.69" stop-color="#855bff"/><stop offset="0.77" stop-color="#a238ff"/><stop offset="0.81" stop-color="#a738fd"/><stop offset="0.86" stop-color="#b539f9"/><stop offset="0.9" stop-color="#cd39f1"/><stop offset="0.96" stop-color="#ee3ae6"/><stop offset="0.98" stop-color="#ff3be0"/></radialGradient><linearGradient id="linear-gradient-7" x1="-7458.97" y1="9093.17" x2="-7531.06" y2="8282.84" gradientTransform="matrix(1.23, 0, 0, -1.22, 9958.21, 11048.11)" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#00ec00"/><stop offset="0.1" stop-color="#00e244"/><stop offset="0.22" stop-color="#00d694"/><stop offset="0.31" stop-color="#00cfc7"/><stop offset="0.35" stop-color="#00ccda"/><stop offset="0.42" stop-color="#0bc2dd" stop-opacity="0.92"/><stop offset="0.57" stop-color="#29a7e4" stop-opacity="0.72"/><stop offset="0.77" stop-color="#597df0" stop-opacity="0.4"/><stop offset="1" stop-color="#9448ff" stop-opacity="0"/></linearGradient><linearGradient id="linear-gradient-8" x1="-8926.61" y1="7680.53" x2="-8790.14" y2="7680.53" gradientTransform="matrix(1.22, 0.12, 0.12, -1.22, 10241.06, 10765.32)" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#005fe7"/><stop offset="0.46" stop-color="#0071f3" stop-opacity="0.51"/><stop offset="0.83" stop-color="#007efc" stop-opacity="0.14"/><stop offset="1" stop-color="#0083ff" stop-opacity="0"/></linearGradient><radialGradient id="radial-gradient-3" cx="-8914.62" cy="7721.05" r="165.97" gradientTransform="matrix(1.22, 0.12, 0.12, -1.22, 10241.06, 10765.32)" gradientUnits="userSpaceOnUse"><stop offset="0.63" stop-color="#ffe302" stop-opacity="0"/><stop offset="0.67" stop-color="#ffe302" stop-opacity="0.05"/><stop offset="0.75" stop-color="#ffe302" stop-opacity="0.19"/><stop offset="0.86" stop-color="#ffe302" stop-opacity="0.4"/><stop offset="0.99" stop-color="#ffe302" stop-opacity="0.7"/></radialGradient><linearGradient id="linear-gradient-9" x1="214.02" y1="2032.47" x2="96.19" y2="2284.31" gradientTransform="matrix(0.99, 0.1, 0.1, -0.99, -250.1, 2306.29)" gradientUnits="userSpaceOnUse"><stop offset="0.19" stop-color="#4a1475" stop-opacity="0.5"/><stop offset="0.62" stop-color="#2277ac" stop-opacity="0.23"/><stop offset="0.94" stop-color="#00ccda" stop-opacity="0"/></linearGradient><linearGradient id="linear-gradient-10" x1="-38.44" y1="278.18" x2="55.67" y2="171.29" gradientTransform="matrix(0.99, 0.1, 0.1, -0.99, 229.04, 745.87)" gradientUnits="userSpaceOnUse"><stop offset="0.01" stop-color="#002079" stop-opacity="0.5"/><stop offset="1" stop-color="#0083ff" stop-opacity="0"/></linearGradient><linearGradient id="linear-gradient-11" x1="142.45" y1="96.25" x2="142.5" y2="149.68" gradientTransform="matrix(0.99, 0.1, 0.1, -0.99, 229.04, 745.87)" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#4a1475" stop-opacity="0.9"/><stop offset="0.18" stop-color="#6720a2" stop-opacity="0.6"/><stop offset="0.38" stop-color="#812acb" stop-opacity="0.34"/><stop offset="0.57" stop-color="#9332e8" stop-opacity="0.15"/><stop offset="0.76" stop-color="#9e36f9" stop-opacity="0.04"/><stop offset="0.93" stop-color="#a238ff" stop-opacity="0"/></linearGradient><linearGradient id="linear-gradient-12" x1="620.52" y1="947.88" x2="926.18" y2="264.39" gradientTransform="matrix(1, 0, 0, -1, 0, 984)" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#00ec00" stop-opacity="0"/><stop offset="0.28" stop-color="#00dc6d" stop-opacity="0.5"/><stop offset="0.5" stop-color="#00d1bb" stop-opacity="0.86"/><stop offset="0.6" stop-color="#00ccda"/><stop offset="0.68" stop-color="#04c9db"/><stop offset="0.75" stop-color="#0fc1df"/><stop offset="0.83" stop-color="#23b2e6"/><stop offset="0.9" stop-color="#3e9ef0"/><stop offset="0.98" stop-color="#6184fc"/><stop offset="0.99" stop-color="#6680fe"/></linearGradient><linearGradient id="linear-gradient-13" x1="680.88" y1="554.79" x2="536.1" y2="166.04" gradientTransform="matrix(1, 0, 0, -1, 0, 984)" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#0083ff"/><stop offset="0.04" stop-color="#0083ff" stop-opacity="0.92"/><stop offset="0.14" stop-color="#0083ff" stop-opacity="0.71"/><stop offset="0.26" stop-color="#0083ff" stop-opacity="0.52"/><stop offset="0.37" stop-color="#0083ff" stop-opacity="0.36"/><stop offset="0.49" stop-color="#0083ff" stop-opacity="0.23"/><stop offset="0.61" stop-color="#0083ff" stop-opacity="0.13"/><stop offset="0.73" stop-color="#0083ff" stop-opacity="0.06"/><stop offset="0.86" stop-color="#0083ff" stop-opacity="0.01"/><stop offset="1" stop-color="#0083ff" stop-opacity="0"/></linearGradient></defs><title>firefox-logo-nightly</title><g id="Layer_2" data-name="Layer 2"><g id="Layer_1-2" data-name="Layer 1"><g id="Layer_2-2" data-name="Layer 2"><g id="Firefox"><path d="M770.28,91.56c-23.95,27.88-35.1,90.64-10.82,154.26s61.5,49.8,84.7,114.67c30.62,85.6,16.37,200.59,16.37,200.59s36.81,106.61,62.47-6.63C979.79,341.74,770.28,143.94,770.28,91.56Z" style="fill:url(#linear-gradient)"/><path id="_Path_" data-name=" Path " d="M476.92,972.83c245.24,0,443.9-199.74,443.9-446s-198.66-446-443.66-446S33.5,280.51,33.5,526.8C33,773.33,231.92,972.83,476.92,972.83Z" style="fill:url(#radial-gradient)"/><path d="M810.67,803.64a246.8,246.8,0,0,1-30.12,18.18,705.31,705.31,0,0,0,38.3-63c9.46-10.47,18.13-20.65,25.19-31.65,3.44-5.41,7.31-12.08,11.42-19.82,24.92-44.9,52.4-117.56,53.18-192.2v-5.66a257.25,257.25,0,0,0-5.71-55.75c.2,1.43.38,2.86.56,4.29-.22-1.1-.41-2.21-.64-3.31.37,2,.66,4,1,6,5.09,43.22,1.47,85.37-16.68,116.45-.29.45-.58.88-.87,1.32,9.41-47.23,12.56-99.39,2.09-151.6,0,0-4.19-25.38-35.38-102.44-18-44.35-49.83-80.72-78-107.21-24.69-30.55-47.11-51-59.47-64.06C689.72,126,678.9,105.61,674.45,92.31c-3.85-1.93-53.14-49.81-57.05-51.63-21.51,33.35-89.16,137.67-57,235.15,14.58,44.17,51.47,90,90.07,115.74,1.69,1.94,23,25,33.09,77.16,10.45,53.85,5,95.86-16.54,158C641.73,681.24,577,735.12,516.3,740.63c-129.67,11.78-177.15-65.11-177.15-65.11C385.49,694,436.72,690.17,467.87,671c31.4-19.43,50.39-33.83,65.81-28.15C548.86,648.43,561,632,550.1,615a78.5,78.5,0,0,0-79.4-34.57c-31.43,5.11-60.23,30-101.41,5.89a86.29,86.29,0,0,1-7.73-5.06c-2.71-1.79,8.83,2.72,6.13.69-8-4.35-22.2-13.84-25.88-17.22-.61-.56,6.22,2.18,5.61,1.62-38.51-31.71-33.7-53.13-32.49-66.57,1-10.75,8-24.52,19.75-30.11,5.69,3.11,9.24,5.48,9.24,5.48s-2.43-5-3.74-7.58c.46-.2.9-.15,1.36-.34,4.66,2.25,15,8.1,20.41,11.67,7.07,5,9.33,9.44,9.33,9.44s1.86-1,.48-5.37c-.5-1.78-2.65-7.45-9.65-13.17h.44A81.61,81.61,0,0,1,374.42,478c2-7.18,5.53-14.68,4.75-28.09-.48-9.43-.26-11.87-1.92-15.51-1.49-3.13.83-4.35,3.42-1.1a32.5,32.5,0,0,0-2.21-7.4v-.24c3.23-11.24,68.25-40.46,73-43.88A67.2,67.2,0,0,0,470.59,361c3.62-5.76,6.34-13.85,7-26.11.36-8.84-3.76-14.73-69.51-21.62-18-1.77-28.53-14.8-34.53-26.82-1.09-2.59-2.21-4.94-3.33-7.28a57.68,57.68,0,0,1-2.56-8.43c10.75-30.87,28.81-57,55.37-76.7,1.45-1.32-5.78.34-4.34-1,1.69-1.54,12.71-6,14.79-7,2.54-1.2-10.88-6.9-22.73-5.51-12.07,1.36-14.63,2.8-21.07,5.53,2.67-2.66,11.17-6.15,9.18-6.13-13,2-29.18,9.56-43,18.12a10.66,10.66,0,0,1,.83-4.35c-6.44,2.73-22.26,13.79-26.87,23.14a44.29,44.29,0,0,0,.27-5.4,84.17,84.17,0,0,0-13.19,13.82l-.24.22c-37.36-15-70.23-16-98.05-9.28-6.09-6.11-9.06-1.64-22.91-32.07-.94-1.83.72,1.81,0,0-2.28-5.9,1.39,7.87,0,0-23.28,18.37-53.92,39.19-68.63,53.89-.18.59,17.16-4.9,0,0-6,1.72-5.6,5.28-6.51,37.5-.22,2.44,0,5.18-.22,7.38-11.75,15-19.75,27.64-22.78,34.21-15.19,26.18-31.93,67-48.15,131.55A334.82,334.82,0,0,1,75.2,398.36C61.71,432.63,48.67,486.44,46.07,569.3A482.08,482.08,0,0,1,58.6,518.64,473,473,0,0,0,93.33,719.71c9.33,22.82,24.76,57.46,51,95.4C226.9,902,343.31,956,472.21,956,606.79,956,727.64,897.13,810.67,803.64Z" style="fill:url(#linear-gradient-2)"/><path d="M810.67,803.64a246.8,246.8,0,0,1-30.12,18.18,705.31,705.31,0,0,0,38.3-63c9.46-10.47,18.13-20.65,25.19-31.65,3.44-5.41,7.31-12.08,11.42-19.82,24.92-44.9,52.4-117.56,53.18-192.2v-5.66a257.25,257.25,0,0,0-5.71-55.75c.2,1.43.38,2.86.56,4.29-.22-1.1-.41-2.21-.64-3.31.37,2,.66,4,1,6,5.09,43.22,1.47,85.37-16.68,116.45-.29.45-.58.88-.87,1.32,9.41-47.23,12.56-99.39,2.09-151.6,0,0-4.19-25.38-35.38-102.44-18-44.35-49.83-80.72-78-107.21-24.69-30.55-47.11-51-59.47-64.06C689.72,126,678.9,105.61,674.45,92.31c-3.85-1.93-53.14-49.81-57.05-51.63-21.51,33.35-89.16,137.67-57,235.15,14.58,44.17,51.47,90,90.07,115.74,1.69,1.94,23,25,33.09,77.16,10.45,53.85,5,95.86-16.54,158C641.73,681.24,577,735.12,516.3,740.63c-129.67,11.78-177.15-65.11-177.15-65.11C385.49,694,436.72,690.17,467.87,671c31.4-19.43,50.39-33.83,65.81-28.15C548.86,648.43,561,632,550.1,615a78.5,78.5,0,0,0-79.4-34.57c-31.43,5.11-60.23,30-101.41,5.89a86.29,86.29,0,0,1-7.73-5.06c-2.71-1.79,8.83,2.72,6.13.69-8-4.35-22.2-13.84-25.88-17.22-.61-.56,6.22,2.18,5.61,1.62-38.51-31.71-33.7-53.13-32.49-66.57,1-10.75,8-24.52,19.75-30.11,5.69,3.11,9.24,5.48,9.24,5.48s-2.43-5-3.74-7.58c.46-.2.9-.15,1.36-.34,4.66,2.25,15,8.1,20.41,11.67,7.07,5,9.33,9.44,9.33,9.44s1.86-1,.48-5.37c-.5-1.78-2.65-7.45-9.65-13.17h.44A81.61,81.61,0,0,1,374.42,478c2-7.18,5.53-14.68,4.75-28.09-.48-9.43-.26-11.87-1.92-15.51-1.49-3.13.83-4.35,3.42-1.1a32.5,32.5,0,0,0-2.21-7.4v-.24c3.23-11.24,68.25-40.46,73-43.88A67.2,67.2,0,0,0,470.59,361c3.62-5.76,6.34-13.85,7-26.11.36-8.84-3.76-14.73-69.51-21.62-18-1.77-28.53-14.8-34.53-26.82-1.09-2.59-2.21-4.94-3.33-7.28a57.68,57.68,0,0,1-2.56-8.43c10.75-30.87,28.81-57,55.37-76.7,1.45-1.32-5.78.34-4.34-1,1.69-1.54,12.71-6,14.79-7,2.54-1.2-10.88-6.9-22.73-5.51-12.07,1.36-14.63,2.8-21.07,5.53,2.67-2.66,11.17-6.15,9.18-6.13-13,2-29.18,9.56-43,18.12a10.66,10.66,0,0,1,.83-4.35c-6.44,2.73-22.26,13.79-26.87,23.14a44.29,44.29,0,0,0,.27-5.4,84.17,84.17,0,0,0-13.19,13.82l-.24.22c-37.36-15-70.23-16-98.05-9.28-6.09-6.11-9.06-1.64-22.91-32.07-.94-1.83.72,1.81,0,0-2.28-5.9,1.39,7.87,0,0-23.28,18.37-53.92,39.19-68.63,53.89-.18.59,17.16-4.9,0,0-6,1.72-5.6,5.28-6.51,37.5-.22,2.44,0,5.18-.22,7.38-11.75,15-19.75,27.64-22.78,34.21-15.19,26.18-31.93,67-48.15,131.55A334.82,334.82,0,0,1,75.2,398.36C61.71,432.63,48.67,486.44,46.07,569.3A482.08,482.08,0,0,1,58.6,518.64,473,473,0,0,0,93.33,719.71c9.33,22.82,24.76,57.46,51,95.4C226.9,902,343.31,956,472.21,956,606.79,956,727.64,897.13,810.67,803.64Z" style="fill:url(#linear-gradient-3)"/><path d="M711.1,866.71c162.87-18.86,235-186.7,142.38-190C769.85,674,634,875.61,711.1,866.71Z" style="fill:url(#linear-gradient-4)"/><path d="M865.21,642.42C977.26,577.21,948,436.34,948,436.34s-43.25,50.24-72.62,130.32C846.4,646,797.84,681.81,865.21,642.42Z" style="fill:url(#linear-gradient-5)"/><path d="M509.47,950.06C665.7,999.91,800,876.84,717.21,835.74,642,798.68,435.32,926.49,509.47,950.06Z" style="fill:url(#linear-gradient-6)"/><path d="M638.58,21.42l.53-.57A1.7,1.7,0,0,0,638.58,21.42ZM876.85,702.23c3.8-5.36,8.94-22.53,13.48-30.21,27.58-44.52,27.78-80,27.78-80.84,16.66-83.22,15.15-117.2,4.9-180-8.25-50.6-44.32-123.09-75.57-158-32.2-36-9.51-24.25-40.69-50.52-27.33-30.29-53.82-60.29-68.25-72.36C634.22,43.09,636.57,24.58,638.58,21.42c-.34.37-.84.92-1.47,1.64C635.87,18.14,635,14,635,14s-57,57-69,152c-7.83,62,15.38,126.68,49,168a381.62,381.62,0,0,0,59,58h0c25.4,36.48,39.38,81.49,39.38,129.91,0,121.24-98.34,219.53-219.65,219.53a220.14,220.14,0,0,1-49.13-5.52c-57.24-10.92-90.3-39.8-106.78-59.41-9.45-11.23-13.46-19.42-13.46-19.42,51.28,18.37,108,14.53,142.47-4.52,34.75-19.26,55.77-33.55,72.84-27.92,16.82,5.61,30.21-10.67,18.2-27.54-11.77-16.85-42.4-41-87.88-34.29-34.79,5.07-66.66,29.76-112.24,5.84a97.34,97.34,0,0,1-8.55-5c-3-1.77,9.77,2.69,6.79.68-8.87-4.32-24.57-13.73-28.64-17.07-.68-.56,6.88,2.16,6.2,1.6-42.62-31.45-37.3-52.69-36-66,1.07-10.66,8.81-24.32,21.86-29.86,6.3,3.08,10.23,5.43,10.23,5.43s-2.69-4.92-4.14-7.51c.51-.19,1-.15,1.5-.34,5.16,2.23,16.58,8,22.59,11.57,7.83,4.95,10.32,9.36,10.32,9.36s2.06-1,.54-5.33c-.56-1.77-2.93-7.39-10.68-13.07h.48a91.65,91.65,0,0,1,13.13,8.17c2.19-7.12,6.12-14.56,5.25-27.86-.53-9.35-.28-11.78-2.12-15.39-1.65-3.1.92-4.31,3.78-1.09a29.73,29.73,0,0,0-2.44-7.34v-.24c3.57-11.14,75.53-40.12,80.77-43.51a70.24,70.24,0,0,0,21.17-20.63c4-5.72,7-13.73,7.75-25.89.25-5.48-1.44-9.82-20.5-14-11.44-2.49-29.14-4.91-56.43-7.47-19.9-1.76-31.58-14.68-38.21-26.6-1.21-2.57-2.45-4.9-3.68-7.22a53.41,53.41,0,0,1-2.83-8.36,158.47,158.47,0,0,1,61.28-76.06c1.6-1.31-6.4.33-4.8-1,1.87-1.52,14.06-5.93,16.37-6.92,2.81-1.19-12-6.84-25.16-5.47-13.36,1.35-16.19,2.78-23.32,5.49,3-2.64,12.37-6.1,10.16-6.08-14.4,2-32.3,9.48-47.6,18a9.72,9.72,0,0,1,.92-4.31c-7.13,2.71-24.64,13.67-29.73,23a39.79,39.79,0,0,0,.29-5.35,88.55,88.55,0,0,0-14.6,13.7l-.27.22C258.14,196,221.75,195,191,201.72c-6.74-6.06-17.57-15.23-32.89-45.4-1-1.82-1.6,3.75-2.4,2-6-13.81-9.55-36.44-9-52,0,0-12.32,5.61-22.51,29.06-1.89,4.21-3.11,6.54-4.32,8.87-.56.68,1.27-7.7,1-7.24-1.77,3-6.36,7.19-8.37,12.62-1.38,4-3.32,6.27-4.56,11.29l-.29.46c-.1-1.48.37-6.08,0-5.14A235.4,235.4,0,0,0,95.34,186c-5.49,18-11.88,42.61-12.89,74.57-.24,2.42,0,5.14-.25,7.32-13,14.83-21.86,27.39-25.2,33.91-16.81,26-35.33,66.44-53.29,130.46a319.35,319.35,0,0,1,28.54-50C17.32,416.25,2.89,469.62,0,551.8a436.92,436.92,0,0,1,13.87-50.24C11.29,556.36,17.68,624.3,52.32,701c20.57,45,67.92,136.6,183.62,208h0s39.36,29.3,107,51.26c5,1.81,10.06,3.6,15.23,5.33q-2.43-1-4.71-2A484.9,484.9,0,0,0,492.27,984c175.18.15,226.85-70.2,226.85-70.2l-.51.38q3.71-3.49,7.14-7.26c-27.64,26.08-90.75,27.84-114.3,26,40.22-11.81,66.69-21.81,118.17-41.52q9-3.36,18.48-7.64l2-.94c1.25-.58,2.49-1.13,3.75-1.74a349.3,349.3,0,0,0,70.26-44c51.7-41.3,63-81.56,68.83-108.1-.82,2.54-3.37,8.47-5.17,12.32-13.31,28.48-42.84,46-74.91,61a689.05,689.05,0,0,0,42.38-62.44C865.77,729.39,869,713.15,876.85,702.23Z" style="fill:url(#radial-gradient-2)"/><path d="M813.92,801c21.08-23.24,40-49.82,54.35-80,36.9-77.58,94-206.58,49-341.31C881.77,273.22,833,215,771.11,158.12,670.56,65.76,642.48,24.52,642.48,0c0,0-116.09,129.41-65.74,264.38s153.46,130,221.68,270.87c80.27,165.74-64.95,346.61-185,397.24,7.35-1.63,267-60.38,280.61-208.88C893.68,726.34,887.83,767.41,813.92,801Z" style="fill:url(#linear-gradient-7)"/><path d="M477.59,319.37c.39-8.77-4.16-14.66-76.68-21.46-29.84-2.76-41.26-30.33-44.75-41.94-10.61,27.56-15,56.49-12.64,91.48,1.61,22.92,17,47.52,24.37,62,0,0,1.64-2.13,2.39-2.91,13.86-14.43,71.94-36.42,77.39-39.54C453.69,363.16,476.58,346.44,477.59,319.37Z" style="fill:url(#linear-gradient-8)"/><path d="M477.59,319.37c.39-8.77-4.16-14.66-76.68-21.46-29.84-2.76-41.26-30.33-44.75-41.94-10.61,27.56-15,56.49-12.64,91.48,1.61,22.92,17,47.52,24.37,62,0,0,1.64-2.13,2.39-2.91,13.86-14.43,71.94-36.42,77.39-39.54C453.69,363.16,476.58,346.44,477.59,319.37Z" style="opacity:0.5;isolation:isolate;fill:url(#radial-gradient-3)"/><path d="M158.31,156.47c-1-1.82-1.6,3.75-2.4,2-6-13.81-9.58-36.2-8.72-52,0,0-12.32,5.61-22.51,29.06-1.89,4.21-3.11,6.54-4.32,8.86-.56.68,1.27-7.7,1-7.24-1.77,3-6.36,7.19-8.35,12.38-1.65,4.24-3.35,6.52-4.61,11.77-.39,1.43.39-6.32,0-5.38C84.72,201.68,80.19,271,82.69,268,133.17,214.14,191,201.36,191,201.36c-6.15-4.53-19.53-17.63-32.7-44.89Z" style="fill:url(#linear-gradient-9)"/><path d="M349.84,720.1c-69.72-29.77-149-71.75-146-167.14C207.92,427.35,321,452.18,321,452.18c-4.27,1-15.68,9.16-19.72,17.82-4.27,10.83-12.07,35.28,11.55,60.9,37.09,40.19-76.2,95.36,98.66,199.57,4.41,2.4-41-1.43-61.64-10.36Z" style="fill:url(#linear-gradient-10)"/><path d="M325.07,657.5c49.44,17.21,107,14.19,141.52-4.86,23.09-12.85,52.7-33.43,70.92-28.35-15.78-6.24-27.73-9.15-42.1-9.86-2.45,0-5.38,0-8-.32a136,136,0,0,0-15.76.86c-8.9.82-18.77,6.43-27.74,5.53-.48,0,8.7-3.77,8-3.61-4.75,1-9.92,1.21-15.37,1.88-3.47.39-6.45.82-9.89,1-103,8.73-190-55.81-190-55.81-7.41,25,33.17,74.3,88.52,93.57Z" style="opacity:0.5;isolation:isolate;fill:url(#linear-gradient-11)"/><path d="M813.74,801.65c104.16-102.27,156.86-226.58,134.58-366,0,0,8.9,71.5-24.85,144.63,16.21-71.39,18.1-160.11-25-252C841,205.64,746.45,141.11,710.35,114.19,655.66,73.4,633,31.87,632.57,23.3c-16.34,33.48-65.77,148.2-5.31,247,56.64,92.56,145.86,120,208.33,205C950.67,631.67,813.74,801.65,813.74,801.65Z" style="fill:url(#linear-gradient-12)"/><path d="M798.81,535.55C762.41,460.35,717,427.55,674,392c5,7,6.23,9.47,9,14,37.83,40.32,93.61,138.66,53.11,262.11C659.88,900.48,355,791.06,323,760.32,335.93,894.81,561,959.16,707.6,872,791,793,858.47,658.79,798.81,535.55Z" style="fill:url(#linear-gradient-13)"/></g></g></g></g></svg>`,
+      { headers: { "content-type": "image/svg+xml" } }
+    );
+  }
+
+  dump(
+    `A normal URL ${request.url} has been requested, let's fetch it from the network.\n`
+  );
+  return fetch(request);
+}
diff --git a/tools/profiler/tests/browser/simple.html b/tools/profiler/tests/browser/simple.html
new file mode 100644
index 0000000000..f7c32d02c3
--- /dev/null
+++ b/tools/profiler/tests/browser/simple.html
@@ -0,0 +1,9 @@
+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8"/>
+  </head>
+  <body>
+    Testing
+  </body>
+</html>
diff --git a/tools/profiler/tests/browser/single_frame.html b/tools/profiler/tests/browser/single_frame.html
new file mode 100644
index 0000000000..ebdfc41da2
--- /dev/null
+++ b/tools/profiler/tests/browser/single_frame.html
@@ -0,0 +1,10 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta charset="utf-8">
+  <title>Single Frame</title>
+</head>
+<body>
+  Single Frame
+</body>
+</html>
diff --git a/tools/profiler/tests/chrome/chrome.ini b/tools/profiler/tests/chrome/chrome.ini
new file mode 100644
index 0000000000..7089b8fb8e
--- /dev/null
+++ b/tools/profiler/tests/chrome/chrome.ini
@@ -0,0 +1,8 @@
+[DEFAULT]
+skip-if = tsan # Bug 1804081
+support-files=profiler_test_utils.js
+
+[test_profile_worker_bug_1428076.html]
+skip-if = os == 'android' && processor == 'arm' # Bug 1541291
+[test_profile_worker.html]
+skip-if = os == 'android' && processor == 'arm' # Bug 1541291
diff --git a/tools/profiler/tests/chrome/profiler_test_utils.js b/tools/profiler/tests/chrome/profiler_test_utils.js
new file mode 100644
index 0000000000..d2e4499b34
--- /dev/null
+++ b/tools/profiler/tests/chrome/profiler_test_utils.js
@@ -0,0 +1,66 @@
+"use strict";
+
+(function () {
+  async function startProfiler(settings) {
+    let startPromise = Services.profiler.StartProfiler(
+      settings.entries,
+      settings.interval,
+      settings.features,
+      settings.threads,
+      0,
+      settings.duration
+    );
+
+    info("Parent Profiler has started");
+
+    await startPromise;
+
+    info("Child profilers have started");
+  }
+
+  function getProfile() {
+    const profile = Services.profiler.getProfileData();
+    info(
+      "We got a profile, run the mochitest with `--keep-open true` to see the logged profile in the Web Console."
+    );
+
+    // Run the mochitest with `--keep-open true` to see the logged profile in the
+    // Web console.
+    console.log(profile);
+
+    return profile;
+  }
+
+  async function stopProfiler() {
+    let stopPromise = Services.profiler.StopProfiler();
+    info("Parent profiler has stopped");
+    await stopPromise;
+    info("Child profilers have stopped");
+  }
+
+  function end(error) {
+    if (error) {
+      ok(false, `We got an error: ${error}`);
+    } else {
+      ok(true, "We ran the whole process");
+    }
+    SimpleTest.finish();
+  }
+
+  async function runTest(settings, workload) {
+    SimpleTest.waitForExplicitFinish();
+    try {
+      await startProfiler(settings);
+      await workload();
+      await getProfile();
+      await stopProfiler();
+      await end();
+    } catch (e) {
+      // By catching and handling the error, we're being nice to mochitest
+      // runners: instead of waiting for the timeout, we fail right away.
+      await end(e);
+    }
+  }
+
+  window.runTest = runTest;
+})();
diff --git a/tools/profiler/tests/chrome/test_profile_worker.html b/tools/profiler/tests/chrome/test_profile_worker.html
new file mode 100644
index 0000000000..8e2bae7fbd
--- /dev/null
+++ b/tools/profiler/tests/chrome/test_profile_worker.html
@@ -0,0 +1,66 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=1428076
+-->
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 1428076</title>
+  <link rel="stylesheet" type="text/css" href="chrome://global/skin"/>
+  <link rel="stylesheet" type="text/css" href="chrome://mochikit/content/tests/SimpleTest/test.css"/>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1428076">Mozilla Bug 1428076</a>
+
+<script src="chrome://mochikit/content/tests/SimpleTest/SimpleTest.js"></script>
+<script type="application/javascript" src="profiler_test_utils.js"></script>
+<script type="application/javascript">
+/* globals runTest */
+
+"use strict";
+
+const settings = {
+  entries: 1000000, // 9MB
+  interval: 1, // ms
+  features: ["js", "stackwalk", "cpu"],
+  threads: ["GeckoMain", "Compositor", "Worker"], // most common combination
+};
+
+const workerCode = `
+  console.log('hello world');
+  setTimeout(() => postMessage('message from worker'), 50);
+`;
+
+function startWorker() {
+  // We use a Blob for the worker content to avoid an external JS file, and data
+  // URLs seem to be blocked in a chrome environment.
+  const workerContent = new Blob(
+    [ workerCode ],
+    { type: "application/javascript" }
+  );
+  const blobURL = URL.createObjectURL(workerContent);
+
+  // We start a worker and then terminate it right away to trigger our bug.
+  info("Starting the worker...");
+  const myWorker = new Worker(blobURL);
+  return { worker: myWorker, url: blobURL };
+}
+
+function workload() {
+  const { worker, url } = startWorker();
+
+  return new Promise(resolve => {
+    worker.onmessage = () => {
+      info("Got a message, terminating the worker.");
+      worker.terminate();
+      URL.revokeObjectURL(url);
+      resolve();
+    };
+  });
+}
+
+runTest(settings, workload);
+
+</script>
+</body>
+</html>
diff --git a/tools/profiler/tests/chrome/test_profile_worker_bug_1428076.html b/tools/profiler/tests/chrome/test_profile_worker_bug_1428076.html
new file mode 100644
index 0000000000..abe0e5748a
--- /dev/null
+++ b/tools/profiler/tests/chrome/test_profile_worker_bug_1428076.html
@@ -0,0 +1,58 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=1428076
+-->
+<head>
+  <meta charset="utf-8">
+  <title>Test for Bug 1428076</title>
+  <link rel="stylesheet" type="text/css" href="chrome://global/skin"/>
+  <link rel="stylesheet" type="text/css" href="chrome://mochikit/content/tests/SimpleTest/test.css"/>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1428076">Mozilla Bug 1428076</a>
+
+<script src="chrome://mochikit/content/tests/SimpleTest/SimpleTest.js"></script>
+<script type="application/javascript" src="profiler_test_utils.js"></script>
+<script type="application/javascript">
+/** Test for Bug 1428076 **/
+
+/* globals runTest */
+
+"use strict";
+
+const settings = {
+  entries: 1000000, // 9MB
+  interval: 1, // ms
+  features: ["js", "stackwalk"],
+  threads: ["GeckoMain", "Compositor", "Worker"], // most common combination
+};
+
+function workload() {
+  // We use a Blob for the worker content to avoid an external JS file, and data
+  // URLs seem to be blocked in a chrome environment.
+  const workerContent = new Blob(
+    [ "console.log('hello world!')" ],
+    { type: "application/javascript" }
+  );
+  const blobURL = URL.createObjectURL(workerContent);
+
+  // We start a worker and then terminate it right away to trigger our bug.
+  info("Starting the worker, and terminate it right away.");
+  const myWorker = new Worker(blobURL);
+  myWorker.terminate();
+
+  URL.revokeObjectURL(blobURL);
+
+  // We're deferring some little time so that the worker has the time to be
+  // properly cleaned up and the profiler actually saves the worker data.
+  return new Promise(resolve => {
+    setTimeout(resolve, 50);
+  });
+}
+
+runTest(settings, workload);
+
+</script>
+</body>
+</html>
diff --git a/tools/profiler/tests/gtest/GeckoProfiler.cpp b/tools/profiler/tests/gtest/GeckoProfiler.cpp
new file mode 100644
index 0000000000..78456662f5
--- /dev/null
+++ b/tools/profiler/tests/gtest/GeckoProfiler.cpp
@@ -0,0 +1,5099 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// This file tests a lot of the profiler_*() functions in GeckoProfiler.h.
+// Most of the tests just check that nothing untoward (e.g. crashes, deadlocks)
+// happens when calling these functions. They don't do much inspection of
+// profiler internals.
+
+#include "mozilla/ProfilerThreadPlatformData.h"
+#include "mozilla/ProfilerThreadRegistration.h"
+#include "mozilla/ProfilerThreadRegistrationInfo.h"
+#include "mozilla/ProfilerThreadRegistry.h"
+#include "mozilla/ProfilerUtils.h"
+#include "mozilla/ProgressLogger.h"
+#include "mozilla/UniquePtrExtensions.h"
+
+#include "nsIThread.h"
+#include "nsThreadUtils.h"
+#include "prthread.h"
+
+#include "gtest/gtest.h"
+#include "mozilla/gtest/MozAssertions.h"
+
+#include <thread>
+
+#if defined(_MSC_VER) || defined(__MINGW32__)
+#  include <processthreadsapi.h>
+#  include <realtimeapiset.h>
+#elif defined(__APPLE__)
+#  include <mach/thread_act.h>
+#endif
+
+#ifdef XP_WIN
+#include "mozilla/WindowsVersion.h"
+#endif
+
+#ifdef MOZ_GECKO_PROFILER
+
+#  include "GeckoProfiler.h"
+#  include "mozilla/ProfilerMarkerTypes.h"
+#  include "mozilla/ProfilerMarkers.h"
+#  include "NetworkMarker.h"
+#  include "platform.h"
+#  include "ProfileBuffer.h"
+#  include "ProfilerControl.h"
+
+#  include "js/Initialization.h"
+#  include "js/Printf.h"
+#  include "jsapi.h"
+#  include "json/json.h"
+#  include "mozilla/Atomics.h"
+#  include "mozilla/BlocksRingBuffer.h"
+#  include "mozilla/DataMutex.h"
+#  include "mozilla/ProfileBufferEntrySerializationGeckoExtensions.h"
+#  include "mozilla/ProfileJSONWriter.h"
+#  include "mozilla/ScopeExit.h"
+#  include "mozilla/net/HttpBaseChannel.h"
+#  include "nsIChannelEventSink.h"
+#  include "nsIThread.h"
+#  include "nsThreadUtils.h"
+
+#  include <cstring>
+#  include <set>
+
+#endif  // MOZ_GECKO_PROFILER
+
+// Note: profiler_init() has already been called in XRE_main(), so we can't
+// test it here. Likewise for profiler_shutdown(), and AutoProfilerInit
+// (which is just an RAII wrapper for profiler_init() and profiler_shutdown()).
+
+using namespace mozilla;
+
+TEST(GeckoProfiler, ProfilerUtils)
+{
+  profiler_init_main_thread_id();
+
+  static_assert(std::is_same_v<decltype(profiler_current_process_id()),
+                               ProfilerProcessId>);
+  static_assert(
+      std::is_same_v<decltype(profiler_current_process_id()),
+                     decltype(baseprofiler::profiler_current_process_id())>);
+  ProfilerProcessId processId = profiler_current_process_id();
+  EXPECT_TRUE(processId.IsSpecified());
+  EXPECT_EQ(processId, baseprofiler::profiler_current_process_id());
+
+  static_assert(
+      std::is_same_v<decltype(profiler_current_thread_id()), ProfilerThreadId>);
+  static_assert(
+      std::is_same_v<decltype(profiler_current_thread_id()),
+                     decltype(baseprofiler::profiler_current_thread_id())>);
+  EXPECT_EQ(profiler_current_thread_id(),
+            baseprofiler::profiler_current_thread_id());
+
+  ProfilerThreadId mainTestThreadId = profiler_current_thread_id();
+  EXPECT_TRUE(mainTestThreadId.IsSpecified());
+
+  ProfilerThreadId mainThreadId = profiler_main_thread_id();
+  EXPECT_TRUE(mainThreadId.IsSpecified());
+
+  EXPECT_EQ(mainThreadId, mainTestThreadId)
+      << "Test should run on the main thread";
+  EXPECT_TRUE(profiler_is_main_thread());
+
+  std::thread testThread([&]() {
+    EXPECT_EQ(profiler_current_process_id(), processId);
+
+    const ProfilerThreadId testThreadId = profiler_current_thread_id();
+    EXPECT_TRUE(testThreadId.IsSpecified());
+    EXPECT_NE(testThreadId, mainThreadId);
+    EXPECT_FALSE(profiler_is_main_thread());
+
+    EXPECT_EQ(baseprofiler::profiler_current_process_id(), processId);
+    EXPECT_EQ(baseprofiler::profiler_current_thread_id(), testThreadId);
+    EXPECT_EQ(baseprofiler::profiler_main_thread_id(), mainThreadId);
+    EXPECT_FALSE(baseprofiler::profiler_is_main_thread());
+  });
+  testThread.join();
+}
+
+TEST(GeckoProfiler, ThreadRegistrationInfo)
+{
+  profiler_init_main_thread_id();
+
+  TimeStamp ts = TimeStamp::Now();
+  {
+    profiler::ThreadRegistrationInfo trInfo{
+        "name", ProfilerThreadId::FromNumber(123), false, ts};
+    EXPECT_STREQ(trInfo.Name(), "name");
+    EXPECT_NE(trInfo.Name(), "name")
+        << "ThreadRegistrationInfo should keep its own copy of the name";
+    EXPECT_EQ(trInfo.RegisterTime(), ts);
+    EXPECT_EQ(trInfo.ThreadId(), ProfilerThreadId::FromNumber(123));
+    EXPECT_EQ(trInfo.IsMainThread(), false);
+  }
+
+  // Make sure the next timestamp will be different from `ts`.
+  while (TimeStamp::Now() == ts) {
+  }
+
+  {
+    profiler::ThreadRegistrationInfo trInfoHere{"Here"};
+    EXPECT_STREQ(trInfoHere.Name(), "Here");
+    EXPECT_NE(trInfoHere.Name(), "Here")
+        << "ThreadRegistrationInfo should keep its own copy of the name";
+    TimeStamp baseRegistrationTime =
+        baseprofiler::detail::GetThreadRegistrationTime();
+    if (baseRegistrationTime) {
+      EXPECT_EQ(trInfoHere.RegisterTime(), baseRegistrationTime);
+    } else {
+      EXPECT_GT(trInfoHere.RegisterTime(), ts);
+    }
+    EXPECT_EQ(trInfoHere.ThreadId(), profiler_current_thread_id());
+    EXPECT_EQ(trInfoHere.ThreadId(), profiler_main_thread_id())
+        << "Gtests are assumed to run on the main thread";
+    EXPECT_EQ(trInfoHere.IsMainThread(), true)
+        << "Gtests are assumed to run on the main thread";
+  }
+
+  {
+    // Sub-thread test.
+    // These will receive sub-thread data (to test move at thread end).
+    TimeStamp tsThread;
+    ProfilerThreadId threadThreadId;
+    UniquePtr<profiler::ThreadRegistrationInfo> trInfoThreadPtr;
+
+    std::thread testThread([&]() {
+      profiler::ThreadRegistrationInfo trInfoThread{"Thread"};
+      EXPECT_STREQ(trInfoThread.Name(), "Thread");
+      EXPECT_NE(trInfoThread.Name(), "Thread")
+          << "ThreadRegistrationInfo should keep its own copy of the name";
+      EXPECT_GT(trInfoThread.RegisterTime(), ts);
+      EXPECT_EQ(trInfoThread.ThreadId(), profiler_current_thread_id());
+      EXPECT_NE(trInfoThread.ThreadId(), profiler_main_thread_id());
+      EXPECT_EQ(trInfoThread.IsMainThread(), false);
+
+      tsThread = trInfoThread.RegisterTime();
+      threadThreadId = trInfoThread.ThreadId();
+      trInfoThreadPtr =
+          MakeUnique<profiler::ThreadRegistrationInfo>(std::move(trInfoThread));
+    });
+    testThread.join();
+
+    ASSERT_NE(trInfoThreadPtr, nullptr);
+    EXPECT_STREQ(trInfoThreadPtr->Name(), "Thread");
+    EXPECT_EQ(trInfoThreadPtr->RegisterTime(), tsThread);
+    EXPECT_EQ(trInfoThreadPtr->ThreadId(), threadThreadId);
+    EXPECT_EQ(trInfoThreadPtr->IsMainThread(), false)
+        << "Gtests are assumed to run on the main thread";
+  }
+}
+
+static constexpr ThreadProfilingFeatures scEachAndAnyThreadProfilingFeatures[] =
+    {ThreadProfilingFeatures::CPUUtilization, ThreadProfilingFeatures::Sampling,
+     ThreadProfilingFeatures::Markers, ThreadProfilingFeatures::Any};
+
+TEST(GeckoProfiler, ThreadProfilingFeaturesType)
+{
+  ASSERT_EQ(static_cast<uint32_t>(ThreadProfilingFeatures::Any), 1u + 2u + 4u)
+      << "This test assumes that there are 3 binary choices 1+2+4; "
+         "Is this test up to date?";
+
+  EXPECT_EQ(Combine(ThreadProfilingFeatures::CPUUtilization,
+                    ThreadProfilingFeatures::Sampling,
+                    ThreadProfilingFeatures::Markers),
+            ThreadProfilingFeatures::Any);
+
+  constexpr ThreadProfilingFeatures allThreadProfilingFeatures[] = {
+      ThreadProfilingFeatures::NotProfiled,
+      ThreadProfilingFeatures::CPUUtilization,
+      ThreadProfilingFeatures::Sampling, ThreadProfilingFeatures::Markers,
+      ThreadProfilingFeatures::Any};
+
+  for (ThreadProfilingFeatures f1 : allThreadProfilingFeatures) {
+    // Combine and Intersect are commutative.
+    for (ThreadProfilingFeatures f2 : allThreadProfilingFeatures) {
+      EXPECT_EQ(Combine(f1, f2), Combine(f2, f1));
+      EXPECT_EQ(Intersect(f1, f2), Intersect(f2, f1));
+    }
+
+    // Combine works like OR.
+    EXPECT_EQ(Combine(f1, f1), f1);
+    EXPECT_EQ(Combine(f1, f1, f1), f1);
+
+    // 'OR NotProfiled' doesn't change anything.
+    EXPECT_EQ(Combine(f1, ThreadProfilingFeatures::NotProfiled), f1);
+
+    // 'OR Any' makes Any.
+    EXPECT_EQ(Combine(f1, ThreadProfilingFeatures::Any),
+              ThreadProfilingFeatures::Any);
+
+    // Intersect works like AND.
+    EXPECT_EQ(Intersect(f1, f1), f1);
+    EXPECT_EQ(Intersect(f1, f1, f1), f1);
+
+    // 'AND NotProfiled' erases anything.
+    EXPECT_EQ(Intersect(f1, ThreadProfilingFeatures::NotProfiled),
+              ThreadProfilingFeatures::NotProfiled);
+
+    // 'AND Any' doesn't change anything.
+    EXPECT_EQ(Intersect(f1, ThreadProfilingFeatures::Any), f1);
+  }
+
+  for (ThreadProfilingFeatures f1 : scEachAndAnyThreadProfilingFeatures) {
+    EXPECT_TRUE(DoFeaturesIntersect(f1, f1));
+
+    // NotProfiled doesn't intersect with any feature.
+    EXPECT_FALSE(DoFeaturesIntersect(f1, ThreadProfilingFeatures::NotProfiled));
+
+    // Any intersects with any feature.
+    EXPECT_TRUE(DoFeaturesIntersect(f1, ThreadProfilingFeatures::Any));
+  }
+}
+
+static void TestConstUnlockedConstReader(
+    const profiler::ThreadRegistration::UnlockedConstReader& aData,
+    const TimeStamp& aBeforeRegistration, const TimeStamp& aAfterRegistration,
+    const void* aOnStackObject,
+    ProfilerThreadId aThreadId = profiler_current_thread_id()) {
+  EXPECT_STREQ(aData.Info().Name(), "Test thread");
+  EXPECT_GE(aData.Info().RegisterTime(), aBeforeRegistration);
+  EXPECT_LE(aData.Info().RegisterTime(), aAfterRegistration);
+  EXPECT_EQ(aData.Info().ThreadId(), aThreadId);
+  EXPECT_FALSE(aData.Info().IsMainThread());
+
+#if (defined(_MSC_VER) || defined(__MINGW32__)) && defined(MOZ_GECKO_PROFILER)
+  HANDLE threadHandle = aData.PlatformDataCRef().ProfiledThread();
+  EXPECT_NE(threadHandle, nullptr);
+  EXPECT_EQ(ProfilerThreadId::FromNumber(::GetThreadId(threadHandle)),
+            aThreadId);
+  // Test calling QueryThreadCycleTime, we cannot assume that it will always
+  // work, but at least it shouldn't crash.
+  ULONG64 cycles;
+  (void)QueryThreadCycleTime(threadHandle, &cycles);
+#elif defined(__APPLE__) && defined(MOZ_GECKO_PROFILER)
+  // Test calling thread_info, we cannot assume that it will always work, but at
+  // least it shouldn't crash.
+  thread_basic_info_data_t threadBasicInfo;
+  mach_msg_type_number_t basicCount = THREAD_BASIC_INFO_COUNT;
+  (void)thread_info(
+      aData.PlatformDataCRef().ProfiledThread(), THREAD_BASIC_INFO,
+      reinterpret_cast<thread_info_t>(&threadBasicInfo), &basicCount);
+#elif (defined(__linux__) || defined(__ANDROID__) || defined(__FreeBSD__)) && \
+    defined(MOZ_GECKO_PROFILER)
+  // Test calling GetClockId, we cannot assume that it will always work, but at
+  // least it shouldn't crash.
+  Maybe<clockid_t> maybeClockId = aData.PlatformDataCRef().GetClockId();
+  if (maybeClockId) {
+    // Test calling clock_gettime, we cannot assume that it will always work,
+    // but at least it shouldn't crash.
+    timespec ts;
+    (void)clock_gettime(*maybeClockId, &ts);
+  }
+#else
+  (void)aData.PlatformDataCRef();
+#endif
+
+  EXPECT_GE(aData.StackTop(), aOnStackObject)
+      << "StackTop should be at &onStackChar, or higher on some "
+         "platforms";
+};
+
+static void TestConstUnlockedConstReaderAndAtomicRW(
+    const profiler::ThreadRegistration::UnlockedConstReaderAndAtomicRW& aData,
+    const TimeStamp& aBeforeRegistration, const TimeStamp& aAfterRegistration,
+    const void* aOnStackObject,
+    ProfilerThreadId aThreadId = profiler_current_thread_id()) {
+  TestConstUnlockedConstReader(aData, aBeforeRegistration, aAfterRegistration,
+                               aOnStackObject, aThreadId);
+
+  (void)aData.ProfilingStackCRef();
+
+  EXPECT_EQ(aData.ProfilingFeatures(), ThreadProfilingFeatures::NotProfiled);
+
+  EXPECT_FALSE(aData.IsSleeping());
+};
+
+static void TestUnlockedConstReaderAndAtomicRW(
+    profiler::ThreadRegistration::UnlockedConstReaderAndAtomicRW& aData,
+    const TimeStamp& aBeforeRegistration, const TimeStamp& aAfterRegistration,
+    const void* aOnStackObject,
+    ProfilerThreadId aThreadId = profiler_current_thread_id()) {
+  TestConstUnlockedConstReaderAndAtomicRW(aData, aBeforeRegistration,
+                                          aAfterRegistration, aOnStackObject,
+                                          aThreadId);
+
+  (void)aData.ProfilingStackRef();
+
+  EXPECT_FALSE(aData.IsSleeping());
+  aData.SetSleeping();
+  EXPECT_TRUE(aData.IsSleeping());
+  aData.SetAwake();
+  EXPECT_FALSE(aData.IsSleeping());
+
+  aData.ReinitializeOnResume();
+
+  EXPECT_FALSE(aData.CanDuplicateLastSampleDueToSleep());
+  EXPECT_FALSE(aData.CanDuplicateLastSampleDueToSleep());
+  aData.SetSleeping();
+  // After sleeping, the 2nd+ calls can duplicate.
+  EXPECT_FALSE(aData.CanDuplicateLastSampleDueToSleep());
+  EXPECT_TRUE(aData.CanDuplicateLastSampleDueToSleep());
+  EXPECT_TRUE(aData.CanDuplicateLastSampleDueToSleep());
+  aData.ReinitializeOnResume();
+  // After reinit (and sleeping), the 2nd+ calls can duplicate.
+  EXPECT_FALSE(aData.CanDuplicateLastSampleDueToSleep());
+  EXPECT_TRUE(aData.CanDuplicateLastSampleDueToSleep());
+  EXPECT_TRUE(aData.CanDuplicateLastSampleDueToSleep());
+  aData.SetAwake();
+  EXPECT_FALSE(aData.CanDuplicateLastSampleDueToSleep());
+  EXPECT_FALSE(aData.CanDuplicateLastSampleDueToSleep());
+};
+
+static void TestConstUnlockedRWForLockedProfiler(
+    const profiler::ThreadRegistration::UnlockedRWForLockedProfiler& aData,
+    const TimeStamp& aBeforeRegistration, const TimeStamp& aAfterRegistration,
+    const void* aOnStackObject,
+    ProfilerThreadId aThreadId = profiler_current_thread_id()) {
+  TestConstUnlockedConstReaderAndAtomicRW(aData, aBeforeRegistration,
+                                          aAfterRegistration, aOnStackObject,
+                                          aThreadId);
+
+  // We can't create a PSAutoLock here, so just verify that the call would
+  // compile and return the expected type.
+  static_assert(std::is_same_v<decltype(aData.GetProfiledThreadData(
+                                   std::declval<PSAutoLock>())),
+                               const ProfiledThreadData*>);
+};
+
+static void TestConstUnlockedReaderAndAtomicRWOnThread(
+    const profiler::ThreadRegistration::UnlockedReaderAndAtomicRWOnThread&
+        aData,
+    const TimeStamp& aBeforeRegistration, const TimeStamp& aAfterRegistration,
+    const void* aOnStackObject,
+    ProfilerThreadId aThreadId = profiler_current_thread_id()) {
+  TestConstUnlockedRWForLockedProfiler(aData, aBeforeRegistration,
+                                       aAfterRegistration, aOnStackObject,
+                                       aThreadId);
+
+  EXPECT_EQ(aData.GetJSContext(), nullptr);
+};
+
+static void TestUnlockedRWForLockedProfiler(
+    profiler::ThreadRegistration::UnlockedRWForLockedProfiler& aData,
+    const TimeStamp& aBeforeRegistration, const TimeStamp& aAfterRegistration,
+    const void* aOnStackObject,
+    ProfilerThreadId aThreadId = profiler_current_thread_id()) {
+  TestConstUnlockedRWForLockedProfiler(aData, aBeforeRegistration,
+                                       aAfterRegistration, aOnStackObject,
+                                       aThreadId);
+  TestUnlockedConstReaderAndAtomicRW(aData, aBeforeRegistration,
+                                     aAfterRegistration, aOnStackObject,
+                                     aThreadId);
+
+  // No functions to test here.
+};
+
+static void TestUnlockedReaderAndAtomicRWOnThread(
+    profiler::ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aData,
+    const TimeStamp& aBeforeRegistration, const TimeStamp& aAfterRegistration,
+    const void* aOnStackObject,
+    ProfilerThreadId aThreadId = profiler_current_thread_id()) {
+  TestConstUnlockedReaderAndAtomicRWOnThread(aData, aBeforeRegistration,
+                                             aAfterRegistration, aOnStackObject,
+                                             aThreadId);
+  TestUnlockedRWForLockedProfiler(aData, aBeforeRegistration,
+                                  aAfterRegistration, aOnStackObject,
+                                  aThreadId);
+
+  // No functions to test here.
+};
+
+static void TestConstLockedRWFromAnyThread(
+    const profiler::ThreadRegistration::LockedRWFromAnyThread& aData,
+    const TimeStamp& aBeforeRegistration, const TimeStamp& aAfterRegistration,
+    const void* aOnStackObject,
+    ProfilerThreadId aThreadId = profiler_current_thread_id()) {
+  TestConstUnlockedReaderAndAtomicRWOnThread(aData, aBeforeRegistration,
+                                             aAfterRegistration, aOnStackObject,
+                                             aThreadId);
+
+  EXPECT_EQ(aData.GetJsFrameBuffer(), nullptr);
+  EXPECT_EQ(aData.GetEventTarget(), nullptr);
+};
+
+static void TestLockedRWFromAnyThread(
+    profiler::ThreadRegistration::LockedRWFromAnyThread& aData,
+    const TimeStamp& aBeforeRegistration, const TimeStamp& aAfterRegistration,
+    const void* aOnStackObject,
+    ProfilerThreadId aThreadId = profiler_current_thread_id()) {
+  TestConstLockedRWFromAnyThread(aData, aBeforeRegistration, aAfterRegistration,
+                                 aOnStackObject, aThreadId);
+  TestUnlockedReaderAndAtomicRWOnThread(aData, aBeforeRegistration,
+                                        aAfterRegistration, aOnStackObject,
+                                        aThreadId);
+
+  // We can't create a ProfiledThreadData nor PSAutoLock here, so just verify
+  // that the call would compile and return the expected type.
+  static_assert(std::is_same_v<decltype(aData.SetProfilingFeaturesAndData(
+                                   std::declval<ThreadProfilingFeatures>(),
+                                   std::declval<ProfiledThreadData*>(),
+                                   std::declval<PSAutoLock>())),
+                               void>);
+
+  aData.ResetMainThread(nullptr);
+
+  TimeDuration delay = TimeDuration::FromSeconds(1);
+  TimeDuration running = TimeDuration::FromSeconds(1);
+  aData.GetRunningEventDelay(TimeStamp::Now(), delay, running);
+  EXPECT_TRUE(delay.IsZero());
+  EXPECT_TRUE(running.IsZero());
+
+  aData.StartJSSampling(123u);
+  aData.StopJSSampling();
+};
+
+static void TestConstLockedRWOnThread(
+    const profiler::ThreadRegistration::LockedRWOnThread& aData,
+    const TimeStamp& aBeforeRegistration, const TimeStamp& aAfterRegistration,
+    const void* aOnStackObject,
+    ProfilerThreadId aThreadId = profiler_current_thread_id()) {
+  TestConstLockedRWFromAnyThread(aData, aBeforeRegistration, aAfterRegistration,
+                                 aOnStackObject, aThreadId);
+
+  // No functions to test here.
+};
+
+static void TestLockedRWOnThread(
+    profiler::ThreadRegistration::LockedRWOnThread& aData,
+    const TimeStamp& aBeforeRegistration, const TimeStamp& aAfterRegistration,
+    const void* aOnStackObject,
+    ProfilerThreadId aThreadId = profiler_current_thread_id()) {
+  TestConstLockedRWOnThread(aData, aBeforeRegistration, aAfterRegistration,
+                            aOnStackObject, aThreadId);
+  TestLockedRWFromAnyThread(aData, aBeforeRegistration, aAfterRegistration,
+                            aOnStackObject, aThreadId);
+
+  // We don't want to really call SetJSContext here, so just verify that
+  // the call would compile and return the expected type.
+  static_assert(
+      std::is_same_v<decltype(aData.SetJSContext(std::declval<JSContext*>())),
+                     void>);
+  aData.ClearJSContext();
+  aData.PollJSSampling();
+};
+
+TEST(GeckoProfiler, ThreadRegistration_DataAccess)
+{
+  using TR = profiler::ThreadRegistration;
+
+  profiler_init_main_thread_id();
+  ASSERT_TRUE(profiler_is_main_thread())
+  << "This test assumes it runs on the main thread";
+
+  // Note that the main thread could already be registered, so we work in a new
+  // thread to test an actual registration that we control.
+
+  std::thread testThread([&]() {
+    ASSERT_FALSE(TR::IsRegistered())
+    << "A new std::thread should not start registered";
+    EXPECT_FALSE(TR::GetOnThreadPtr());
+    EXPECT_FALSE(TR::WithOnThreadRefOr([&](auto) { return true; }, false));
+
+    char onStackChar;
+
+    TimeStamp beforeRegistration = TimeStamp::Now();
+    TR tr{"Test thread", &onStackChar};
+    TimeStamp afterRegistration = TimeStamp::Now();
+
+    ASSERT_TRUE(TR::IsRegistered());
+
+    // Note: This test will mostly be about checking the correct access to
+    // thread data, depending on how it's obtained. Not all the functionality
+    // related to that data is tested (e.g., because it involves JS or other
+    // external dependencies that would be difficult to control here.)
+
+    auto TestOnThreadRef = [&](TR::OnThreadRef aOnThreadRef) {
+      // To test const-qualified member functions.
+      const TR::OnThreadRef& onThreadCRef = aOnThreadRef;
+
+      // const UnlockedConstReader (always const)
+
+      TestConstUnlockedConstReader(onThreadCRef.UnlockedConstReaderCRef(),
+                                   beforeRegistration, afterRegistration,
+                                   &onStackChar);
+      onThreadCRef.WithUnlockedConstReader(
+          [&](const TR::UnlockedConstReader& aData) {
+            TestConstUnlockedConstReader(aData, beforeRegistration,
+                                         afterRegistration, &onStackChar);
+          });
+
+      // const UnlockedConstReaderAndAtomicRW
+
+      TestConstUnlockedConstReaderAndAtomicRW(
+          onThreadCRef.UnlockedConstReaderAndAtomicRWCRef(), beforeRegistration,
+          afterRegistration, &onStackChar);
+      onThreadCRef.WithUnlockedConstReaderAndAtomicRW(
+          [&](const TR::UnlockedConstReaderAndAtomicRW& aData) {
+            TestConstUnlockedConstReaderAndAtomicRW(
+                aData, beforeRegistration, afterRegistration, &onStackChar);
+          });
+
+      // non-const UnlockedConstReaderAndAtomicRW
+
+      TestUnlockedConstReaderAndAtomicRW(
+          aOnThreadRef.UnlockedConstReaderAndAtomicRWRef(), beforeRegistration,
+          afterRegistration, &onStackChar);
+      aOnThreadRef.WithUnlockedConstReaderAndAtomicRW(
+          [&](TR::UnlockedConstReaderAndAtomicRW& aData) {
+            TestUnlockedConstReaderAndAtomicRW(aData, beforeRegistration,
+                                               afterRegistration, &onStackChar);
+          });
+
+      // const UnlockedRWForLockedProfiler
+
+      TestConstUnlockedRWForLockedProfiler(
+          onThreadCRef.UnlockedRWForLockedProfilerCRef(), beforeRegistration,
+          afterRegistration, &onStackChar);
+      onThreadCRef.WithUnlockedRWForLockedProfiler(
+          [&](const TR::UnlockedRWForLockedProfiler& aData) {
+            TestConstUnlockedRWForLockedProfiler(
+                aData, beforeRegistration, afterRegistration, &onStackChar);
+          });
+
+      // non-const UnlockedRWForLockedProfiler
+
+      TestUnlockedRWForLockedProfiler(
+          aOnThreadRef.UnlockedRWForLockedProfilerRef(), beforeRegistration,
+          afterRegistration, &onStackChar);
+      aOnThreadRef.WithUnlockedRWForLockedProfiler(
+          [&](TR::UnlockedRWForLockedProfiler& aData) {
+            TestUnlockedRWForLockedProfiler(aData, beforeRegistration,
+                                            afterRegistration, &onStackChar);
+          });
+
+      // const UnlockedReaderAndAtomicRWOnThread
+
+      TestConstUnlockedReaderAndAtomicRWOnThread(
+          onThreadCRef.UnlockedReaderAndAtomicRWOnThreadCRef(),
+          beforeRegistration, afterRegistration, &onStackChar);
+      onThreadCRef.WithUnlockedReaderAndAtomicRWOnThread(
+          [&](const TR::UnlockedReaderAndAtomicRWOnThread& aData) {
+            TestConstUnlockedReaderAndAtomicRWOnThread(
+                aData, beforeRegistration, afterRegistration, &onStackChar);
+          });
+
+      // non-const UnlockedReaderAndAtomicRWOnThread
+
+      TestUnlockedReaderAndAtomicRWOnThread(
+          aOnThreadRef.UnlockedReaderAndAtomicRWOnThreadRef(),
+          beforeRegistration, afterRegistration, &onStackChar);
+      aOnThreadRef.WithUnlockedReaderAndAtomicRWOnThread(
+          [&](TR::UnlockedReaderAndAtomicRWOnThread& aData) {
+            TestUnlockedReaderAndAtomicRWOnThread(
+                aData, beforeRegistration, afterRegistration, &onStackChar);
+          });
+
+      // LockedRWFromAnyThread
+      // Note: It cannot directly be accessed on the thread, this will be
+      // tested through LockedRWOnThread.
+
+      // const LockedRWOnThread
+
+      EXPECT_FALSE(TR::IsDataMutexLockedOnCurrentThread());
+      {
+        TR::OnThreadRef::ConstRWOnThreadWithLock constRWOnThreadWithLock =
+            onThreadCRef.ConstLockedRWOnThread();
+        EXPECT_TRUE(TR::IsDataMutexLockedOnCurrentThread());
+        TestConstLockedRWOnThread(constRWOnThreadWithLock.DataCRef(),
+                                  beforeRegistration, afterRegistration,
+                                  &onStackChar);
+      }
+      EXPECT_FALSE(TR::IsDataMutexLockedOnCurrentThread());
+      onThreadCRef.WithConstLockedRWOnThread(
+          [&](const TR::LockedRWOnThread& aData) {
+            EXPECT_TRUE(TR::IsDataMutexLockedOnCurrentThread());
+            TestConstLockedRWOnThread(aData, beforeRegistration,
+                                      afterRegistration, &onStackChar);
+          });
+      EXPECT_FALSE(TR::IsDataMutexLockedOnCurrentThread());
+
+      // non-const LockedRWOnThread
+
+      EXPECT_FALSE(TR::IsDataMutexLockedOnCurrentThread());
+      {
+        TR::OnThreadRef::RWOnThreadWithLock rwOnThreadWithLock =
+            aOnThreadRef.GetLockedRWOnThread();
+        EXPECT_TRUE(TR::IsDataMutexLockedOnCurrentThread());
+        TestConstLockedRWOnThread(rwOnThreadWithLock.DataCRef(),
+                                  beforeRegistration, afterRegistration,
+                                  &onStackChar);
+        TestLockedRWOnThread(rwOnThreadWithLock.DataRef(), beforeRegistration,
+                             afterRegistration, &onStackChar);
+      }
+      EXPECT_FALSE(TR::IsDataMutexLockedOnCurrentThread());
+      aOnThreadRef.WithLockedRWOnThread([&](TR::LockedRWOnThread& aData) {
+        EXPECT_TRUE(TR::IsDataMutexLockedOnCurrentThread());
+        TestLockedRWOnThread(aData, beforeRegistration, afterRegistration,
+                             &onStackChar);
+      });
+      EXPECT_FALSE(TR::IsDataMutexLockedOnCurrentThread());
+    };
+
+    TR::OnThreadPtr onThreadPtr = TR::GetOnThreadPtr();
+    ASSERT_TRUE(onThreadPtr);
+    TestOnThreadRef(*onThreadPtr);
+
+    TR::WithOnThreadRef(
+        [&](TR::OnThreadRef aOnThreadRef) { TestOnThreadRef(aOnThreadRef); });
+
+    EXPECT_TRUE(TR::WithOnThreadRefOr(
+        [&](TR::OnThreadRef aOnThreadRef) {
+          TestOnThreadRef(aOnThreadRef);
+          return true;
+        },
+        false));
+  });
+  testThread.join();
+}
+
+// Thread name if registered, nullptr otherwise.
+static const char* GetThreadName() {
+  return profiler::ThreadRegistration::WithOnThreadRefOr(
+      [](profiler::ThreadRegistration::OnThreadRef onThreadRef) {
+        return onThreadRef.WithUnlockedConstReader(
+            [](const profiler::ThreadRegistration::UnlockedConstReader& aData) {
+              return aData.Info().Name();
+            });
+      },
+      nullptr);
+}
+
+// Get the thread name, as registered in the PRThread, nullptr on failure.
+static const char* GetPRThreadName() {
+  nsIThread* nsThread = NS_GetCurrentThread();
+  if (!nsThread) {
+    return nullptr;
+  }
+  PRThread* prThread = nullptr;
+  if (NS_FAILED(nsThread->GetPRThread(&prThread))) {
+    return nullptr;
+  }
+  if (!prThread) {
+    return nullptr;
+  }
+  return PR_GetThreadName(prThread);
+}
+
+TEST(GeckoProfiler, ThreadRegistration_MainThreadName)
+{
+  EXPECT_TRUE(profiler::ThreadRegistration::IsRegistered());
+  EXPECT_STREQ(GetThreadName(), "GeckoMain");
+
+  // Check that the real thread name (outside the profiler) is *not* GeckoMain.
+  EXPECT_STRNE(GetPRThreadName(), "GeckoMain");
+}
+
+TEST(GeckoProfiler, ThreadRegistration_NestedRegistrations)
+{
+  using TR = profiler::ThreadRegistration;
+
+  profiler_init_main_thread_id();
+  ASSERT_TRUE(profiler_is_main_thread())
+  << "This test assumes it runs on the main thread";
+
+  // Note that the main thread could already be registered, so we work in a new
+  // thread to test actual registrations that we control.
+
+  std::thread testThread([&]() {
+    ASSERT_FALSE(TR::IsRegistered())
+    << "A new std::thread should not start registered";
+
+    char onStackChar;
+
+    // Blocks {} are mostly for clarity, but some control on-stack registration
+    // lifetimes.
+
+    // On-stack registration.
+    {
+      TR rt{"Test thread #1", &onStackChar};
+      ASSERT_TRUE(TR::IsRegistered());
+      EXPECT_STREQ(GetThreadName(), "Test thread #1");
+      EXPECT_STREQ(GetPRThreadName(), "Test thread #1");
+    }
+    ASSERT_FALSE(TR::IsRegistered());
+
+    // Off-stack registration.
+    {
+      TR::RegisterThread("Test thread #2", &onStackChar);
+      ASSERT_TRUE(TR::IsRegistered());
+      EXPECT_STREQ(GetThreadName(), "Test thread #2");
+      EXPECT_STREQ(GetPRThreadName(), "Test thread #2");
+
+      TR::UnregisterThread();
+      ASSERT_FALSE(TR::IsRegistered());
+    }
+
+    // Extra un-registration should be ignored.
+    TR::UnregisterThread();
+    ASSERT_FALSE(TR::IsRegistered());
+
+    // Nested on-stack.
+    {
+      TR rt2{"Test thread #3", &onStackChar};
+      ASSERT_TRUE(TR::IsRegistered());
+      EXPECT_STREQ(GetThreadName(), "Test thread #3");
+      EXPECT_STREQ(GetPRThreadName(), "Test thread #3");
+
+      {
+        TR rt3{"Test thread #4", &onStackChar};
+        ASSERT_TRUE(TR::IsRegistered());
+        EXPECT_STREQ(GetThreadName(), "Test thread #3")
+            << "Nested registration shouldn't change the name";
+        EXPECT_STREQ(GetPRThreadName(), "Test thread #3")
+            << "Nested registration shouldn't change the PRThread name";
+      }
+      ASSERT_TRUE(TR::IsRegistered())
+      << "Thread should still be registered after nested un-registration";
+      EXPECT_STREQ(GetThreadName(), "Test thread #3")
+          << "Thread should still be registered after nested un-registration";
+      EXPECT_STREQ(GetPRThreadName(), "Test thread #3");
+    }
+    ASSERT_FALSE(TR::IsRegistered());
+
+    // Nested off-stack.
+    {
+      TR::RegisterThread("Test thread #5", &onStackChar);
+      ASSERT_TRUE(TR::IsRegistered());
+      EXPECT_STREQ(GetThreadName(), "Test thread #5");
+      EXPECT_STREQ(GetPRThreadName(), "Test thread #5");
+
+      {
+        TR::RegisterThread("Test thread #6", &onStackChar);
+        ASSERT_TRUE(TR::IsRegistered());
+        EXPECT_STREQ(GetThreadName(), "Test thread #5")
+            << "Nested registration shouldn't change the name";
+        EXPECT_STREQ(GetPRThreadName(), "Test thread #5")
+            << "Nested registration shouldn't change the PRThread name";
+
+        TR::UnregisterThread();
+        ASSERT_TRUE(TR::IsRegistered())
+        << "Thread should still be registered after nested un-registration";
+        EXPECT_STREQ(GetThreadName(), "Test thread #5")
+            << "Thread should still be registered after nested un-registration";
+        EXPECT_STREQ(GetPRThreadName(), "Test thread #5");
+      }
+
+      TR::UnregisterThread();
+      ASSERT_FALSE(TR::IsRegistered());
+    }
+
+    // Nested on- and off-stack.
+    {
+      TR rt2{"Test thread #7", &onStackChar};
+      ASSERT_TRUE(TR::IsRegistered());
+      EXPECT_STREQ(GetThreadName(), "Test thread #7");
+      EXPECT_STREQ(GetPRThreadName(), "Test thread #7");
+
+      {
+        TR::RegisterThread("Test thread #8", &onStackChar);
+        ASSERT_TRUE(TR::IsRegistered());
+        EXPECT_STREQ(GetThreadName(), "Test thread #7")
+            << "Nested registration shouldn't change the name";
+        EXPECT_STREQ(GetPRThreadName(), "Test thread #7")
+            << "Nested registration shouldn't change the PRThread name";
+
+        TR::UnregisterThread();
+        ASSERT_TRUE(TR::IsRegistered())
+        << "Thread should still be registered after nested un-registration";
+        EXPECT_STREQ(GetThreadName(), "Test thread #7")
+            << "Thread should still be registered after nested un-registration";
+        EXPECT_STREQ(GetPRThreadName(), "Test thread #7");
+      }
+    }
+    ASSERT_FALSE(TR::IsRegistered());
+
+    // Nested off- and on-stack.
+    {
+      TR::RegisterThread("Test thread #9", &onStackChar);
+      ASSERT_TRUE(TR::IsRegistered());
+      EXPECT_STREQ(GetThreadName(), "Test thread #9");
+      EXPECT_STREQ(GetPRThreadName(), "Test thread #9");
+
+      {
+        TR rt3{"Test thread #10", &onStackChar};
+        ASSERT_TRUE(TR::IsRegistered());
+        EXPECT_STREQ(GetThreadName(), "Test thread #9")
+            << "Nested registration shouldn't change the name";
+        EXPECT_STREQ(GetPRThreadName(), "Test thread #9")
+            << "Nested registration shouldn't change the PRThread name";
+      }
+      ASSERT_TRUE(TR::IsRegistered())
+      << "Thread should still be registered after nested un-registration";
+      EXPECT_STREQ(GetThreadName(), "Test thread #9")
+          << "Thread should still be registered after nested un-registration";
+      EXPECT_STREQ(GetPRThreadName(), "Test thread #9");
+
+      TR::UnregisterThread();
+      ASSERT_FALSE(TR::IsRegistered());
+    }
+
+    // Excess UnregisterThread with on-stack TR.
+    {
+      TR rt2{"Test thread #11", &onStackChar};
+      ASSERT_TRUE(TR::IsRegistered());
+      EXPECT_STREQ(GetThreadName(), "Test thread #11");
+      EXPECT_STREQ(GetPRThreadName(), "Test thread #11");
+
+      TR::UnregisterThread();
+      ASSERT_TRUE(TR::IsRegistered())
+      << "On-stack thread should still be registered after off-stack "
+         "un-registration";
+      EXPECT_STREQ(GetThreadName(), "Test thread #11")
+          << "On-stack thread should still be registered after off-stack "
+             "un-registration";
+      EXPECT_STREQ(GetPRThreadName(), "Test thread #11");
+    }
+    ASSERT_FALSE(TR::IsRegistered());
+
+    // Excess on-thread TR destruction with already-unregistered root off-thread
+    // registration.
+    {
+      TR::RegisterThread("Test thread #12", &onStackChar);
+      ASSERT_TRUE(TR::IsRegistered());
+      EXPECT_STREQ(GetThreadName(), "Test thread #12");
+      EXPECT_STREQ(GetPRThreadName(), "Test thread #12");
+
+      {
+        TR rt3{"Test thread #13", &onStackChar};
+        ASSERT_TRUE(TR::IsRegistered());
+        EXPECT_STREQ(GetThreadName(), "Test thread #12")
+            << "Nested registration shouldn't change the name";
+        EXPECT_STREQ(GetPRThreadName(), "Test thread #12")
+            << "Nested registration shouldn't change the PRThread name";
+
+        // Note that we unregister the root registration, while nested `rt3` is
+        // still alive.
+        TR::UnregisterThread();
+        ASSERT_FALSE(TR::IsRegistered())
+        << "UnregisterThread() of the root RegisterThread() should always work";
+
+        // At this end of this block, `rt3` will be destroyed, but nothing
+        // should happen.
+      }
+      ASSERT_FALSE(TR::IsRegistered());
+    }
+
+    ASSERT_FALSE(TR::IsRegistered());
+  });
+  testThread.join();
+}
+
+TEST(GeckoProfiler, ThreadRegistry_DataAccess)
+{
+  using TR = profiler::ThreadRegistration;
+  using TRy = profiler::ThreadRegistry;
+
+  profiler_init_main_thread_id();
+  ASSERT_TRUE(profiler_is_main_thread())
+  << "This test assumes it runs on the main thread";
+
+  // Note that the main thread could already be registered, so we work in a new
+  // thread to test an actual registration that we control.
+
+  std::thread testThread([&]() {
+    ASSERT_FALSE(TR::IsRegistered())
+    << "A new std::thread should not start registered";
+    EXPECT_FALSE(TR::GetOnThreadPtr());
+    EXPECT_FALSE(TR::WithOnThreadRefOr([&](auto) { return true; }, false));
+
+    char onStackChar;
+
+    TimeStamp beforeRegistration = TimeStamp::Now();
+    TR tr{"Test thread", &onStackChar};
+    TimeStamp afterRegistration = TimeStamp::Now();
+
+    ASSERT_TRUE(TR::IsRegistered());
+
+    // Note: This test will mostly be about checking the correct access to
+    // thread data, depending on how it's obtained. Not all the functionality
+    // related to that data is tested (e.g., because it involves JS or other
+    // external dependencies that would be difficult to control here.)
+
+    const ProfilerThreadId testThreadId = profiler_current_thread_id();
+
+    auto testThroughRegistry = [&]() {
+      auto TestOffThreadRef = [&](TRy::OffThreadRef aOffThreadRef) {
+        // To test const-qualified member functions.
+        const TRy::OffThreadRef& offThreadCRef = aOffThreadRef;
+
+        // const UnlockedConstReader (always const)
+
+        TestConstUnlockedConstReader(offThreadCRef.UnlockedConstReaderCRef(),
+                                     beforeRegistration, afterRegistration,
+                                     &onStackChar, testThreadId);
+        offThreadCRef.WithUnlockedConstReader(
+            [&](const TR::UnlockedConstReader& aData) {
+              TestConstUnlockedConstReader(aData, beforeRegistration,
+                                           afterRegistration, &onStackChar,
+                                           testThreadId);
+            });
+
+        // const UnlockedConstReaderAndAtomicRW
+
+        TestConstUnlockedConstReaderAndAtomicRW(
+            offThreadCRef.UnlockedConstReaderAndAtomicRWCRef(),
+            beforeRegistration, afterRegistration, &onStackChar, testThreadId);
+        offThreadCRef.WithUnlockedConstReaderAndAtomicRW(
+            [&](const TR::UnlockedConstReaderAndAtomicRW& aData) {
+              TestConstUnlockedConstReaderAndAtomicRW(
+                  aData, beforeRegistration, afterRegistration, &onStackChar,
+                  testThreadId);
+            });
+
+        // non-const UnlockedConstReaderAndAtomicRW
+
+        TestUnlockedConstReaderAndAtomicRW(
+            aOffThreadRef.UnlockedConstReaderAndAtomicRWRef(),
+            beforeRegistration, afterRegistration, &onStackChar, testThreadId);
+        aOffThreadRef.WithUnlockedConstReaderAndAtomicRW(
+            [&](TR::UnlockedConstReaderAndAtomicRW& aData) {
+              TestUnlockedConstReaderAndAtomicRW(aData, beforeRegistration,
+                                                 afterRegistration,
+                                                 &onStackChar, testThreadId);
+            });
+
+        // const UnlockedRWForLockedProfiler
+
+        TestConstUnlockedRWForLockedProfiler(
+            offThreadCRef.UnlockedRWForLockedProfilerCRef(), beforeRegistration,
+            afterRegistration, &onStackChar, testThreadId);
+        offThreadCRef.WithUnlockedRWForLockedProfiler(
+            [&](const TR::UnlockedRWForLockedProfiler& aData) {
+              TestConstUnlockedRWForLockedProfiler(aData, beforeRegistration,
+                                                   afterRegistration,
+                                                   &onStackChar, testThreadId);
+            });
+
+        // non-const UnlockedRWForLockedProfiler
+
+        TestUnlockedRWForLockedProfiler(
+            aOffThreadRef.UnlockedRWForLockedProfilerRef(), beforeRegistration,
+            afterRegistration, &onStackChar, testThreadId);
+        aOffThreadRef.WithUnlockedRWForLockedProfiler(
+            [&](TR::UnlockedRWForLockedProfiler& aData) {
+              TestUnlockedRWForLockedProfiler(aData, beforeRegistration,
+                                              afterRegistration, &onStackChar,
+                                              testThreadId);
+            });
+
+        // UnlockedReaderAndAtomicRWOnThread
+        // Note: It cannot directly be accessed off the thread, this will be
+        // tested through LockedRWFromAnyThread.
+
+        // const LockedRWFromAnyThread
+
+        EXPECT_FALSE(TR::IsDataMutexLockedOnCurrentThread());
+        {
+          TRy::OffThreadRef::ConstRWFromAnyThreadWithLock
+              constRWFromAnyThreadWithLock =
+                  offThreadCRef.ConstLockedRWFromAnyThread();
+          if (profiler_current_thread_id() == testThreadId) {
+            EXPECT_TRUE(TR::IsDataMutexLockedOnCurrentThread());
+          }
+          TestConstLockedRWFromAnyThread(
+              constRWFromAnyThreadWithLock.DataCRef(), beforeRegistration,
+              afterRegistration, &onStackChar, testThreadId);
+        }
+        EXPECT_FALSE(TR::IsDataMutexLockedOnCurrentThread());
+        offThreadCRef.WithConstLockedRWFromAnyThread(
+            [&](const TR::LockedRWFromAnyThread& aData) {
+              if (profiler_current_thread_id() == testThreadId) {
+                EXPECT_TRUE(TR::IsDataMutexLockedOnCurrentThread());
+              }
+              TestConstLockedRWFromAnyThread(aData, beforeRegistration,
+                                             afterRegistration, &onStackChar,
+                                             testThreadId);
+            });
+        EXPECT_FALSE(TR::IsDataMutexLockedOnCurrentThread());
+
+        // non-const LockedRWFromAnyThread
+
+        EXPECT_FALSE(TR::IsDataMutexLockedOnCurrentThread());
+        {
+          TRy::OffThreadRef::RWFromAnyThreadWithLock rwFromAnyThreadWithLock =
+              aOffThreadRef.GetLockedRWFromAnyThread();
+          if (profiler_current_thread_id() == testThreadId) {
+            EXPECT_TRUE(TR::IsDataMutexLockedOnCurrentThread());
+          }
+          TestLockedRWFromAnyThread(rwFromAnyThreadWithLock.DataRef(),
+                                    beforeRegistration, afterRegistration,
+                                    &onStackChar, testThreadId);
+        }
+        EXPECT_FALSE(TR::IsDataMutexLockedOnCurrentThread());
+        aOffThreadRef.WithLockedRWFromAnyThread(
+            [&](TR::LockedRWFromAnyThread& aData) {
+              if (profiler_current_thread_id() == testThreadId) {
+                EXPECT_TRUE(TR::IsDataMutexLockedOnCurrentThread());
+              }
+              TestLockedRWFromAnyThread(aData, beforeRegistration,
+                                        afterRegistration, &onStackChar,
+                                        testThreadId);
+            });
+        EXPECT_FALSE(TR::IsDataMutexLockedOnCurrentThread());
+
+        // LockedRWOnThread
+        // Note: It can never be accessed off the thread.
+      };
+
+      int ranTest = 0;
+      TRy::WithOffThreadRef(testThreadId, [&](TRy::OffThreadRef aOffThreadRef) {
+        TestOffThreadRef(aOffThreadRef);
+        ++ranTest;
+      });
+      EXPECT_EQ(ranTest, 1);
+
+      EXPECT_TRUE(TRy::WithOffThreadRefOr(
+          testThreadId,
+          [&](TRy::OffThreadRef aOffThreadRef) {
+            TestOffThreadRef(aOffThreadRef);
+            return true;
+          },
+          false));
+
+      ranTest = 0;
+      EXPECT_FALSE(TRy::IsRegistryMutexLockedOnCurrentThread());
+      for (TRy::OffThreadRef offThreadRef : TRy::LockedRegistry{}) {
+        EXPECT_TRUE(TRy::IsRegistryMutexLockedOnCurrentThread() ||
+                    !TR::IsRegistered());
+        if (offThreadRef.UnlockedConstReaderCRef().Info().ThreadId() ==
+            testThreadId) {
+          TestOffThreadRef(offThreadRef);
+          ++ranTest;
+        }
+      }
+      EXPECT_EQ(ranTest, 1);
+      EXPECT_FALSE(TRy::IsRegistryMutexLockedOnCurrentThread());
+
+      {
+        ranTest = 0;
+        EXPECT_FALSE(TRy::IsRegistryMutexLockedOnCurrentThread());
+        TRy::LockedRegistry lockedRegistry{};
+        EXPECT_TRUE(TRy::IsRegistryMutexLockedOnCurrentThread() ||
+                    !TR::IsRegistered());
+        for (TRy::OffThreadRef offThreadRef : lockedRegistry) {
+          if (offThreadRef.UnlockedConstReaderCRef().Info().ThreadId() ==
+              testThreadId) {
+            TestOffThreadRef(offThreadRef);
+            ++ranTest;
+          }
+        }
+        EXPECT_EQ(ranTest, 1);
+      }
+      EXPECT_FALSE(TRy::IsRegistryMutexLockedOnCurrentThread());
+    };
+
+    // Test on the current thread.
+    testThroughRegistry();
+
+    // Test from another thread.
+    std::thread otherThread([&]() {
+      ASSERT_NE(profiler_current_thread_id(), testThreadId);
+      testThroughRegistry();
+
+      // Test that this unregistered thread is really not registered.
+      int ranTest = 0;
+      TRy::WithOffThreadRef(
+          profiler_current_thread_id(),
+          [&](TRy::OffThreadRef aOffThreadRef) { ++ranTest; });
+      EXPECT_EQ(ranTest, 0);
+
+      EXPECT_FALSE(TRy::WithOffThreadRefOr(
+          profiler_current_thread_id(),
+          [&](TRy::OffThreadRef aOffThreadRef) {
+            ++ranTest;
+            return true;
+          },
+          false));
+      EXPECT_EQ(ranTest, 0);
+    });
+    otherThread.join();
+  });
+  testThread.join();
+}
+
+TEST(GeckoProfiler, ThreadRegistration_RegistrationEdgeCases)
+{
+  using TR = profiler::ThreadRegistration;
+  using TRy = profiler::ThreadRegistry;
+
+  profiler_init_main_thread_id();
+  ASSERT_TRUE(profiler_is_main_thread())
+  << "This test assumes it runs on the main thread";
+
+  // Note that the main thread could already be registered, so we work in a new
+  // thread to test an actual registration that we control.
+
+  int registrationCount = 0;
+  int otherThreadLoops = 0;
+  int otherThreadReads = 0;
+
+  // This thread will register and unregister in a loop, with some pauses.
+  // Another thread will attempty to access the test thread, and lock its data.
+  // The main goal is to check edges cases around (un)registrations.
+  std::thread testThread([&]() {
+    const ProfilerThreadId testThreadId = profiler_current_thread_id();
+
+    const TimeStamp endTestAt = TimeStamp::Now() + TimeDuration::FromSeconds(1);
+
+    std::thread otherThread([&]() {
+      // Initial sleep so that testThread can start its loop.
+      PR_Sleep(PR_MillisecondsToInterval(1));
+
+      while (TimeStamp::Now() < endTestAt) {
+        ++otherThreadLoops;
+
+        TRy::WithOffThreadRef(testThreadId, [&](TRy::OffThreadRef
+                                                    aOffThreadRef) {
+          if (otherThreadLoops % 1000 == 0) {
+            PR_Sleep(PR_MillisecondsToInterval(1));
+          }
+          TRy::OffThreadRef::RWFromAnyThreadWithLock rwFromAnyThreadWithLock =
+              aOffThreadRef.GetLockedRWFromAnyThread();
+          ++otherThreadReads;
+          if (otherThreadReads % 1000 == 0) {
+            PR_Sleep(PR_MillisecondsToInterval(1));
+          }
+        });
+      }
+    });
+
+    while (TimeStamp::Now() < endTestAt) {
+      ASSERT_FALSE(TR::IsRegistered())
+      << "A new std::thread should not start registered";
+      EXPECT_FALSE(TR::GetOnThreadPtr());
+      EXPECT_FALSE(TR::WithOnThreadRefOr([&](auto) { return true; }, false));
+
+      char onStackChar;
+
+      TR tr{"Test thread", &onStackChar};
+      ++registrationCount;
+
+      ASSERT_TRUE(TR::IsRegistered());
+
+      int ranTest = 0;
+      TRy::WithOffThreadRef(testThreadId, [&](TRy::OffThreadRef aOffThreadRef) {
+        if (registrationCount % 2000 == 0) {
+          PR_Sleep(PR_MillisecondsToInterval(1));
+        }
+        ++ranTest;
+      });
+      EXPECT_EQ(ranTest, 1);
+
+      if (registrationCount % 1000 == 0) {
+        PR_Sleep(PR_MillisecondsToInterval(1));
+      }
+    }
+
+    otherThread.join();
+  });
+
+  testThread.join();
+
+  // It's difficult to guess what these numbers should be, but they definitely
+  // should be non-zero. The main goal was to test that nothing goes wrong.
+  EXPECT_GT(registrationCount, 0);
+  EXPECT_GT(otherThreadLoops, 0);
+  EXPECT_GT(otherThreadReads, 0);
+}
+
+#ifdef MOZ_GECKO_PROFILER
+
+TEST(BaseProfiler, BlocksRingBuffer)
+{
+  constexpr uint32_t MBSize = 256;
+  uint8_t buffer[MBSize * 3];
+  for (size_t i = 0; i < MBSize * 3; ++i) {
+    buffer[i] = uint8_t('A' + i);
+  }
+  BlocksRingBuffer rb(BlocksRingBuffer::ThreadSafety::WithMutex,
+                      &buffer[MBSize], MakePowerOfTwo32<MBSize>());
+
+  {
+    nsCString cs("nsCString"_ns);
+    nsString s(u"nsString"_ns);
+    nsAutoCString acs("nsAutoCString"_ns);
+    nsAutoString as(u"nsAutoString"_ns);
+    nsAutoCStringN<8> acs8("nsAutoCStringN"_ns);
+    nsAutoStringN<8> as8(u"nsAutoStringN"_ns);
+    JS::UniqueChars jsuc = JS_smprintf("%s", "JS::UniqueChars");
+
+    rb.PutObjects(cs, s, acs, as, acs8, as8, jsuc);
+  }
+
+  rb.ReadEach([](ProfileBufferEntryReader& aER) {
+    ASSERT_EQ(aER.ReadObject<nsCString>(), "nsCString"_ns);
+    ASSERT_EQ(aER.ReadObject<nsString>(), u"nsString"_ns);
+    ASSERT_EQ(aER.ReadObject<nsAutoCString>(), "nsAutoCString"_ns);
+    ASSERT_EQ(aER.ReadObject<nsAutoString>(), u"nsAutoString"_ns);
+    ASSERT_EQ(aER.ReadObject<nsAutoCStringN<8>>(), "nsAutoCStringN"_ns);
+    ASSERT_EQ(aER.ReadObject<nsAutoStringN<8>>(), u"nsAutoStringN"_ns);
+    auto jsuc2 = aER.ReadObject<JS::UniqueChars>();
+    ASSERT_TRUE(!!jsuc2);
+    ASSERT_TRUE(strcmp(jsuc2.get(), "JS::UniqueChars") == 0);
+  });
+
+  // Everything around the sub-buffer should be unchanged.
+  for (size_t i = 0; i < MBSize; ++i) {
+    ASSERT_EQ(buffer[i], uint8_t('A' + i));
+  }
+  for (size_t i = MBSize * 2; i < MBSize * 3; ++i) {
+    ASSERT_EQ(buffer[i], uint8_t('A' + i));
+  }
+}
+
+// Common JSON checks.
+
+// Check that the given JSON string include no JSON whitespace characters
+// (excluding those in property names and strings).
+void JSONWhitespaceCheck(const char* aOutput) {
+  ASSERT_NE(aOutput, nullptr);
+
+  enum class State { Data, String, StringEscaped };
+  State state = State::Data;
+  size_t length = 0;
+  size_t whitespaces = 0;
+  for (const char* p = aOutput; *p != '\0'; ++p) {
+    ++length;
+    const char c = *p;
+
+    switch (state) {
+      case State::Data:
+        if (c == '\n' || c == '\r' || c == ' ' || c == '\t') {
+          ++whitespaces;
+        } else if (c == '"') {
+          state = State::String;
+        }
+        break;
+
+      case State::String:
+        if (c == '"') {
+          state = State::Data;
+        } else if (c == '\\') {
+          state = State::StringEscaped;
+        }
+        break;
+
+      case State::StringEscaped:
+        state = State::String;
+        break;
+    }
+  }
+
+  EXPECT_EQ(whitespaces, 0u);
+  EXPECT_GT(length, 0u);
+}
+
+// Does the GETTER return a non-null TYPE? (Non-critical)
+#  define EXPECT_HAS_JSON(GETTER, TYPE)              \
+    do {                                             \
+      if ((GETTER).isNull()) {                       \
+        EXPECT_FALSE((GETTER).isNull())              \
+            << #GETTER " doesn't exist or is null";  \
+      } else if (!(GETTER).is##TYPE()) {             \
+        EXPECT_TRUE((GETTER).is##TYPE())             \
+            << #GETTER " didn't return type " #TYPE; \
+      }                                              \
+    } while (false)
+
+// Does the GETTER return a non-null TYPE? (Critical)
+#  define ASSERT_HAS_JSON(GETTER, TYPE) \
+    do {                                \
+      ASSERT_FALSE((GETTER).isNull());  \
+      ASSERT_TRUE((GETTER).is##TYPE()); \
+    } while (false)
+
+// Does the GETTER return a non-null TYPE? (Critical)
+// If yes, store the reference to Json::Value into VARIABLE.
+#  define GET_JSON(VARIABLE, GETTER, TYPE) \
+    ASSERT_HAS_JSON(GETTER, TYPE);         \
+    const Json::Value& VARIABLE = (GETTER)
+
+// Does the GETTER return a non-null TYPE? (Critical)
+// If yes, store the value as `const TYPE` into VARIABLE.
+#  define GET_JSON_VALUE(VARIABLE, GETTER, TYPE) \
+    ASSERT_HAS_JSON(GETTER, TYPE);               \
+    const auto VARIABLE = (GETTER).as##TYPE()
+
+// Non-const GET_JSON_VALUE.
+#  define GET_JSON_MUTABLE_VALUE(VARIABLE, GETTER, TYPE) \
+    ASSERT_HAS_JSON(GETTER, TYPE);                       \
+    auto VARIABLE = (GETTER).as##TYPE()
+
+// Checks that the GETTER's value is present, is of the expected TYPE, and has
+// the expected VALUE. (Non-critical)
+#  define EXPECT_EQ_JSON(GETTER, TYPE, VALUE)        \
+    do {                                             \
+      if ((GETTER).isNull()) {                       \
+        EXPECT_FALSE((GETTER).isNull())              \
+            << #GETTER " doesn't exist or is null";  \
+      } else if (!(GETTER).is##TYPE()) {             \
+        EXPECT_TRUE((GETTER).is##TYPE())             \
+            << #GETTER " didn't return type " #TYPE; \
+      } else {                                       \
+        EXPECT_EQ((GETTER).as##TYPE(), (VALUE));     \
+      }                                              \
+    } while (false)
+
+// Checks that the GETTER's value is present, and is a valid index into the
+// STRINGTABLE array, pointing at the expected STRING.
+#  define EXPECT_EQ_STRINGTABLE(GETTER, STRINGTABLE, STRING)                 \
+    do {                                                                     \
+      if ((GETTER).isNull()) {                                               \
+        EXPECT_FALSE((GETTER).isNull())                                      \
+            << #GETTER " doesn't exist or is null";                          \
+      } else if (!(GETTER).isUInt()) {                                       \
+        EXPECT_TRUE((GETTER).isUInt()) << #GETTER " didn't return an index"; \
+      } else {                                                               \
+        EXPECT_LT((GETTER).asUInt(), (STRINGTABLE).size());                  \
+        EXPECT_EQ_JSON((STRINGTABLE)[(GETTER).asUInt()], String, (STRING));  \
+      }                                                                      \
+    } while (false)
+
+#  define EXPECT_JSON_ARRAY_CONTAINS(GETTER, TYPE, VALUE)                     \
+    do {                                                                      \
+      if ((GETTER).isNull()) {                                                \
+        EXPECT_FALSE((GETTER).isNull())                                       \
+            << #GETTER " doesn't exist or is null";                           \
+      } else if (!(GETTER).isArray()) {                                       \
+        EXPECT_TRUE((GETTER).is##TYPE()) << #GETTER " is not an array";       \
+      } else if (const Json::ArrayIndex size = (GETTER).size(); size == 0u) { \
+        EXPECT_NE(size, 0u) << #GETTER " is an empty array";                  \
+      } else {                                                                \
+        bool found = false;                                                   \
+        for (Json::ArrayIndex i = 0; i < size; ++i) {                         \
+          if (!(GETTER)[i].is##TYPE()) {                                      \
+            EXPECT_TRUE((GETTER)[i].is##TYPE())                               \
+                << #GETTER "[" << i << "] is not " #TYPE;                     \
+            break;                                                            \
+          }                                                                   \
+          if ((GETTER)[i].as##TYPE() == (VALUE)) {                            \
+            found = true;                                                     \
+            break;                                                            \
+          }                                                                   \
+        }                                                                     \
+        EXPECT_TRUE(found) << #GETTER " doesn't contain " #VALUE;             \
+      }                                                                       \
+    } while (false)
+
+#  define EXPECT_JSON_ARRAY_EXCLUDES(GETTER, TYPE, VALUE)               \
+    do {                                                                \
+      if ((GETTER).isNull()) {                                          \
+        EXPECT_FALSE((GETTER).isNull())                                 \
+            << #GETTER " doesn't exist or is null";                     \
+      } else if (!(GETTER).isArray()) {                                 \
+        EXPECT_TRUE((GETTER).is##TYPE()) << #GETTER " is not an array"; \
+      } else {                                                          \
+        const Json::ArrayIndex size = (GETTER).size();                  \
+        for (Json::ArrayIndex i = 0; i < size; ++i) {                   \
+          if (!(GETTER)[i].is##TYPE()) {                                \
+            EXPECT_TRUE((GETTER)[i].is##TYPE())                         \
+                << #GETTER "[" << i << "] is not " #TYPE;               \
+            break;                                                      \
+          }                                                             \
+          if ((GETTER)[i].as##TYPE() == (VALUE)) {                      \
+            EXPECT_TRUE((GETTER)[i].as##TYPE() != (VALUE))              \
+                << #GETTER " contains " #VALUE;                         \
+            break;                                                      \
+          }                                                             \
+        }                                                               \
+      }                                                                 \
+    } while (false)
+
+// Check that the given process root contains all the expected properties.
+static void JSONRootCheck(const Json::Value& aRoot,
+                          bool aWithMainThread = true) {
+  ASSERT_TRUE(aRoot.isObject());
+
+  EXPECT_HAS_JSON(aRoot["libs"], Array);
+
+  GET_JSON(meta, aRoot["meta"], Object);
+  EXPECT_HAS_JSON(meta["version"], UInt);
+  EXPECT_HAS_JSON(meta["startTime"], Double);
+  EXPECT_HAS_JSON(meta["profilingStartTime"], Double);
+  EXPECT_HAS_JSON(meta["contentEarliestTime"], Double);
+  EXPECT_HAS_JSON(meta["profilingEndTime"], Double);
+
+  EXPECT_HAS_JSON(aRoot["pages"], Array);
+
+  EXPECT_HAS_JSON(aRoot["profilerOverhead"], Object);
+
+  // "counters" is only present if there is any data to report.
+  // Test that expect "counters" should test for its presence first.
+  if (aRoot.isMember("counters")) {
+    // We have "counters", test their overall validity.
+    GET_JSON(counters, aRoot["counters"], Array);
+    for (const Json::Value& counter : counters) {
+      ASSERT_TRUE(counter.isObject());
+      EXPECT_HAS_JSON(counter["name"], String);
+      EXPECT_HAS_JSON(counter["category"], String);
+      EXPECT_HAS_JSON(counter["description"], String);
+      GET_JSON(sampleGroups, counter["sample_groups"], Array);
+      for (const Json::Value& sampleGroup : sampleGroups) {
+        ASSERT_TRUE(sampleGroup.isObject());
+        EXPECT_HAS_JSON(sampleGroup["id"], UInt);
+
+        GET_JSON(samples, sampleGroup["samples"], Object);
+        GET_JSON(samplesSchema, samples["schema"], Object);
+        EXPECT_GE(samplesSchema.size(), 3u);
+        GET_JSON_VALUE(samplesTime, samplesSchema["time"], UInt);
+        GET_JSON_VALUE(samplesNumber, samplesSchema["number"], UInt);
+        GET_JSON_VALUE(samplesCount, samplesSchema["count"], UInt);
+        GET_JSON(samplesData, samples["data"], Array);
+        double previousTime = 0.0;
+        for (const Json::Value& sample : samplesData) {
+          ASSERT_TRUE(sample.isArray());
+          GET_JSON_VALUE(time, sample[samplesTime], Double);
+          EXPECT_GE(time, previousTime);
+          previousTime = time;
+          if (sample.isValidIndex(samplesNumber)) {
+            EXPECT_HAS_JSON(sample[samplesNumber], UInt64);
+          }
+          if (sample.isValidIndex(samplesCount)) {
+            EXPECT_HAS_JSON(sample[samplesCount], Int64);
+          }
+        }
+      }
+    }
+  }
+
+  GET_JSON(threads, aRoot["threads"], Array);
+  const Json::ArrayIndex threadCount = threads.size();
+  for (Json::ArrayIndex i = 0; i < threadCount; ++i) {
+    GET_JSON(thread, threads[i], Object);
+    EXPECT_HAS_JSON(thread["processType"], String);
+    EXPECT_HAS_JSON(thread["name"], String);
+    EXPECT_HAS_JSON(thread["registerTime"], Double);
+    GET_JSON(samples, thread["samples"], Object);
+    EXPECT_HAS_JSON(thread["markers"], Object);
+    EXPECT_HAS_JSON(thread["pid"], Int64);
+    EXPECT_HAS_JSON(thread["tid"], Int64);
+    GET_JSON(stackTable, thread["stackTable"], Object);
+    GET_JSON(frameTable, thread["frameTable"], Object);
+    GET_JSON(stringTable, thread["stringTable"], Array);
+
+    GET_JSON(stackTableSchema, stackTable["schema"], Object);
+    EXPECT_GE(stackTableSchema.size(), 2u);
+    GET_JSON_VALUE(stackTablePrefix, stackTableSchema["prefix"], UInt);
+    GET_JSON_VALUE(stackTableFrame, stackTableSchema["frame"], UInt);
+    GET_JSON(stackTableData, stackTable["data"], Array);
+
+    GET_JSON(frameTableSchema, frameTable["schema"], Object);
+    EXPECT_GE(frameTableSchema.size(), 1u);
+    GET_JSON_VALUE(frameTableLocation, frameTableSchema["location"], UInt);
+    GET_JSON(frameTableData, frameTable["data"], Array);
+
+    GET_JSON(samplesSchema, samples["schema"], Object);
+    GET_JSON_VALUE(sampleStackIndex, samplesSchema["stack"], UInt);
+    GET_JSON(samplesData, samples["data"], Array);
+    for (const Json::Value& sample : samplesData) {
+      ASSERT_TRUE(sample.isArray());
+      if (sample.isValidIndex(sampleStackIndex)) {
+        if (!sample[sampleStackIndex].isNull()) {
+          GET_JSON_MUTABLE_VALUE(stack, sample[sampleStackIndex], UInt);
+          EXPECT_TRUE(stackTableData.isValidIndex(stack));
+          for (;;) {
+            // `stack` (from the sample, or from the callee frame's "prefix" in
+            // the previous loop) points into the stackTable.
+            GET_JSON(stackTableEntry, stackTableData[stack], Array);
+            GET_JSON_VALUE(frame, stackTableEntry[stackTableFrame], UInt);
+
+            // The stackTable entry's "frame" points into the frameTable.
+            EXPECT_TRUE(frameTableData.isValidIndex(frame));
+            GET_JSON(frameTableEntry, frameTableData[frame], Array);
+            GET_JSON_VALUE(location, frameTableEntry[frameTableLocation], UInt);
+
+            // The frameTable entry's "location" points at a string.
+            EXPECT_TRUE(stringTable.isValidIndex(location));
+
+            // The stackTable entry's "prefix" is null for the root frame.
+            if (stackTableEntry[stackTablePrefix].isNull()) {
+              break;
+            }
+            // Otherwise it recursively points at the caller in the stackTable.
+            GET_JSON_VALUE(prefix, stackTableEntry[stackTablePrefix], UInt);
+            EXPECT_TRUE(stackTableData.isValidIndex(prefix));
+            stack = prefix;
+          }
+        }
+      }
+    }
+  }
+
+  if (aWithMainThread) {
+    ASSERT_GT(threadCount, 0u);
+    GET_JSON(thread0, threads[0], Object);
+    EXPECT_EQ_JSON(thread0["name"], String, "GeckoMain");
+  }
+
+  EXPECT_HAS_JSON(aRoot["pausedRanges"], Array);
+
+  const Json::Value& processes = aRoot["processes"];
+  if (!processes.isNull()) {
+    ASSERT_TRUE(processes.isArray());
+    const Json::ArrayIndex processCount = processes.size();
+    for (Json::ArrayIndex i = 0; i < processCount; ++i) {
+      GET_JSON(process, processes[i], Object);
+      JSONRootCheck(process, aWithMainThread);
+    }
+  }
+
+  GET_JSON(profilingLog, aRoot["profilingLog"], Object);
+  EXPECT_EQ(profilingLog.size(), 1u);
+  for (auto it = profilingLog.begin(); it != profilingLog.end(); ++it) {
+    // The key should be a pid.
+    const auto key = it.name();
+    for (const auto letter : key) {
+      EXPECT_GE(letter, '0');
+      EXPECT_LE(letter, '9');
+    }
+    // And the value should be an object.
+    GET_JSON(logForPid, profilingLog[key], Object);
+    // Its content is not defined, but we expect at least these:
+    EXPECT_HAS_JSON(logForPid["profilingLogBegin_TSms"], Double);
+    EXPECT_HAS_JSON(logForPid["profilingLogEnd_TSms"], Double);
+  }
+}
+
+// Check that various expected top properties are in the JSON, and then call the
+// provided `aJSONCheckFunction` with the JSON root object.
+template <typename JSONCheckFunction>
+void JSONOutputCheck(const char* aOutput,
+                     JSONCheckFunction&& aJSONCheckFunction) {
+  ASSERT_NE(aOutput, nullptr);
+
+  JSONWhitespaceCheck(aOutput);
+
+  // Extract JSON.
+  Json::Value parsedRoot;
+  Json::CharReaderBuilder builder;
+  const std::unique_ptr<Json::CharReader> reader(builder.newCharReader());
+  ASSERT_TRUE(
+      reader->parse(aOutput, strchr(aOutput, '\0'), &parsedRoot, nullptr));
+
+  JSONRootCheck(parsedRoot);
+
+  std::forward<JSONCheckFunction>(aJSONCheckFunction)(parsedRoot);
+}
+
+// Returns `static_cast<SamplingState>(-1)` if callback could not be installed.
+static SamplingState WaitForSamplingState() {
+  Atomic<int> samplingState{-1};
+
+  if (!profiler_callback_after_sampling([&](SamplingState aSamplingState) {
+        samplingState = static_cast<int>(aSamplingState);
+      })) {
+    return static_cast<SamplingState>(-1);
+  }
+
+  while (samplingState == -1) {
+  }
+
+  return static_cast<SamplingState>(static_cast<int>(samplingState));
+}
+
+typedef Vector<const char*> StrVec;
+
+static void InactiveFeaturesAndParamsCheck() {
+  int entries;
+  Maybe<double> duration;
+  double interval;
+  uint32_t features;
+  StrVec filters;
+  uint64_t activeTabID;
+
+  ASSERT_TRUE(!profiler_is_active());
+  ASSERT_TRUE(!profiler_feature_active(ProfilerFeature::MainThreadIO));
+  ASSERT_TRUE(!profiler_feature_active(ProfilerFeature::NativeAllocations));
+
+  profiler_get_start_params(&entries, &duration, &interval, &features, &filters,
+                            &activeTabID);
+
+  ASSERT_TRUE(entries == 0);
+  ASSERT_TRUE(duration == Nothing());
+  ASSERT_TRUE(interval == 0);
+  ASSERT_TRUE(features == 0);
+  ASSERT_TRUE(filters.empty());
+  ASSERT_TRUE(activeTabID == 0);
+}
+
+static void ActiveParamsCheck(int aEntries, double aInterval,
+                              uint32_t aFeatures, const char** aFilters,
+                              size_t aFiltersLen, uint64_t aActiveTabID,
+                              const Maybe<double>& aDuration = Nothing()) {
+  int entries;
+  Maybe<double> duration;
+  double interval;
+  uint32_t features;
+  StrVec filters;
+  uint64_t activeTabID;
+
+  profiler_get_start_params(&entries, &duration, &interval, &features, &filters,
+                            &activeTabID);
+
+  ASSERT_TRUE(entries == aEntries);
+  ASSERT_TRUE(duration == aDuration);
+  ASSERT_TRUE(interval == aInterval);
+  ASSERT_TRUE(features == aFeatures);
+  ASSERT_TRUE(filters.length() == aFiltersLen);
+  ASSERT_TRUE(activeTabID == aActiveTabID);
+  for (size_t i = 0; i < aFiltersLen; i++) {
+    ASSERT_TRUE(strcmp(filters[i], aFilters[i]) == 0);
+  }
+}
+
+TEST(GeckoProfiler, FeaturesAndParams)
+{
+  InactiveFeaturesAndParamsCheck();
+
+  // Try a couple of features and filters.
+  {
+    uint32_t features = ProfilerFeature::JS;
+    const char* filters[] = {"GeckoMain", "Compositor"};
+
+#  define PROFILER_DEFAULT_DURATION 20 /* seconds, for tests only */
+    profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL,
+                   features, filters, MOZ_ARRAY_LENGTH(filters), 100,
+                   Some(PROFILER_DEFAULT_DURATION));
+
+    ASSERT_TRUE(profiler_is_active());
+    ASSERT_TRUE(!profiler_feature_active(ProfilerFeature::MainThreadIO));
+    ASSERT_TRUE(!profiler_feature_active(ProfilerFeature::IPCMessages));
+
+    ActiveParamsCheck(PROFILER_DEFAULT_ENTRIES.Value(),
+                      PROFILER_DEFAULT_INTERVAL, features, filters,
+                      MOZ_ARRAY_LENGTH(filters), 100,
+                      Some(PROFILER_DEFAULT_DURATION));
+
+    profiler_stop();
+
+    InactiveFeaturesAndParamsCheck();
+  }
+
+  // Try some different features and filters.
+  {
+    uint32_t features =
+        ProfilerFeature::MainThreadIO | ProfilerFeature::IPCMessages;
+    const char* filters[] = {"GeckoMain", "Foo", "Bar"};
+
+    // Testing with some arbitrary buffer size (as could be provided by
+    // external code), which we convert to the appropriate power of 2.
+    profiler_start(PowerOfTwo32(999999), 3, features, filters,
+                   MOZ_ARRAY_LENGTH(filters), 123, Some(25.0));
+
+    ASSERT_TRUE(profiler_is_active());
+    ASSERT_TRUE(profiler_feature_active(ProfilerFeature::MainThreadIO));
+    ASSERT_TRUE(profiler_feature_active(ProfilerFeature::IPCMessages));
+
+    ActiveParamsCheck(int(PowerOfTwo32(999999).Value()), 3, features, filters,
+                      MOZ_ARRAY_LENGTH(filters), 123, Some(25.0));
+
+    profiler_stop();
+
+    InactiveFeaturesAndParamsCheck();
+  }
+
+  // Try with no duration
+  {
+    uint32_t features =
+        ProfilerFeature::MainThreadIO | ProfilerFeature::IPCMessages;
+    const char* filters[] = {"GeckoMain", "Foo", "Bar"};
+
+    profiler_start(PowerOfTwo32(999999), 3, features, filters,
+                   MOZ_ARRAY_LENGTH(filters), 0, Nothing());
+
+    ASSERT_TRUE(profiler_is_active());
+    ASSERT_TRUE(profiler_feature_active(ProfilerFeature::MainThreadIO));
+    ASSERT_TRUE(profiler_feature_active(ProfilerFeature::IPCMessages));
+
+    ActiveParamsCheck(int(PowerOfTwo32(999999).Value()), 3, features, filters,
+                      MOZ_ARRAY_LENGTH(filters), 0, Nothing());
+
+    profiler_stop();
+
+    InactiveFeaturesAndParamsCheck();
+  }
+
+  // Try all supported features, and filters that match all threads.
+  {
+    uint32_t availableFeatures = profiler_get_available_features();
+    const char* filters[] = {""};
+
+    profiler_start(PowerOfTwo32(88888), 10, availableFeatures, filters,
+                   MOZ_ARRAY_LENGTH(filters), 0, Some(15.0));
+
+    ASSERT_TRUE(profiler_is_active());
+    ASSERT_TRUE(profiler_feature_active(ProfilerFeature::MainThreadIO));
+    ASSERT_TRUE(profiler_feature_active(ProfilerFeature::IPCMessages));
+
+    ActiveParamsCheck(PowerOfTwo32(88888).Value(), 10, availableFeatures,
+                      filters, MOZ_ARRAY_LENGTH(filters), 0, Some(15.0));
+
+    // Don't call profiler_stop() here.
+  }
+
+  // Try no features, and filters that match no threads.
+  {
+    uint32_t features = 0;
+    const char* filters[] = {"NoThreadWillMatchThis"};
+
+    // Second profiler_start() call in a row without an intervening
+    // profiler_stop(); this will do an implicit profiler_stop() and restart.
+    profiler_start(PowerOfTwo32(0), 0, features, filters,
+                   MOZ_ARRAY_LENGTH(filters), 0, Some(0.0));
+
+    ASSERT_TRUE(profiler_is_active());
+    ASSERT_TRUE(!profiler_feature_active(ProfilerFeature::MainThreadIO));
+    ASSERT_TRUE(!profiler_feature_active(ProfilerFeature::IPCMessages));
+
+    // Entries and intervals go to defaults if 0 is specified.
+    ActiveParamsCheck(PROFILER_DEFAULT_ENTRIES.Value(),
+                      PROFILER_DEFAULT_INTERVAL, features, filters,
+                      MOZ_ARRAY_LENGTH(filters), 0, Nothing());
+
+    profiler_stop();
+
+    InactiveFeaturesAndParamsCheck();
+
+    // These calls are no-ops.
+    profiler_stop();
+    profiler_stop();
+
+    InactiveFeaturesAndParamsCheck();
+  }
+}
+
+TEST(GeckoProfiler, EnsureStarted)
+{
+  InactiveFeaturesAndParamsCheck();
+
+  uint32_t features = ProfilerFeature::JS;
+  const char* filters[] = {"GeckoMain", "Compositor"};
+  {
+    // Inactive -> Active
+    profiler_ensure_started(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL,
+                            features, filters, MOZ_ARRAY_LENGTH(filters), 0,
+                            Some(PROFILER_DEFAULT_DURATION));
+
+    ActiveParamsCheck(
+        PROFILER_DEFAULT_ENTRIES.Value(), PROFILER_DEFAULT_INTERVAL, features,
+        filters, MOZ_ARRAY_LENGTH(filters), 0, Some(PROFILER_DEFAULT_DURATION));
+  }
+
+  {
+    // Active -> Active with same settings
+
+    Maybe<ProfilerBufferInfo> info0 = profiler_get_buffer_info();
+    ASSERT_TRUE(info0->mRangeEnd > 0);
+
+    // First, write some samples into the buffer.
+    PR_Sleep(PR_MillisecondsToInterval(500));
+
+    Maybe<ProfilerBufferInfo> info1 = profiler_get_buffer_info();
+    ASSERT_TRUE(info1->mRangeEnd > info0->mRangeEnd);
+
+    // Call profiler_ensure_started with the same settings as before.
+    // This operation must not clear our buffer!
+    profiler_ensure_started(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL,
+                            features, filters, MOZ_ARRAY_LENGTH(filters), 0,
+                            Some(PROFILER_DEFAULT_DURATION));
+
+    ActiveParamsCheck(
+        PROFILER_DEFAULT_ENTRIES.Value(), PROFILER_DEFAULT_INTERVAL, features,
+        filters, MOZ_ARRAY_LENGTH(filters), 0, Some(PROFILER_DEFAULT_DURATION));
+
+    // Check that our position in the buffer stayed the same or advanced, but
+    // not by much, and the range-start after profiler_ensure_started shouldn't
+    // have passed the range-end before.
+    Maybe<ProfilerBufferInfo> info2 = profiler_get_buffer_info();
+    ASSERT_TRUE(info2->mRangeEnd >= info1->mRangeEnd);
+    ASSERT_TRUE(info2->mRangeEnd - info1->mRangeEnd <
+                info1->mRangeEnd - info0->mRangeEnd);
+    ASSERT_TRUE(info2->mRangeStart < info1->mRangeEnd);
+  }
+
+  {
+    // Active -> Active with *different* settings
+
+    Maybe<ProfilerBufferInfo> info1 = profiler_get_buffer_info();
+
+    // Call profiler_ensure_started with a different feature set than the one
+    // it's currently running with. This is supposed to stop and restart the
+    // profiler, thereby discarding the buffer contents.
+    uint32_t differentFeatures = features | ProfilerFeature::CPUUtilization;
+    profiler_ensure_started(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL,
+                            differentFeatures, filters,
+                            MOZ_ARRAY_LENGTH(filters), 0);
+
+    ActiveParamsCheck(PROFILER_DEFAULT_ENTRIES.Value(),
+                      PROFILER_DEFAULT_INTERVAL, differentFeatures, filters,
+                      MOZ_ARRAY_LENGTH(filters), 0);
+
+    // Check the the buffer was cleared, so its range-start should be at/after
+    // its range-end before.
+    Maybe<ProfilerBufferInfo> info2 = profiler_get_buffer_info();
+    ASSERT_TRUE(info2->mRangeStart >= info1->mRangeEnd);
+  }
+
+  {
+    // Active -> Inactive
+
+    profiler_stop();
+
+    InactiveFeaturesAndParamsCheck();
+  }
+}
+
+TEST(GeckoProfiler, MultiRegistration)
+{
+  // This whole test only checks that function calls don't crash, they don't
+  // actually verify that threads get profiled or not.
+
+  {
+    std::thread thread([]() {
+      char top;
+      profiler_register_thread("thread, no unreg", &top);
+    });
+    thread.join();
+  }
+
+  {
+    std::thread thread([]() { profiler_unregister_thread(); });
+    thread.join();
+  }
+
+  {
+    std::thread thread([]() {
+      char top;
+      profiler_register_thread("thread 1st", &top);
+      profiler_unregister_thread();
+      profiler_register_thread("thread 2nd", &top);
+      profiler_unregister_thread();
+    });
+    thread.join();
+  }
+
+  {
+    std::thread thread([]() {
+      char top;
+      profiler_register_thread("thread once", &top);
+      profiler_register_thread("thread again", &top);
+      profiler_unregister_thread();
+    });
+    thread.join();
+  }
+
+  {
+    std::thread thread([]() {
+      char top;
+      profiler_register_thread("thread to unreg twice", &top);
+      profiler_unregister_thread();
+      profiler_unregister_thread();
+    });
+    thread.join();
+  }
+}
+
+TEST(GeckoProfiler, DifferentThreads)
+{
+  InactiveFeaturesAndParamsCheck();
+
+  nsCOMPtr<nsIThread> thread;
+  nsresult rv = NS_NewNamedThread("GeckoProfGTest", getter_AddRefs(thread));
+  ASSERT_NS_SUCCEEDED(rv);
+
+  // Control the profiler on a background thread and verify flags on the
+  // main thread.
+  {
+    uint32_t features = ProfilerFeature::JS;
+    const char* filters[] = {"GeckoMain", "Compositor"};
+
+    NS_DispatchAndSpinEventLoopUntilComplete(
+        "GeckoProfiler_DifferentThreads_Test::TestBody"_ns, thread,
+        NS_NewRunnableFunction(
+            "GeckoProfiler_DifferentThreads_Test::TestBody", [&]() {
+              profiler_start(PROFILER_DEFAULT_ENTRIES,
+                             PROFILER_DEFAULT_INTERVAL, features, filters,
+                             MOZ_ARRAY_LENGTH(filters), 0);
+            }));
+
+    ASSERT_TRUE(profiler_is_active());
+    ASSERT_TRUE(!profiler_feature_active(ProfilerFeature::MainThreadIO));
+    ASSERT_TRUE(!profiler_feature_active(ProfilerFeature::IPCMessages));
+
+    ActiveParamsCheck(PROFILER_DEFAULT_ENTRIES.Value(),
+                      PROFILER_DEFAULT_INTERVAL, features, filters,
+                      MOZ_ARRAY_LENGTH(filters), 0);
+
+    NS_DispatchAndSpinEventLoopUntilComplete(
+        "GeckoProfiler_DifferentThreads_Test::TestBody"_ns, thread,
+        NS_NewRunnableFunction("GeckoProfiler_DifferentThreads_Test::TestBody",
+                               [&]() { profiler_stop(); }));
+
+    InactiveFeaturesAndParamsCheck();
+  }
+
+  // Control the profiler on the main thread and verify flags on a
+  // background thread.
+  {
+    uint32_t features = ProfilerFeature::JS;
+    const char* filters[] = {"GeckoMain", "Compositor"};
+
+    profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL,
+                   features, filters, MOZ_ARRAY_LENGTH(filters), 0);
+
+    NS_DispatchAndSpinEventLoopUntilComplete(
+        "GeckoProfiler_DifferentThreads_Test::TestBody"_ns, thread,
+        NS_NewRunnableFunction(
+            "GeckoProfiler_DifferentThreads_Test::TestBody", [&]() {
+              ASSERT_TRUE(profiler_is_active());
+              ASSERT_TRUE(
+                  !profiler_feature_active(ProfilerFeature::MainThreadIO));
+              ASSERT_TRUE(
+                  !profiler_feature_active(ProfilerFeature::IPCMessages));
+
+              ActiveParamsCheck(PROFILER_DEFAULT_ENTRIES.Value(),
+                                PROFILER_DEFAULT_INTERVAL, features, filters,
+                                MOZ_ARRAY_LENGTH(filters), 0);
+            }));
+
+    profiler_stop();
+
+    NS_DispatchAndSpinEventLoopUntilComplete(
+        "GeckoProfiler_DifferentThreads_Test::TestBody"_ns, thread,
+        NS_NewRunnableFunction("GeckoProfiler_DifferentThreads_Test::TestBody",
+                               [&]() { InactiveFeaturesAndParamsCheck(); }));
+  }
+
+  thread->Shutdown();
+}
+
+TEST(GeckoProfiler, GetBacktrace)
+{
+  ASSERT_TRUE(!profiler_get_backtrace());
+
+  {
+    uint32_t features = ProfilerFeature::StackWalk;
+    const char* filters[] = {"GeckoMain"};
+
+    profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL,
+                   features, filters, MOZ_ARRAY_LENGTH(filters), 0);
+
+    // These will be destroyed while the profiler is active.
+    static const int N = 100;
+    {
+      UniqueProfilerBacktrace u[N];
+      for (int i = 0; i < N; i++) {
+        u[i] = profiler_get_backtrace();
+        ASSERT_TRUE(u[i]);
+      }
+    }
+
+    // These will be destroyed after the profiler stops.
+    UniqueProfilerBacktrace u[N];
+    for (int i = 0; i < N; i++) {
+      u[i] = profiler_get_backtrace();
+      ASSERT_TRUE(u[i]);
+    }
+
+    profiler_stop();
+  }
+
+  ASSERT_TRUE(!profiler_get_backtrace());
+}
+
+TEST(GeckoProfiler, Pause)
+{
+  profiler_init_main_thread_id();
+  ASSERT_TRUE(profiler_is_main_thread())
+  << "This test must run on the main thread";
+
+  uint32_t features = ProfilerFeature::StackWalk;
+  const char* filters[] = {"GeckoMain", "Profiled GeckoProfiler.Pause"};
+
+  ASSERT_TRUE(!profiler_is_paused());
+  for (ThreadProfilingFeatures features : scEachAndAnyThreadProfilingFeatures) {
+    ASSERT_TRUE(!profiler_thread_is_being_profiled(features));
+    ASSERT_TRUE(
+        !profiler_thread_is_being_profiled(ProfilerThreadId{}, features));
+    ASSERT_TRUE(!profiler_thread_is_being_profiled(profiler_current_thread_id(),
+                                                   features));
+    ASSERT_TRUE(!profiler_thread_is_being_profiled(profiler_main_thread_id(),
+                                                   features));
+  }
+
+  std::thread{[&]() {
+    {
+      for (ThreadProfilingFeatures features :
+           scEachAndAnyThreadProfilingFeatures) {
+        ASSERT_TRUE(!profiler_thread_is_being_profiled(features));
+        ASSERT_TRUE(
+            !profiler_thread_is_being_profiled(ProfilerThreadId{}, features));
+        ASSERT_TRUE(!profiler_thread_is_being_profiled(
+            profiler_current_thread_id(), features));
+        ASSERT_TRUE(!profiler_thread_is_being_profiled(
+            profiler_main_thread_id(), features));
+      }
+    }
+    {
+      AUTO_PROFILER_REGISTER_THREAD(
+          "Ignored GeckoProfiler.Pause - before start");
+      for (ThreadProfilingFeatures features :
+           scEachAndAnyThreadProfilingFeatures) {
+        ASSERT_TRUE(!profiler_thread_is_being_profiled(features));
+        ASSERT_TRUE(
+            !profiler_thread_is_being_profiled(ProfilerThreadId{}, features));
+        ASSERT_TRUE(!profiler_thread_is_being_profiled(
+            profiler_current_thread_id(), features));
+        ASSERT_TRUE(!profiler_thread_is_being_profiled(
+            profiler_main_thread_id(), features));
+      }
+    }
+    {
+      AUTO_PROFILER_REGISTER_THREAD(
+          "Profiled GeckoProfiler.Pause - before start");
+      for (ThreadProfilingFeatures features :
+           scEachAndAnyThreadProfilingFeatures) {
+        ASSERT_TRUE(!profiler_thread_is_being_profiled(features));
+        ASSERT_TRUE(
+            !profiler_thread_is_being_profiled(ProfilerThreadId{}, features));
+        ASSERT_TRUE(!profiler_thread_is_being_profiled(
+            profiler_current_thread_id(), features));
+        ASSERT_TRUE(!profiler_thread_is_being_profiled(
+            profiler_main_thread_id(), features));
+      }
+    }
+  }}.join();
+
+  profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, features,
+                 filters, MOZ_ARRAY_LENGTH(filters), 0);
+
+  ASSERT_TRUE(!profiler_is_paused());
+  for (ThreadProfilingFeatures features : scEachAndAnyThreadProfilingFeatures) {
+    ASSERT_TRUE(profiler_thread_is_being_profiled(features));
+    ASSERT_TRUE(
+        profiler_thread_is_being_profiled(ProfilerThreadId{}, features));
+    ASSERT_TRUE(profiler_thread_is_being_profiled(profiler_current_thread_id(),
+                                                  features));
+  }
+
+  std::thread{[&]() {
+    {
+      for (ThreadProfilingFeatures features :
+           scEachAndAnyThreadProfilingFeatures) {
+        ASSERT_TRUE(!profiler_thread_is_being_profiled(features));
+        ASSERT_TRUE(
+            !profiler_thread_is_being_profiled(ProfilerThreadId{}, features));
+        ASSERT_TRUE(!profiler_thread_is_being_profiled(
+            profiler_current_thread_id(), features));
+        ASSERT_TRUE(profiler_thread_is_being_profiled(profiler_main_thread_id(),
+                                                      features));
+      }
+    }
+    {
+      AUTO_PROFILER_REGISTER_THREAD(
+          "Ignored GeckoProfiler.Pause - after start");
+      for (ThreadProfilingFeatures features :
+           scEachAndAnyThreadProfilingFeatures) {
+        ASSERT_TRUE(!profiler_thread_is_being_profiled(features));
+        ASSERT_TRUE(
+            !profiler_thread_is_being_profiled(ProfilerThreadId{}, features));
+        ASSERT_TRUE(!profiler_thread_is_being_profiled(
+            profiler_current_thread_id(), features));
+        ASSERT_TRUE(profiler_thread_is_being_profiled(profiler_main_thread_id(),
+                                                      features));
+      }
+    }
+    {
+      AUTO_PROFILER_REGISTER_THREAD(
+          "Profiled GeckoProfiler.Pause - after start");
+      for (ThreadProfilingFeatures features :
+           scEachAndAnyThreadProfilingFeatures) {
+        ASSERT_TRUE(profiler_thread_is_being_profiled(features));
+        ASSERT_TRUE(
+            profiler_thread_is_being_profiled(ProfilerThreadId{}, features));
+        ASSERT_TRUE(profiler_thread_is_being_profiled(
+            profiler_current_thread_id(), features));
+        ASSERT_TRUE(profiler_thread_is_being_profiled(profiler_main_thread_id(),
+                                                      features));
+      }
+    }
+  }}.join();
+
+  // Check that we are writing samples while not paused.
+  Maybe<ProfilerBufferInfo> info1 = profiler_get_buffer_info();
+  PR_Sleep(PR_MillisecondsToInterval(500));
+  Maybe<ProfilerBufferInfo> info2 = profiler_get_buffer_info();
+  ASSERT_TRUE(info1->mRangeEnd != info2->mRangeEnd);
+
+  // Check that we are writing markers while not paused.
+  ASSERT_TRUE(profiler_thread_is_being_profiled_for_markers());
+  ASSERT_TRUE(
+      profiler_thread_is_being_profiled_for_markers(ProfilerThreadId{}));
+  ASSERT_TRUE(profiler_thread_is_being_profiled_for_markers(
+      profiler_current_thread_id()));
+  ASSERT_TRUE(
+      profiler_thread_is_being_profiled_for_markers(profiler_main_thread_id()));
+  info1 = profiler_get_buffer_info();
+  PROFILER_MARKER_UNTYPED("Not paused", OTHER, {});
+  info2 = profiler_get_buffer_info();
+  ASSERT_TRUE(info1->mRangeEnd != info2->mRangeEnd);
+
+  profiler_pause();
+
+  ASSERT_TRUE(profiler_is_paused());
+  for (ThreadProfilingFeatures features : scEachAndAnyThreadProfilingFeatures) {
+    ASSERT_TRUE(!profiler_thread_is_being_profiled(features));
+    ASSERT_TRUE(
+        !profiler_thread_is_being_profiled(ProfilerThreadId{}, features));
+    ASSERT_TRUE(!profiler_thread_is_being_profiled(profiler_current_thread_id(),
+                                                   features));
+  }
+  ASSERT_TRUE(!profiler_thread_is_being_profiled_for_markers());
+  ASSERT_TRUE(
+      !profiler_thread_is_being_profiled_for_markers(ProfilerThreadId{}));
+  ASSERT_TRUE(!profiler_thread_is_being_profiled_for_markers(
+      profiler_current_thread_id()));
+  ASSERT_TRUE(!profiler_thread_is_being_profiled_for_markers(
+      profiler_main_thread_id()));
+
+  std::thread{[&]() {
+    {
+      for (ThreadProfilingFeatures features :
+           scEachAndAnyThreadProfilingFeatures) {
+        ASSERT_TRUE(!profiler_thread_is_being_profiled(features));
+        ASSERT_TRUE(
+            !profiler_thread_is_being_profiled(ProfilerThreadId{}, features));
+        ASSERT_TRUE(!profiler_thread_is_being_profiled(
+            profiler_current_thread_id(), features));
+        ASSERT_TRUE(!profiler_thread_is_being_profiled(
+            profiler_main_thread_id(), features));
+      }
+    }
+    {
+      AUTO_PROFILER_REGISTER_THREAD(
+          "Ignored GeckoProfiler.Pause - after pause");
+      for (ThreadProfilingFeatures features :
+           scEachAndAnyThreadProfilingFeatures) {
+        ASSERT_TRUE(!profiler_thread_is_being_profiled(features));
+        ASSERT_TRUE(
+            !profiler_thread_is_being_profiled(ProfilerThreadId{}, features));
+        ASSERT_TRUE(!profiler_thread_is_being_profiled(
+            profiler_current_thread_id(), features));
+        ASSERT_TRUE(!profiler_thread_is_being_profiled(
+            profiler_main_thread_id(), features));
+      }
+    }
+    {
+      AUTO_PROFILER_REGISTER_THREAD(
+          "Profiled GeckoProfiler.Pause - after pause");
+      for (ThreadProfilingFeatures features :
+           scEachAndAnyThreadProfilingFeatures) {
+        ASSERT_TRUE(!profiler_thread_is_being_profiled(features));
+        ASSERT_TRUE(
+            !profiler_thread_is_being_profiled(ProfilerThreadId{}, features));
+        ASSERT_TRUE(!profiler_thread_is_being_profiled(
+            profiler_current_thread_id(), features));
+        ASSERT_TRUE(!profiler_thread_is_being_profiled(
+            profiler_main_thread_id(), features));
+      }
+    }
+  }}.join();
+
+  // Check that we are not writing samples while paused.
+  info1 = profiler_get_buffer_info();
+  PR_Sleep(PR_MillisecondsToInterval(500));
+  info2 = profiler_get_buffer_info();
+  ASSERT_TRUE(info1->mRangeEnd == info2->mRangeEnd);
+
+  // Check that we are now writing markers while paused.
+  info1 = profiler_get_buffer_info();
+  PROFILER_MARKER_UNTYPED("Paused", OTHER, {});
+  info2 = profiler_get_buffer_info();
+  ASSERT_TRUE(info1->mRangeEnd == info2->mRangeEnd);
+  PROFILER_MARKER_UNTYPED("Paused v2", OTHER, {});
+  Maybe<ProfilerBufferInfo> info3 = profiler_get_buffer_info();
+  ASSERT_TRUE(info2->mRangeEnd == info3->mRangeEnd);
+
+  profiler_resume();
+
+  ASSERT_TRUE(!profiler_is_paused());
+  for (ThreadProfilingFeatures features : scEachAndAnyThreadProfilingFeatures) {
+    ASSERT_TRUE(profiler_thread_is_being_profiled(features));
+    ASSERT_TRUE(
+        profiler_thread_is_being_profiled(ProfilerThreadId{}, features));
+    ASSERT_TRUE(profiler_thread_is_being_profiled(profiler_current_thread_id(),
+                                                  features));
+  }
+
+  std::thread{[&]() {
+    {
+      for (ThreadProfilingFeatures features :
+           scEachAndAnyThreadProfilingFeatures) {
+        ASSERT_TRUE(!profiler_thread_is_being_profiled(features));
+        ASSERT_TRUE(
+            !profiler_thread_is_being_profiled(ProfilerThreadId{}, features));
+        ASSERT_TRUE(!profiler_thread_is_being_profiled(
+            profiler_current_thread_id(), features));
+        ASSERT_TRUE(profiler_thread_is_being_profiled(profiler_main_thread_id(),
+                                                      features));
+      }
+    }
+    {
+      AUTO_PROFILER_REGISTER_THREAD(
+          "Ignored GeckoProfiler.Pause - after resume");
+      for (ThreadProfilingFeatures features :
+           scEachAndAnyThreadProfilingFeatures) {
+        ASSERT_TRUE(!profiler_thread_is_being_profiled(features));
+        ASSERT_TRUE(
+            !profiler_thread_is_being_profiled(ProfilerThreadId{}, features));
+        ASSERT_TRUE(!profiler_thread_is_being_profiled(
+            profiler_current_thread_id(), features));
+        ASSERT_TRUE(profiler_thread_is_being_profiled(profiler_main_thread_id(),
+                                                      features));
+      }
+    }
+    {
+      AUTO_PROFILER_REGISTER_THREAD(
+          "Profiled GeckoProfiler.Pause - after resume");
+      for (ThreadProfilingFeatures features :
+           scEachAndAnyThreadProfilingFeatures) {
+        ASSERT_TRUE(profiler_thread_is_being_profiled(features));
+        ASSERT_TRUE(
+            profiler_thread_is_being_profiled(ProfilerThreadId{}, features));
+        ASSERT_TRUE(profiler_thread_is_being_profiled(
+            profiler_current_thread_id(), features));
+        ASSERT_TRUE(profiler_thread_is_being_profiled(profiler_main_thread_id(),
+                                                      features));
+      }
+    }
+  }}.join();
+
+  profiler_stop();
+
+  ASSERT_TRUE(!profiler_is_paused());
+  for (ThreadProfilingFeatures features : scEachAndAnyThreadProfilingFeatures) {
+    ASSERT_TRUE(!profiler_thread_is_being_profiled(features));
+    ASSERT_TRUE(
+        !profiler_thread_is_being_profiled(ProfilerThreadId{}, features));
+    ASSERT_TRUE(!profiler_thread_is_being_profiled(profiler_current_thread_id(),
+                                                   features));
+  }
+
+  std::thread{[&]() {
+    {
+      for (ThreadProfilingFeatures features :
+           scEachAndAnyThreadProfilingFeatures) {
+        ASSERT_TRUE(!profiler_thread_is_being_profiled(features));
+        ASSERT_TRUE(
+            !profiler_thread_is_being_profiled(ProfilerThreadId{}, features));
+        ASSERT_TRUE(!profiler_thread_is_being_profiled(
+            profiler_current_thread_id(), features));
+        ASSERT_TRUE(!profiler_thread_is_being_profiled(
+            profiler_main_thread_id(), features));
+      }
+    }
+    {
+      AUTO_PROFILER_REGISTER_THREAD("Ignored GeckoProfiler.Pause - after stop");
+      for (ThreadProfilingFeatures features :
+           scEachAndAnyThreadProfilingFeatures) {
+        ASSERT_TRUE(!profiler_thread_is_being_profiled(features));
+        ASSERT_TRUE(
+            !profiler_thread_is_being_profiled(ProfilerThreadId{}, features));
+        ASSERT_TRUE(!profiler_thread_is_being_profiled(
+            profiler_current_thread_id(), features));
+        ASSERT_TRUE(!profiler_thread_is_being_profiled(
+            profiler_main_thread_id(), features));
+      }
+    }
+    {
+      AUTO_PROFILER_REGISTER_THREAD(
+          "Profiled GeckoProfiler.Pause - after stop");
+      for (ThreadProfilingFeatures features :
+           scEachAndAnyThreadProfilingFeatures) {
+        ASSERT_TRUE(!profiler_thread_is_being_profiled(features));
+        ASSERT_TRUE(
+            !profiler_thread_is_being_profiled(ProfilerThreadId{}, features));
+        ASSERT_TRUE(!profiler_thread_is_being_profiled(
+            profiler_current_thread_id(), features));
+        ASSERT_TRUE(!profiler_thread_is_being_profiled(
+            profiler_main_thread_id(), features));
+      }
+    }
+  }}.join();
+}
+
+TEST(GeckoProfiler, Markers)
+{
+  uint32_t features = ProfilerFeature::StackWalk;
+  const char* filters[] = {"GeckoMain"};
+
+  profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, features,
+                 filters, MOZ_ARRAY_LENGTH(filters), 0);
+
+  PROFILER_MARKER("tracing event", OTHER, {}, Tracing, "A");
+  PROFILER_MARKER("tracing start", OTHER, MarkerTiming::IntervalStart(),
+                  Tracing, "A");
+  PROFILER_MARKER("tracing end", OTHER, MarkerTiming::IntervalEnd(), Tracing,
+                  "A");
+
+  auto bt = profiler_capture_backtrace();
+  PROFILER_MARKER("tracing event with stack", OTHER,
+                  MarkerStack::TakeBacktrace(std::move(bt)), Tracing, "B");
+
+  { AUTO_PROFILER_TRACING_MARKER("C", "auto tracing", OTHER); }
+
+  PROFILER_MARKER_UNTYPED("M1", OTHER, {});
+  PROFILER_MARKER_UNTYPED("M3", OTHER, {});
+
+  // Create three strings: two that are the maximum allowed length, and one that
+  // is one char longer.
+  static const size_t kMax = ProfileBuffer::kMaxFrameKeyLength;
+  UniquePtr<char[]> okstr1 = MakeUnique<char[]>(kMax);
+  UniquePtr<char[]> okstr2 = MakeUnique<char[]>(kMax);
+  UniquePtr<char[]> longstr = MakeUnique<char[]>(kMax + 1);
+  UniquePtr<char[]> longstrCut = MakeUnique<char[]>(kMax + 1);
+  for (size_t i = 0; i < kMax; i++) {
+    okstr1[i] = 'a';
+    okstr2[i] = 'b';
+    longstr[i] = 'c';
+    longstrCut[i] = 'c';
+  }
+  okstr1[kMax - 1] = '\0';
+  okstr2[kMax - 1] = '\0';
+  longstr[kMax] = '\0';
+  longstrCut[kMax] = '\0';
+  // Should be output as-is.
+  AUTO_PROFILER_LABEL_DYNAMIC_CSTR("", LAYOUT, "");
+  AUTO_PROFILER_LABEL_DYNAMIC_CSTR("", LAYOUT, okstr1.get());
+  // Should be output as label + space + okstr2.
+  AUTO_PROFILER_LABEL_DYNAMIC_CSTR("okstr2", LAYOUT, okstr2.get());
+  // Should be output with kMax length, ending with "...\0".
+  AUTO_PROFILER_LABEL_DYNAMIC_CSTR("", LAYOUT, longstr.get());
+  ASSERT_EQ(longstrCut[kMax - 4], 'c');
+  longstrCut[kMax - 4] = '.';
+  ASSERT_EQ(longstrCut[kMax - 3], 'c');
+  longstrCut[kMax - 3] = '.';
+  ASSERT_EQ(longstrCut[kMax - 2], 'c');
+  longstrCut[kMax - 2] = '.';
+  ASSERT_EQ(longstrCut[kMax - 1], 'c');
+  longstrCut[kMax - 1] = '\0';
+
+  // Test basic markers 2.0.
+  EXPECT_TRUE(
+      profiler_add_marker("default-templated markers 2.0 with empty options",
+                          geckoprofiler::category::OTHER, {}));
+
+  PROFILER_MARKER_UNTYPED(
+      "default-templated markers 2.0 with option", OTHER,
+      MarkerStack::TakeBacktrace(profiler_capture_backtrace()));
+
+  PROFILER_MARKER("explicitly-default-templated markers 2.0 with empty options",
+                  OTHER, {}, NoPayload);
+
+  EXPECT_TRUE(profiler_add_marker(
+      "explicitly-default-templated markers 2.0 with option",
+      geckoprofiler::category::OTHER, {},
+      ::geckoprofiler::markers::NoPayload{}));
+
+  // Used in markers below.
+  TimeStamp ts1 = TimeStamp::Now();
+
+  // Sleep briefly to ensure a sample is taken and the pending markers are
+  // processed.
+  PR_Sleep(PR_MillisecondsToInterval(500));
+
+  // Used in markers below.
+  TimeStamp ts2 = TimeStamp::Now();
+  // ts1 and ts2 should be different thanks to the sleep.
+  EXPECT_NE(ts1, ts2);
+
+  // Test most marker payloads.
+
+  // Keep this one first! (It's used to record `ts1` and `ts2`, to compare
+  // to serialized numbers in other markers.)
+  EXPECT_TRUE(profiler_add_marker("FirstMarker", geckoprofiler::category::OTHER,
+                                  MarkerTiming::Interval(ts1, ts2),
+                                  geckoprofiler::markers::TextMarker{},
+                                  "First Marker"));
+
+  // User-defined marker type with different properties, and fake schema.
+  struct GtestMarker {
+    static constexpr Span<const char> MarkerTypeName() {
+      return MakeStringSpan("markers-gtest");
+    }
+    static void StreamJSONMarkerData(
+        mozilla::baseprofiler::SpliceableJSONWriter& aWriter, int aInt,
+        double aDouble, const mozilla::ProfilerString8View& aText,
+        const mozilla::ProfilerString8View& aUniqueText,
+        const mozilla::TimeStamp& aTime) {
+      aWriter.NullProperty("null");
+      aWriter.BoolProperty("bool-false", false);
+      aWriter.BoolProperty("bool-true", true);
+      aWriter.IntProperty("int", aInt);
+      aWriter.DoubleProperty("double", aDouble);
+      aWriter.StringProperty("text", aText);
+      aWriter.UniqueStringProperty("unique text", aUniqueText);
+      aWriter.UniqueStringProperty("unique text again", aUniqueText);
+      aWriter.TimeProperty("time", aTime);
+    }
+    static mozilla::MarkerSchema MarkerTypeDisplay() {
+      // Note: This is an test function that is not intended to actually output
+      // that correctly matches StreamJSONMarkerData data above! Instead we only
+      // test that it outputs the expected JSON at the end.
+      using MS = mozilla::MarkerSchema;
+      MS schema{MS::Location::MarkerChart,      MS::Location::MarkerTable,
+                MS::Location::TimelineOverview, MS::Location::TimelineMemory,
+                MS::Location::TimelineIPC,      MS::Location::TimelineFileIO,
+                MS::Location::StackChart};
+      // All label functions.
+      schema.SetChartLabel("chart label");
+      schema.SetTooltipLabel("tooltip label");
+      schema.SetTableLabel("table label");
+      // All data functions, all formats, all "searchable" values.
+      schema.AddKeyFormat("key with url", MS::Format::Url);
+      schema.AddKeyLabelFormat("key with label filePath", "label filePath",
+                               MS::Format::FilePath);
+      schema.AddKeyFormatSearchable("key with string not-searchable",
+                                    MS::Format::String,
+                                    MS::Searchable::NotSearchable);
+      schema.AddKeyLabelFormatSearchable("key with label duration searchable",
+                                         "label duration", MS::Format::Duration,
+                                         MS::Searchable::Searchable);
+      schema.AddKeyFormat("key with time", MS::Format::Time);
+      schema.AddKeyFormat("key with seconds", MS::Format::Seconds);
+      schema.AddKeyFormat("key with milliseconds", MS::Format::Milliseconds);
+      schema.AddKeyFormat("key with microseconds", MS::Format::Microseconds);
+      schema.AddKeyFormat("key with nanoseconds", MS::Format::Nanoseconds);
+      schema.AddKeyFormat("key with bytes", MS::Format::Bytes);
+      schema.AddKeyFormat("key with percentage", MS::Format::Percentage);
+      schema.AddKeyFormat("key with integer", MS::Format::Integer);
+      schema.AddKeyFormat("key with decimal", MS::Format::Decimal);
+      schema.AddStaticLabelValue("static label", "static value");
+      return schema;
+    }
+  };
+  EXPECT_TRUE(
+      profiler_add_marker("Gtest custom marker", geckoprofiler::category::OTHER,
+                          MarkerTiming::Interval(ts1, ts2), GtestMarker{}, 42,
+                          43.0, "gtest text", "gtest unique text", ts1));
+
+  // User-defined marker type with no data, special frontend schema.
+  struct GtestSpecialMarker {
+    static constexpr Span<const char> MarkerTypeName() {
+      return MakeStringSpan("markers-gtest-special");
+    }
+    static void StreamJSONMarkerData(
+        mozilla::baseprofiler::SpliceableJSONWriter& aWriter) {}
+    static mozilla::MarkerSchema MarkerTypeDisplay() {
+      return mozilla::MarkerSchema::SpecialFrontendLocation{};
+    }
+  };
+  EXPECT_TRUE(profiler_add_marker("Gtest special marker",
+                                  geckoprofiler::category::OTHER, {},
+                                  GtestSpecialMarker{}));
+
+  // User-defined marker type that is never used, so it shouldn't appear in the
+  // output.
+  struct GtestUnusedMarker {
+    static constexpr Span<const char> MarkerTypeName() {
+      return MakeStringSpan("markers-gtest-unused");
+    }
+    static void StreamJSONMarkerData(
+        mozilla::baseprofiler::SpliceableJSONWriter& aWriter) {}
+    static mozilla::MarkerSchema MarkerTypeDisplay() {
+      return mozilla::MarkerSchema::SpecialFrontendLocation{};
+    }
+  };
+
+  // Make sure the compiler doesn't complain about this unused struct.
+  mozilla::Unused << GtestUnusedMarker{};
+
+  // Other markers in alphabetical order of payload class names.
+
+  nsCOMPtr<nsIURI> uri;
+  ASSERT_TRUE(
+      NS_SUCCEEDED(NS_NewURI(getter_AddRefs(uri), "http://mozilla.org/"_ns)));
+  // The marker name will be "Load <aChannelId>: <aURI>".
+  profiler_add_network_marker(
+      /* nsIURI* aURI */ uri,
+      /* const nsACString& aRequestMethod */ "GET"_ns,
+      /* int32_t aPriority */ 34,
+      /* uint64_t aChannelId */ 1,
+      /* NetworkLoadType aType */ net::NetworkLoadType::LOAD_START,
+      /* mozilla::TimeStamp aStart */ ts1,
+      /* mozilla::TimeStamp aEnd */ ts2,
+      /* int64_t aCount */ 56,
+      /* mozilla::net::CacheDisposition aCacheDisposition */
+      net::kCacheHit,
+      /* uint64_t aInnerWindowID */ 78,
+      /* bool aIsPrivateBrowsing */ false
+      /* const mozilla::net::TimingStruct* aTimings = nullptr */
+      /* mozilla::UniquePtr<mozilla::ProfileChunkedBuffer> aSource =
+         nullptr */
+      /* const mozilla::Maybe<nsDependentCString>& aContentType =
+         mozilla::Nothing() */
+      /* nsIURI* aRedirectURI = nullptr */
+      /* uint64_t aRedirectChannelId = 0 */
+  );
+
+  profiler_add_network_marker(
+      /* nsIURI* aURI */ uri,
+      /* const nsACString& aRequestMethod */ "GET"_ns,
+      /* int32_t aPriority */ 34,
+      /* uint64_t aChannelId */ 2,
+      /* NetworkLoadType aType */ net::NetworkLoadType::LOAD_STOP,
+      /* mozilla::TimeStamp aStart */ ts1,
+      /* mozilla::TimeStamp aEnd */ ts2,
+      /* int64_t aCount */ 56,
+      /* mozilla::net::CacheDisposition aCacheDisposition */
+      net::kCacheUnresolved,
+      /* uint64_t aInnerWindowID */ 78,
+      /* bool aIsPrivateBrowsing */ false,
+      /* const mozilla::net::TimingStruct* aTimings = nullptr */ nullptr,
+      /* mozilla::UniquePtr<mozilla::ProfileChunkedBuffer> aSource =
+         nullptr */
+      nullptr,
+      /* const mozilla::Maybe<nsDependentCString>& aContentType =
+         mozilla::Nothing() */
+      Some(nsDependentCString("text/html")),
+      /* nsIURI* aRedirectURI = nullptr */ nullptr,
+      /* uint64_t aRedirectChannelId = 0 */ 0);
+
+  nsCOMPtr<nsIURI> redirectURI;
+  ASSERT_TRUE(NS_SUCCEEDED(
+      NS_NewURI(getter_AddRefs(redirectURI), "http://example.com/"_ns)));
+  profiler_add_network_marker(
+      /* nsIURI* aURI */ uri,
+      /* const nsACString& aRequestMethod */ "GET"_ns,
+      /* int32_t aPriority */ 34,
+      /* uint64_t aChannelId */ 3,
+      /* NetworkLoadType aType */ net::NetworkLoadType::LOAD_REDIRECT,
+      /* mozilla::TimeStamp aStart */ ts1,
+      /* mozilla::TimeStamp aEnd */ ts2,
+      /* int64_t aCount */ 56,
+      /* mozilla::net::CacheDisposition aCacheDisposition */
+      net::kCacheUnresolved,
+      /* uint64_t aInnerWindowID */ 78,
+      /* bool aIsPrivateBrowsing */ false,
+      /* const mozilla::net::TimingStruct* aTimings = nullptr */ nullptr,
+      /* mozilla::UniquePtr<mozilla::ProfileChunkedBuffer> aSource =
+         nullptr */
+      nullptr,
+      /* const mozilla::Maybe<nsDependentCString>& aContentType =
+         mozilla::Nothing() */
+      mozilla::Nothing(),
+      /* nsIURI* aRedirectURI = nullptr */ redirectURI,
+      /* uint32_t aRedirectFlags = 0 */
+      nsIChannelEventSink::REDIRECT_TEMPORARY,
+      /* uint64_t aRedirectChannelId = 0 */ 103);
+
+  profiler_add_network_marker(
+      /* nsIURI* aURI */ uri,
+      /* const nsACString& aRequestMethod */ "GET"_ns,
+      /* int32_t aPriority */ 34,
+      /* uint64_t aChannelId */ 4,
+      /* NetworkLoadType aType */ net::NetworkLoadType::LOAD_REDIRECT,
+      /* mozilla::TimeStamp aStart */ ts1,
+      /* mozilla::TimeStamp aEnd */ ts2,
+      /* int64_t aCount */ 56,
+      /* mozilla::net::CacheDisposition aCacheDisposition */
+      net::kCacheUnresolved,
+      /* uint64_t aInnerWindowID */ 78,
+      /* bool aIsPrivateBrowsing */ false,
+      /* const mozilla::net::TimingStruct* aTimings = nullptr */ nullptr,
+      /* mozilla::UniquePtr<mozilla::ProfileChunkedBuffer> aSource =
+         nullptr */
+      nullptr,
+      /* const mozilla::Maybe<nsDependentCString>& aContentType =
+         mozilla::Nothing() */
+      mozilla::Nothing(),
+      /* nsIURI* aRedirectURI = nullptr */ redirectURI,
+      /* uint32_t aRedirectFlags = 0 */
+      nsIChannelEventSink::REDIRECT_PERMANENT,
+      /* uint64_t aRedirectChannelId = 0 */ 104);
+
+  profiler_add_network_marker(
+      /* nsIURI* aURI */ uri,
+      /* const nsACString& aRequestMethod */ "GET"_ns,
+      /* int32_t aPriority */ 34,
+      /* uint64_t aChannelId */ 5,
+      /* NetworkLoadType aType */ net::NetworkLoadType::LOAD_REDIRECT,
+      /* mozilla::TimeStamp aStart */ ts1,
+      /* mozilla::TimeStamp aEnd */ ts2,
+      /* int64_t aCount */ 56,
+      /* mozilla::net::CacheDisposition aCacheDisposition */
+      net::kCacheUnresolved,
+      /* uint64_t aInnerWindowID */ 78,
+      /* bool aIsPrivateBrowsing */ false,
+      /* const mozilla::net::TimingStruct* aTimings = nullptr */ nullptr,
+      /* mozilla::UniquePtr<mozilla::ProfileChunkedBuffer> aSource =
+         nullptr */
+      nullptr,
+      /* const mozilla::Maybe<nsDependentCString>& aContentType =
+         mozilla::Nothing() */
+      mozilla::Nothing(),
+      /* nsIURI* aRedirectURI = nullptr */ redirectURI,
+      /* uint32_t aRedirectFlags = 0 */ nsIChannelEventSink::REDIRECT_INTERNAL,
+      /* uint64_t aRedirectChannelId = 0 */ 105);
+
+  profiler_add_network_marker(
+      /* nsIURI* aURI */ uri,
+      /* const nsACString& aRequestMethod */ "GET"_ns,
+      /* int32_t aPriority */ 34,
+      /* uint64_t aChannelId */ 6,
+      /* NetworkLoadType aType */ net::NetworkLoadType::LOAD_REDIRECT,
+      /* mozilla::TimeStamp aStart */ ts1,
+      /* mozilla::TimeStamp aEnd */ ts2,
+      /* int64_t aCount */ 56,
+      /* mozilla::net::CacheDisposition aCacheDisposition */
+      net::kCacheUnresolved,
+      /* uint64_t aInnerWindowID */ 78,
+      /* bool aIsPrivateBrowsing */ false,
+      /* const mozilla::net::TimingStruct* aTimings = nullptr */ nullptr,
+      /* mozilla::UniquePtr<mozilla::ProfileChunkedBuffer> aSource =
+         nullptr */
+      nullptr,
+      /* const mozilla::Maybe<nsDependentCString>& aContentType =
+         mozilla::Nothing() */
+      mozilla::Nothing(),
+      /* nsIURI* aRedirectURI = nullptr */ redirectURI,
+      /* uint32_t aRedirectFlags = 0 */ nsIChannelEventSink::REDIRECT_INTERNAL |
+          nsIChannelEventSink::REDIRECT_STS_UPGRADE,
+      /* uint64_t aRedirectChannelId = 0 */ 106);
+  profiler_add_network_marker(
+      /* nsIURI* aURI */ uri,
+      /* const nsACString& aRequestMethod */ "GET"_ns,
+      /* int32_t aPriority */ 34,
+      /* uint64_t aChannelId */ 7,
+      /* NetworkLoadType aType */ net::NetworkLoadType::LOAD_START,
+      /* mozilla::TimeStamp aStart */ ts1,
+      /* mozilla::TimeStamp aEnd */ ts2,
+      /* int64_t aCount */ 56,
+      /* mozilla::net::CacheDisposition aCacheDisposition */
+      net::kCacheUnresolved,
+      /* uint64_t aInnerWindowID */ 78,
+      /* bool aIsPrivateBrowsing */ true
+      /* const mozilla::net::TimingStruct* aTimings = nullptr */
+      /* mozilla::UniquePtr<mozilla::ProfileChunkedBuffer> aSource =
+         nullptr */
+      /* const mozilla::Maybe<nsDependentCString>& aContentType =
+         mozilla::Nothing() */
+      /* nsIURI* aRedirectURI = nullptr */
+      /* uint64_t aRedirectChannelId = 0 */
+  );
+
+  EXPECT_TRUE(profiler_add_marker(
+      "Text in main thread with stack", geckoprofiler::category::OTHER,
+      {MarkerStack::Capture(), MarkerTiming::Interval(ts1, ts2)},
+      geckoprofiler::markers::TextMarker{}, ""));
+  EXPECT_TRUE(profiler_add_marker(
+      "Text from main thread with stack", geckoprofiler::category::OTHER,
+      MarkerOptions(MarkerThreadId::MainThread(), MarkerStack::Capture()),
+      geckoprofiler::markers::TextMarker{}, ""));
+
+  std::thread registeredThread([]() {
+    AUTO_PROFILER_REGISTER_THREAD("Marker test sub-thread");
+    // Marker in non-profiled thread won't be stored.
+    EXPECT_FALSE(profiler_add_marker(
+        "Text in registered thread with stack", geckoprofiler::category::OTHER,
+        MarkerStack::Capture(), geckoprofiler::markers::TextMarker{}, ""));
+    // Marker will be stored in main thread, with stack from registered thread.
+    EXPECT_TRUE(profiler_add_marker(
+        "Text from registered thread with stack",
+        geckoprofiler::category::OTHER,
+        MarkerOptions(MarkerThreadId::MainThread(), MarkerStack::Capture()),
+        geckoprofiler::markers::TextMarker{}, ""));
+  });
+  registeredThread.join();
+
+  std::thread unregisteredThread([]() {
+    // Marker in unregistered thread won't be stored.
+    EXPECT_FALSE(profiler_add_marker("Text in unregistered thread with stack",
+                                     geckoprofiler::category::OTHER,
+                                     MarkerStack::Capture(),
+                                     geckoprofiler::markers::TextMarker{}, ""));
+    // Marker will be stored in main thread, but stack cannot be captured in an
+    // unregistered thread.
+    EXPECT_TRUE(profiler_add_marker(
+        "Text from unregistered thread with stack",
+        geckoprofiler::category::OTHER,
+        MarkerOptions(MarkerThreadId::MainThread(), MarkerStack::Capture()),
+        geckoprofiler::markers::TextMarker{}, ""));
+  });
+  unregisteredThread.join();
+
+  EXPECT_TRUE(profiler_add_marker("Tracing", geckoprofiler::category::OTHER, {},
+                                  geckoprofiler::markers::Tracing{},
+                                  "category"));
+
+  EXPECT_TRUE(profiler_add_marker("Text", geckoprofiler::category::OTHER, {},
+                                  geckoprofiler::markers::TextMarker{},
+                                  "Text text"));
+
+  // Ensure that we evaluate to false for markers with very long texts by
+  // testing against a ~3mb string. A string of this size should exceed the
+  // available buffer chunks (max: 2) that are available and be discarded.
+  EXPECT_FALSE(profiler_add_marker("Text", geckoprofiler::category::OTHER, {},
+                                   geckoprofiler::markers::TextMarker{},
+                                   std::string(3 * 1024 * 1024, 'x')));
+
+  EXPECT_TRUE(profiler_add_marker(
+      "MediaSample", geckoprofiler::category::OTHER, {},
+      geckoprofiler::markers::MediaSampleMarker{}, 123, 456, 789));
+
+  SpliceableChunkedJSONWriter w{FailureLatchInfallibleSource::Singleton()};
+  w.Start();
+  EXPECT_TRUE(::profiler_stream_json_for_this_process(w).isOk());
+  w.End();
+
+  EXPECT_FALSE(w.Failed());
+
+  UniquePtr<char[]> profile = w.ChunkedWriteFunc().CopyData();
+  ASSERT_TRUE(!!profile.get());
+
+  // Expected markers, in order.
+  enum State {
+    S_tracing_event,
+    S_tracing_start,
+    S_tracing_end,
+    S_tracing_event_with_stack,
+    S_tracing_auto_tracing_start,
+    S_tracing_auto_tracing_end,
+    S_M1,
+    S_M3,
+    S_Markers2DefaultEmptyOptions,
+    S_Markers2DefaultWithOptions,
+    S_Markers2ExplicitDefaultEmptyOptions,
+    S_Markers2ExplicitDefaultWithOptions,
+    S_FirstMarker,
+    S_CustomMarker,
+    S_SpecialMarker,
+    S_NetworkMarkerPayload_start,
+    S_NetworkMarkerPayload_stop,
+    S_NetworkMarkerPayload_redirect_temporary,
+    S_NetworkMarkerPayload_redirect_permanent,
+    S_NetworkMarkerPayload_redirect_internal,
+    S_NetworkMarkerPayload_redirect_internal_sts,
+    S_NetworkMarkerPayload_private_browsing,
+
+    S_TextWithStack,
+    S_TextToMTWithStack,
+    S_RegThread_TextToMTWithStack,
+    S_UnregThread_TextToMTWithStack,
+
+    S_LAST,
+  } state = State(0);
+
+  // These will be set when first read from S_FirstMarker, then
+  // compared in following markers.
+  // TODO: Compute these values from the timestamps.
+  double ts1Double = 0.0;
+  double ts2Double = 0.0;
+
+  JSONOutputCheck(profile.get(), [&](const Json::Value& root) {
+    {
+      GET_JSON(threads, root["threads"], Array);
+      ASSERT_EQ(threads.size(), 1u);
+
+      {
+        GET_JSON(thread0, threads[0], Object);
+
+        // Keep a reference to the string table in this block, it will be used
+        // below.
+        GET_JSON(stringTable, thread0["stringTable"], Array);
+        ASSERT_TRUE(stringTable.isArray());
+
+        // Test the expected labels in the string table.
+        bool foundEmpty = false;
+        bool foundOkstr1 = false;
+        bool foundOkstr2 = false;
+        const std::string okstr2Label = std::string("okstr2 ") + okstr2.get();
+        bool foundTooLong = false;
+        for (const auto& s : stringTable) {
+          ASSERT_TRUE(s.isString());
+          std::string sString = s.asString();
+          if (sString.empty()) {
+            EXPECT_FALSE(foundEmpty);
+            foundEmpty = true;
+          } else if (sString == okstr1.get()) {
+            EXPECT_FALSE(foundOkstr1);
+            foundOkstr1 = true;
+          } else if (sString == okstr2Label) {
+            EXPECT_FALSE(foundOkstr2);
+            foundOkstr2 = true;
+          } else if (sString == longstrCut.get()) {
+            EXPECT_FALSE(foundTooLong);
+            foundTooLong = true;
+          } else {
+            EXPECT_NE(sString, longstr.get());
+          }
+        }
+        EXPECT_TRUE(foundEmpty);
+        EXPECT_TRUE(foundOkstr1);
+        EXPECT_TRUE(foundOkstr2);
+        EXPECT_TRUE(foundTooLong);
+
+        {
+          GET_JSON(markers, thread0["markers"], Object);
+
+          {
+            GET_JSON(data, markers["data"], Array);
+
+            for (const Json::Value& marker : data) {
+              // Name the indexes into the marker tuple:
+              // [name, startTime, endTime, phase, category, payload]
+              const unsigned int NAME = 0u;
+              const unsigned int START_TIME = 1u;
+              const unsigned int END_TIME = 2u;
+              const unsigned int PHASE = 3u;
+              const unsigned int CATEGORY = 4u;
+              const unsigned int PAYLOAD = 5u;
+
+              const unsigned int PHASE_INSTANT = 0;
+              const unsigned int PHASE_INTERVAL = 1;
+              const unsigned int PHASE_START = 2;
+              const unsigned int PHASE_END = 3;
+
+              const unsigned int SIZE_WITHOUT_PAYLOAD = 5u;
+              const unsigned int SIZE_WITH_PAYLOAD = 6u;
+
+              ASSERT_TRUE(marker.isArray());
+              // The payload is optional.
+              ASSERT_GE(marker.size(), SIZE_WITHOUT_PAYLOAD);
+              ASSERT_LE(marker.size(), SIZE_WITH_PAYLOAD);
+
+              // root.threads[0].markers.data[i] is an array with 5 or 6
+              // elements.
+
+              ASSERT_TRUE(marker[NAME].isUInt());  // name id
+              GET_JSON(name, stringTable[marker[NAME].asUInt()], String);
+              std::string nameString = name.asString();
+
+              EXPECT_TRUE(marker[START_TIME].isNumeric());
+              EXPECT_TRUE(marker[END_TIME].isNumeric());
+              EXPECT_TRUE(marker[PHASE].isUInt());
+              EXPECT_TRUE(marker[PHASE].asUInt() < 4);
+              EXPECT_TRUE(marker[CATEGORY].isUInt());
+
+#  define EXPECT_TIMING_INSTANT                  \
+    EXPECT_NE(marker[START_TIME].asDouble(), 0); \
+    EXPECT_EQ(marker[END_TIME].asDouble(), 0);   \
+    EXPECT_EQ(marker[PHASE].asUInt(), PHASE_INSTANT);
+#  define EXPECT_TIMING_INTERVAL                 \
+    EXPECT_NE(marker[START_TIME].asDouble(), 0); \
+    EXPECT_NE(marker[END_TIME].asDouble(), 0);   \
+    EXPECT_EQ(marker[PHASE].asUInt(), PHASE_INTERVAL);
+#  define EXPECT_TIMING_START                    \
+    EXPECT_NE(marker[START_TIME].asDouble(), 0); \
+    EXPECT_EQ(marker[END_TIME].asDouble(), 0);   \
+    EXPECT_EQ(marker[PHASE].asUInt(), PHASE_START);
+#  define EXPECT_TIMING_END                      \
+    EXPECT_EQ(marker[START_TIME].asDouble(), 0); \
+    EXPECT_NE(marker[END_TIME].asDouble(), 0);   \
+    EXPECT_EQ(marker[PHASE].asUInt(), PHASE_END);
+
+#  define EXPECT_TIMING_INSTANT_AT(t)            \
+    EXPECT_EQ(marker[START_TIME].asDouble(), t); \
+    EXPECT_EQ(marker[END_TIME].asDouble(), 0);   \
+    EXPECT_EQ(marker[PHASE].asUInt(), PHASE_INSTANT);
+#  define EXPECT_TIMING_INTERVAL_AT(start, end)      \
+    EXPECT_EQ(marker[START_TIME].asDouble(), start); \
+    EXPECT_EQ(marker[END_TIME].asDouble(), end);     \
+    EXPECT_EQ(marker[PHASE].asUInt(), PHASE_INTERVAL);
+#  define EXPECT_TIMING_START_AT(start)              \
+    EXPECT_EQ(marker[START_TIME].asDouble(), start); \
+    EXPECT_EQ(marker[END_TIME].asDouble(), 0);       \
+    EXPECT_EQ(marker[PHASE].asUInt(), PHASE_START);
+#  define EXPECT_TIMING_END_AT(end)              \
+    EXPECT_EQ(marker[START_TIME].asDouble(), 0); \
+    EXPECT_EQ(marker[END_TIME].asDouble(), end); \
+    EXPECT_EQ(marker[PHASE].asUInt(), PHASE_END);
+
+              if (marker.size() == SIZE_WITHOUT_PAYLOAD) {
+                // root.threads[0].markers.data[i] is an array with 5 elements,
+                // so there is no payload.
+                if (nameString == "M1") {
+                  ASSERT_EQ(state, S_M1);
+                  state = State(state + 1);
+                } else if (nameString == "M3") {
+                  ASSERT_EQ(state, S_M3);
+                  state = State(state + 1);
+                } else if (nameString ==
+                           "default-templated markers 2.0 with empty options") {
+                  EXPECT_EQ(state, S_Markers2DefaultEmptyOptions);
+                  state = State(S_Markers2DefaultEmptyOptions + 1);
+// TODO: Re-enable this when bug 1646714 lands, and check for stack.
+#  if 0
+              } else if (nameString ==
+                         "default-templated markers 2.0 with option") {
+                EXPECT_EQ(state, S_Markers2DefaultWithOptions);
+                state = State(S_Markers2DefaultWithOptions + 1);
+#  endif
+                } else if (nameString ==
+                           "explicitly-default-templated markers 2.0 with "
+                           "empty "
+                           "options") {
+                  EXPECT_EQ(state, S_Markers2ExplicitDefaultEmptyOptions);
+                  state = State(S_Markers2ExplicitDefaultEmptyOptions + 1);
+                } else if (nameString ==
+                           "explicitly-default-templated markers 2.0 with "
+                           "option") {
+                  EXPECT_EQ(state, S_Markers2ExplicitDefaultWithOptions);
+                  state = State(S_Markers2ExplicitDefaultWithOptions + 1);
+                }
+              } else {
+                // root.threads[0].markers.data[i] is an array with 6 elements,
+                // so there is a payload.
+                GET_JSON(payload, marker[PAYLOAD], Object);
+
+                // root.threads[0].markers.data[i][PAYLOAD] is an object
+                // (payload).
+
+                // It should at least have a "type" string.
+                GET_JSON(type, payload["type"], String);
+                std::string typeString = type.asString();
+
+                if (nameString == "tracing event") {
+                  EXPECT_EQ(state, S_tracing_event);
+                  state = State(S_tracing_event + 1);
+                  EXPECT_EQ(typeString, "tracing");
+                  EXPECT_TIMING_INSTANT;
+                  EXPECT_EQ_JSON(payload["category"], String, "A");
+                  EXPECT_TRUE(payload["stack"].isNull());
+
+                } else if (nameString == "tracing start") {
+                  EXPECT_EQ(state, S_tracing_start);
+                  state = State(S_tracing_start + 1);
+                  EXPECT_EQ(typeString, "tracing");
+                  EXPECT_TIMING_START;
+                  EXPECT_EQ_JSON(payload["category"], String, "A");
+                  EXPECT_TRUE(payload["stack"].isNull());
+
+                } else if (nameString == "tracing end") {
+                  EXPECT_EQ(state, S_tracing_end);
+                  state = State(S_tracing_end + 1);
+                  EXPECT_EQ(typeString, "tracing");
+                  EXPECT_TIMING_END;
+                  EXPECT_EQ_JSON(payload["category"], String, "A");
+                  EXPECT_TRUE(payload["stack"].isNull());
+
+                } else if (nameString == "tracing event with stack") {
+                  EXPECT_EQ(state, S_tracing_event_with_stack);
+                  state = State(S_tracing_event_with_stack + 1);
+                  EXPECT_EQ(typeString, "tracing");
+                  EXPECT_TIMING_INSTANT;
+                  EXPECT_EQ_JSON(payload["category"], String, "B");
+                  EXPECT_TRUE(payload["stack"].isObject());
+
+                } else if (nameString == "auto tracing") {
+                  switch (state) {
+                    case S_tracing_auto_tracing_start:
+                      state = State(S_tracing_auto_tracing_start + 1);
+                      EXPECT_EQ(typeString, "tracing");
+                      EXPECT_TIMING_START;
+                      EXPECT_EQ_JSON(payload["category"], String, "C");
+                      EXPECT_TRUE(payload["stack"].isNull());
+                      break;
+                    case S_tracing_auto_tracing_end:
+                      state = State(S_tracing_auto_tracing_end + 1);
+                      EXPECT_EQ(typeString, "tracing");
+                      EXPECT_TIMING_END;
+                      EXPECT_EQ_JSON(payload["category"], String, "C");
+                      ASSERT_TRUE(payload["stack"].isNull());
+                      break;
+                    default:
+                      EXPECT_TRUE(state == S_tracing_auto_tracing_start ||
+                                  state == S_tracing_auto_tracing_end);
+                      break;
+                  }
+
+                } else if (nameString ==
+                           "default-templated markers 2.0 with option") {
+                  // TODO: Remove this when bug 1646714 lands.
+                  EXPECT_EQ(state, S_Markers2DefaultWithOptions);
+                  state = State(S_Markers2DefaultWithOptions + 1);
+                  EXPECT_EQ(typeString, "NoPayloadUserData");
+                  EXPECT_FALSE(payload["stack"].isNull());
+
+                } else if (nameString == "FirstMarker") {
+                  // Record start and end times, to compare with timestamps in
+                  // following markers.
+                  EXPECT_EQ(state, S_FirstMarker);
+                  ts1Double = marker[START_TIME].asDouble();
+                  ts2Double = marker[END_TIME].asDouble();
+                  state = State(S_FirstMarker + 1);
+                  EXPECT_EQ(typeString, "Text");
+                  EXPECT_EQ_JSON(payload["name"], String, "First Marker");
+
+                } else if (nameString == "Gtest custom marker") {
+                  EXPECT_EQ(state, S_CustomMarker);
+                  state = State(S_CustomMarker + 1);
+                  EXPECT_EQ(typeString, "markers-gtest");
+                  EXPECT_EQ(payload.size(), 1u + 9u);
+                  EXPECT_TRUE(payload["null"].isNull());
+                  EXPECT_EQ_JSON(payload["bool-false"], Bool, false);
+                  EXPECT_EQ_JSON(payload["bool-true"], Bool, true);
+                  EXPECT_EQ_JSON(payload["int"], Int64, 42);
+                  EXPECT_EQ_JSON(payload["double"], Double, 43.0);
+                  EXPECT_EQ_JSON(payload["text"], String, "gtest text");
+                  // Unique strings can be fetched from the string table.
+                  ASSERT_TRUE(payload["unique text"].isUInt());
+                  auto textIndex = payload["unique text"].asUInt();
+                  GET_JSON(uniqueText, stringTable[textIndex], String);
+                  ASSERT_TRUE(uniqueText.isString());
+                  ASSERT_EQ(uniqueText.asString(), "gtest unique text");
+                  // The duplicate unique text should have the exact same index.
+                  EXPECT_EQ_JSON(payload["unique text again"], UInt, textIndex);
+                  EXPECT_EQ_JSON(payload["time"], Double, ts1Double);
+
+                } else if (nameString == "Gtest special marker") {
+                  EXPECT_EQ(state, S_SpecialMarker);
+                  state = State(S_SpecialMarker + 1);
+                  EXPECT_EQ(typeString, "markers-gtest-special");
+                  EXPECT_EQ(payload.size(), 1u) << "Only 'type' in the payload";
+
+                } else if (nameString == "Load 1: http://mozilla.org/") {
+                  EXPECT_EQ(state, S_NetworkMarkerPayload_start);
+                  state = State(S_NetworkMarkerPayload_start + 1);
+                  EXPECT_EQ(typeString, "Network");
+                  EXPECT_EQ_JSON(payload["startTime"], Double, ts1Double);
+                  EXPECT_EQ_JSON(payload["endTime"], Double, ts2Double);
+                  EXPECT_EQ_JSON(payload["id"], Int64, 1);
+                  EXPECT_EQ_JSON(payload["URI"], String, "http://mozilla.org/");
+                  EXPECT_EQ_JSON(payload["requestMethod"], String, "GET");
+                  EXPECT_EQ_JSON(payload["pri"], Int64, 34);
+                  EXPECT_EQ_JSON(payload["count"], Int64, 56);
+                  EXPECT_EQ_JSON(payload["cache"], String, "Hit");
+                  EXPECT_TRUE(payload["isPrivateBrowsing"].isNull());
+                  EXPECT_TRUE(payload["RedirectURI"].isNull());
+                  EXPECT_TRUE(payload["redirectType"].isNull());
+                  EXPECT_TRUE(payload["isHttpToHttpsRedirect"].isNull());
+                  EXPECT_TRUE(payload["redirectId"].isNull());
+                  EXPECT_TRUE(payload["contentType"].isNull());
+
+                } else if (nameString == "Load 2: http://mozilla.org/") {
+                  EXPECT_EQ(state, S_NetworkMarkerPayload_stop);
+                  state = State(S_NetworkMarkerPayload_stop + 1);
+                  EXPECT_EQ(typeString, "Network");
+                  EXPECT_EQ_JSON(payload["startTime"], Double, ts1Double);
+                  EXPECT_EQ_JSON(payload["endTime"], Double, ts2Double);
+                  EXPECT_EQ_JSON(payload["id"], Int64, 2);
+                  EXPECT_EQ_JSON(payload["URI"], String, "http://mozilla.org/");
+                  EXPECT_EQ_JSON(payload["requestMethod"], String, "GET");
+                  EXPECT_EQ_JSON(payload["pri"], Int64, 34);
+                  EXPECT_EQ_JSON(payload["count"], Int64, 56);
+                  EXPECT_EQ_JSON(payload["cache"], String, "Unresolved");
+                  EXPECT_TRUE(payload["isPrivateBrowsing"].isNull());
+                  EXPECT_TRUE(payload["RedirectURI"].isNull());
+                  EXPECT_TRUE(payload["redirectType"].isNull());
+                  EXPECT_TRUE(payload["isHttpToHttpsRedirect"].isNull());
+                  EXPECT_TRUE(payload["redirectId"].isNull());
+                  EXPECT_EQ_JSON(payload["contentType"], String, "text/html");
+
+                } else if (nameString == "Load 3: http://mozilla.org/") {
+                  EXPECT_EQ(state, S_NetworkMarkerPayload_redirect_temporary);
+                  state = State(S_NetworkMarkerPayload_redirect_temporary + 1);
+                  EXPECT_EQ(typeString, "Network");
+                  EXPECT_EQ_JSON(payload["startTime"], Double, ts1Double);
+                  EXPECT_EQ_JSON(payload["endTime"], Double, ts2Double);
+                  EXPECT_EQ_JSON(payload["id"], Int64, 3);
+                  EXPECT_EQ_JSON(payload["URI"], String, "http://mozilla.org/");
+                  EXPECT_EQ_JSON(payload["requestMethod"], String, "GET");
+                  EXPECT_EQ_JSON(payload["pri"], Int64, 34);
+                  EXPECT_EQ_JSON(payload["count"], Int64, 56);
+                  EXPECT_EQ_JSON(payload["cache"], String, "Unresolved");
+                  EXPECT_TRUE(payload["isPrivateBrowsing"].isNull());
+                  EXPECT_EQ_JSON(payload["RedirectURI"], String,
+                                 "http://example.com/");
+                  EXPECT_EQ_JSON(payload["redirectType"], String, "Temporary");
+                  EXPECT_EQ_JSON(payload["isHttpToHttpsRedirect"], Bool, false);
+                  EXPECT_EQ_JSON(payload["redirectId"], Int64, 103);
+                  EXPECT_TRUE(payload["contentType"].isNull());
+
+                } else if (nameString == "Load 4: http://mozilla.org/") {
+                  EXPECT_EQ(state, S_NetworkMarkerPayload_redirect_permanent);
+                  state = State(S_NetworkMarkerPayload_redirect_permanent + 1);
+                  EXPECT_EQ(typeString, "Network");
+                  EXPECT_EQ_JSON(payload["startTime"], Double, ts1Double);
+                  EXPECT_EQ_JSON(payload["endTime"], Double, ts2Double);
+                  EXPECT_EQ_JSON(payload["id"], Int64, 4);
+                  EXPECT_EQ_JSON(payload["URI"], String, "http://mozilla.org/");
+                  EXPECT_EQ_JSON(payload["requestMethod"], String, "GET");
+                  EXPECT_EQ_JSON(payload["pri"], Int64, 34);
+                  EXPECT_EQ_JSON(payload["count"], Int64, 56);
+                  EXPECT_EQ_JSON(payload["cache"], String, "Unresolved");
+                  EXPECT_TRUE(payload["isPrivateBrowsing"].isNull());
+                  EXPECT_EQ_JSON(payload["RedirectURI"], String,
+                                 "http://example.com/");
+                  EXPECT_EQ_JSON(payload["redirectType"], String, "Permanent");
+                  EXPECT_EQ_JSON(payload["isHttpToHttpsRedirect"], Bool, false);
+                  EXPECT_EQ_JSON(payload["redirectId"], Int64, 104);
+                  EXPECT_TRUE(payload["contentType"].isNull());
+
+                } else if (nameString == "Load 5: http://mozilla.org/") {
+                  EXPECT_EQ(state, S_NetworkMarkerPayload_redirect_internal);
+                  state = State(S_NetworkMarkerPayload_redirect_internal + 1);
+                  EXPECT_EQ(typeString, "Network");
+                  EXPECT_EQ_JSON(payload["startTime"], Double, ts1Double);
+                  EXPECT_EQ_JSON(payload["endTime"], Double, ts2Double);
+                  EXPECT_EQ_JSON(payload["id"], Int64, 5);
+                  EXPECT_EQ_JSON(payload["URI"], String, "http://mozilla.org/");
+                  EXPECT_EQ_JSON(payload["requestMethod"], String, "GET");
+                  EXPECT_EQ_JSON(payload["pri"], Int64, 34);
+                  EXPECT_EQ_JSON(payload["count"], Int64, 56);
+                  EXPECT_EQ_JSON(payload["cache"], String, "Unresolved");
+                  EXPECT_TRUE(payload["isPrivateBrowsing"].isNull());
+                  EXPECT_EQ_JSON(payload["RedirectURI"], String,
+                                 "http://example.com/");
+                  EXPECT_EQ_JSON(payload["redirectType"], String, "Internal");
+                  EXPECT_EQ_JSON(payload["isHttpToHttpsRedirect"], Bool, false);
+                  EXPECT_EQ_JSON(payload["redirectId"], Int64, 105);
+                  EXPECT_TRUE(payload["contentType"].isNull());
+
+                } else if (nameString == "Load 6: http://mozilla.org/") {
+                  EXPECT_EQ(state,
+                            S_NetworkMarkerPayload_redirect_internal_sts);
+                  state =
+                      State(S_NetworkMarkerPayload_redirect_internal_sts + 1);
+                  EXPECT_EQ(typeString, "Network");
+                  EXPECT_EQ_JSON(payload["startTime"], Double, ts1Double);
+                  EXPECT_EQ_JSON(payload["endTime"], Double, ts2Double);
+                  EXPECT_EQ_JSON(payload["id"], Int64, 6);
+                  EXPECT_EQ_JSON(payload["URI"], String, "http://mozilla.org/");
+                  EXPECT_EQ_JSON(payload["requestMethod"], String, "GET");
+                  EXPECT_EQ_JSON(payload["pri"], Int64, 34);
+                  EXPECT_EQ_JSON(payload["count"], Int64, 56);
+                  EXPECT_EQ_JSON(payload["cache"], String, "Unresolved");
+                  EXPECT_TRUE(payload["isPrivateBrowsing"].isNull());
+                  EXPECT_EQ_JSON(payload["RedirectURI"], String,
+                                 "http://example.com/");
+                  EXPECT_EQ_JSON(payload["redirectType"], String, "Internal");
+                  EXPECT_EQ_JSON(payload["isHttpToHttpsRedirect"], Bool, true);
+                  EXPECT_EQ_JSON(payload["redirectId"], Int64, 106);
+                  EXPECT_TRUE(payload["contentType"].isNull());
+
+                } else if (nameString == "Load 7: http://mozilla.org/") {
+                  EXPECT_EQ(state, S_NetworkMarkerPayload_private_browsing);
+                  state = State(S_NetworkMarkerPayload_private_browsing + 1);
+                  EXPECT_EQ(typeString, "Network");
+                  EXPECT_EQ_JSON(payload["startTime"], Double, ts1Double);
+                  EXPECT_EQ_JSON(payload["endTime"], Double, ts2Double);
+                  EXPECT_EQ_JSON(payload["id"], Int64, 7);
+                  EXPECT_EQ_JSON(payload["URI"], String, "http://mozilla.org/");
+                  EXPECT_EQ_JSON(payload["requestMethod"], String, "GET");
+                  EXPECT_EQ_JSON(payload["pri"], Int64, 34);
+                  EXPECT_EQ_JSON(payload["count"], Int64, 56);
+                  EXPECT_EQ_JSON(payload["cache"], String, "Unresolved");
+                  EXPECT_EQ_JSON(payload["isPrivateBrowsing"], Bool, true);
+                  EXPECT_TRUE(payload["RedirectURI"].isNull());
+                  EXPECT_TRUE(payload["redirectType"].isNull());
+                  EXPECT_TRUE(payload["isHttpToHttpsRedirect"].isNull());
+                  EXPECT_TRUE(payload["redirectId"].isNull());
+                  EXPECT_TRUE(payload["contentType"].isNull());
+                } else if (nameString == "Text in main thread with stack") {
+                  EXPECT_EQ(state, S_TextWithStack);
+                  state = State(S_TextWithStack + 1);
+                  EXPECT_EQ(typeString, "Text");
+                  EXPECT_FALSE(payload["stack"].isNull());
+                  EXPECT_TIMING_INTERVAL_AT(ts1Double, ts2Double);
+                  EXPECT_EQ_JSON(payload["name"], String, "");
+
+                } else if (nameString == "Text from main thread with stack") {
+                  EXPECT_EQ(state, S_TextToMTWithStack);
+                  state = State(S_TextToMTWithStack + 1);
+                  EXPECT_EQ(typeString, "Text");
+                  EXPECT_FALSE(payload["stack"].isNull());
+                  EXPECT_EQ_JSON(payload["name"], String, "");
+
+                } else if (nameString ==
+                           "Text in registered thread with stack") {
+                  ADD_FAILURE()
+                      << "Unexpected 'Text in registered thread with stack'";
+
+                } else if (nameString ==
+                           "Text from registered thread with stack") {
+                  EXPECT_EQ(state, S_RegThread_TextToMTWithStack);
+                  state = State(S_RegThread_TextToMTWithStack + 1);
+                  EXPECT_EQ(typeString, "Text");
+                  EXPECT_FALSE(payload["stack"].isNull());
+                  EXPECT_EQ_JSON(payload["name"], String, "");
+
+                } else if (nameString ==
+                           "Text in unregistered thread with stack") {
+                  ADD_FAILURE()
+                      << "Unexpected 'Text in unregistered thread with stack'";
+
+                } else if (nameString ==
+                           "Text from unregistered thread with stack") {
+                  EXPECT_EQ(state, S_UnregThread_TextToMTWithStack);
+                  state = State(S_UnregThread_TextToMTWithStack + 1);
+                  EXPECT_EQ(typeString, "Text");
+                  EXPECT_TRUE(payload["stack"].isNull());
+                  EXPECT_EQ_JSON(payload["name"], String, "");
+                }
+              }  // marker with payload
+            }    // for (marker : data)
+          }      // markers.data
+        }        // markers
+      }          // thread0
+    }            // threads
+    // We should have read all expected markers.
+    EXPECT_EQ(state, S_LAST);
+
+    {
+      GET_JSON(meta, root["meta"], Object);
+
+      {
+        GET_JSON(markerSchema, meta["markerSchema"], Array);
+
+        std::set<std::string> testedSchemaNames;
+
+        for (const Json::Value& schema : markerSchema) {
+          GET_JSON(name, schema["name"], String);
+          const std::string nameString = name.asString();
+
+          GET_JSON(display, schema["display"], Array);
+
+          GET_JSON(data, schema["data"], Array);
+
+          EXPECT_TRUE(
+              testedSchemaNames
+                  .insert(std::string(nameString.data(), nameString.size()))
+                  .second)
+              << "Each schema name should be unique (inserted once in the set)";
+
+          if (nameString == "Text") {
+            EXPECT_EQ(display.size(), 2u);
+            EXPECT_EQ(display[0u].asString(), "marker-chart");
+            EXPECT_EQ(display[1u].asString(), "marker-table");
+
+            ASSERT_EQ(data.size(), 1u);
+
+            ASSERT_TRUE(data[0u].isObject());
+            EXPECT_EQ_JSON(data[0u]["key"], String, "name");
+            EXPECT_EQ_JSON(data[0u]["label"], String, "Details");
+            EXPECT_EQ_JSON(data[0u]["format"], String, "string");
+
+          } else if (nameString == "NoPayloadUserData") {
+            // TODO: Remove this when bug 1646714 lands.
+            EXPECT_EQ(display.size(), 2u);
+            EXPECT_EQ(display[0u].asString(), "marker-chart");
+            EXPECT_EQ(display[1u].asString(), "marker-table");
+
+            ASSERT_EQ(data.size(), 0u);
+
+          } else if (nameString == "FileIO") {
+            // These are defined in ProfilerIOInterposeObserver.cpp
+
+          } else if (nameString == "tracing") {
+            EXPECT_EQ(display.size(), 3u);
+            EXPECT_EQ(display[0u].asString(), "marker-chart");
+            EXPECT_EQ(display[1u].asString(), "marker-table");
+            EXPECT_EQ(display[2u].asString(), "timeline-overview");
+
+            ASSERT_EQ(data.size(), 1u);
+
+            ASSERT_TRUE(data[0u].isObject());
+            EXPECT_EQ_JSON(data[0u]["key"], String, "category");
+            EXPECT_EQ_JSON(data[0u]["label"], String, "Type");
+            EXPECT_EQ_JSON(data[0u]["format"], String, "string");
+
+          } else if (nameString == "BHR-detected hang") {
+            EXPECT_EQ(display.size(), 2u);
+            EXPECT_EQ(display[0u].asString(), "marker-chart");
+            EXPECT_EQ(display[1u].asString(), "marker-table");
+
+            ASSERT_EQ(data.size(), 0u);
+
+          } else if (nameString == "MainThreadLongTask") {
+            EXPECT_EQ(display.size(), 2u);
+            EXPECT_EQ(display[0u].asString(), "marker-chart");
+            EXPECT_EQ(display[1u].asString(), "marker-table");
+
+            ASSERT_EQ(data.size(), 1u);
+
+            ASSERT_TRUE(data[0u].isObject());
+            EXPECT_EQ_JSON(data[0u]["key"], String, "category");
+            EXPECT_EQ_JSON(data[0u]["label"], String, "Type");
+            EXPECT_EQ_JSON(data[0u]["format"], String, "string");
+
+          } else if (nameString == "Log") {
+            EXPECT_EQ(display.size(), 1u);
+            EXPECT_EQ(display[0u].asString(), "marker-table");
+
+            ASSERT_EQ(data.size(), 2u);
+
+            ASSERT_TRUE(data[0u].isObject());
+            EXPECT_EQ_JSON(data[0u]["key"], String, "module");
+            EXPECT_EQ_JSON(data[0u]["label"], String, "Module");
+            EXPECT_EQ_JSON(data[0u]["format"], String, "string");
+
+            ASSERT_TRUE(data[1u].isObject());
+            EXPECT_EQ_JSON(data[1u]["key"], String, "name");
+            EXPECT_EQ_JSON(data[1u]["label"], String, "Name");
+            EXPECT_EQ_JSON(data[1u]["format"], String, "string");
+
+          } else if (nameString == "MediaSample") {
+            EXPECT_EQ(display.size(), 2u);
+            EXPECT_EQ(display[0u].asString(), "marker-chart");
+            EXPECT_EQ(display[1u].asString(), "marker-table");
+
+            ASSERT_EQ(data.size(), 3u);
+
+            ASSERT_TRUE(data[0u].isObject());
+            EXPECT_EQ_JSON(data[0u]["key"], String, "sampleStartTimeUs");
+            EXPECT_EQ_JSON(data[0u]["label"], String, "Sample start time");
+            EXPECT_EQ_JSON(data[0u]["format"], String, "microseconds");
+
+            ASSERT_TRUE(data[1u].isObject());
+            EXPECT_EQ_JSON(data[1u]["key"], String, "sampleEndTimeUs");
+            EXPECT_EQ_JSON(data[1u]["label"], String, "Sample end time");
+            EXPECT_EQ_JSON(data[1u]["format"], String, "microseconds");
+
+            ASSERT_TRUE(data[2u].isObject());
+            EXPECT_EQ_JSON(data[2u]["key"], String, "queueLength");
+            EXPECT_EQ_JSON(data[2u]["label"], String, "Queue length");
+            EXPECT_EQ_JSON(data[2u]["format"], String, "integer");
+
+          } else if (nameString == "VideoFallingBehind") {
+            EXPECT_EQ(display.size(), 2u);
+            EXPECT_EQ(display[0u].asString(), "marker-chart");
+            EXPECT_EQ(display[1u].asString(), "marker-table");
+
+            ASSERT_EQ(data.size(), 2u);
+
+            ASSERT_TRUE(data[0u].isObject());
+            EXPECT_EQ_JSON(data[0u]["key"], String, "videoFrameStartTimeUs");
+            EXPECT_EQ_JSON(data[0u]["label"], String, "Video frame start time");
+            EXPECT_EQ_JSON(data[0u]["format"], String, "microseconds");
+
+            ASSERT_TRUE(data[1u].isObject());
+            EXPECT_EQ_JSON(data[1u]["key"], String, "mediaCurrentTimeUs");
+            EXPECT_EQ_JSON(data[1u]["label"], String, "Media current time");
+            EXPECT_EQ_JSON(data[1u]["format"], String, "microseconds");
+
+          } else if (nameString == "Budget") {
+            EXPECT_EQ(display.size(), 2u);
+            EXPECT_EQ(display[0u].asString(), "marker-chart");
+            EXPECT_EQ(display[1u].asString(), "marker-table");
+
+            ASSERT_EQ(data.size(), 0u);
+
+          } else if (nameString == "markers-gtest") {
+            EXPECT_EQ(display.size(), 7u);
+            EXPECT_EQ(display[0u].asString(), "marker-chart");
+            EXPECT_EQ(display[1u].asString(), "marker-table");
+            EXPECT_EQ(display[2u].asString(), "timeline-overview");
+            EXPECT_EQ(display[3u].asString(), "timeline-memory");
+            EXPECT_EQ(display[4u].asString(), "timeline-ipc");
+            EXPECT_EQ(display[5u].asString(), "timeline-fileio");
+            EXPECT_EQ(display[6u].asString(), "stack-chart");
+
+            EXPECT_EQ_JSON(schema["chartLabel"], String, "chart label");
+            EXPECT_EQ_JSON(schema["tooltipLabel"], String, "tooltip label");
+            EXPECT_EQ_JSON(schema["tableLabel"], String, "table label");
+
+            ASSERT_EQ(data.size(), 14u);
+
+            ASSERT_TRUE(data[0u].isObject());
+            EXPECT_EQ_JSON(data[0u]["key"], String, "key with url");
+            EXPECT_TRUE(data[0u]["label"].isNull());
+            EXPECT_EQ_JSON(data[0u]["format"], String, "url");
+            EXPECT_TRUE(data[0u]["searchable"].isNull());
+
+            ASSERT_TRUE(data[1u].isObject());
+            EXPECT_EQ_JSON(data[1u]["key"], String, "key with label filePath");
+            EXPECT_EQ_JSON(data[1u]["label"], String, "label filePath");
+            EXPECT_EQ_JSON(data[1u]["format"], String, "file-path");
+            EXPECT_TRUE(data[1u]["searchable"].isNull());
+
+            ASSERT_TRUE(data[2u].isObject());
+            EXPECT_EQ_JSON(data[2u]["key"], String,
+                           "key with string not-searchable");
+            EXPECT_TRUE(data[2u]["label"].isNull());
+            EXPECT_EQ_JSON(data[2u]["format"], String, "string");
+            EXPECT_EQ_JSON(data[2u]["searchable"], Bool, false);
+
+            ASSERT_TRUE(data[3u].isObject());
+            EXPECT_EQ_JSON(data[3u]["key"], String,
+                           "key with label duration searchable");
+            EXPECT_TRUE(data[3u]["label duration"].isNull());
+            EXPECT_EQ_JSON(data[3u]["format"], String, "duration");
+            EXPECT_EQ_JSON(data[3u]["searchable"], Bool, true);
+
+            ASSERT_TRUE(data[4u].isObject());
+            EXPECT_EQ_JSON(data[4u]["key"], String, "key with time");
+            EXPECT_TRUE(data[4u]["label"].isNull());
+            EXPECT_EQ_JSON(data[4u]["format"], String, "time");
+            EXPECT_TRUE(data[4u]["searchable"].isNull());
+
+            ASSERT_TRUE(data[5u].isObject());
+            EXPECT_EQ_JSON(data[5u]["key"], String, "key with seconds");
+            EXPECT_TRUE(data[5u]["label"].isNull());
+            EXPECT_EQ_JSON(data[5u]["format"], String, "seconds");
+            EXPECT_TRUE(data[5u]["searchable"].isNull());
+
+            ASSERT_TRUE(data[6u].isObject());
+            EXPECT_EQ_JSON(data[6u]["key"], String, "key with milliseconds");
+            EXPECT_TRUE(data[6u]["label"].isNull());
+            EXPECT_EQ_JSON(data[6u]["format"], String, "milliseconds");
+            EXPECT_TRUE(data[6u]["searchable"].isNull());
+
+            ASSERT_TRUE(data[7u].isObject());
+            EXPECT_EQ_JSON(data[7u]["key"], String, "key with microseconds");
+            EXPECT_TRUE(data[7u]["label"].isNull());
+            EXPECT_EQ_JSON(data[7u]["format"], String, "microseconds");
+            EXPECT_TRUE(data[7u]["searchable"].isNull());
+
+            ASSERT_TRUE(data[8u].isObject());
+            EXPECT_EQ_JSON(data[8u]["key"], String, "key with nanoseconds");
+            EXPECT_TRUE(data[8u]["label"].isNull());
+            EXPECT_EQ_JSON(data[8u]["format"], String, "nanoseconds");
+            EXPECT_TRUE(data[8u]["searchable"].isNull());
+
+            ASSERT_TRUE(data[9u].isObject());
+            EXPECT_EQ_JSON(data[9u]["key"], String, "key with bytes");
+            EXPECT_TRUE(data[9u]["label"].isNull());
+            EXPECT_EQ_JSON(data[9u]["format"], String, "bytes");
+            EXPECT_TRUE(data[9u]["searchable"].isNull());
+
+            ASSERT_TRUE(data[10u].isObject());
+            EXPECT_EQ_JSON(data[10u]["key"], String, "key with percentage");
+            EXPECT_TRUE(data[10u]["label"].isNull());
+            EXPECT_EQ_JSON(data[10u]["format"], String, "percentage");
+            EXPECT_TRUE(data[10u]["searchable"].isNull());
+
+            ASSERT_TRUE(data[11u].isObject());
+            EXPECT_EQ_JSON(data[11u]["key"], String, "key with integer");
+            EXPECT_TRUE(data[11u]["label"].isNull());
+            EXPECT_EQ_JSON(data[11u]["format"], String, "integer");
+            EXPECT_TRUE(data[11u]["searchable"].isNull());
+
+            ASSERT_TRUE(data[12u].isObject());
+            EXPECT_EQ_JSON(data[12u]["key"], String, "key with decimal");
+            EXPECT_TRUE(data[12u]["label"].isNull());
+            EXPECT_EQ_JSON(data[12u]["format"], String, "decimal");
+            EXPECT_TRUE(data[12u]["searchable"].isNull());
+
+            ASSERT_TRUE(data[13u].isObject());
+            EXPECT_EQ_JSON(data[13u]["label"], String, "static label");
+            EXPECT_EQ_JSON(data[13u]["value"], String, "static value");
+
+          } else if (nameString == "markers-gtest-special") {
+            EXPECT_EQ(display.size(), 0u);
+            ASSERT_EQ(data.size(), 0u);
+
+          } else if (nameString == "markers-gtest-unused") {
+            ADD_FAILURE() << "Schema for GtestUnusedMarker should not be here";
+
+          } else {
+            printf("FYI: Unknown marker schema '%s'\n", nameString.c_str());
+          }
+        }
+
+        // Check that we've got all expected schema.
+        EXPECT_TRUE(testedSchemaNames.find("Text") != testedSchemaNames.end());
+        EXPECT_TRUE(testedSchemaNames.find("tracing") !=
+                    testedSchemaNames.end());
+        EXPECT_TRUE(testedSchemaNames.find("MediaSample") !=
+                    testedSchemaNames.end());
+      }  // markerSchema
+    }    // meta
+  });
+
+  Maybe<ProfilerBufferInfo> info = profiler_get_buffer_info();
+  EXPECT_TRUE(info.isSome());
+  printf("Profiler buffer range: %llu .. %llu (%llu bytes)\n",
+         static_cast<unsigned long long>(info->mRangeStart),
+         static_cast<unsigned long long>(info->mRangeEnd),
+         // sizeof(ProfileBufferEntry) == 9
+         (static_cast<unsigned long long>(info->mRangeEnd) -
+          static_cast<unsigned long long>(info->mRangeStart)) *
+             9);
+  printf("Stats:         min(us) .. mean(us) .. max(us)  [count]\n");
+  printf("- Intervals:   %7.1f .. %7.1f  .. %7.1f  [%u]\n",
+         info->mIntervalsUs.min, info->mIntervalsUs.sum / info->mIntervalsUs.n,
+         info->mIntervalsUs.max, info->mIntervalsUs.n);
+  printf("- Overheads:   %7.1f .. %7.1f  .. %7.1f  [%u]\n",
+         info->mOverheadsUs.min, info->mOverheadsUs.sum / info->mOverheadsUs.n,
+         info->mOverheadsUs.max, info->mOverheadsUs.n);
+  printf("  - Locking:   %7.1f .. %7.1f  .. %7.1f  [%u]\n",
+         info->mLockingsUs.min, info->mLockingsUs.sum / info->mLockingsUs.n,
+         info->mLockingsUs.max, info->mLockingsUs.n);
+  printf("  - Clearning: %7.1f .. %7.1f  .. %7.1f  [%u]\n",
+         info->mCleaningsUs.min, info->mCleaningsUs.sum / info->mCleaningsUs.n,
+         info->mCleaningsUs.max, info->mCleaningsUs.n);
+  printf("  - Counters:  %7.1f .. %7.1f  .. %7.1f  [%u]\n",
+         info->mCountersUs.min, info->mCountersUs.sum / info->mCountersUs.n,
+         info->mCountersUs.max, info->mCountersUs.n);
+  printf("  - Threads:   %7.1f .. %7.1f  .. %7.1f  [%u]\n",
+         info->mThreadsUs.min, info->mThreadsUs.sum / info->mThreadsUs.n,
+         info->mThreadsUs.max, info->mThreadsUs.n);
+
+  profiler_stop();
+
+  // Try to add markers while the profiler is stopped.
+  PROFILER_MARKER_UNTYPED("marker after profiler_stop", OTHER);
+
+  // Warning: this could be racy
+  profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, features,
+                 filters, MOZ_ARRAY_LENGTH(filters), 0);
+
+  // This last marker shouldn't get streamed.
+  SpliceableChunkedJSONWriter w2{FailureLatchInfallibleSource::Singleton()};
+  w2.Start();
+  EXPECT_TRUE(::profiler_stream_json_for_this_process(w2).isOk());
+  w2.End();
+  EXPECT_FALSE(w2.Failed());
+  UniquePtr<char[]> profile2 = w2.ChunkedWriteFunc().CopyData();
+  ASSERT_TRUE(!!profile2.get());
+  EXPECT_TRUE(
+      std::string_view(profile2.get()).find("marker after profiler_stop") ==
+      std::string_view::npos);
+
+  profiler_stop();
+}
+
+#  define COUNTER_NAME "TestCounter"
+#  define COUNTER_DESCRIPTION "Test of counters in profiles"
+#  define COUNTER_NAME2 "Counter2"
+#  define COUNTER_DESCRIPTION2 "Second Test of counters in profiles"
+
+PROFILER_DEFINE_COUNT_TOTAL(TestCounter, COUNTER_NAME, COUNTER_DESCRIPTION);
+PROFILER_DEFINE_COUNT_TOTAL(TestCounter2, COUNTER_NAME2, COUNTER_DESCRIPTION2);
+
+TEST(GeckoProfiler, Counters)
+{
+  uint32_t features = 0;
+  const char* filters[] = {"GeckoMain"};
+
+  // We will record some counter values, and check that they're present (and no
+  // other) when expected.
+
+  struct NumberAndCount {
+    uint64_t mNumber;
+    int64_t mCount;
+  };
+
+  int64_t testCounters[] = {10, 7, -17};
+  NumberAndCount expectedTestCounters[] = {{1u, 10}, {0u, 0}, {1u, 7},
+                                           {0u, 0},  {0u, 0}, {1u, -17},
+                                           {0u, 0},  {0u, 0}};
+  constexpr size_t expectedTestCountersCount =
+      MOZ_ARRAY_LENGTH(expectedTestCounters);
+
+  bool expectCounter2 = false;
+  int64_t testCounters2[] = {10};
+  NumberAndCount expectedTestCounters2[] = {{1u, 10}, {0u, 0}};
+  constexpr size_t expectedTestCounters2Count =
+      MOZ_ARRAY_LENGTH(expectedTestCounters2);
+
+  auto checkCountersInJSON = [&](const Json::Value& aRoot) {
+    size_t nextExpectedTestCounter = 0u;
+    size_t nextExpectedTestCounter2 = 0u;
+
+    GET_JSON(counters, aRoot["counters"], Array);
+    for (const Json::Value& counter : counters) {
+      ASSERT_TRUE(counter.isObject());
+      GET_JSON_VALUE(name, counter["name"], String);
+      if (name == "TestCounter") {
+        EXPECT_EQ_JSON(counter["category"], String, COUNTER_NAME);
+        EXPECT_EQ_JSON(counter["description"], String, COUNTER_DESCRIPTION);
+        GET_JSON(sampleGroups, counter["sample_groups"], Array);
+        for (const Json::Value& sampleGroup : sampleGroups) {
+          ASSERT_TRUE(sampleGroup.isObject());
+          EXPECT_EQ_JSON(sampleGroup["id"], UInt, 0u);
+
+          GET_JSON(samples, sampleGroup["samples"], Object);
+          GET_JSON(samplesSchema, samples["schema"], Object);
+          EXPECT_GE(samplesSchema.size(), 3u);
+          GET_JSON_VALUE(samplesNumber, samplesSchema["number"], UInt);
+          GET_JSON_VALUE(samplesCount, samplesSchema["count"], UInt);
+          GET_JSON(samplesData, samples["data"], Array);
+          for (const Json::Value& sample : samplesData) {
+            ASSERT_TRUE(sample.isArray());
+            ASSERT_LT(nextExpectedTestCounter, expectedTestCountersCount);
+            EXPECT_EQ_JSON(
+                sample[samplesNumber], UInt64,
+                expectedTestCounters[nextExpectedTestCounter].mNumber);
+            EXPECT_EQ_JSON(
+                sample[samplesCount], Int64,
+                expectedTestCounters[nextExpectedTestCounter].mCount);
+            ++nextExpectedTestCounter;
+          }
+        }
+      } else if (name == "TestCounter2") {
+        EXPECT_TRUE(expectCounter2);
+
+        EXPECT_EQ_JSON(counter["category"], String, COUNTER_NAME2);
+        EXPECT_EQ_JSON(counter["description"], String, COUNTER_DESCRIPTION2);
+        GET_JSON(sampleGroups, counter["sample_groups"], Array);
+        for (const Json::Value& sampleGroup : sampleGroups) {
+          ASSERT_TRUE(sampleGroup.isObject());
+          EXPECT_EQ_JSON(sampleGroup["id"], UInt, 0u);
+
+          GET_JSON(samples, sampleGroup["samples"], Object);
+          GET_JSON(samplesSchema, samples["schema"], Object);
+          EXPECT_GE(samplesSchema.size(), 3u);
+          GET_JSON_VALUE(samplesNumber, samplesSchema["number"], UInt);
+          GET_JSON_VALUE(samplesCount, samplesSchema["count"], UInt);
+          GET_JSON(samplesData, samples["data"], Array);
+          for (const Json::Value& sample : samplesData) {
+            ASSERT_TRUE(sample.isArray());
+            ASSERT_LT(nextExpectedTestCounter2, expectedTestCounters2Count);
+            EXPECT_EQ_JSON(
+                sample[samplesNumber], UInt64,
+                expectedTestCounters2[nextExpectedTestCounter2].mNumber);
+            EXPECT_EQ_JSON(
+                sample[samplesCount], Int64,
+                expectedTestCounters2[nextExpectedTestCounter2].mCount);
+            ++nextExpectedTestCounter2;
+          }
+        }
+      }
+    }
+
+    EXPECT_EQ(nextExpectedTestCounter, expectedTestCountersCount);
+    if (expectCounter2) {
+      EXPECT_EQ(nextExpectedTestCounter2, expectedTestCounters2Count);
+    }
+  };
+
+  // Inactive -> Active
+  profiler_ensure_started(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL,
+                          features, filters, MOZ_ARRAY_LENGTH(filters), 0);
+
+  // Output all "TestCounter"s, with increasing delays (to test different
+  // number of counter samplings).
+  int samplingWaits = 2;
+  for (int64_t counter : testCounters) {
+    AUTO_PROFILER_COUNT_TOTAL(TestCounter, counter);
+    for (int i = 0; i < samplingWaits; ++i) {
+      ASSERT_EQ(WaitForSamplingState(), SamplingState::SamplingCompleted);
+    }
+    ++samplingWaits;
+  }
+
+  // Verify we got "TestCounter" in the output, but not "TestCounter2" yet.
+  UniquePtr<char[]> profile = profiler_get_profile();
+  JSONOutputCheck(profile.get(), checkCountersInJSON);
+
+  // Now introduce TestCounter2.
+  expectCounter2 = true;
+  for (int64_t counter2 : testCounters2) {
+    AUTO_PROFILER_COUNT_TOTAL(TestCounter2, counter2);
+    ASSERT_EQ(WaitForSamplingState(), SamplingState::SamplingCompleted);
+    ASSERT_EQ(WaitForSamplingState(), SamplingState::SamplingCompleted);
+  }
+
+  // Verify we got both "TestCounter" and "TestCounter2" in the output.
+  profile = profiler_get_profile();
+  JSONOutputCheck(profile.get(), checkCountersInJSON);
+
+  profiler_stop();
+}
+
+TEST(GeckoProfiler, Time)
+{
+  uint32_t features = ProfilerFeature::StackWalk;
+  const char* filters[] = {"GeckoMain"};
+
+  double t1 = profiler_time();
+  double t2 = profiler_time();
+  ASSERT_TRUE(t1 <= t2);
+
+  // profiler_start() restarts the timer used by profiler_time().
+  profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, features,
+                 filters, MOZ_ARRAY_LENGTH(filters), 0);
+
+  double t3 = profiler_time();
+  double t4 = profiler_time();
+  ASSERT_TRUE(t3 <= t4);
+
+  profiler_stop();
+
+  double t5 = profiler_time();
+  double t6 = profiler_time();
+  ASSERT_TRUE(t4 <= t5 && t1 <= t6);
+}
+
+TEST(GeckoProfiler, GetProfile)
+{
+  uint32_t features = ProfilerFeature::StackWalk;
+  const char* filters[] = {"GeckoMain"};
+
+  ASSERT_TRUE(!profiler_get_profile());
+
+  profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, features,
+                 filters, MOZ_ARRAY_LENGTH(filters), 0);
+
+  mozilla::Maybe<uint32_t> activeFeatures = profiler_features_if_active();
+  ASSERT_TRUE(activeFeatures.isSome());
+  // Not all platforms support stack-walking.
+  const bool hasStackWalk = ProfilerFeature::HasStackWalk(*activeFeatures);
+
+  UniquePtr<char[]> profile = profiler_get_profile();
+  JSONOutputCheck(profile.get(), [&](const Json::Value& aRoot) {
+    GET_JSON(meta, aRoot["meta"], Object);
+    {
+      GET_JSON(configuration, meta["configuration"], Object);
+      {
+        GET_JSON(features, configuration["features"], Array);
+        {
+          EXPECT_EQ(features.size(), (hasStackWalk ? 1u : 0u));
+          if (hasStackWalk) {
+            EXPECT_JSON_ARRAY_CONTAINS(features, String, "stackwalk");
+          }
+        }
+        GET_JSON(threads, configuration["threads"], Array);
+        {
+          EXPECT_EQ(threads.size(), 1u);
+          EXPECT_JSON_ARRAY_CONTAINS(threads, String, "GeckoMain");
+        }
+      }
+    }
+  });
+
+  profiler_stop();
+
+  ASSERT_TRUE(!profiler_get_profile());
+}
+
+TEST(GeckoProfiler, StreamJSONForThisProcess)
+{
+  uint32_t features = ProfilerFeature::StackWalk;
+  const char* filters[] = {"GeckoMain"};
+
+  SpliceableChunkedJSONWriter w{FailureLatchInfallibleSource::Singleton()};
+  MOZ_RELEASE_ASSERT(!w.ChunkedWriteFunc().Fallible());
+  MOZ_RELEASE_ASSERT(!w.ChunkedWriteFunc().Failed());
+  MOZ_RELEASE_ASSERT(!w.ChunkedWriteFunc().GetFailure());
+  MOZ_RELEASE_ASSERT(&w.ChunkedWriteFunc().SourceFailureLatch() ==
+                     &mozilla::FailureLatchInfallibleSource::Singleton());
+  MOZ_RELEASE_ASSERT(
+      &std::as_const(w.ChunkedWriteFunc()).SourceFailureLatch() ==
+      &mozilla::FailureLatchInfallibleSource::Singleton());
+  MOZ_RELEASE_ASSERT(!w.Fallible());
+  MOZ_RELEASE_ASSERT(!w.Failed());
+  MOZ_RELEASE_ASSERT(!w.GetFailure());
+  MOZ_RELEASE_ASSERT(&w.SourceFailureLatch() ==
+                     &mozilla::FailureLatchInfallibleSource::Singleton());
+  MOZ_RELEASE_ASSERT(&std::as_const(w).SourceFailureLatch() ==
+                     &mozilla::FailureLatchInfallibleSource::Singleton());
+
+  ASSERT_TRUE(::profiler_stream_json_for_this_process(w).isErr());
+  MOZ_RELEASE_ASSERT(!w.ChunkedWriteFunc().Failed());
+  MOZ_RELEASE_ASSERT(!w.ChunkedWriteFunc().GetFailure());
+  MOZ_RELEASE_ASSERT(!w.Failed());
+  MOZ_RELEASE_ASSERT(!w.GetFailure());
+
+  profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, features,
+                 filters, MOZ_ARRAY_LENGTH(filters), 0);
+
+  w.Start();
+  ASSERT_TRUE(::profiler_stream_json_for_this_process(w).isOk());
+  w.End();
+
+  MOZ_RELEASE_ASSERT(!w.ChunkedWriteFunc().Failed());
+  MOZ_RELEASE_ASSERT(!w.ChunkedWriteFunc().GetFailure());
+  MOZ_RELEASE_ASSERT(!w.Failed());
+  MOZ_RELEASE_ASSERT(!w.GetFailure());
+
+  UniquePtr<char[]> profile = w.ChunkedWriteFunc().CopyData();
+
+  JSONOutputCheck(profile.get(), [](const Json::Value&) {});
+
+  profiler_stop();
+
+  ASSERT_TRUE(::profiler_stream_json_for_this_process(w).isErr());
+}
+
+// Internal version of profiler_stream_json_for_this_process, which allows being
+// called from a non-main thread of the parent process, at the risk of getting
+// an incomplete profile.
+ProfilerResult<ProfileGenerationAdditionalInformation>
+do_profiler_stream_json_for_this_process(
+    SpliceableJSONWriter& aWriter, double aSinceTime, bool aIsShuttingDown,
+    ProfilerCodeAddressService* aService,
+    mozilla::ProgressLogger aProgressLogger);
+
+TEST(GeckoProfiler, StreamJSONForThisProcessThreaded)
+{
+  // Same as the previous test, but calling some things on background threads.
+  nsCOMPtr<nsIThread> thread;
+  nsresult rv = NS_NewNamedThread("GeckoProfGTest", getter_AddRefs(thread));
+  ASSERT_NS_SUCCEEDED(rv);
+
+  uint32_t features = ProfilerFeature::StackWalk;
+  const char* filters[] = {"GeckoMain"};
+
+  SpliceableChunkedJSONWriter w{FailureLatchInfallibleSource::Singleton()};
+  MOZ_RELEASE_ASSERT(!w.ChunkedWriteFunc().Fallible());
+  MOZ_RELEASE_ASSERT(!w.ChunkedWriteFunc().Failed());
+  MOZ_RELEASE_ASSERT(!w.ChunkedWriteFunc().GetFailure());
+  MOZ_RELEASE_ASSERT(&w.ChunkedWriteFunc().SourceFailureLatch() ==
+                     &mozilla::FailureLatchInfallibleSource::Singleton());
+  MOZ_RELEASE_ASSERT(
+      &std::as_const(w.ChunkedWriteFunc()).SourceFailureLatch() ==
+      &mozilla::FailureLatchInfallibleSource::Singleton());
+  MOZ_RELEASE_ASSERT(!w.Fallible());
+  MOZ_RELEASE_ASSERT(!w.Failed());
+  MOZ_RELEASE_ASSERT(!w.GetFailure());
+  MOZ_RELEASE_ASSERT(&w.SourceFailureLatch() ==
+                     &mozilla::FailureLatchInfallibleSource::Singleton());
+  MOZ_RELEASE_ASSERT(&std::as_const(w).SourceFailureLatch() ==
+                     &mozilla::FailureLatchInfallibleSource::Singleton());
+
+  ASSERT_TRUE(::profiler_stream_json_for_this_process(w).isErr());
+  MOZ_RELEASE_ASSERT(!w.ChunkedWriteFunc().Failed());
+  MOZ_RELEASE_ASSERT(!w.ChunkedWriteFunc().GetFailure());
+  MOZ_RELEASE_ASSERT(!w.Failed());
+  MOZ_RELEASE_ASSERT(!w.GetFailure());
+
+  // Start the profiler on the main thread.
+  profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, features,
+                 filters, MOZ_ARRAY_LENGTH(filters), 0);
+
+  // Call profiler_stream_json_for_this_process on a background thread.
+  NS_DispatchAndSpinEventLoopUntilComplete(
+      "GeckoProfiler_StreamJSONForThisProcessThreaded_Test::TestBody"_ns,
+      thread,
+      NS_NewRunnableFunction(
+          "GeckoProfiler_StreamJSONForThisProcessThreaded_Test::TestBody",
+          [&]() {
+            w.Start();
+            ASSERT_TRUE(::do_profiler_stream_json_for_this_process(
+                            w, /* double aSinceTime */ 0.0,
+                            /* bool aIsShuttingDown */ false,
+                            /* ProfilerCodeAddressService* aService */ nullptr,
+                            mozilla::ProgressLogger{})
+                            .isOk());
+            w.End();
+          }));
+
+  MOZ_RELEASE_ASSERT(!w.ChunkedWriteFunc().Failed());
+  MOZ_RELEASE_ASSERT(!w.ChunkedWriteFunc().GetFailure());
+  MOZ_RELEASE_ASSERT(!w.Failed());
+  MOZ_RELEASE_ASSERT(!w.GetFailure());
+
+  UniquePtr<char[]> profile = w.ChunkedWriteFunc().CopyData();
+
+  JSONOutputCheck(profile.get(), [](const Json::Value&) {});
+
+  // Stop the profiler and call profiler_stream_json_for_this_process on a
+  // background thread.
+  NS_DispatchAndSpinEventLoopUntilComplete(
+      "GeckoProfiler_StreamJSONForThisProcessThreaded_Test::TestBody"_ns,
+      thread,
+      NS_NewRunnableFunction(
+          "GeckoProfiler_StreamJSONForThisProcessThreaded_Test::TestBody",
+          [&]() {
+            profiler_stop();
+            ASSERT_TRUE(::do_profiler_stream_json_for_this_process(
+                            w, /* double aSinceTime */ 0.0,
+                            /* bool aIsShuttingDown */ false,
+                            /* ProfilerCodeAddressService* aService */ nullptr,
+                            mozilla::ProgressLogger{})
+                            .isErr());
+          }));
+  thread->Shutdown();
+
+  // Call profiler_stream_json_for_this_process on the main thread.
+  ASSERT_TRUE(::profiler_stream_json_for_this_process(w).isErr());
+}
+
+TEST(GeckoProfiler, ProfilingStack)
+{
+  uint32_t features = ProfilerFeature::StackWalk;
+  const char* filters[] = {"GeckoMain"};
+
+  AUTO_PROFILER_LABEL("A::B", OTHER);
+
+  UniqueFreePtr<char> dynamic(strdup("dynamic"));
+  {
+    AUTO_PROFILER_LABEL_DYNAMIC_CSTR("A::C", JS, dynamic.get());
+    AUTO_PROFILER_LABEL_DYNAMIC_NSCSTRING("A::C2", JS,
+                                          nsDependentCString(dynamic.get()));
+    AUTO_PROFILER_LABEL_DYNAMIC_LOSSY_NSSTRING(
+        "A::C3", JS, NS_ConvertUTF8toUTF16(dynamic.get()));
+
+    profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL,
+                   features, filters, MOZ_ARRAY_LENGTH(filters), 0);
+
+    ASSERT_TRUE(profiler_get_backtrace());
+  }
+
+  AutoProfilerLabel label1("A", nullptr, JS::ProfilingCategoryPair::DOM);
+  AutoProfilerLabel label2("A", dynamic.get(),
+                           JS::ProfilingCategoryPair::NETWORK);
+  ASSERT_TRUE(profiler_get_backtrace());
+
+  profiler_stop();
+
+  ASSERT_TRUE(!profiler_get_profile());
+}
+
+TEST(GeckoProfiler, Bug1355807)
+{
+  uint32_t features = ProfilerFeature::JS;
+  const char* manyThreadsFilter[] = {""};
+  const char* fewThreadsFilter[] = {"GeckoMain"};
+
+  profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, features,
+                 manyThreadsFilter, MOZ_ARRAY_LENGTH(manyThreadsFilter), 0);
+
+  profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, features,
+                 fewThreadsFilter, MOZ_ARRAY_LENGTH(fewThreadsFilter), 0);
+
+  // In bug 1355807 this caused an assertion failure in StopJSSampling().
+  profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, features,
+                 fewThreadsFilter, MOZ_ARRAY_LENGTH(fewThreadsFilter), 0);
+
+  profiler_stop();
+}
+
+class GTestStackCollector final : public ProfilerStackCollector {
+ public:
+  GTestStackCollector() : mSetIsMainThread(0), mFrames(0) {}
+
+  virtual void SetIsMainThread() { mSetIsMainThread++; }
+
+  virtual void CollectNativeLeafAddr(void* aAddr) { mFrames++; }
+  virtual void CollectJitReturnAddr(void* aAddr) { mFrames++; }
+  virtual void CollectWasmFrame(const char* aLabel) { mFrames++; }
+  virtual void CollectProfilingStackFrame(
+      const js::ProfilingStackFrame& aFrame) {
+    mFrames++;
+  }
+
+  int mSetIsMainThread;
+  int mFrames;
+};
+
+void DoSuspendAndSample(ProfilerThreadId aTidToSample,
+                        nsIThread* aSamplingThread) {
+  NS_DispatchAndSpinEventLoopUntilComplete(
+      "GeckoProfiler_SuspendAndSample_Test::TestBody"_ns, aSamplingThread,
+      NS_NewRunnableFunction(
+          "GeckoProfiler_SuspendAndSample_Test::TestBody", [&]() {
+            uint32_t features = ProfilerFeature::CPUUtilization;
+            GTestStackCollector collector;
+            profiler_suspend_and_sample_thread(aTidToSample, features,
+                                               collector,
+                                               /* sampleNative = */ true);
+
+            ASSERT_TRUE(collector.mSetIsMainThread ==
+                        (aTidToSample == profiler_main_thread_id()));
+            ASSERT_TRUE(collector.mFrames > 0);
+          }));
+}
+
+TEST(GeckoProfiler, SuspendAndSample)
+{
+  nsCOMPtr<nsIThread> thread;
+  nsresult rv = NS_NewNamedThread("GeckoProfGTest", getter_AddRefs(thread));
+  ASSERT_NS_SUCCEEDED(rv);
+
+  ProfilerThreadId tid = profiler_current_thread_id();
+
+  ASSERT_TRUE(!profiler_is_active());
+
+  // Suspend and sample while the profiler is inactive.
+  DoSuspendAndSample(tid, thread);
+
+  DoSuspendAndSample(ProfilerThreadId{}, thread);
+
+  uint32_t features = ProfilerFeature::JS;
+  const char* filters[] = {"GeckoMain", "Compositor"};
+
+  profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, features,
+                 filters, MOZ_ARRAY_LENGTH(filters), 0);
+
+  ASSERT_TRUE(profiler_is_active());
+
+  // Suspend and sample while the profiler is active.
+  DoSuspendAndSample(tid, thread);
+
+  DoSuspendAndSample(ProfilerThreadId{}, thread);
+
+  profiler_stop();
+
+  ASSERT_TRUE(!profiler_is_active());
+}
+
+TEST(GeckoProfiler, PostSamplingCallback)
+{
+  const char* filters[] = {"GeckoMain"};
+
+  ASSERT_TRUE(!profiler_is_active());
+  ASSERT_TRUE(!profiler_callback_after_sampling(
+      [&](SamplingState) { ASSERT_TRUE(false); }));
+
+  profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL,
+                 ProfilerFeature::StackWalk, filters, MOZ_ARRAY_LENGTH(filters),
+                 0);
+  {
+    // Stack sampling -> This label should appear at least once.
+    AUTO_PROFILER_LABEL("PostSamplingCallback completed", OTHER);
+    ASSERT_EQ(WaitForSamplingState(), SamplingState::SamplingCompleted);
+  }
+  UniquePtr<char[]> profileCompleted = profiler_get_profile();
+  JSONOutputCheck(profileCompleted.get(), [](const Json::Value& aRoot) {
+    GET_JSON(threads, aRoot["threads"], Array);
+    {
+      GET_JSON(thread0, threads[0], Object);
+      {
+        EXPECT_JSON_ARRAY_CONTAINS(thread0["stringTable"], String,
+                                   "PostSamplingCallback completed");
+      }
+    }
+  });
+
+  profiler_pause();
+  {
+    // Paused -> This label should not appear.
+    AUTO_PROFILER_LABEL("PostSamplingCallback paused", OTHER);
+    ASSERT_EQ(WaitForSamplingState(), SamplingState::SamplingPaused);
+  }
+  UniquePtr<char[]> profilePaused = profiler_get_profile();
+  JSONOutputCheck(profilePaused.get(), [](const Json::Value& aRoot) {});
+  // This string shouldn't appear *anywhere* in the profile.
+  ASSERT_FALSE(strstr(profilePaused.get(), "PostSamplingCallback paused"));
+
+  profiler_resume();
+  {
+    // Stack sampling -> This label should appear at least once.
+    AUTO_PROFILER_LABEL("PostSamplingCallback resumed", OTHER);
+    ASSERT_EQ(WaitForSamplingState(), SamplingState::SamplingCompleted);
+  }
+  UniquePtr<char[]> profileResumed = profiler_get_profile();
+  JSONOutputCheck(profileResumed.get(), [](const Json::Value& aRoot) {
+    GET_JSON(threads, aRoot["threads"], Array);
+    {
+      GET_JSON(thread0, threads[0], Object);
+      {
+        EXPECT_JSON_ARRAY_CONTAINS(thread0["stringTable"], String,
+                                   "PostSamplingCallback resumed");
+      }
+    }
+  });
+
+  profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL,
+                 ProfilerFeature::StackWalk | ProfilerFeature::NoStackSampling,
+                 filters, MOZ_ARRAY_LENGTH(filters), 0);
+  {
+    // No stack sampling -> This label should not appear.
+    AUTO_PROFILER_LABEL("PostSamplingCallback completed (no stacks)", OTHER);
+    ASSERT_EQ(WaitForSamplingState(), SamplingState::NoStackSamplingCompleted);
+  }
+  UniquePtr<char[]> profileNoStacks = profiler_get_profile();
+  JSONOutputCheck(profileNoStacks.get(), [](const Json::Value& aRoot) {});
+  // This string shouldn't appear *anywhere* in the profile.
+  ASSERT_FALSE(strstr(profileNoStacks.get(),
+                      "PostSamplingCallback completed (no stacks)"));
+
+  // Note: There is no non-racy way to test for SamplingState::JustStopped, as
+  // it would require coordination between `profiler_stop()` and another thread
+  // doing `profiler_callback_after_sampling()` at just the right moment.
+
+  profiler_stop();
+  ASSERT_TRUE(!profiler_is_active());
+  ASSERT_TRUE(!profiler_callback_after_sampling(
+      [&](SamplingState) { ASSERT_TRUE(false); }));
+}
+
+TEST(GeckoProfiler, ProfilingStateCallback)
+{
+  const char* filters[] = {"GeckoMain"};
+
+  ASSERT_TRUE(!profiler_is_active());
+
+  struct ProfilingStateAndId {
+    ProfilingState mProfilingState;
+    int mId;
+  };
+  DataMutex<Vector<ProfilingStateAndId>> states{"Profiling states"};
+  auto CreateCallback = [&states](int id) {
+    return [id, &states](ProfilingState aProfilingState) {
+      auto lockedStates = states.Lock();
+      ASSERT_TRUE(
+          lockedStates->append(ProfilingStateAndId{aProfilingState, id}));
+    };
+  };
+  auto CheckStatesIsEmpty = [&states]() {
+    auto lockedStates = states.Lock();
+    EXPECT_TRUE(lockedStates->empty());
+  };
+  auto CheckStatesOnlyContains = [&states](ProfilingState aProfilingState,
+                                           int aId) {
+    auto lockedStates = states.Lock();
+    EXPECT_EQ(lockedStates->length(), 1u);
+    if (lockedStates->length() >= 1u) {
+      EXPECT_EQ((*lockedStates)[0].mProfilingState, aProfilingState);
+      EXPECT_EQ((*lockedStates)[0].mId, aId);
+    }
+    lockedStates->clear();
+  };
+
+  profiler_add_state_change_callback(AllProfilingStates(), CreateCallback(1),
+                                     1);
+  // This is in case of error, and it also exercises the (allowed) removal of
+  // unknown callback ids.
+  auto cleanup1 = mozilla::MakeScopeExit(
+      []() { profiler_remove_state_change_callback(1); });
+  CheckStatesIsEmpty();
+
+  profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL,
+                 ProfilerFeature::StackWalk, filters, MOZ_ARRAY_LENGTH(filters),
+                 0);
+
+  CheckStatesOnlyContains(ProfilingState::Started, 1);
+
+  profiler_add_state_change_callback(AllProfilingStates(), CreateCallback(2),
+                                     2);
+  // This is in case of error, and it also exercises the (allowed) removal of
+  // unknown callback ids.
+  auto cleanup2 = mozilla::MakeScopeExit(
+      []() { profiler_remove_state_change_callback(2); });
+  CheckStatesOnlyContains(ProfilingState::AlreadyActive, 2);
+
+  profiler_remove_state_change_callback(2);
+  CheckStatesOnlyContains(ProfilingState::RemovingCallback, 2);
+  // Note: The actual removal is effectively tested below, by not seeing any
+  // more invocations of the 2nd callback.
+
+  ASSERT_EQ(WaitForSamplingState(), SamplingState::SamplingCompleted);
+  UniquePtr<char[]> profileCompleted = profiler_get_profile();
+  CheckStatesOnlyContains(ProfilingState::GeneratingProfile, 1);
+  JSONOutputCheck(profileCompleted.get(), [](const Json::Value& aRoot) {});
+
+  profiler_pause();
+  CheckStatesOnlyContains(ProfilingState::Pausing, 1);
+  UniquePtr<char[]> profilePaused = profiler_get_profile();
+  CheckStatesOnlyContains(ProfilingState::GeneratingProfile, 1);
+  JSONOutputCheck(profilePaused.get(), [](const Json::Value& aRoot) {});
+
+  profiler_resume();
+  CheckStatesOnlyContains(ProfilingState::Resumed, 1);
+  ASSERT_EQ(WaitForSamplingState(), SamplingState::SamplingCompleted);
+  UniquePtr<char[]> profileResumed = profiler_get_profile();
+  CheckStatesOnlyContains(ProfilingState::GeneratingProfile, 1);
+  JSONOutputCheck(profileResumed.get(), [](const Json::Value& aRoot) {});
+
+  // This effectively stops the profiler before restarting it, but
+  // ProfilingState::Stopping is not notified. See `profiler_start` for details.
+  profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL,
+                 ProfilerFeature::StackWalk | ProfilerFeature::NoStackSampling,
+                 filters, MOZ_ARRAY_LENGTH(filters), 0);
+  CheckStatesOnlyContains(ProfilingState::Started, 1);
+  ASSERT_EQ(WaitForSamplingState(), SamplingState::NoStackSamplingCompleted);
+  UniquePtr<char[]> profileNoStacks = profiler_get_profile();
+  CheckStatesOnlyContains(ProfilingState::GeneratingProfile, 1);
+  JSONOutputCheck(profileNoStacks.get(), [](const Json::Value& aRoot) {});
+
+  profiler_stop();
+  CheckStatesOnlyContains(ProfilingState::Stopping, 1);
+  ASSERT_TRUE(!profiler_is_active());
+
+  profiler_remove_state_change_callback(1);
+  CheckStatesOnlyContains(ProfilingState::RemovingCallback, 1);
+
+  // Note: ProfilingState::ShuttingDown cannot be tested here, and the profiler
+  // can only be shut down once per process.
+}
+
+TEST(GeckoProfiler, BaseProfilerHandOff)
+{
+  const char* filters[] = {"GeckoMain"};
+
+  ASSERT_TRUE(!baseprofiler::profiler_is_active());
+  ASSERT_TRUE(!profiler_is_active());
+
+  BASE_PROFILER_MARKER_UNTYPED("Base marker before base profiler", OTHER, {});
+  PROFILER_MARKER_UNTYPED("Gecko marker before base profiler", OTHER, {});
+
+  // Start the Base Profiler.
+  baseprofiler::profiler_start(
+      PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL,
+      ProfilerFeature::StackWalk, filters, MOZ_ARRAY_LENGTH(filters));
+
+  ASSERT_TRUE(baseprofiler::profiler_is_active());
+  ASSERT_TRUE(!profiler_is_active());
+
+  // Add at least a marker, which should go straight into the buffer.
+  Maybe<baseprofiler::ProfilerBufferInfo> info0 =
+      baseprofiler::profiler_get_buffer_info();
+  BASE_PROFILER_MARKER_UNTYPED("Base marker during base profiler", OTHER, {});
+  Maybe<baseprofiler::ProfilerBufferInfo> info1 =
+      baseprofiler::profiler_get_buffer_info();
+  ASSERT_GT(info1->mRangeEnd, info0->mRangeEnd);
+
+  PROFILER_MARKER_UNTYPED("Gecko marker during base profiler", OTHER, {});
+
+  // Start the Gecko Profiler, which should grab the Base Profiler profile and
+  // stop it.
+  profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL,
+                 ProfilerFeature::StackWalk, filters, MOZ_ARRAY_LENGTH(filters),
+                 0);
+
+  ASSERT_TRUE(!baseprofiler::profiler_is_active());
+  ASSERT_TRUE(profiler_is_active());
+
+  BASE_PROFILER_MARKER_UNTYPED("Base marker during gecko profiler", OTHER, {});
+  PROFILER_MARKER_UNTYPED("Gecko marker during gecko profiler", OTHER, {});
+
+  // Write some Gecko Profiler samples.
+  ASSERT_EQ(WaitForSamplingState(), SamplingState::SamplingCompleted);
+
+  // Check that the Gecko Profiler profile contains at least the Base Profiler
+  // main thread samples.
+  UniquePtr<char[]> profile = profiler_get_profile();
+
+  profiler_stop();
+  ASSERT_TRUE(!profiler_is_active());
+
+  BASE_PROFILER_MARKER_UNTYPED("Base marker after gecko profiler", OTHER, {});
+  PROFILER_MARKER_UNTYPED("Gecko marker after gecko profiler", OTHER, {});
+
+  JSONOutputCheck(profile.get(), [](const Json::Value& aRoot) {
+    GET_JSON(threads, aRoot["threads"], Array);
+    {
+      bool found = false;
+      for (const Json::Value& thread : threads) {
+        ASSERT_TRUE(thread.isObject());
+        GET_JSON(name, thread["name"], String);
+        if (name.asString() == "GeckoMain") {
+          found = true;
+          EXPECT_JSON_ARRAY_EXCLUDES(thread["stringTable"], String,
+                                     "Base marker before base profiler");
+          EXPECT_JSON_ARRAY_EXCLUDES(thread["stringTable"], String,
+                                     "Gecko marker before base profiler");
+          EXPECT_JSON_ARRAY_CONTAINS(thread["stringTable"], String,
+                                     "Base marker during base profiler");
+          EXPECT_JSON_ARRAY_EXCLUDES(thread["stringTable"], String,
+                                     "Gecko marker during base profiler");
+          EXPECT_JSON_ARRAY_CONTAINS(thread["stringTable"], String,
+                                     "Base marker during gecko profiler");
+          EXPECT_JSON_ARRAY_CONTAINS(thread["stringTable"], String,
+                                     "Gecko marker during gecko profiler");
+          EXPECT_JSON_ARRAY_EXCLUDES(thread["stringTable"], String,
+                                     "Base marker after gecko profiler");
+          EXPECT_JSON_ARRAY_EXCLUDES(thread["stringTable"], String,
+                                     "Gecko marker after gecko profiler");
+          break;
+        }
+      }
+      EXPECT_TRUE(found);
+    }
+  });
+}
+
+static std::string_view GetFeatureName(uint32_t feature) {
+  switch (feature) {
+#  define FEATURE_NAME(n_, str_, Name_, desc_) \
+    case ProfilerFeature::Name_:               \
+      return str_;
+
+    PROFILER_FOR_EACH_FEATURE(FEATURE_NAME)
+
+#  undef FEATURE_NAME
+
+    default:
+      return "?";
+  }
+}
+
+TEST(GeckoProfiler, FeatureCombinations)
+{
+  // Bug 1845606
+  #ifdef XP_WIN
+  if (!IsWin8OrLater()) {
+    return;
+  }
+  #endif
+
+  const char* filters[] = {"*"};
+
+  // List of features to test. Every combination of up to 3 of them will be
+  // tested, so be careful not to add too many to keep the test run at a
+  // reasonable time.
+  uint32_t featureList[] = {ProfilerFeature::JS,
+                            ProfilerFeature::Screenshots,
+                            ProfilerFeature::StackWalk,
+                            ProfilerFeature::NoStackSampling,
+                            ProfilerFeature::NativeAllocations,
+                            ProfilerFeature::CPUUtilization,
+                            ProfilerFeature::CPUAllThreads,
+                            ProfilerFeature::SamplingAllThreads,
+                            ProfilerFeature::MarkersAllThreads,
+                            ProfilerFeature::UnregisteredThreads};
+  constexpr uint32_t featureCount = uint32_t(MOZ_ARRAY_LENGTH(featureList));
+
+  auto testFeatures = [&](uint32_t features,
+                          const std::string& featuresString) {
+    SCOPED_TRACE(featuresString.c_str());
+
+    ASSERT_TRUE(!profiler_is_active());
+
+    profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL,
+                   features, filters, MOZ_ARRAY_LENGTH(filters), 0);
+
+    ASSERT_TRUE(profiler_is_active());
+
+    // Write some Gecko Profiler samples.
+    EXPECT_EQ(WaitForSamplingState(),
+              (((features & ProfilerFeature::NoStackSampling) != 0) &&
+               ((features & (ProfilerFeature::CPUUtilization |
+                             ProfilerFeature::CPUAllThreads)) == 0))
+                  ? SamplingState::NoStackSamplingCompleted
+                  : SamplingState::SamplingCompleted);
+
+    // Check that the profile looks valid. Note that we don't test feature-
+    // specific changes.
+    UniquePtr<char[]> profile = profiler_get_profile();
+    JSONOutputCheck(profile.get(), [](const Json::Value& aRoot) {});
+
+    profiler_stop();
+    ASSERT_TRUE(!profiler_is_active());
+  };
+
+  testFeatures(0, "Features: (none)");
+
+  for (uint32_t f1 = 0u; f1 < featureCount; ++f1) {
+    const uint32_t features1 = featureList[f1];
+    std::string features1String = "Features: ";
+    features1String += GetFeatureName(featureList[f1]);
+
+    testFeatures(features1, features1String);
+
+    for (uint32_t f2 = f1 + 1u; f2 < featureCount; ++f2) {
+      const uint32_t features12 = f1 | featureList[f2];
+      std::string features12String = features1String + " ";
+      features12String += GetFeatureName(featureList[f2]);
+
+      testFeatures(features12, features12String);
+
+      for (uint32_t f3 = f2 + 1u; f3 < featureCount; ++f3) {
+        const uint32_t features123 = features12 | featureList[f3];
+        std::string features123String = features12String + " ";
+        features123String += GetFeatureName(featureList[f3]);
+
+        testFeatures(features123, features123String);
+      }
+    }
+  }
+}
+
+static void CountCPUDeltas(const Json::Value& aThread, size_t& aOutSamplings,
+                           uint64_t& aOutCPUDeltaSum) {
+  GET_JSON(samples, aThread["samples"], Object);
+  {
+    Json::ArrayIndex threadCPUDeltaIndex = 0;
+    GET_JSON(schema, samples["schema"], Object);
+    {
+      GET_JSON(jsonThreadCPUDeltaIndex, schema["threadCPUDelta"], UInt);
+      threadCPUDeltaIndex = jsonThreadCPUDeltaIndex.asUInt();
+    }
+
+    aOutSamplings = 0;
+    aOutCPUDeltaSum = 0;
+    GET_JSON(data, samples["data"], Array);
+    aOutSamplings = data.size();
+    for (const Json::Value& sample : data) {
+      ASSERT_TRUE(sample.isArray());
+      if (sample.isValidIndex(threadCPUDeltaIndex)) {
+        if (!sample[threadCPUDeltaIndex].isNull()) {
+          GET_JSON(cpuDelta, sample[threadCPUDeltaIndex], UInt64);
+          aOutCPUDeltaSum += uint64_t(cpuDelta.asUInt64());
+        }
+      }
+    }
+  }
+}
+
+TEST(GeckoProfiler, CPUUsage)
+{
+  profiler_init_main_thread_id();
+  ASSERT_TRUE(profiler_is_main_thread())
+  << "This test assumes it runs on the main thread";
+
+  const char* filters[] = {"GeckoMain", "Idle test", "Busy test"};
+
+  enum class TestThreadsState {
+    // Initial state, while constructing and starting the idle thread.
+    STARTING,
+    // Set by the idle thread just before running its main mostly-idle loop.
+    RUNNING1,
+    RUNNING2,
+    // Set by the main thread when it wants the idle thread to stop.
+    STOPPING
+  };
+  Atomic<TestThreadsState> testThreadsState{TestThreadsState::STARTING};
+
+  std::thread idle([&]() {
+    AUTO_PROFILER_REGISTER_THREAD("Idle test");
+    // Add a label to ensure that we have a non-empty stack, even if native
+    // stack-walking is not available.
+    AUTO_PROFILER_LABEL("Idle test", PROFILER);
+    ASSERT_TRUE(testThreadsState.compareExchange(TestThreadsState::STARTING,
+                                                 TestThreadsState::RUNNING1) ||
+                testThreadsState.compareExchange(TestThreadsState::RUNNING1,
+                                                 TestThreadsState::RUNNING2));
+
+    while (testThreadsState != TestThreadsState::STOPPING) {
+      // Sleep for multiple profiler intervals, so the profiler should have
+      // samples with zero CPU utilization.
+      PR_Sleep(PR_MillisecondsToInterval(PROFILER_DEFAULT_INTERVAL * 10));
+    }
+  });
+
+  std::thread busy([&]() {
+    AUTO_PROFILER_REGISTER_THREAD("Busy test");
+    // Add a label to ensure that we have a non-empty stack, even if native
+    // stack-walking is not available.
+    AUTO_PROFILER_LABEL("Busy test", PROFILER);
+    ASSERT_TRUE(testThreadsState.compareExchange(TestThreadsState::STARTING,
+                                                 TestThreadsState::RUNNING1) ||
+                testThreadsState.compareExchange(TestThreadsState::RUNNING1,
+                                                 TestThreadsState::RUNNING2));
+
+    while (testThreadsState != TestThreadsState::STOPPING) {
+      // Stay busy!
+    }
+  });
+
+  // Wait for idle thread to start running its main loop.
+  while (testThreadsState != TestThreadsState::RUNNING2) {
+    PR_Sleep(PR_MillisecondsToInterval(1));
+  }
+
+  // We want to ensure that CPU usage numbers are present whether or not we are
+  // collecting stack samples.
+  static constexpr bool scTestsWithOrWithoutStackSampling[] = {false, true};
+  for (const bool testWithNoStackSampling : scTestsWithOrWithoutStackSampling) {
+    ASSERT_TRUE(!profiler_is_active());
+    ASSERT_TRUE(!profiler_callback_after_sampling(
+        [&](SamplingState) { ASSERT_TRUE(false); }));
+
+    profiler_start(
+        PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL,
+        ProfilerFeature::StackWalk | ProfilerFeature::CPUUtilization |
+            (testWithNoStackSampling ? ProfilerFeature::NoStackSampling : 0),
+        filters, MOZ_ARRAY_LENGTH(filters), 0);
+    // Grab a few samples, each with a different label on the stack.
+#  define SAMPLE_LABEL_PREFIX "CPUUsage sample label "
+    static constexpr const char* scSampleLabels[] = {
+        SAMPLE_LABEL_PREFIX "0", SAMPLE_LABEL_PREFIX "1",
+        SAMPLE_LABEL_PREFIX "2", SAMPLE_LABEL_PREFIX "3",
+        SAMPLE_LABEL_PREFIX "4", SAMPLE_LABEL_PREFIX "5",
+        SAMPLE_LABEL_PREFIX "6", SAMPLE_LABEL_PREFIX "7",
+        SAMPLE_LABEL_PREFIX "8", SAMPLE_LABEL_PREFIX "9"};
+    static constexpr size_t scSampleLabelCount =
+        (sizeof(scSampleLabels) / sizeof(scSampleLabels[0]));
+    // We'll do two samplings for each label.
+    static constexpr size_t scMinSamplings = scSampleLabelCount * 2;
+
+    for (const char* sampleLabel : scSampleLabels) {
+      AUTO_PROFILER_LABEL(sampleLabel, OTHER);
+      ASSERT_EQ(WaitForSamplingState(), SamplingState::SamplingCompleted);
+      // Note: There could have been a delay before this label above, where the
+      // profiler could have sampled the stack and missed the label. By forcing
+      // another sampling now, the label is guaranteed to be present.
+      ASSERT_EQ(WaitForSamplingState(), SamplingState::SamplingCompleted);
+    }
+
+    UniquePtr<char[]> profile = profiler_get_profile();
+
+    if (testWithNoStackSampling) {
+      // If we are testing nostacksampling, we shouldn't find this label prefix
+      // in the profile.
+      EXPECT_FALSE(strstr(profile.get(), SAMPLE_LABEL_PREFIX));
+    } else {
+      // In normal sampling mode, we should find all labels.
+      for (const char* sampleLabel : scSampleLabels) {
+        EXPECT_TRUE(strstr(profile.get(), sampleLabel));
+      }
+    }
+
+    JSONOutputCheck(profile.get(), [testWithNoStackSampling](
+                                       const Json::Value& aRoot) {
+      // Check that the "cpu" feature is present.
+      GET_JSON(meta, aRoot["meta"], Object);
+      {
+        GET_JSON(configuration, meta["configuration"], Object);
+        {
+          GET_JSON(features, configuration["features"], Array);
+          { EXPECT_JSON_ARRAY_CONTAINS(features, String, "cpu"); }
+        }
+      }
+
+      {
+        GET_JSON(sampleUnits, meta["sampleUnits"], Object);
+        {
+          EXPECT_EQ_JSON(sampleUnits["time"], String, "ms");
+          EXPECT_EQ_JSON(sampleUnits["eventDelay"], String, "ms");
+#  if defined(GP_OS_windows) || defined(GP_OS_darwin) || \
+      defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
+          // Note: The exact string is not important here.
+          EXPECT_TRUE(sampleUnits["threadCPUDelta"].isString())
+              << "There should be a sampleUnits.threadCPUDelta on this "
+                 "platform";
+#  else
+        EXPECT_FALSE(sampleUnits.isMember("threadCPUDelta"))
+            << "Unexpected sampleUnits.threadCPUDelta on this platform";;
+#  endif
+        }
+      }
+
+      bool foundMain = false;
+      bool foundIdle = false;
+      uint64_t idleThreadCPUDeltaSum = 0u;
+      bool foundBusy = false;
+      uint64_t busyThreadCPUDeltaSum = 0u;
+
+      // Check that the sample schema contains "threadCPUDelta".
+      GET_JSON(threads, aRoot["threads"], Array);
+      for (const Json::Value& thread : threads) {
+        ASSERT_TRUE(thread.isObject());
+        GET_JSON(name, thread["name"], String);
+        if (name.asString() == "GeckoMain") {
+          foundMain = true;
+          GET_JSON(samples, thread["samples"], Object);
+          {
+            Json::ArrayIndex stackIndex = 0;
+            Json::ArrayIndex threadCPUDeltaIndex = 0;
+            GET_JSON(schema, samples["schema"], Object);
+            {
+              GET_JSON(jsonStackIndex, schema["stack"], UInt);
+              stackIndex = jsonStackIndex.asUInt();
+              GET_JSON(jsonThreadCPUDeltaIndex, schema["threadCPUDelta"], UInt);
+              threadCPUDeltaIndex = jsonThreadCPUDeltaIndex.asUInt();
+            }
+
+            std::set<uint64_t> stackLeaves;  // To count distinct leaves.
+            unsigned threadCPUDeltaCount = 0;
+            GET_JSON(data, samples["data"], Array);
+            if (testWithNoStackSampling) {
+              // When not sampling stacks, the first sampling loop will have no
+              // running times, so it won't output anything.
+              EXPECT_GE(data.size(), scMinSamplings - 1);
+            } else {
+              EXPECT_GE(data.size(), scMinSamplings);
+            }
+            for (const Json::Value& sample : data) {
+              ASSERT_TRUE(sample.isArray());
+              if (sample.isValidIndex(stackIndex)) {
+                if (!sample[stackIndex].isNull()) {
+                  GET_JSON(stack, sample[stackIndex], UInt64);
+                  stackLeaves.insert(stack.asUInt64());
+                }
+              }
+              if (sample.isValidIndex(threadCPUDeltaIndex)) {
+                if (!sample[threadCPUDeltaIndex].isNull()) {
+                  EXPECT_TRUE(sample[threadCPUDeltaIndex].isUInt64());
+                  ++threadCPUDeltaCount;
+                }
+              }
+            }
+
+            if (testWithNoStackSampling) {
+              // in nostacksampling mode, there should only be one kind of stack
+              // leaf (the root).
+              EXPECT_EQ(stackLeaves.size(), 1u);
+            } else {
+              // in normal sampling mode, there should be at least one kind of
+              // stack leaf for each distinct label.
+              EXPECT_GE(stackLeaves.size(), scSampleLabelCount);
+            }
+
+#  if defined(GP_OS_windows) || defined(GP_OS_darwin) || \
+      defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
+            EXPECT_GE(threadCPUDeltaCount, data.size() - 1u)
+                << "There should be 'threadCPUDelta' values in all but 1 "
+                   "samples";
+#  else
+          // All "threadCPUDelta" data should be absent or null on unsupported
+          // platforms.
+          EXPECT_EQ(threadCPUDeltaCount, 0u);
+#  endif
+          }
+        } else if (name.asString() == "Idle test") {
+          foundIdle = true;
+          size_t samplings;
+          CountCPUDeltas(thread, samplings, idleThreadCPUDeltaSum);
+          if (testWithNoStackSampling) {
+            // When not sampling stacks, the first sampling loop will have no
+            // running times, so it won't output anything.
+            EXPECT_GE(samplings, scMinSamplings - 1);
+          } else {
+            EXPECT_GE(samplings, scMinSamplings);
+          }
+#  if !(defined(GP_OS_windows) || defined(GP_OS_darwin) || \
+        defined(GP_OS_linux) || defined(GP_OS_android) ||  \
+        defined(GP_OS_freebsd))
+          // All "threadCPUDelta" data should be absent or null on unsupported
+          // platforms.
+          EXPECT_EQ(idleThreadCPUDeltaSum, 0u);
+#  endif
+        } else if (name.asString() == "Busy test") {
+          foundBusy = true;
+          size_t samplings;
+          CountCPUDeltas(thread, samplings, busyThreadCPUDeltaSum);
+          if (testWithNoStackSampling) {
+            // When not sampling stacks, the first sampling loop will have no
+            // running times, so it won't output anything.
+            EXPECT_GE(samplings, scMinSamplings - 1);
+          } else {
+            EXPECT_GE(samplings, scMinSamplings);
+          }
+#  if !(defined(GP_OS_windows) || defined(GP_OS_darwin) || \
+        defined(GP_OS_linux) || defined(GP_OS_android) ||  \
+        defined(GP_OS_freebsd))
+          // All "threadCPUDelta" data should be absent or null on unsupported
+          // platforms.
+          EXPECT_EQ(busyThreadCPUDeltaSum, 0u);
+#  endif
+        }
+      }
+
+      EXPECT_TRUE(foundMain);
+      EXPECT_TRUE(foundIdle);
+      EXPECT_TRUE(foundBusy);
+      EXPECT_LE(idleThreadCPUDeltaSum, busyThreadCPUDeltaSum);
+    });
+
+    // Note: There is no non-racy way to test for SamplingState::JustStopped, as
+    // it would require coordination between `profiler_stop()` and another
+    // thread doing `profiler_callback_after_sampling()` at just the right
+    // moment.
+
+    profiler_stop();
+    ASSERT_TRUE(!profiler_is_active());
+    ASSERT_TRUE(!profiler_callback_after_sampling(
+        [&](SamplingState) { ASSERT_TRUE(false); }));
+  }
+
+  testThreadsState = TestThreadsState::STOPPING;
+  busy.join();
+  idle.join();
+}
+
+TEST(GeckoProfiler, AllThreads)
+{
+  // Bug 1845606
+  #ifdef XP_WIN
+  if (!IsWin8OrLater()) {
+    return;
+  }
+  #endif
+
+  profiler_init_main_thread_id();
+  ASSERT_TRUE(profiler_is_main_thread())
+  << "This test assumes it runs on the main thread";
+
+  ASSERT_EQ(static_cast<uint32_t>(ThreadProfilingFeatures::Any), 1u + 2u + 4u)
+      << "This test assumes that there are 3 binary choices 1+2+4; "
+         "Is this test up to date?";
+
+  for (uint32_t threadFeaturesBinary = 0u;
+       threadFeaturesBinary <=
+       static_cast<uint32_t>(ThreadProfilingFeatures::Any);
+       ++threadFeaturesBinary) {
+    ThreadProfilingFeatures threadFeatures =
+        static_cast<ThreadProfilingFeatures>(threadFeaturesBinary);
+    const bool threadCPU = DoFeaturesIntersect(
+        threadFeatures, ThreadProfilingFeatures::CPUUtilization);
+    const bool threadSampling =
+        DoFeaturesIntersect(threadFeatures, ThreadProfilingFeatures::Sampling);
+    const bool threadMarkers =
+        DoFeaturesIntersect(threadFeatures, ThreadProfilingFeatures::Markers);
+
+    ASSERT_TRUE(!profiler_is_active());
+
+    uint32_t features = ProfilerFeature::StackWalk;
+    std::string featuresString = "Features: StackWalk Threads";
+    if (threadCPU) {
+      features |= ProfilerFeature::CPUAllThreads;
+      featuresString += " CPUAllThreads";
+    }
+    if (threadSampling) {
+      features |= ProfilerFeature::SamplingAllThreads;
+      featuresString += " SamplingAllThreads";
+    }
+    if (threadMarkers) {
+      features |= ProfilerFeature::MarkersAllThreads;
+      featuresString += " MarkersAllThreads";
+    }
+
+    SCOPED_TRACE(featuresString.c_str());
+
+    const char* filters[] = {"GeckoMain", "Selected"};
+
+    EXPECT_FALSE(profiler_thread_is_being_profiled(
+        ThreadProfilingFeatures::CPUUtilization));
+    EXPECT_FALSE(
+        profiler_thread_is_being_profiled(ThreadProfilingFeatures::Sampling));
+    EXPECT_FALSE(
+        profiler_thread_is_being_profiled(ThreadProfilingFeatures::Markers));
+    EXPECT_FALSE(profiler_thread_is_being_profiled_for_markers());
+
+    profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL,
+                   features, filters, MOZ_ARRAY_LENGTH(filters), 0);
+
+    EXPECT_TRUE(profiler_thread_is_being_profiled(
+        ThreadProfilingFeatures::CPUUtilization));
+    EXPECT_TRUE(
+        profiler_thread_is_being_profiled(ThreadProfilingFeatures::Sampling));
+    EXPECT_TRUE(
+        profiler_thread_is_being_profiled(ThreadProfilingFeatures::Markers));
+    EXPECT_TRUE(profiler_thread_is_being_profiled_for_markers());
+
+    // This will signal all threads to stop spinning.
+    Atomic<bool> stopThreads{false};
+
+    Atomic<int> selectedThreadSpins{0};
+    std::thread selectedThread([&]() {
+      AUTO_PROFILER_REGISTER_THREAD("Selected test thread");
+      // Add a label to ensure that we have a non-empty stack, even if native
+      // stack-walking is not available.
+      AUTO_PROFILER_LABEL("Selected test thread", PROFILER);
+      EXPECT_TRUE(profiler_thread_is_being_profiled(
+          ThreadProfilingFeatures::CPUUtilization));
+      EXPECT_TRUE(
+          profiler_thread_is_being_profiled(ThreadProfilingFeatures::Sampling));
+      EXPECT_TRUE(
+          profiler_thread_is_being_profiled(ThreadProfilingFeatures::Markers));
+      EXPECT_TRUE(profiler_thread_is_being_profiled_for_markers());
+      while (!stopThreads) {
+        PROFILER_MARKER_UNTYPED("Spinning Selected!", PROFILER);
+        ++selectedThreadSpins;
+        PR_Sleep(PR_MillisecondsToInterval(1));
+      }
+    });
+
+    Atomic<int> unselectedThreadSpins{0};
+    std::thread unselectedThread([&]() {
+      AUTO_PROFILER_REGISTER_THREAD("Registered test thread");
+      // Add a label to ensure that we have a non-empty stack, even if native
+      // stack-walking is not available.
+      AUTO_PROFILER_LABEL("Registered test thread", PROFILER);
+      // This thread is *not* selected for full profiling, but it may still be
+      // profiled depending on the -allthreads features.
+      EXPECT_EQ(profiler_thread_is_being_profiled(
+                    ThreadProfilingFeatures::CPUUtilization),
+                threadCPU);
+      EXPECT_EQ(
+          profiler_thread_is_being_profiled(ThreadProfilingFeatures::Sampling),
+          threadSampling);
+      EXPECT_EQ(
+          profiler_thread_is_being_profiled(ThreadProfilingFeatures::Markers),
+          threadMarkers);
+      EXPECT_EQ(profiler_thread_is_being_profiled_for_markers(), threadMarkers);
+      while (!stopThreads) {
+        PROFILER_MARKER_UNTYPED("Spinning Registered!", PROFILER);
+        ++unselectedThreadSpins;
+        PR_Sleep(PR_MillisecondsToInterval(1));
+      }
+    });
+
+    Atomic<int> unregisteredThreadSpins{0};
+    std::thread unregisteredThread([&]() {
+      // No `AUTO_PROFILER_REGISTER_THREAD` here.
+      EXPECT_FALSE(profiler_thread_is_being_profiled(
+          ThreadProfilingFeatures::CPUUtilization));
+      EXPECT_FALSE(
+          profiler_thread_is_being_profiled(ThreadProfilingFeatures::Sampling));
+      EXPECT_FALSE(
+          profiler_thread_is_being_profiled(ThreadProfilingFeatures::Markers));
+      EXPECT_FALSE(profiler_thread_is_being_profiled_for_markers());
+      while (!stopThreads) {
+        PROFILER_MARKER_UNTYPED("Spinning Unregistered!", PROFILER);
+        ++unregisteredThreadSpins;
+        PR_Sleep(PR_MillisecondsToInterval(1));
+      }
+    });
+
+    // Wait for all threads to have started at least one spin.
+    while (selectedThreadSpins == 0 || unselectedThreadSpins == 0 ||
+           unregisteredThreadSpins == 0) {
+      PR_Sleep(PR_MillisecondsToInterval(1));
+    }
+
+    // Wait until the sampler has done at least one loop.
+    ASSERT_EQ(WaitForSamplingState(), SamplingState::SamplingCompleted);
+
+    // Restart the spin counts, and ensure each threads will do at least one
+    // more spin each. Since spins are increased after PROFILER_MARKER calls, in
+    // the worst case, each thread will have attempted to record at least one
+    // marker.
+    selectedThreadSpins = 0;
+    unselectedThreadSpins = 0;
+    unregisteredThreadSpins = 0;
+    while (selectedThreadSpins < 1 && unselectedThreadSpins < 1 &&
+           unregisteredThreadSpins < 1) {
+      ASSERT_EQ(WaitForSamplingState(), SamplingState::SamplingCompleted);
+    }
+
+    profiler_pause();
+    UniquePtr<char[]> profile = profiler_get_profile();
+
+    profiler_stop();
+    stopThreads = true;
+    unregisteredThread.join();
+    unselectedThread.join();
+    selectedThread.join();
+
+    JSONOutputCheck(profile.get(), [&](const Json::Value& aRoot) {
+      GET_JSON(threads, aRoot["threads"], Array);
+      int foundMain = 0;
+      int foundSelected = 0;
+      int foundSelectedMarker = 0;
+      int foundUnselected = 0;
+      int foundUnselectedMarker = 0;
+      for (const Json::Value& thread : threads) {
+        ASSERT_TRUE(thread.isObject());
+        GET_JSON(stringTable, thread["stringTable"], Array);
+        GET_JSON(name, thread["name"], String);
+        if (name.asString() == "GeckoMain") {
+          ++foundMain;
+          // Don't check the main thread further in this test.
+
+        } else if (name.asString() == "Selected test thread") {
+          ++foundSelected;
+
+          GET_JSON(samples, thread["samples"], Object);
+          GET_JSON(samplesData, samples["data"], Array);
+          EXPECT_GT(samplesData.size(), 0u);
+
+          GET_JSON(markers, thread["markers"], Object);
+          GET_JSON(markersData, markers["data"], Array);
+          for (const Json::Value& marker : markersData) {
+            const unsigned int NAME = 0u;
+            ASSERT_TRUE(marker[NAME].isUInt());  // name id
+            GET_JSON(name, stringTable[marker[NAME].asUInt()], String);
+            if (name == "Spinning Selected!") {
+              ++foundSelectedMarker;
+            }
+          }
+        } else if (name.asString() == "Registered test thread") {
+          ++foundUnselected;
+
+          GET_JSON(samples, thread["samples"], Object);
+          GET_JSON(samplesData, samples["data"], Array);
+          if (threadCPU || threadSampling) {
+            EXPECT_GT(samplesData.size(), 0u);
+          } else {
+            EXPECT_EQ(samplesData.size(), 0u);
+          }
+
+          GET_JSON(markers, thread["markers"], Object);
+          GET_JSON(markersData, markers["data"], Array);
+          for (const Json::Value& marker : markersData) {
+            const unsigned int NAME = 0u;
+            ASSERT_TRUE(marker[NAME].isUInt());  // name id
+            GET_JSON(name, stringTable[marker[NAME].asUInt()], String);
+            if (name == "Spinning Registered!") {
+              ++foundUnselectedMarker;
+            }
+          }
+
+        } else {
+          EXPECT_STRNE(name.asString().c_str(),
+                       "Unregistered test thread label");
+        }
+      }
+      EXPECT_EQ(foundMain, 1);
+      EXPECT_EQ(foundSelected, 1);
+      EXPECT_GT(foundSelectedMarker, 0);
+      EXPECT_EQ(foundUnselected,
+                (threadCPU || threadSampling || threadMarkers) ? 1 : 0)
+          << "Unselected thread should only be present if at least one of the "
+             "allthreads feature is on";
+      if (threadMarkers) {
+        EXPECT_GT(foundUnselectedMarker, 0);
+      } else {
+        EXPECT_EQ(foundUnselectedMarker, 0);
+      }
+    });
+  }
+}
+
+TEST(GeckoProfiler, FailureHandling)
+{
+  profiler_init_main_thread_id();
+  ASSERT_TRUE(profiler_is_main_thread())
+  << "This test assumes it runs on the main thread";
+
+  uint32_t features = ProfilerFeature::StackWalk;
+  const char* filters[] = {"GeckoMain"};
+  profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, features,
+                 filters, MOZ_ARRAY_LENGTH(filters), 0);
+
+  ASSERT_EQ(WaitForSamplingState(), SamplingState::SamplingCompleted);
+
+  // User-defined marker type that generates a failure when streaming JSON.
+  struct GtestFailingMarker {
+    static constexpr Span<const char> MarkerTypeName() {
+      return MakeStringSpan("markers-gtest-failing");
+    }
+    static void StreamJSONMarkerData(
+        mozilla::baseprofiler::SpliceableJSONWriter& aWriter) {
+      aWriter.SetFailure("boom!");
+    }
+    static mozilla::MarkerSchema MarkerTypeDisplay() {
+      return mozilla::MarkerSchema::SpecialFrontendLocation{};
+    }
+  };
+  EXPECT_TRUE(profiler_add_marker("Gtest failing marker",
+                                  geckoprofiler::category::OTHER, {},
+                                  GtestFailingMarker{}));
+
+  ASSERT_EQ(WaitForSamplingState(), SamplingState::SamplingCompleted);
+  profiler_pause();
+
+  FailureLatchSource failureLatch;
+  SpliceableChunkedJSONWriter w{failureLatch};
+  EXPECT_FALSE(w.Failed());
+  ASSERT_FALSE(w.GetFailure());
+
+  w.Start();
+  EXPECT_FALSE(w.Failed());
+  ASSERT_FALSE(w.GetFailure());
+
+  // The marker will cause a failure during this function call.
+  EXPECT_FALSE(::profiler_stream_json_for_this_process(w).isOk());
+  EXPECT_TRUE(w.Failed());
+  ASSERT_TRUE(w.GetFailure());
+  EXPECT_EQ(strcmp(w.GetFailure(), "boom!"), 0);
+
+  // Already failed, check that we don't crash or reset the failure.
+  EXPECT_FALSE(::profiler_stream_json_for_this_process(w).isOk());
+  EXPECT_TRUE(w.Failed());
+  ASSERT_TRUE(w.GetFailure());
+  EXPECT_EQ(strcmp(w.GetFailure(), "boom!"), 0);
+
+  w.End();
+
+  profiler_stop();
+
+  EXPECT_TRUE(w.Failed());
+  ASSERT_TRUE(w.GetFailure());
+  EXPECT_EQ(strcmp(w.GetFailure(), "boom!"), 0);
+
+  UniquePtr<char[]> profile = w.ChunkedWriteFunc().CopyData();
+  ASSERT_EQ(profile.get(), nullptr);
+}
+
+TEST(GeckoProfiler, NoMarkerStacks)
+{
+  uint32_t features = ProfilerFeature::NoMarkerStacks;
+  const char* filters[] = {"GeckoMain"};
+
+  ASSERT_TRUE(!profiler_get_profile());
+
+  // Make sure that profiler_capture_backtrace returns nullptr when the profiler
+  // is not active.
+  ASSERT_TRUE(!profiler_capture_backtrace());
+
+  {
+    // Start the profiler without the NoMarkerStacks feature and make sure we
+    // capture stacks.
+    profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL,
+                   /* features */ 0, filters, MOZ_ARRAY_LENGTH(filters), 0);
+
+    ASSERT_TRUE(profiler_capture_backtrace());
+    profiler_stop();
+  }
+
+  // Start the profiler without the NoMarkerStacks feature and make sure we
+  // don't capture stacks.
+  profiler_start(PROFILER_DEFAULT_ENTRIES, PROFILER_DEFAULT_INTERVAL, features,
+                 filters, MOZ_ARRAY_LENGTH(filters), 0);
+
+  // Make sure that the active features has the NoMarkerStacks feature.
+  mozilla::Maybe<uint32_t> activeFeatures = profiler_features_if_active();
+  ASSERT_TRUE(activeFeatures.isSome());
+  ASSERT_TRUE(ProfilerFeature::HasNoMarkerStacks(*activeFeatures));
+
+  // Make sure we don't capture stacks.
+  ASSERT_TRUE(!profiler_capture_backtrace());
+
+  // Add a marker with a stack to test.
+  EXPECT_TRUE(profiler_add_marker(
+      "Text with stack", geckoprofiler::category::OTHER, MarkerStack::Capture(),
+      geckoprofiler::markers::TextMarker{}, ""));
+
+  UniquePtr<char[]> profile = profiler_get_profile();
+  JSONOutputCheck(profile.get(), [&](const Json::Value& aRoot) {
+    // Check that the meta.configuration.features array contains
+    // "nomarkerstacks".
+    GET_JSON(meta, aRoot["meta"], Object);
+    {
+      GET_JSON(configuration, meta["configuration"], Object);
+      {
+        GET_JSON(features, configuration["features"], Array);
+        {
+          EXPECT_EQ(features.size(), 1u);
+          EXPECT_JSON_ARRAY_CONTAINS(features, String, "nomarkerstacks");
+        }
+      }
+    }
+
+    // Make sure that the marker we captured doesn't have a stack.
+    GET_JSON(threads, aRoot["threads"], Array);
+    {
+      ASSERT_EQ(threads.size(), 1u);
+      GET_JSON(thread0, threads[0], Object);
+      {
+        GET_JSON(markers, thread0["markers"], Object);
+        {
+          GET_JSON(data, markers["data"], Array);
+          {
+            const unsigned int NAME = 0u;
+            const unsigned int PAYLOAD = 5u;
+            bool foundMarker = false;
+            GET_JSON(stringTable, thread0["stringTable"], Array);
+
+            for (const Json::Value& marker : data) {
+              // Even though we only added one marker, some markers like
+              // NotifyObservers are being added as well. Let's iterate over
+              // them and make sure that we have the one we added explicitly and
+              // check its stack doesn't exist.
+              GET_JSON(name, stringTable[marker[NAME].asUInt()], String);
+              std::string nameString = name.asString();
+
+              if (nameString == "Text with stack") {
+                // Make sure that the marker doesn't have a stack.
+                foundMarker = true;
+                EXPECT_FALSE(marker[PAYLOAD].isNull());
+                EXPECT_TRUE(marker[PAYLOAD]["stack"].isNull());
+              }
+            }
+
+            EXPECT_TRUE(foundMarker);
+          }
+        }
+      }
+    }
+  });
+
+  profiler_stop();
+
+  ASSERT_TRUE(!profiler_get_profile());
+}
+
+#endif  // MOZ_GECKO_PROFILER
diff --git a/tools/profiler/tests/gtest/LulTest.cpp b/tools/profiler/tests/gtest/LulTest.cpp
new file mode 100644
index 0000000000..159a366567
--- /dev/null
+++ b/tools/profiler/tests/gtest/LulTest.cpp
@@ -0,0 +1,51 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "gtest/gtest.h"
+#include "mozilla/Atomics.h"
+#include "LulMain.h"
+#include "GeckoProfiler.h"       // for TracingKind
+#include "platform-linux-lul.h"  // for read_procmaps
+
+// Set this to 0 to make LUL be completely silent during tests.
+// Set it to 1 to get logging output from LUL, presumably for
+// the purpose of debugging it.
+#define DEBUG_LUL_TEST 0
+
+// LUL needs a callback for its logging sink.
+static void gtest_logging_sink_for_LulIntegration(const char* str) {
+  if (DEBUG_LUL_TEST == 0) {
+    return;
+  }
+  // Ignore any trailing \n, since LOG will add one anyway.
+  size_t n = strlen(str);
+  if (n > 0 && str[n - 1] == '\n') {
+    char* tmp = strdup(str);
+    tmp[n - 1] = 0;
+    fprintf(stderr, "LUL-in-gtest: %s\n", tmp);
+    free(tmp);
+  } else {
+    fprintf(stderr, "LUL-in-gtest: %s\n", str);
+  }
+}
+
+TEST(LulIntegration, unwind_consistency)
+{
+  // Set up LUL and get it to read unwind info for libxul.so, which is
+  // all we care about here, plus (incidentally) practically every
+  // other object in the process too.
+  lul::LUL* lul = new lul::LUL(gtest_logging_sink_for_LulIntegration);
+  read_procmaps(lul);
+
+  // Run unwind tests and receive information about how many there
+  // were and how many were successful.
+  lul->EnableUnwinding();
+  int nTests = 0, nTestsPassed = 0;
+  RunLulUnitTests(&nTests, &nTestsPassed, lul);
+  EXPECT_TRUE(nTests == 6) << "Unexpected number of tests";
+  EXPECT_EQ(nTestsPassed, nTests) << "Not all tests passed";
+
+  delete lul;
+}
diff --git a/tools/profiler/tests/gtest/LulTestDwarf.cpp b/tools/profiler/tests/gtest/LulTestDwarf.cpp
new file mode 100644
index 0000000000..55373ec093
--- /dev/null
+++ b/tools/profiler/tests/gtest/LulTestDwarf.cpp
@@ -0,0 +1,2733 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "gtest/gtest.h"
+#include "gmock/gmock.h"
+#include "LulCommonExt.h"
+#include "LulDwarfExt.h"
+#include "LulDwarfInt.h"
+#include "LulTestInfrastructure.h"
+
+using lul_test::CFISection;
+using lul_test::test_assembler::kBigEndian;
+using lul_test::test_assembler::kLittleEndian;
+using lul_test::test_assembler::Label;
+using testing::_;
+using testing::InSequence;
+using testing::Return;
+using testing::Sequence;
+using testing::Test;
+
+#define PERHAPS_WRITE_DEBUG_FRAME_FILE(name, section) /**/
+#define PERHAPS_WRITE_EH_FRAME_FILE(name, section)    /**/
+
+// Set this to 0 to make LUL be completely silent during tests.
+// Set it to 1 to get logging output from LUL, presumably for
+// the purpose of debugging it.
+#define DEBUG_LUL_TEST_DWARF 0
+
+// LUL needs a callback for its logging sink.
+static void gtest_logging_sink_for_LulTestDwarf(const char* str) {
+  if (DEBUG_LUL_TEST_DWARF == 0) {
+    return;
+  }
+  // Ignore any trailing \n, since LOG will add one anyway.
+  size_t n = strlen(str);
+  if (n > 0 && str[n - 1] == '\n') {
+    char* tmp = strdup(str);
+    tmp[n - 1] = 0;
+    fprintf(stderr, "LUL-in-gtest: %s\n", tmp);
+    free(tmp);
+  } else {
+    fprintf(stderr, "LUL-in-gtest: %s\n", str);
+  }
+}
+
+namespace lul {
+
+class MockCallFrameInfoHandler : public CallFrameInfo::Handler {
+ public:
+  MOCK_METHOD6(Entry,
+               bool(size_t offset, uint64 address, uint64 length, uint8 version,
+                    const std::string& augmentation, unsigned return_address));
+  MOCK_METHOD2(UndefinedRule, bool(uint64 address, int reg));
+  MOCK_METHOD2(SameValueRule, bool(uint64 address, int reg));
+  MOCK_METHOD4(OffsetRule,
+               bool(uint64 address, int reg, int base_register, long offset));
+  MOCK_METHOD4(ValOffsetRule,
+               bool(uint64 address, int reg, int base_register, long offset));
+  MOCK_METHOD3(RegisterRule, bool(uint64 address, int reg, int base_register));
+  MOCK_METHOD3(ExpressionRule,
+               bool(uint64 address, int reg, const ImageSlice& expression));
+  MOCK_METHOD3(ValExpressionRule,
+               bool(uint64 address, int reg, const ImageSlice& expression));
+  MOCK_METHOD0(End, bool());
+  MOCK_METHOD2(PersonalityRoutine, bool(uint64 address, bool indirect));
+  MOCK_METHOD2(LanguageSpecificDataArea, bool(uint64 address, bool indirect));
+  MOCK_METHOD0(SignalHandler, bool());
+};
+
+class MockCallFrameErrorReporter : public CallFrameInfo::Reporter {
+ public:
+  MockCallFrameErrorReporter()
+      : Reporter(gtest_logging_sink_for_LulTestDwarf, "mock filename",
+                 "mock section") {}
+  MOCK_METHOD2(Incomplete, void(uint64, CallFrameInfo::EntryKind));
+  MOCK_METHOD1(EarlyEHTerminator, void(uint64));
+  MOCK_METHOD2(CIEPointerOutOfRange, void(uint64, uint64));
+  MOCK_METHOD2(BadCIEId, void(uint64, uint64));
+  MOCK_METHOD2(UnrecognizedVersion, void(uint64, int version));
+  MOCK_METHOD2(UnrecognizedAugmentation, void(uint64, const string&));
+  MOCK_METHOD2(InvalidPointerEncoding, void(uint64, uint8));
+  MOCK_METHOD2(UnusablePointerEncoding, void(uint64, uint8));
+  MOCK_METHOD2(RestoreInCIE, void(uint64, uint64));
+  MOCK_METHOD3(BadInstruction, void(uint64, CallFrameInfo::EntryKind, uint64));
+  MOCK_METHOD3(NoCFARule, void(uint64, CallFrameInfo::EntryKind, uint64));
+  MOCK_METHOD3(EmptyStateStack, void(uint64, CallFrameInfo::EntryKind, uint64));
+  MOCK_METHOD3(ClearingCFARule, void(uint64, CallFrameInfo::EntryKind, uint64));
+};
+
+struct CFIFixture {
+  enum { kCFARegister = CallFrameInfo::Handler::kCFARegister };
+
+  CFIFixture() {
+    // Default expectations for the data handler.
+    //
+    // - Leave Entry and End without expectations, as it's probably a
+    //   good idea to set those explicitly in each test.
+    //
+    // - Expect the *Rule functions to not be called,
+    //   so that each test can simply list the calls they expect.
+    //
+    // I gather I could use StrictMock for this, but the manual seems
+    // to suggest using that only as a last resort, and this isn't so
+    // bad.
+    EXPECT_CALL(handler, UndefinedRule(_, _)).Times(0);
+    EXPECT_CALL(handler, SameValueRule(_, _)).Times(0);
+    EXPECT_CALL(handler, OffsetRule(_, _, _, _)).Times(0);
+    EXPECT_CALL(handler, ValOffsetRule(_, _, _, _)).Times(0);
+    EXPECT_CALL(handler, RegisterRule(_, _, _)).Times(0);
+    EXPECT_CALL(handler, ExpressionRule(_, _, _)).Times(0);
+    EXPECT_CALL(handler, ValExpressionRule(_, _, _)).Times(0);
+    EXPECT_CALL(handler, PersonalityRoutine(_, _)).Times(0);
+    EXPECT_CALL(handler, LanguageSpecificDataArea(_, _)).Times(0);
+    EXPECT_CALL(handler, SignalHandler()).Times(0);
+
+    // Default expectations for the error/warning reporer.
+    EXPECT_CALL(reporter, Incomplete(_, _)).Times(0);
+    EXPECT_CALL(reporter, EarlyEHTerminator(_)).Times(0);
+    EXPECT_CALL(reporter, CIEPointerOutOfRange(_, _)).Times(0);
+    EXPECT_CALL(reporter, BadCIEId(_, _)).Times(0);
+    EXPECT_CALL(reporter, UnrecognizedVersion(_, _)).Times(0);
+    EXPECT_CALL(reporter, UnrecognizedAugmentation(_, _)).Times(0);
+    EXPECT_CALL(reporter, InvalidPointerEncoding(_, _)).Times(0);
+    EXPECT_CALL(reporter, UnusablePointerEncoding(_, _)).Times(0);
+    EXPECT_CALL(reporter, RestoreInCIE(_, _)).Times(0);
+    EXPECT_CALL(reporter, BadInstruction(_, _, _)).Times(0);
+    EXPECT_CALL(reporter, NoCFARule(_, _, _)).Times(0);
+    EXPECT_CALL(reporter, EmptyStateStack(_, _, _)).Times(0);
+    EXPECT_CALL(reporter, ClearingCFARule(_, _, _)).Times(0);
+  }
+
+  MockCallFrameInfoHandler handler;
+  MockCallFrameErrorReporter reporter;
+};
+
+class LulDwarfCFI : public CFIFixture, public Test {};
+
+TEST_F(LulDwarfCFI, EmptyRegion) {
+  EXPECT_CALL(handler, Entry(_, _, _, _, _, _)).Times(0);
+  EXPECT_CALL(handler, End()).Times(0);
+  static const char data[1] = {42};
+
+  ByteReader reader(ENDIANNESS_BIG);
+  CallFrameInfo parser(data, 0, &reader, &handler, &reporter);
+  EXPECT_TRUE(parser.Start());
+}
+
+TEST_F(LulDwarfCFI, IncompleteLength32) {
+  CFISection section(kBigEndian, 8);
+  section
+      // Not even long enough for an initial length.
+      .D16(0xa0f)
+      // Padding to keep valgrind happy. We subtract these off when we
+      // construct the parser.
+      .D16(0);
+
+  EXPECT_CALL(handler, Entry(_, _, _, _, _, _)).Times(0);
+  EXPECT_CALL(handler, End()).Times(0);
+
+  EXPECT_CALL(reporter, Incomplete(_, CallFrameInfo::kUnknown))
+      .WillOnce(Return());
+
+  string contents;
+  ASSERT_TRUE(section.GetContents(&contents));
+
+  ByteReader reader(ENDIANNESS_BIG);
+  reader.SetAddressSize(8);
+  CallFrameInfo parser(contents.data(), contents.size() - 2, &reader, &handler,
+                       &reporter);
+  EXPECT_FALSE(parser.Start());
+}
+
+TEST_F(LulDwarfCFI, IncompleteLength64) {
+  CFISection section(kLittleEndian, 4);
+  section
+      // An incomplete 64-bit DWARF initial length.
+      .D32(0xffffffff)
+      .D32(0x71fbaec2)
+      // Padding to keep valgrind happy. We subtract these off when we
+      // construct the parser.
+      .D32(0);
+
+  EXPECT_CALL(handler, Entry(_, _, _, _, _, _)).Times(0);
+  EXPECT_CALL(handler, End()).Times(0);
+
+  EXPECT_CALL(reporter, Incomplete(_, CallFrameInfo::kUnknown))
+      .WillOnce(Return());
+
+  string contents;
+  ASSERT_TRUE(section.GetContents(&contents));
+
+  ByteReader reader(ENDIANNESS_LITTLE);
+  reader.SetAddressSize(4);
+  CallFrameInfo parser(contents.data(), contents.size() - 4, &reader, &handler,
+                       &reporter);
+  EXPECT_FALSE(parser.Start());
+}
+
+TEST_F(LulDwarfCFI, IncompleteId32) {
+  CFISection section(kBigEndian, 8);
+  section
+      .D32(3)  // Initial length, not long enough for id
+      .D8(0xd7)
+      .D8(0xe5)
+      .D8(0xf1)  // incomplete id
+      .CIEHeader(8727, 3983, 8889, 3, "")
+      .FinishEntry();
+
+  EXPECT_CALL(handler, Entry(_, _, _, _, _, _)).Times(0);
+  EXPECT_CALL(handler, End()).Times(0);
+
+  EXPECT_CALL(reporter, Incomplete(_, CallFrameInfo::kUnknown))
+      .WillOnce(Return());
+
+  string contents;
+  ASSERT_TRUE(section.GetContents(&contents));
+
+  ByteReader reader(ENDIANNESS_BIG);
+  reader.SetAddressSize(8);
+  CallFrameInfo parser(contents.data(), contents.size(), &reader, &handler,
+                       &reporter);
+  EXPECT_FALSE(parser.Start());
+}
+
+TEST_F(LulDwarfCFI, BadId32) {
+  CFISection section(kBigEndian, 8);
+  section
+      .D32(0x100)                // Initial length
+      .D32(0xe802fade)           // bogus ID
+      .Append(0x100 - 4, 0x42);  // make the length true
+  section.CIEHeader(1672, 9872, 8529, 3, "").FinishEntry();
+
+  EXPECT_CALL(handler, Entry(_, _, _, _, _, _)).Times(0);
+  EXPECT_CALL(handler, End()).Times(0);
+
+  EXPECT_CALL(reporter, CIEPointerOutOfRange(_, 0xe802fade)).WillOnce(Return());
+
+  string contents;
+  ASSERT_TRUE(section.GetContents(&contents));
+
+  ByteReader reader(ENDIANNESS_BIG);
+  reader.SetAddressSize(8);
+  CallFrameInfo parser(contents.data(), contents.size(), &reader, &handler,
+                       &reporter);
+  EXPECT_FALSE(parser.Start());
+}
+
+// A lone CIE shouldn't cause any handler calls.
+TEST_F(LulDwarfCFI, SingleCIE) {
+  CFISection section(kLittleEndian, 4);
+  section.CIEHeader(0xffe799a8, 0x3398dcdd, 0x6e9683de, 3, "");
+  section.Append(10, lul::DW_CFA_nop);
+  section.FinishEntry();
+
+  PERHAPS_WRITE_DEBUG_FRAME_FILE("SingleCIE", section);
+
+  EXPECT_CALL(handler, Entry(_, _, _, _, _, _)).Times(0);
+  EXPECT_CALL(handler, End()).Times(0);
+
+  string contents;
+  EXPECT_TRUE(section.GetContents(&contents));
+  ByteReader reader(ENDIANNESS_LITTLE);
+  reader.SetAddressSize(4);
+  CallFrameInfo parser(contents.data(), contents.size(), &reader, &handler,
+                       &reporter);
+  EXPECT_TRUE(parser.Start());
+}
+
+// One FDE, one CIE.
+TEST_F(LulDwarfCFI, OneFDE) {
+  CFISection section(kBigEndian, 4);
+  Label cie;
+  section.Mark(&cie)
+      .CIEHeader(0x4be22f75, 0x2492236e, 0x6b6efb87, 3, "")
+      .FinishEntry()
+      .FDEHeader(cie, 0x7714740d, 0x3d5a10cd)
+      .FinishEntry();
+
+  PERHAPS_WRITE_DEBUG_FRAME_FILE("OneFDE", section);
+
+  {
+    InSequence s;
+    EXPECT_CALL(handler, Entry(_, 0x7714740d, 0x3d5a10cd, 3, "", 0x6b6efb87))
+        .WillOnce(Return(true));
+    EXPECT_CALL(handler, End()).WillOnce(Return(true));
+  }
+
+  string contents;
+  EXPECT_TRUE(section.GetContents(&contents));
+  ByteReader reader(ENDIANNESS_BIG);
+  reader.SetAddressSize(4);
+  CallFrameInfo parser(contents.data(), contents.size(), &reader, &handler,
+                       &reporter);
+  EXPECT_TRUE(parser.Start());
+}
+
+// Two FDEs share a CIE.
+TEST_F(LulDwarfCFI, TwoFDEsOneCIE) {
+  CFISection section(kBigEndian, 4);
+  Label cie;
+  section
+      // First FDE. readelf complains about this one because it makes
+      // a forward reference to its CIE.
+      .FDEHeader(cie, 0xa42744df, 0xa3b42121)
+      .FinishEntry()
+      // CIE.
+      .Mark(&cie)
+      .CIEHeader(0x04f7dc7b, 0x3d00c05f, 0xbd43cb59, 3, "")
+      .FinishEntry()
+      // Second FDE.
+      .FDEHeader(cie, 0x6057d391, 0x700f608d)
+      .FinishEntry();
+
+  PERHAPS_WRITE_DEBUG_FRAME_FILE("TwoFDEsOneCIE", section);
+
+  {
+    InSequence s;
+    EXPECT_CALL(handler, Entry(_, 0xa42744df, 0xa3b42121, 3, "", 0xbd43cb59))
+        .WillOnce(Return(true));
+    EXPECT_CALL(handler, End()).WillOnce(Return(true));
+  }
+  {
+    InSequence s;
+    EXPECT_CALL(handler, Entry(_, 0x6057d391, 0x700f608d, 3, "", 0xbd43cb59))
+        .WillOnce(Return(true));
+    EXPECT_CALL(handler, End()).WillOnce(Return(true));
+  }
+
+  string contents;
+  EXPECT_TRUE(section.GetContents(&contents));
+  ByteReader reader(ENDIANNESS_BIG);
+  reader.SetAddressSize(4);
+  CallFrameInfo parser(contents.data(), contents.size(), &reader, &handler,
+                       &reporter);
+  EXPECT_TRUE(parser.Start());
+}
+
+// Two FDEs, two CIEs.
+TEST_F(LulDwarfCFI, TwoFDEsTwoCIEs) {
+  CFISection section(kLittleEndian, 8);
+  Label cie1, cie2;
+  section
+      // First CIE.
+      .Mark(&cie1)
+      .CIEHeader(0x694d5d45, 0x4233221b, 0xbf45e65a, 3, "")
+      .FinishEntry()
+      // First FDE which cites second CIE. readelf complains about
+      // this one because it makes a forward reference to its CIE.
+      .FDEHeader(cie2, 0x778b27dfe5871f05ULL, 0x324ace3448070926ULL)
+      .FinishEntry()
+      // Second FDE, which cites first CIE.
+      .FDEHeader(cie1, 0xf6054ca18b10bf5fULL, 0x45fdb970d8bca342ULL)
+      .FinishEntry()
+      // Second CIE.
+      .Mark(&cie2)
+      .CIEHeader(0xfba3fad7, 0x6287e1fd, 0x61d2c581, 2, "")
+      .FinishEntry();
+
+  PERHAPS_WRITE_DEBUG_FRAME_FILE("TwoFDEsTwoCIEs", section);
+
+  {
+    InSequence s;
+    EXPECT_CALL(handler, Entry(_, 0x778b27dfe5871f05ULL, 0x324ace3448070926ULL,
+                               2, "", 0x61d2c581))
+        .WillOnce(Return(true));
+    EXPECT_CALL(handler, End()).WillOnce(Return(true));
+  }
+  {
+    InSequence s;
+    EXPECT_CALL(handler, Entry(_, 0xf6054ca18b10bf5fULL, 0x45fdb970d8bca342ULL,
+                               3, "", 0xbf45e65a))
+        .WillOnce(Return(true));
+    EXPECT_CALL(handler, End()).WillOnce(Return(true));
+  }
+
+  string contents;
+  EXPECT_TRUE(section.GetContents(&contents));
+  ByteReader reader(ENDIANNESS_LITTLE);
+  reader.SetAddressSize(8);
+  CallFrameInfo parser(contents.data(), contents.size(), &reader, &handler,
+                       &reporter);
+  EXPECT_TRUE(parser.Start());
+}
+
+// An FDE whose CIE specifies a version we don't recognize.
+TEST_F(LulDwarfCFI, BadVersion) {
+  CFISection section(kBigEndian, 4);
+  Label cie1, cie2;
+  section.Mark(&cie1)
+      .CIEHeader(0xca878cf0, 0x7698ec04, 0x7b616f54, 0x52, "")
+      .FinishEntry()
+      // We should skip this entry, as its CIE specifies a version we
+      // don't recognize.
+      .FDEHeader(cie1, 0x08852292, 0x2204004a)
+      .FinishEntry()
+      // Despite the above, we should visit this entry.
+      .Mark(&cie2)
+      .CIEHeader(0x7c3ae7c9, 0xb9b9a512, 0x96cb3264, 3, "")
+      .FinishEntry()
+      .FDEHeader(cie2, 0x2094735a, 0x6e875501)
+      .FinishEntry();
+
+  PERHAPS_WRITE_DEBUG_FRAME_FILE("BadVersion", section);
+
+  EXPECT_CALL(reporter, UnrecognizedVersion(_, 0x52)).WillOnce(Return());
+
+  {
+    InSequence s;
+    // We should see no mention of the first FDE, but we should get
+    // a call to Entry for the second.
+    EXPECT_CALL(handler, Entry(_, 0x2094735a, 0x6e875501, 3, "", 0x96cb3264))
+        .WillOnce(Return(true));
+    EXPECT_CALL(handler, End()).WillOnce(Return(true));
+  }
+
+  string contents;
+  EXPECT_TRUE(section.GetContents(&contents));
+  ByteReader reader(ENDIANNESS_BIG);
+  reader.SetAddressSize(4);
+  CallFrameInfo parser(contents.data(), contents.size(), &reader, &handler,
+                       &reporter);
+  EXPECT_FALSE(parser.Start());
+}
+
+// An FDE whose CIE specifies an augmentation we don't recognize.
+TEST_F(LulDwarfCFI, BadAugmentation) {
+  CFISection section(kBigEndian, 4);
+  Label cie1, cie2;
+  section.Mark(&cie1)
+      .CIEHeader(0x4be22f75, 0x2492236e, 0x6b6efb87, 3, "spaniels!")
+      .FinishEntry()
+      // We should skip this entry, as its CIE specifies an
+      // augmentation we don't recognize.
+      .FDEHeader(cie1, 0x7714740d, 0x3d5a10cd)
+      .FinishEntry()
+      // Despite the above, we should visit this entry.
+      .Mark(&cie2)
+      .CIEHeader(0xf8bc4399, 0x8cf09931, 0xf2f519b2, 3, "")
+      .FinishEntry()
+      .FDEHeader(cie2, 0x7bf0fda0, 0xcbcd28d8)
+      .FinishEntry();
+
+  PERHAPS_WRITE_DEBUG_FRAME_FILE("BadAugmentation", section);
+
+  EXPECT_CALL(reporter, UnrecognizedAugmentation(_, "spaniels!"))
+      .WillOnce(Return());
+
+  {
+    InSequence s;
+    // We should see no mention of the first FDE, but we should get
+    // a call to Entry for the second.
+    EXPECT_CALL(handler, Entry(_, 0x7bf0fda0, 0xcbcd28d8, 3, "", 0xf2f519b2))
+        .WillOnce(Return(true));
+    EXPECT_CALL(handler, End()).WillOnce(Return(true));
+  }
+
+  string contents;
+  EXPECT_TRUE(section.GetContents(&contents));
+  ByteReader reader(ENDIANNESS_BIG);
+  reader.SetAddressSize(4);
+  CallFrameInfo parser(contents.data(), contents.size(), &reader, &handler,
+                       &reporter);
+  EXPECT_FALSE(parser.Start());
+}
+
+// The return address column field is a byte in CFI version 1
+// (DWARF2), but a ULEB128 value in version 3 (DWARF3).
+TEST_F(LulDwarfCFI, CIEVersion1ReturnColumn) {
+  CFISection section(kBigEndian, 4);
+  Label cie;
+  section
+      // CIE, using the version 1 format: return column is a ubyte.
+      .Mark(&cie)
+      // Use a value for the return column that is parsed differently
+      // as a ubyte and as a ULEB128.
+      .CIEHeader(0xbcdea24f, 0x5be28286, 0x9f, 1, "")
+      .FinishEntry()
+      // FDE, citing that CIE.
+      .FDEHeader(cie, 0xb8d347b5, 0x825e55dc)
+      .FinishEntry();
+
+  PERHAPS_WRITE_DEBUG_FRAME_FILE("CIEVersion1ReturnColumn", section);
+
+  {
+    InSequence s;
+    EXPECT_CALL(handler, Entry(_, 0xb8d347b5, 0x825e55dc, 1, "", 0x9f))
+        .WillOnce(Return(true));
+    EXPECT_CALL(handler, End()).WillOnce(Return(true));
+  }
+
+  string contents;
+  EXPECT_TRUE(section.GetContents(&contents));
+  ByteReader reader(ENDIANNESS_BIG);
+  reader.SetAddressSize(4);
+  CallFrameInfo parser(contents.data(), contents.size(), &reader, &handler,
+                       &reporter);
+  EXPECT_TRUE(parser.Start());
+}
+
+// The return address column field is a byte in CFI version 1
+// (DWARF2), but a ULEB128 value in version 3 (DWARF3).
+TEST_F(LulDwarfCFI, CIEVersion3ReturnColumn) {
+  CFISection section(kBigEndian, 4);
+  Label cie;
+  section
+      // CIE, using the version 3 format: return column is a ULEB128.
+      .Mark(&cie)
+      // Use a value for the return column that is parsed differently
+      // as a ubyte and as a ULEB128.
+      .CIEHeader(0x0ab4758d, 0xc010fdf7, 0x89, 3, "")
+      .FinishEntry()
+      // FDE, citing that CIE.
+      .FDEHeader(cie, 0x86763f2b, 0x2a66dc23)
+      .FinishEntry();
+
+  PERHAPS_WRITE_DEBUG_FRAME_FILE("CIEVersion3ReturnColumn", section);
+
+  {
+    InSequence s;
+    EXPECT_CALL(handler, Entry(_, 0x86763f2b, 0x2a66dc23, 3, "", 0x89))
+        .WillOnce(Return(true));
+    EXPECT_CALL(handler, End()).WillOnce(Return(true));
+  }
+
+  string contents;
+  EXPECT_TRUE(section.GetContents(&contents));
+  ByteReader reader(ENDIANNESS_BIG);
+  reader.SetAddressSize(4);
+  CallFrameInfo parser(contents.data(), contents.size(), &reader, &handler,
+                       &reporter);
+  EXPECT_TRUE(parser.Start());
+}
+
+struct CFIInsnFixture : public CFIFixture {
+  CFIInsnFixture() : CFIFixture() {
+    data_factor = 0xb6f;
+    return_register = 0x9be1ed9f;
+    version = 3;
+    cfa_base_register = 0x383a3aa;
+    cfa_offset = 0xf748;
+  }
+
+  // Prepare SECTION to receive FDE instructions.
+  //
+  // - Append a stock CIE header that establishes the fixture's
+  //   code_factor, data_factor, return_register, version, and
+  //   augmentation values.
+  // - Have the CIE set up a CFA rule using cfa_base_register and
+  //   cfa_offset.
+  // - Append a stock FDE header, referring to the above CIE, for the
+  //   fde_size bytes at fde_start. Choose fde_start and fde_size
+  //   appropriately for the section's address size.
+  // - Set appropriate expectations on handler in sequence s for the
+  //   frame description entry and the CIE's CFA rule.
+  //
+  // On return, SECTION is ready to have FDE instructions appended to
+  // it, and its FinishEntry member called.
+  void StockCIEAndFDE(CFISection* section) {
+    // Choose appropriate constants for our address size.
+    if (section->AddressSize() == 4) {
+      fde_start = 0xc628ecfbU;
+      fde_size = 0x5dee04a2;
+      code_factor = 0x60b;
+    } else {
+      assert(section->AddressSize() == 8);
+      fde_start = 0x0005c57ce7806bd3ULL;
+      fde_size = 0x2699521b5e333100ULL;
+      code_factor = 0x01008e32855274a8ULL;
+    }
+
+    // Create the CIE.
+    (*section)
+        .Mark(&cie_label)
+        .CIEHeader(code_factor, data_factor, return_register, version, "")
+        .D8(lul::DW_CFA_def_cfa)
+        .ULEB128(cfa_base_register)
+        .ULEB128(cfa_offset)
+        .FinishEntry();
+
+    // Create the FDE.
+    section->FDEHeader(cie_label, fde_start, fde_size);
+
+    // Expect an Entry call for the FDE and a ValOffsetRule call for the
+    // CIE's CFA rule.
+    EXPECT_CALL(handler,
+                Entry(_, fde_start, fde_size, version, "", return_register))
+        .InSequence(s)
+        .WillOnce(Return(true));
+    EXPECT_CALL(handler, ValOffsetRule(fde_start, kCFARegister,
+                                       cfa_base_register, cfa_offset))
+        .InSequence(s)
+        .WillOnce(Return(true));
+  }
+
+  // Run the contents of SECTION through a CallFrameInfo parser,
+  // expecting parser.Start to return SUCCEEDS.  Caller may optionally
+  // supply, via READER, its own ByteReader.  If that's absent, a
+  // local one is used.
+  void ParseSection(CFISection* section, bool succeeds = true,
+                    ByteReader* reader = nullptr) {
+    string contents;
+    EXPECT_TRUE(section->GetContents(&contents));
+    lul::Endianness endianness;
+    if (section->endianness() == kBigEndian)
+      endianness = ENDIANNESS_BIG;
+    else {
+      assert(section->endianness() == kLittleEndian);
+      endianness = ENDIANNESS_LITTLE;
+    }
+    ByteReader local_reader(endianness);
+    ByteReader* reader_to_use = reader ? reader : &local_reader;
+    reader_to_use->SetAddressSize(section->AddressSize());
+    CallFrameInfo parser(contents.data(), contents.size(), reader_to_use,
+                         &handler, &reporter);
+    if (succeeds)
+      EXPECT_TRUE(parser.Start());
+    else
+      EXPECT_FALSE(parser.Start());
+  }
+
+  Label cie_label;
+  Sequence s;
+  uint64 code_factor;
+  int data_factor;
+  unsigned return_register;
+  unsigned version;
+  unsigned cfa_base_register;
+  int cfa_offset;
+  uint64 fde_start, fde_size;
+};
+
+class LulDwarfCFIInsn : public CFIInsnFixture, public Test {};
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_set_loc) {
+  CFISection section(kBigEndian, 4);
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_set_loc)
+      .D32(0xb1ee3e7a)
+      // Use DW_CFA_def_cfa to force a handler call that we can use to
+      // check the effect of the DW_CFA_set_loc.
+      .D8(lul::DW_CFA_def_cfa)
+      .ULEB128(0x4defb431)
+      .ULEB128(0x6d17b0ee)
+      .FinishEntry();
+
+  PERHAPS_WRITE_DEBUG_FRAME_FILE("DW_CFA_set_loc", section);
+
+  EXPECT_CALL(handler,
+              ValOffsetRule(0xb1ee3e7a, kCFARegister, 0x4defb431, 0x6d17b0ee))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_advance_loc) {
+  CFISection section(kBigEndian, 8);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_advance_loc | 0x2a)
+      // Use DW_CFA_def_cfa to force a handler call that we can use to
+      // check the effect of the DW_CFA_advance_loc.
+      .D8(lul::DW_CFA_def_cfa)
+      .ULEB128(0x5bbb3715)
+      .ULEB128(0x0186c7bf)
+      .FinishEntry();
+
+  PERHAPS_WRITE_DEBUG_FRAME_FILE("DW_CFA_advance_loc", section);
+
+  EXPECT_CALL(handler, ValOffsetRule(fde_start + 0x2a * code_factor,
+                                     kCFARegister, 0x5bbb3715, 0x0186c7bf))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_advance_loc1) {
+  CFISection section(kLittleEndian, 8);
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_advance_loc1)
+      .D8(0xd8)
+      .D8(lul::DW_CFA_def_cfa)
+      .ULEB128(0x69d5696a)
+      .ULEB128(0x1eb7fc93)
+      .FinishEntry();
+
+  PERHAPS_WRITE_DEBUG_FRAME_FILE("DW_CFA_advance_loc1", section);
+
+  EXPECT_CALL(handler, ValOffsetRule((fde_start + 0xd8 * code_factor),
+                                     kCFARegister, 0x69d5696a, 0x1eb7fc93))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_advance_loc2) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_advance_loc2)
+      .D16(0x3adb)
+      .D8(lul::DW_CFA_def_cfa)
+      .ULEB128(0x3a368bed)
+      .ULEB128(0x3194ee37)
+      .FinishEntry();
+
+  PERHAPS_WRITE_DEBUG_FRAME_FILE("DW_CFA_advance_loc2", section);
+
+  EXPECT_CALL(handler, ValOffsetRule((fde_start + 0x3adb * code_factor),
+                                     kCFARegister, 0x3a368bed, 0x3194ee37))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_advance_loc4) {
+  CFISection section(kBigEndian, 8);
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_advance_loc4)
+      .D32(0x15813c88)
+      .D8(lul::DW_CFA_def_cfa)
+      .ULEB128(0x135270c5)
+      .ULEB128(0x24bad7cb)
+      .FinishEntry();
+
+  PERHAPS_WRITE_DEBUG_FRAME_FILE("DW_CFA_advance_loc4", section);
+
+  EXPECT_CALL(handler, ValOffsetRule((fde_start + 0x15813c88ULL * code_factor),
+                                     kCFARegister, 0x135270c5, 0x24bad7cb))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_MIPS_advance_loc8) {
+  code_factor = 0x2d;
+  CFISection section(kBigEndian, 8);
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_MIPS_advance_loc8)
+      .D64(0x3c4f3945b92c14ULL)
+      .D8(lul::DW_CFA_def_cfa)
+      .ULEB128(0xe17ed602)
+      .ULEB128(0x3d162e7f)
+      .FinishEntry();
+
+  PERHAPS_WRITE_DEBUG_FRAME_FILE("DW_CFA_advance_loc8", section);
+
+  EXPECT_CALL(handler,
+              ValOffsetRule((fde_start + 0x3c4f3945b92c14ULL * code_factor),
+                            kCFARegister, 0xe17ed602, 0x3d162e7f))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_def_cfa) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_def_cfa)
+      .ULEB128(0x4e363a85)
+      .ULEB128(0x815f9aa7)
+      .FinishEntry();
+
+  PERHAPS_WRITE_DEBUG_FRAME_FILE("DW_CFA_def_cfa", section);
+
+  EXPECT_CALL(handler,
+              ValOffsetRule(fde_start, kCFARegister, 0x4e363a85, 0x815f9aa7))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_def_cfa_sf) {
+  CFISection section(kBigEndian, 4);
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_def_cfa_sf)
+      .ULEB128(0x8ccb32b7)
+      .LEB128(0x9ea)
+      .D8(lul::DW_CFA_def_cfa_sf)
+      .ULEB128(0x9b40f5da)
+      .LEB128(-0x40a2)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, ValOffsetRule(fde_start, kCFARegister, 0x8ccb32b7,
+                                     0x9ea * data_factor))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, ValOffsetRule(fde_start, kCFARegister, 0x9b40f5da,
+                                     -0x40a2 * data_factor))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_def_cfa_register) {
+  CFISection section(kLittleEndian, 8);
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_def_cfa_register).ULEB128(0x3e7e9363).FinishEntry();
+
+  EXPECT_CALL(handler,
+              ValOffsetRule(fde_start, kCFARegister, 0x3e7e9363, cfa_offset))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+// DW_CFA_def_cfa_register should have no effect when applied to a
+// non-base/offset rule.
+TEST_F(LulDwarfCFIInsn, DW_CFA_def_cfa_registerBadRule) {
+  ByteReader reader(ENDIANNESS_BIG);
+  CFISection section(kBigEndian, 4);
+  ImageSlice expr("needle in a haystack");
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_def_cfa_expression)
+      .Block(expr)
+      .D8(lul::DW_CFA_def_cfa_register)
+      .ULEB128(0xf1b49e49)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, ValExpressionRule(fde_start, kCFARegister, expr))
+      .WillRepeatedly(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section, true, &reader);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_def_cfa_offset) {
+  CFISection section(kBigEndian, 4);
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_def_cfa_offset).ULEB128(0x1e8e3b9b).FinishEntry();
+
+  EXPECT_CALL(handler, ValOffsetRule(fde_start, kCFARegister, cfa_base_register,
+                                     0x1e8e3b9b))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_def_cfa_offset_sf) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_def_cfa_offset_sf)
+      .LEB128(0x970)
+      .D8(lul::DW_CFA_def_cfa_offset_sf)
+      .LEB128(-0x2cd)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, ValOffsetRule(fde_start, kCFARegister, cfa_base_register,
+                                     0x970 * data_factor))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, ValOffsetRule(fde_start, kCFARegister, cfa_base_register,
+                                     -0x2cd * data_factor))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+// DW_CFA_def_cfa_offset should have no effect when applied to a
+// non-base/offset rule.
+TEST_F(LulDwarfCFIInsn, DW_CFA_def_cfa_offsetBadRule) {
+  ByteReader reader(ENDIANNESS_BIG);
+  CFISection section(kBigEndian, 4);
+  StockCIEAndFDE(&section);
+  ImageSlice expr("six ways to Sunday");
+  section.D8(lul::DW_CFA_def_cfa_expression)
+      .Block(expr)
+      .D8(lul::DW_CFA_def_cfa_offset)
+      .ULEB128(0x1e8e3b9b)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, ValExpressionRule(fde_start, kCFARegister, expr))
+      .WillRepeatedly(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section, true, &reader);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_def_cfa_expression) {
+  ByteReader reader(ENDIANNESS_LITTLE);
+  CFISection section(kLittleEndian, 8);
+  ImageSlice expr("eating crow");
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_def_cfa_expression).Block(expr).FinishEntry();
+
+  EXPECT_CALL(handler, ValExpressionRule(fde_start, kCFARegister, expr))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section, true, &reader);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_undefined) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_undefined).ULEB128(0x300ce45d).FinishEntry();
+
+  EXPECT_CALL(handler, UndefinedRule(fde_start, 0x300ce45d))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_same_value) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_same_value).ULEB128(0x3865a760).FinishEntry();
+
+  EXPECT_CALL(handler, SameValueRule(fde_start, 0x3865a760))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_offset) {
+  CFISection section(kBigEndian, 4);
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_offset | 0x2c).ULEB128(0x9f6).FinishEntry();
+
+  EXPECT_CALL(handler,
+              OffsetRule(fde_start, 0x2c, kCFARegister, 0x9f6 * data_factor))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_offset_extended) {
+  CFISection section(kBigEndian, 4);
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_offset_extended)
+      .ULEB128(0x402b)
+      .ULEB128(0xb48)
+      .FinishEntry();
+
+  EXPECT_CALL(handler,
+              OffsetRule(fde_start, 0x402b, kCFARegister, 0xb48 * data_factor))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_offset_extended_sf) {
+  CFISection section(kBigEndian, 8);
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_offset_extended_sf)
+      .ULEB128(0x997c23ee)
+      .LEB128(0x2d00)
+      .D8(lul::DW_CFA_offset_extended_sf)
+      .ULEB128(0x9519eb82)
+      .LEB128(-0xa77)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, OffsetRule(fde_start, 0x997c23ee, kCFARegister,
+                                  0x2d00 * data_factor))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, OffsetRule(fde_start, 0x9519eb82, kCFARegister,
+                                  -0xa77 * data_factor))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_val_offset) {
+  CFISection section(kBigEndian, 4);
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_val_offset)
+      .ULEB128(0x623562fe)
+      .ULEB128(0x673)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, ValOffsetRule(fde_start, 0x623562fe, kCFARegister,
+                                     0x673 * data_factor))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_val_offset_sf) {
+  CFISection section(kBigEndian, 4);
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_val_offset_sf)
+      .ULEB128(0x6f4f)
+      .LEB128(0xaab)
+      .D8(lul::DW_CFA_val_offset_sf)
+      .ULEB128(0x2483)
+      .LEB128(-0x8a2)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, ValOffsetRule(fde_start, 0x6f4f, kCFARegister,
+                                     0xaab * data_factor))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, ValOffsetRule(fde_start, 0x2483, kCFARegister,
+                                     -0x8a2 * data_factor))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_register) {
+  CFISection section(kLittleEndian, 8);
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_register)
+      .ULEB128(0x278d18f9)
+      .ULEB128(0x1a684414)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, RegisterRule(fde_start, 0x278d18f9, 0x1a684414))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_expression) {
+  ByteReader reader(ENDIANNESS_BIG);
+  CFISection section(kBigEndian, 8);
+  StockCIEAndFDE(&section);
+  ImageSlice expr("plus ça change, plus c'est la même chose");
+  section.D8(lul::DW_CFA_expression)
+      .ULEB128(0xa1619fb2)
+      .Block(expr)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, ExpressionRule(fde_start, 0xa1619fb2, expr))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section, true, &reader);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_val_expression) {
+  ByteReader reader(ENDIANNESS_BIG);
+  CFISection section(kBigEndian, 4);
+  ImageSlice expr("he who has the gold makes the rules");
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_val_expression)
+      .ULEB128(0xc5e4a9e3)
+      .Block(expr)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, ValExpressionRule(fde_start, 0xc5e4a9e3, expr))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section, true, &reader);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_restore) {
+  CFISection section(kLittleEndian, 8);
+  code_factor = 0x01bd188a9b1fa083ULL;
+  data_factor = -0x1ac8;
+  return_register = 0x8c35b049;
+  version = 2;
+  fde_start = 0x2d70fe998298bbb1ULL;
+  fde_size = 0x46ccc2e63cf0b108ULL;
+  Label cie;
+  section.Mark(&cie)
+      .CIEHeader(code_factor, data_factor, return_register, version, "")
+      // Provide a CFA rule, because register rules require them.
+      .D8(lul::DW_CFA_def_cfa)
+      .ULEB128(0x6ca1d50e)
+      .ULEB128(0x372e38e8)
+      // Provide an offset(N) rule for register 0x3c.
+      .D8(lul::DW_CFA_offset | 0x3c)
+      .ULEB128(0xb348)
+      .FinishEntry()
+      // In the FDE...
+      .FDEHeader(cie, fde_start, fde_size)
+      // At a second address, provide a new offset(N) rule for register 0x3c.
+      .D8(lul::DW_CFA_advance_loc | 0x13)
+      .D8(lul::DW_CFA_offset | 0x3c)
+      .ULEB128(0x9a50)
+      // At a third address, restore the original rule for register 0x3c.
+      .D8(lul::DW_CFA_advance_loc | 0x01)
+      .D8(lul::DW_CFA_restore | 0x3c)
+      .FinishEntry();
+
+  {
+    InSequence s;
+    EXPECT_CALL(handler,
+                Entry(_, fde_start, fde_size, version, "", return_register))
+        .WillOnce(Return(true));
+    // CIE's CFA rule.
+    EXPECT_CALL(handler,
+                ValOffsetRule(fde_start, kCFARegister, 0x6ca1d50e, 0x372e38e8))
+        .WillOnce(Return(true));
+    // CIE's rule for register 0x3c.
+    EXPECT_CALL(handler,
+                OffsetRule(fde_start, 0x3c, kCFARegister, 0xb348 * data_factor))
+        .WillOnce(Return(true));
+    // FDE's rule for register 0x3c.
+    EXPECT_CALL(handler, OffsetRule(fde_start + 0x13 * code_factor, 0x3c,
+                                    kCFARegister, 0x9a50 * data_factor))
+        .WillOnce(Return(true));
+    // Restore CIE's rule for register 0x3c.
+    EXPECT_CALL(handler, OffsetRule(fde_start + (0x13 + 0x01) * code_factor,
+                                    0x3c, kCFARegister, 0xb348 * data_factor))
+        .WillOnce(Return(true));
+    EXPECT_CALL(handler, End()).WillOnce(Return(true));
+  }
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_restoreNoRule) {
+  CFISection section(kBigEndian, 4);
+  code_factor = 0x005f78143c1c3b82ULL;
+  data_factor = 0x25d0;
+  return_register = 0xe8;
+  version = 1;
+  fde_start = 0x4062e30f;
+  fde_size = 0x5302a389;
+  Label cie;
+  section.Mark(&cie)
+      .CIEHeader(code_factor, data_factor, return_register, version, "")
+      // Provide a CFA rule, because register rules require them.
+      .D8(lul::DW_CFA_def_cfa)
+      .ULEB128(0x470aa334)
+      .ULEB128(0x099ef127)
+      .FinishEntry()
+      // In the FDE...
+      .FDEHeader(cie, fde_start, fde_size)
+      // At a second address, provide an offset(N) rule for register 0x2c.
+      .D8(lul::DW_CFA_advance_loc | 0x7)
+      .D8(lul::DW_CFA_offset | 0x2c)
+      .ULEB128(0x1f47)
+      // At a third address, restore the (missing) CIE rule for register 0x2c.
+      .D8(lul::DW_CFA_advance_loc | 0xb)
+      .D8(lul::DW_CFA_restore | 0x2c)
+      .FinishEntry();
+
+  {
+    InSequence s;
+    EXPECT_CALL(handler,
+                Entry(_, fde_start, fde_size, version, "", return_register))
+        .WillOnce(Return(true));
+    // CIE's CFA rule.
+    EXPECT_CALL(handler,
+                ValOffsetRule(fde_start, kCFARegister, 0x470aa334, 0x099ef127))
+        .WillOnce(Return(true));
+    // FDE's rule for register 0x2c.
+    EXPECT_CALL(handler, OffsetRule(fde_start + 0x7 * code_factor, 0x2c,
+                                    kCFARegister, 0x1f47 * data_factor))
+        .WillOnce(Return(true));
+    // Restore CIE's (missing) rule for register 0x2c.
+    EXPECT_CALL(handler,
+                SameValueRule(fde_start + (0x7 + 0xb) * code_factor, 0x2c))
+        .WillOnce(Return(true));
+    EXPECT_CALL(handler, End()).WillOnce(Return(true));
+  }
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_restore_extended) {
+  CFISection section(kBigEndian, 4);
+  code_factor = 0x126e;
+  data_factor = -0xd8b;
+  return_register = 0x77711787;
+  version = 3;
+  fde_start = 0x01f55a45;
+  fde_size = 0x452adb80;
+  Label cie;
+  section.Mark(&cie)
+      .CIEHeader(code_factor, data_factor, return_register, version, "",
+                 true /* dwarf64 */)
+      // Provide a CFA rule, because register rules require them.
+      .D8(lul::DW_CFA_def_cfa)
+      .ULEB128(0x56fa0edd)
+      .ULEB128(0x097f78a5)
+      // Provide an offset(N) rule for register 0x0f9b8a1c.
+      .D8(lul::DW_CFA_offset_extended)
+      .ULEB128(0x0f9b8a1c)
+      .ULEB128(0xc979)
+      .FinishEntry()
+      // In the FDE...
+      .FDEHeader(cie, fde_start, fde_size)
+      // At a second address, provide a new offset(N) rule for reg 0x0f9b8a1c.
+      .D8(lul::DW_CFA_advance_loc | 0x3)
+      .D8(lul::DW_CFA_offset_extended)
+      .ULEB128(0x0f9b8a1c)
+      .ULEB128(0x3b7b)
+      // At a third address, restore the original rule for register 0x0f9b8a1c.
+      .D8(lul::DW_CFA_advance_loc | 0x04)
+      .D8(lul::DW_CFA_restore_extended)
+      .ULEB128(0x0f9b8a1c)
+      .FinishEntry();
+
+  {
+    InSequence s;
+    EXPECT_CALL(handler,
+                Entry(_, fde_start, fde_size, version, "", return_register))
+        .WillOnce(Return(true));
+    // CIE's CFA rule.
+    EXPECT_CALL(handler,
+                ValOffsetRule(fde_start, kCFARegister, 0x56fa0edd, 0x097f78a5))
+        .WillOnce(Return(true));
+    // CIE's rule for register 0x0f9b8a1c.
+    EXPECT_CALL(handler, OffsetRule(fde_start, 0x0f9b8a1c, kCFARegister,
+                                    0xc979 * data_factor))
+        .WillOnce(Return(true));
+    // FDE's rule for register 0x0f9b8a1c.
+    EXPECT_CALL(handler, OffsetRule(fde_start + 0x3 * code_factor, 0x0f9b8a1c,
+                                    kCFARegister, 0x3b7b * data_factor))
+        .WillOnce(Return(true));
+    // Restore CIE's rule for register 0x0f9b8a1c.
+    EXPECT_CALL(handler,
+                OffsetRule(fde_start + (0x3 + 0x4) * code_factor, 0x0f9b8a1c,
+                           kCFARegister, 0xc979 * data_factor))
+        .WillOnce(Return(true));
+    EXPECT_CALL(handler, End()).WillOnce(Return(true));
+  }
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_remember_and_restore_state) {
+  CFISection section(kLittleEndian, 8);
+  StockCIEAndFDE(&section);
+
+  // We create a state, save it, modify it, and then restore. We
+  // refer to the state that is overridden the restore as the
+  // "outgoing" state, and the restored state the "incoming" state.
+  //
+  // Register         outgoing        incoming        expect
+  // 1                offset(N)       no rule         new "same value" rule
+  // 2                register(R)     offset(N)       report changed rule
+  // 3                offset(N)       offset(M)       report changed offset
+  // 4                offset(N)       offset(N)       no report
+  // 5                offset(N)       no rule         new "same value" rule
+  section
+      // Create the "incoming" state, which we will save and later restore.
+      .D8(lul::DW_CFA_offset | 2)
+      .ULEB128(0x9806)
+      .D8(lul::DW_CFA_offset | 3)
+      .ULEB128(0x995d)
+      .D8(lul::DW_CFA_offset | 4)
+      .ULEB128(0x7055)
+      .D8(lul::DW_CFA_remember_state)
+      // Advance to a new instruction; an implementation could legitimately
+      // ignore all but the final rule for a given register at a given address.
+      .D8(lul::DW_CFA_advance_loc | 1)
+      // Create the "outgoing" state, which we will discard.
+      .D8(lul::DW_CFA_offset | 1)
+      .ULEB128(0xea1a)
+      .D8(lul::DW_CFA_register)
+      .ULEB128(2)
+      .ULEB128(0x1d2a3767)
+      .D8(lul::DW_CFA_offset | 3)
+      .ULEB128(0xdd29)
+      .D8(lul::DW_CFA_offset | 5)
+      .ULEB128(0xf1ce)
+      // At a third address, restore the incoming state.
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  uint64 addr = fde_start;
+
+  // Expect the incoming rules to be reported.
+  EXPECT_CALL(handler, OffsetRule(addr, 2, kCFARegister, 0x9806 * data_factor))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, OffsetRule(addr, 3, kCFARegister, 0x995d * data_factor))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, OffsetRule(addr, 4, kCFARegister, 0x7055 * data_factor))
+      .InSequence(s)
+      .WillOnce(Return(true));
+
+  addr += code_factor;
+
+  // After the save, we establish the outgoing rule set.
+  EXPECT_CALL(handler, OffsetRule(addr, 1, kCFARegister, 0xea1a * data_factor))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, RegisterRule(addr, 2, 0x1d2a3767))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, OffsetRule(addr, 3, kCFARegister, 0xdd29 * data_factor))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, OffsetRule(addr, 5, kCFARegister, 0xf1ce * data_factor))
+      .InSequence(s)
+      .WillOnce(Return(true));
+
+  addr += code_factor;
+
+  // Finally, after the restore, expect to see the differences from
+  // the outgoing to the incoming rules reported.
+  EXPECT_CALL(handler, SameValueRule(addr, 1))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, OffsetRule(addr, 2, kCFARegister, 0x9806 * data_factor))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, OffsetRule(addr, 3, kCFARegister, 0x995d * data_factor))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, SameValueRule(addr, 5))
+      .InSequence(s)
+      .WillOnce(Return(true));
+
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+// Check that restoring a rule set reports changes to the CFA rule.
+TEST_F(LulDwarfCFIInsn, DW_CFA_remember_and_restore_stateCFA) {
+  CFISection section(kBigEndian, 4);
+  StockCIEAndFDE(&section);
+
+  section.D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_def_cfa_offset)
+      .ULEB128(0x90481102)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, ValOffsetRule(fde_start + code_factor, kCFARegister,
+                                     cfa_base_register, 0x90481102))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, ValOffsetRule(fde_start + code_factor * 2, kCFARegister,
+                                     cfa_base_register, cfa_offset))
+      .InSequence(s)
+      .WillOnce(Return(true));
+
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_nop) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_nop)
+      .D8(lul::DW_CFA_def_cfa)
+      .ULEB128(0x3fb8d4f1)
+      .ULEB128(0x078dc67b)
+      .D8(lul::DW_CFA_nop)
+      .FinishEntry();
+
+  EXPECT_CALL(handler,
+              ValOffsetRule(fde_start, kCFARegister, 0x3fb8d4f1, 0x078dc67b))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_GNU_window_save) {
+  CFISection section(kBigEndian, 4);
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_GNU_window_save).FinishEntry();
+
+  // Don't include all the rules in any particular sequence.
+
+  // The caller's %o0-%o7 have become the callee's %i0-%i7. This is
+  // the GCC register numbering.
+  for (int i = 8; i < 16; i++)
+    EXPECT_CALL(handler, RegisterRule(fde_start, i, i + 16))
+        .WillOnce(Return(true));
+  // The caller's %l0-%l7 and %i0-%i7 have been saved at the top of
+  // its frame.
+  for (int i = 16; i < 32; i++)
+    EXPECT_CALL(handler, OffsetRule(fde_start, i, kCFARegister, (i - 16) * 4))
+        .WillOnce(Return(true));
+
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_GNU_args_size) {
+  CFISection section(kLittleEndian, 8);
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_GNU_args_size)
+      .ULEB128(0xeddfa520)
+      // Verify that we see this, meaning we parsed the above properly.
+      .D8(lul::DW_CFA_offset | 0x23)
+      .ULEB128(0x269)
+      .FinishEntry();
+
+  EXPECT_CALL(handler,
+              OffsetRule(fde_start, 0x23, kCFARegister, 0x269 * data_factor))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_GNU_negative_offset_extended) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_GNU_negative_offset_extended)
+      .ULEB128(0x430cc87a)
+      .ULEB128(0x613)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, OffsetRule(fde_start, 0x430cc87a, kCFARegister,
+                                  -0x613 * data_factor))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+// Three FDEs: skip the second
+TEST_F(LulDwarfCFIInsn, SkipFDE) {
+  CFISection section(kBigEndian, 4);
+  Label cie;
+  section
+      // CIE, used by all FDEs.
+      .Mark(&cie)
+      .CIEHeader(0x010269f2, 0x9177, 0xedca5849, 2, "")
+      .D8(lul::DW_CFA_def_cfa)
+      .ULEB128(0x42ed390b)
+      .ULEB128(0x98f43aad)
+      .FinishEntry()
+      // First FDE.
+      .FDEHeader(cie, 0xa870ebdd, 0x60f6aa4)
+      .D8(lul::DW_CFA_register)
+      .ULEB128(0x3a860351)
+      .ULEB128(0x6c9a6bcf)
+      .FinishEntry()
+      // Second FDE.
+      .FDEHeader(cie, 0xc534f7c0, 0xf6552e9, true /* dwarf64 */)
+      .D8(lul::DW_CFA_register)
+      .ULEB128(0x1b62c234)
+      .ULEB128(0x26586b18)
+      .FinishEntry()
+      // Third FDE.
+      .FDEHeader(cie, 0xf681cfc8, 0x7e4594e)
+      .D8(lul::DW_CFA_register)
+      .ULEB128(0x26c53934)
+      .ULEB128(0x18eeb8a4)
+      .FinishEntry();
+
+  {
+    InSequence s;
+
+    // Process the first FDE.
+    EXPECT_CALL(handler, Entry(_, 0xa870ebdd, 0x60f6aa4, 2, "", 0xedca5849))
+        .WillOnce(Return(true));
+    EXPECT_CALL(handler,
+                ValOffsetRule(0xa870ebdd, kCFARegister, 0x42ed390b, 0x98f43aad))
+        .WillOnce(Return(true));
+    EXPECT_CALL(handler, RegisterRule(0xa870ebdd, 0x3a860351, 0x6c9a6bcf))
+        .WillOnce(Return(true));
+    EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+    // Skip the second FDE.
+    EXPECT_CALL(handler, Entry(_, 0xc534f7c0, 0xf6552e9, 2, "", 0xedca5849))
+        .WillOnce(Return(false));
+
+    // Process the third FDE.
+    EXPECT_CALL(handler, Entry(_, 0xf681cfc8, 0x7e4594e, 2, "", 0xedca5849))
+        .WillOnce(Return(true));
+    EXPECT_CALL(handler,
+                ValOffsetRule(0xf681cfc8, kCFARegister, 0x42ed390b, 0x98f43aad))
+        .WillOnce(Return(true));
+    EXPECT_CALL(handler, RegisterRule(0xf681cfc8, 0x26c53934, 0x18eeb8a4))
+        .WillOnce(Return(true));
+    EXPECT_CALL(handler, End()).WillOnce(Return(true));
+  }
+
+  ParseSection(&section);
+}
+
+// Quit processing in the middle of an entry's instructions.
+TEST_F(LulDwarfCFIInsn, QuitMidentry) {
+  CFISection section(kLittleEndian, 8);
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_register)
+      .ULEB128(0xe0cf850d)
+      .ULEB128(0x15aab431)
+      .D8(lul::DW_CFA_expression)
+      .ULEB128(0x46750aa5)
+      .Block("meat")
+      .FinishEntry();
+
+  EXPECT_CALL(handler, RegisterRule(fde_start, 0xe0cf850d, 0x15aab431))
+      .InSequence(s)
+      .WillOnce(Return(false));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section, false);
+}
+
+class LulDwarfCFIRestore : public CFIInsnFixture, public Test {};
+
+TEST_F(LulDwarfCFIRestore, RestoreUndefinedRuleUnchanged) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_undefined)
+      .ULEB128(0x0bac878e)
+      .D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, UndefinedRule(fde_start, 0x0bac878e))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIRestore, RestoreUndefinedRuleChanged) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_undefined)
+      .ULEB128(0x7dedff5f)
+      .D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_same_value)
+      .ULEB128(0x7dedff5f)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, UndefinedRule(fde_start, 0x7dedff5f))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, SameValueRule(fde_start + code_factor, 0x7dedff5f))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, UndefinedRule(fde_start + 2 * code_factor, 0x7dedff5f))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIRestore, RestoreSameValueRuleUnchanged) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_same_value)
+      .ULEB128(0xadbc9b3a)
+      .D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, SameValueRule(fde_start, 0xadbc9b3a))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIRestore, RestoreSameValueRuleChanged) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_same_value)
+      .ULEB128(0x3d90dcb5)
+      .D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_undefined)
+      .ULEB128(0x3d90dcb5)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, SameValueRule(fde_start, 0x3d90dcb5))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, UndefinedRule(fde_start + code_factor, 0x3d90dcb5))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, SameValueRule(fde_start + 2 * code_factor, 0x3d90dcb5))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIRestore, RestoreOffsetRuleUnchanged) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_offset | 0x14)
+      .ULEB128(0xb6f)
+      .D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  EXPECT_CALL(handler,
+              OffsetRule(fde_start, 0x14, kCFARegister, 0xb6f * data_factor))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIRestore, RestoreOffsetRuleChanged) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_offset | 0x21)
+      .ULEB128(0xeb7)
+      .D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_undefined)
+      .ULEB128(0x21)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  EXPECT_CALL(handler,
+              OffsetRule(fde_start, 0x21, kCFARegister, 0xeb7 * data_factor))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, UndefinedRule(fde_start + code_factor, 0x21))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, OffsetRule(fde_start + 2 * code_factor, 0x21,
+                                  kCFARegister, 0xeb7 * data_factor))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIRestore, RestoreOffsetRuleChangedOffset) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_offset | 0x21)
+      .ULEB128(0x134)
+      .D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_offset | 0x21)
+      .ULEB128(0xf4f)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  EXPECT_CALL(handler,
+              OffsetRule(fde_start, 0x21, kCFARegister, 0x134 * data_factor))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, OffsetRule(fde_start + code_factor, 0x21, kCFARegister,
+                                  0xf4f * data_factor))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, OffsetRule(fde_start + 2 * code_factor, 0x21,
+                                  kCFARegister, 0x134 * data_factor))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIRestore, RestoreValOffsetRuleUnchanged) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_val_offset)
+      .ULEB128(0x829caee6)
+      .ULEB128(0xe4c)
+      .D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, ValOffsetRule(fde_start, 0x829caee6, kCFARegister,
+                                     0xe4c * data_factor))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIRestore, RestoreValOffsetRuleChanged) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_val_offset)
+      .ULEB128(0xf17c36d6)
+      .ULEB128(0xeb7)
+      .D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_undefined)
+      .ULEB128(0xf17c36d6)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, ValOffsetRule(fde_start, 0xf17c36d6, kCFARegister,
+                                     0xeb7 * data_factor))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, UndefinedRule(fde_start + code_factor, 0xf17c36d6))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, ValOffsetRule(fde_start + 2 * code_factor, 0xf17c36d6,
+                                     kCFARegister, 0xeb7 * data_factor))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIRestore, RestoreValOffsetRuleChangedValOffset) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_val_offset)
+      .ULEB128(0x2cf0ab1b)
+      .ULEB128(0x562)
+      .D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_val_offset)
+      .ULEB128(0x2cf0ab1b)
+      .ULEB128(0xe88)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, ValOffsetRule(fde_start, 0x2cf0ab1b, kCFARegister,
+                                     0x562 * data_factor))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, ValOffsetRule(fde_start + code_factor, 0x2cf0ab1b,
+                                     kCFARegister, 0xe88 * data_factor))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, ValOffsetRule(fde_start + 2 * code_factor, 0x2cf0ab1b,
+                                     kCFARegister, 0x562 * data_factor))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIRestore, RestoreRegisterRuleUnchanged) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_register)
+      .ULEB128(0x77514acc)
+      .ULEB128(0x464de4ce)
+      .D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, RegisterRule(fde_start, 0x77514acc, 0x464de4ce))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIRestore, RestoreRegisterRuleChanged) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_register)
+      .ULEB128(0xe39acce5)
+      .ULEB128(0x095f1559)
+      .D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_undefined)
+      .ULEB128(0xe39acce5)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, RegisterRule(fde_start, 0xe39acce5, 0x095f1559))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, UndefinedRule(fde_start + code_factor, 0xe39acce5))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler,
+              RegisterRule(fde_start + 2 * code_factor, 0xe39acce5, 0x095f1559))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIRestore, RestoreRegisterRuleChangedRegister) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_register)
+      .ULEB128(0xd40e21b1)
+      .ULEB128(0x16607d6a)
+      .D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_register)
+      .ULEB128(0xd40e21b1)
+      .ULEB128(0xbabb4742)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, RegisterRule(fde_start, 0xd40e21b1, 0x16607d6a))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler,
+              RegisterRule(fde_start + code_factor, 0xd40e21b1, 0xbabb4742))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler,
+              RegisterRule(fde_start + 2 * code_factor, 0xd40e21b1, 0x16607d6a))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIRestore, RestoreExpressionRuleUnchanged) {
+  ByteReader reader(ENDIANNESS_LITTLE);
+  CFISection section(kLittleEndian, 4);
+  ImageSlice dwarf("dwarf");
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_expression)
+      .ULEB128(0x666ae152)
+      .Block("dwarf")
+      .D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, ExpressionRule(fde_start, 0x666ae152, dwarf))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section, true, &reader);
+}
+
+TEST_F(LulDwarfCFIRestore, RestoreExpressionRuleChanged) {
+  ByteReader reader(ENDIANNESS_LITTLE);
+  CFISection section(kLittleEndian, 4);
+  ImageSlice elf("elf");
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_expression)
+      .ULEB128(0xb5ca5c46)
+      .Block(elf)
+      .D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_undefined)
+      .ULEB128(0xb5ca5c46)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, ExpressionRule(fde_start, 0xb5ca5c46, elf))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, UndefinedRule(fde_start + code_factor, 0xb5ca5c46))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler,
+              ExpressionRule(fde_start + 2 * code_factor, 0xb5ca5c46, elf))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section, true, &reader);
+}
+
+TEST_F(LulDwarfCFIRestore, RestoreExpressionRuleChangedExpression) {
+  ByteReader reader(ENDIANNESS_LITTLE);
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  ImageSlice smurf("smurf");
+  ImageSlice orc("orc");
+  section.D8(lul::DW_CFA_expression)
+      .ULEB128(0x500f5739)
+      .Block(smurf)
+      .D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_expression)
+      .ULEB128(0x500f5739)
+      .Block(orc)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, ExpressionRule(fde_start, 0x500f5739, smurf))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, ExpressionRule(fde_start + code_factor, 0x500f5739, orc))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  // Expectations are not wishes.
+  EXPECT_CALL(handler,
+              ExpressionRule(fde_start + 2 * code_factor, 0x500f5739, smurf))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section, true, &reader);
+}
+
+TEST_F(LulDwarfCFIRestore, RestoreValExpressionRuleUnchanged) {
+  ByteReader reader(ENDIANNESS_LITTLE);
+  CFISection section(kLittleEndian, 4);
+  ImageSlice hideous("hideous");
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_val_expression)
+      .ULEB128(0x666ae152)
+      .Block(hideous)
+      .D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, ValExpressionRule(fde_start, 0x666ae152, hideous))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section, true, &reader);
+}
+
+TEST_F(LulDwarfCFIRestore, RestoreValExpressionRuleChanged) {
+  ByteReader reader(ENDIANNESS_LITTLE);
+  CFISection section(kLittleEndian, 4);
+  ImageSlice revolting("revolting");
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_val_expression)
+      .ULEB128(0xb5ca5c46)
+      .Block(revolting)
+      .D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_undefined)
+      .ULEB128(0xb5ca5c46)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  PERHAPS_WRITE_DEBUG_FRAME_FILE("RestoreValExpressionRuleChanged", section);
+
+  EXPECT_CALL(handler, ValExpressionRule(fde_start, 0xb5ca5c46, revolting))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, UndefinedRule(fde_start + code_factor, 0xb5ca5c46))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, ValExpressionRule(fde_start + 2 * code_factor,
+                                         0xb5ca5c46, revolting))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section, true, &reader);
+}
+
+TEST_F(LulDwarfCFIRestore, RestoreValExpressionRuleChangedValExpression) {
+  ByteReader reader(ENDIANNESS_LITTLE);
+  CFISection section(kLittleEndian, 4);
+  ImageSlice repulsive("repulsive");
+  ImageSlice nauseous("nauseous");
+  StockCIEAndFDE(&section);
+  section.D8(lul::DW_CFA_val_expression)
+      .ULEB128(0x500f5739)
+      .Block(repulsive)
+      .D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_val_expression)
+      .ULEB128(0x500f5739)
+      .Block(nauseous)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  PERHAPS_WRITE_DEBUG_FRAME_FILE("RestoreValExpressionRuleChangedValExpression",
+                                 section);
+
+  EXPECT_CALL(handler, ValExpressionRule(fde_start, 0x500f5739, repulsive))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler,
+              ValExpressionRule(fde_start + code_factor, 0x500f5739, nauseous))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  // Expectations are not wishes.
+  EXPECT_CALL(handler, ValExpressionRule(fde_start + 2 * code_factor,
+                                         0x500f5739, repulsive))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section, true, &reader);
+}
+
+struct EHFrameFixture : public CFIInsnFixture {
+  EHFrameFixture() : CFIInsnFixture(), section(kBigEndian, 4, true) {
+    encoded_pointer_bases.cfi = 0x7f496cb2;
+    encoded_pointer_bases.text = 0x540f67b6;
+    encoded_pointer_bases.data = 0xe3eab768;
+    section.SetEncodedPointerBases(encoded_pointer_bases);
+  }
+  CFISection section;
+  CFISection::EncodedPointerBases encoded_pointer_bases;
+
+  // Parse CFIInsnFixture::ParseSection, but parse the section as
+  // .eh_frame data, supplying stock base addresses.
+  void ParseEHFrameSection(CFISection* section, bool succeeds = true) {
+    EXPECT_TRUE(section->ContainsEHFrame());
+    string contents;
+    EXPECT_TRUE(section->GetContents(&contents));
+    lul::Endianness endianness;
+    if (section->endianness() == kBigEndian)
+      endianness = ENDIANNESS_BIG;
+    else {
+      assert(section->endianness() == kLittleEndian);
+      endianness = ENDIANNESS_LITTLE;
+    }
+    ByteReader reader(endianness);
+    reader.SetAddressSize(section->AddressSize());
+    reader.SetCFIDataBase(encoded_pointer_bases.cfi, contents.data());
+    reader.SetTextBase(encoded_pointer_bases.text);
+    reader.SetDataBase(encoded_pointer_bases.data);
+    CallFrameInfo parser(contents.data(), contents.size(), &reader, &handler,
+                         &reporter, true);
+    if (succeeds)
+      EXPECT_TRUE(parser.Start());
+    else
+      EXPECT_FALSE(parser.Start());
+  }
+};
+
+class LulDwarfEHFrame : public EHFrameFixture, public Test {};
+
+// A simple CIE, an FDE, and a terminator.
+TEST_F(LulDwarfEHFrame, Terminator) {
+  Label cie;
+  section.Mark(&cie)
+      .CIEHeader(9968, 2466, 67, 1, "")
+      .D8(lul::DW_CFA_def_cfa)
+      .ULEB128(3772)
+      .ULEB128(1372)
+      .FinishEntry()
+      .FDEHeader(cie, 0x848037a1, 0x7b30475e)
+      .D8(lul::DW_CFA_set_loc)
+      .D32(0x17713850)
+      .D8(lul::DW_CFA_undefined)
+      .ULEB128(5721)
+      .FinishEntry()
+      .D32(0)  // Terminate the sequence.
+      // This FDE should be ignored.
+      .FDEHeader(cie, 0xf19629fe, 0x439fb09b)
+      .FinishEntry();
+
+  PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.Terminator", section);
+
+  EXPECT_CALL(handler, Entry(_, 0x848037a1, 0x7b30475e, 1, "", 67))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, ValOffsetRule(0x848037a1, kCFARegister, 3772, 1372))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, UndefinedRule(0x17713850, 5721))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(reporter, EarlyEHTerminator(_)).InSequence(s).WillOnce(Return());
+
+  ParseEHFrameSection(&section);
+}
+
+// The parser should recognize the Linux Standards Base 'z' augmentations.
+TEST_F(LulDwarfEHFrame, SimpleFDE) {
+  lul::DwarfPointerEncoding lsda_encoding = lul::DwarfPointerEncoding(
+      lul::DW_EH_PE_indirect | lul::DW_EH_PE_datarel | lul::DW_EH_PE_sdata2);
+  lul::DwarfPointerEncoding fde_encoding =
+      lul::DwarfPointerEncoding(lul::DW_EH_PE_textrel | lul::DW_EH_PE_udata2);
+
+  section.SetPointerEncoding(fde_encoding);
+  section.SetEncodedPointerBases(encoded_pointer_bases);
+  Label cie;
+  section.Mark(&cie)
+      .CIEHeader(4873, 7012, 100, 1, "zSLPR")
+      .ULEB128(7)               // Augmentation data length
+      .D8(lsda_encoding)        // LSDA pointer format
+      .D8(lul::DW_EH_PE_pcrel)  // personality pointer format
+      .EncodedPointer(0x97baa00, lul::DW_EH_PE_pcrel)  // and value
+      .D8(fde_encoding)                                // FDE pointer format
+      .D8(lul::DW_CFA_def_cfa)
+      .ULEB128(6706)
+      .ULEB128(31)
+      .FinishEntry()
+      .FDEHeader(cie, 0x540f6b56, 0xf686)
+      .ULEB128(2)                                 // Augmentation data length
+      .EncodedPointer(0xe3eab475, lsda_encoding)  // LSDA pointer, signed
+      .D8(lul::DW_CFA_set_loc)
+      .EncodedPointer(0x540fa4ce, fde_encoding)
+      .D8(lul::DW_CFA_undefined)
+      .ULEB128(0x675e)
+      .FinishEntry()
+      .D32(0);  // terminator
+
+  PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.SimpleFDE", section);
+
+  EXPECT_CALL(handler, Entry(_, 0x540f6b56, 0xf686, 1, "zSLPR", 100))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, PersonalityRoutine(0x97baa00, false))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, LanguageSpecificDataArea(0xe3eab475, true))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, SignalHandler()).InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, ValOffsetRule(0x540f6b56, kCFARegister, 6706, 31))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, UndefinedRule(0x540fa4ce, 0x675e))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseEHFrameSection(&section);
+}
+
+// Check that we can handle an empty 'z' augmentation.
+TEST_F(LulDwarfEHFrame, EmptyZ) {
+  Label cie;
+  section.Mark(&cie)
+      .CIEHeader(5955, 5805, 228, 1, "z")
+      .ULEB128(0)  // Augmentation data length
+      .D8(lul::DW_CFA_def_cfa)
+      .ULEB128(3629)
+      .ULEB128(247)
+      .FinishEntry()
+      .FDEHeader(cie, 0xda007738, 0xfb55c641)
+      .ULEB128(0)  // Augmentation data length
+      .D8(lul::DW_CFA_advance_loc1)
+      .D8(11)
+      .D8(lul::DW_CFA_undefined)
+      .ULEB128(3769)
+      .FinishEntry();
+
+  PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.EmptyZ", section);
+
+  EXPECT_CALL(handler, Entry(_, 0xda007738, 0xfb55c641, 1, "z", 228))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, ValOffsetRule(0xda007738, kCFARegister, 3629, 247))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, UndefinedRule(0xda007738 + 11 * 5955, 3769))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseEHFrameSection(&section);
+}
+
+// Check that we recognize bad 'z' augmentation characters.
+TEST_F(LulDwarfEHFrame, BadZ) {
+  Label cie;
+  section.Mark(&cie)
+      .CIEHeader(6937, 1045, 142, 1, "zQ")
+      .ULEB128(0)  // Augmentation data length
+      .D8(lul::DW_CFA_def_cfa)
+      .ULEB128(9006)
+      .ULEB128(7725)
+      .FinishEntry()
+      .FDEHeader(cie, 0x1293efa8, 0x236f53f2)
+      .ULEB128(0)  // Augmentation data length
+      .D8(lul::DW_CFA_advance_loc | 12)
+      .D8(lul::DW_CFA_register)
+      .ULEB128(5667)
+      .ULEB128(3462)
+      .FinishEntry();
+
+  PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.BadZ", section);
+
+  EXPECT_CALL(reporter, UnrecognizedAugmentation(_, "zQ")).WillOnce(Return());
+
+  ParseEHFrameSection(&section, false);
+}
+
+TEST_F(LulDwarfEHFrame, zL) {
+  Label cie;
+  lul::DwarfPointerEncoding lsda_encoding =
+      lul::DwarfPointerEncoding(lul::DW_EH_PE_funcrel | lul::DW_EH_PE_udata2);
+  section.Mark(&cie)
+      .CIEHeader(9285, 9959, 54, 1, "zL")
+      .ULEB128(1)         // Augmentation data length
+      .D8(lsda_encoding)  // encoding for LSDA pointer in FDE
+
+      .FinishEntry()
+      .FDEHeader(cie, 0xd40091aa, 0x9aa6e746)
+      .ULEB128(2)                                 // Augmentation data length
+      .EncodedPointer(0xd40099cd, lsda_encoding)  // LSDA pointer
+      .FinishEntry()
+      .D32(0);  // terminator
+
+  PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.zL", section);
+
+  EXPECT_CALL(handler, Entry(_, 0xd40091aa, 0x9aa6e746, 1, "zL", 54))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, LanguageSpecificDataArea(0xd40099cd, false))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseEHFrameSection(&section);
+}
+
+TEST_F(LulDwarfEHFrame, zP) {
+  Label cie;
+  lul::DwarfPointerEncoding personality_encoding =
+      lul::DwarfPointerEncoding(lul::DW_EH_PE_datarel | lul::DW_EH_PE_udata2);
+  section.Mark(&cie)
+      .CIEHeader(1097, 6313, 17, 1, "zP")
+      .ULEB128(3)                // Augmentation data length
+      .D8(personality_encoding)  // encoding for personality routine
+      .EncodedPointer(0xe3eaccac, personality_encoding)  // value
+      .FinishEntry()
+      .FDEHeader(cie, 0x0c8350c9, 0xbef11087)
+      .ULEB128(0)  // Augmentation data length
+      .FinishEntry()
+      .D32(0);  // terminator
+
+  PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.zP", section);
+
+  EXPECT_CALL(handler, Entry(_, 0x0c8350c9, 0xbef11087, 1, "zP", 17))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, PersonalityRoutine(0xe3eaccac, false))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseEHFrameSection(&section);
+}
+
+TEST_F(LulDwarfEHFrame, zR) {
+  Label cie;
+  lul::DwarfPointerEncoding pointer_encoding =
+      lul::DwarfPointerEncoding(lul::DW_EH_PE_textrel | lul::DW_EH_PE_sdata2);
+  section.SetPointerEncoding(pointer_encoding);
+  section.Mark(&cie)
+      .CIEHeader(8011, 5496, 75, 1, "zR")
+      .ULEB128(1)            // Augmentation data length
+      .D8(pointer_encoding)  // encoding for FDE addresses
+      .FinishEntry()
+      .FDEHeader(cie, 0x540f9431, 0xbd0)
+      .ULEB128(0)  // Augmentation data length
+      .FinishEntry()
+      .D32(0);  // terminator
+
+  PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.zR", section);
+
+  EXPECT_CALL(handler, Entry(_, 0x540f9431, 0xbd0, 1, "zR", 75))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseEHFrameSection(&section);
+}
+
+TEST_F(LulDwarfEHFrame, zS) {
+  Label cie;
+  section.Mark(&cie)
+      .CIEHeader(9217, 7694, 57, 1, "zS")
+      .ULEB128(0)  // Augmentation data length
+      .FinishEntry()
+      .FDEHeader(cie, 0xd40091aa, 0x9aa6e746)
+      .ULEB128(0)  // Augmentation data length
+      .FinishEntry()
+      .D32(0);  // terminator
+
+  PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.zS", section);
+
+  EXPECT_CALL(handler, Entry(_, 0xd40091aa, 0x9aa6e746, 1, "zS", 57))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, SignalHandler()).InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseEHFrameSection(&section);
+}
+
+// These tests require manual inspection of the test output.
+struct CFIReporterFixture {
+  CFIReporterFixture()
+      : reporter(gtest_logging_sink_for_LulTestDwarf, "test file name",
+                 "test section name") {}
+  CallFrameInfo::Reporter reporter;
+};
+
+class LulDwarfCFIReporter : public CFIReporterFixture, public Test {};
+
+TEST_F(LulDwarfCFIReporter, Incomplete) {
+  reporter.Incomplete(0x0102030405060708ULL, CallFrameInfo::kUnknown);
+}
+
+TEST_F(LulDwarfCFIReporter, EarlyEHTerminator) {
+  reporter.EarlyEHTerminator(0x0102030405060708ULL);
+}
+
+TEST_F(LulDwarfCFIReporter, CIEPointerOutOfRange) {
+  reporter.CIEPointerOutOfRange(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
+}
+
+TEST_F(LulDwarfCFIReporter, BadCIEId) {
+  reporter.BadCIEId(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
+}
+
+TEST_F(LulDwarfCFIReporter, UnrecognizedVersion) {
+  reporter.UnrecognizedVersion(0x0123456789abcdefULL, 43);
+}
+
+TEST_F(LulDwarfCFIReporter, UnrecognizedAugmentation) {
+  reporter.UnrecognizedAugmentation(0x0123456789abcdefULL, "poodles");
+}
+
+TEST_F(LulDwarfCFIReporter, InvalidPointerEncoding) {
+  reporter.InvalidPointerEncoding(0x0123456789abcdefULL, 0x42);
+}
+
+TEST_F(LulDwarfCFIReporter, UnusablePointerEncoding) {
+  reporter.UnusablePointerEncoding(0x0123456789abcdefULL, 0x42);
+}
+
+TEST_F(LulDwarfCFIReporter, RestoreInCIE) {
+  reporter.RestoreInCIE(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
+}
+
+TEST_F(LulDwarfCFIReporter, BadInstruction) {
+  reporter.BadInstruction(0x0123456789abcdefULL, CallFrameInfo::kFDE,
+                          0xfedcba9876543210ULL);
+}
+
+TEST_F(LulDwarfCFIReporter, NoCFARule) {
+  reporter.NoCFARule(0x0123456789abcdefULL, CallFrameInfo::kCIE,
+                     0xfedcba9876543210ULL);
+}
+
+TEST_F(LulDwarfCFIReporter, EmptyStateStack) {
+  reporter.EmptyStateStack(0x0123456789abcdefULL, CallFrameInfo::kTerminator,
+                           0xfedcba9876543210ULL);
+}
+
+TEST_F(LulDwarfCFIReporter, ClearingCFARule) {
+  reporter.ClearingCFARule(0x0123456789abcdefULL, CallFrameInfo::kFDE,
+                           0xfedcba9876543210ULL);
+}
+class LulDwarfExpr : public Test {};
+
+class MockSummariser : public Summariser {
+ public:
+  MockSummariser() : Summariser(nullptr, 0, nullptr) {}
+  MOCK_METHOD2(Entry, void(uintptr_t, uintptr_t));
+  MOCK_METHOD0(End, void());
+  MOCK_METHOD5(Rule, void(uintptr_t, int, LExprHow, int16_t, int64_t));
+  MOCK_METHOD1(AddPfxInstr, uint32_t(PfxInstr));
+};
+
+TEST_F(LulDwarfExpr, SimpleTransliteration) {
+  MockSummariser summ;
+  ByteReader reader(ENDIANNESS_LITTLE);
+
+  CFISection section(kLittleEndian, 8);
+  section.D8(DW_OP_lit0)
+      .D8(DW_OP_lit31)
+      .D8(DW_OP_breg0 + 17)
+      .LEB128(-1234)
+      .D8(DW_OP_const4s)
+      .D32(0xFEDC9876)
+      .D8(DW_OP_deref)
+      .D8(DW_OP_and)
+      .D8(DW_OP_plus)
+      .D8(DW_OP_minus)
+      .D8(DW_OP_shl)
+      .D8(DW_OP_ge);
+  string expr;
+  bool ok = section.GetContents(&expr);
+  EXPECT_TRUE(ok);
+
+  {
+    InSequence s;
+    // required start marker
+    EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_Start, 0)));
+    // DW_OP_lit0
+    EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_SImm32, 0)));
+    // DW_OP_lit31
+    EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_SImm32, 31)));
+    // DW_OP_breg17 -1234
+    EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_DwReg, 17)));
+    EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_SImm32, -1234)));
+    EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_Add)));
+    // DW_OP_const4s 0xFEDC9876
+    EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_SImm32, 0xFEDC9876)));
+    // DW_OP_deref
+    EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_Deref)));
+    // DW_OP_and
+    EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_And)));
+    // DW_OP_plus
+    EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_Add)));
+    // DW_OP_minus
+    EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_Sub)));
+    // DW_OP_shl
+    EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_Shl)));
+    // DW_OP_ge
+    EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_CmpGES)));
+    // required end marker
+    EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_End)));
+  }
+
+  int32_t ix =
+      parseDwarfExpr(&summ, &reader, ImageSlice(expr), false, false, false);
+  EXPECT_TRUE(ix >= 0);
+}
+
+TEST_F(LulDwarfExpr, UnknownOpcode) {
+  MockSummariser summ;
+  ByteReader reader(ENDIANNESS_LITTLE);
+
+  CFISection section(kLittleEndian, 8);
+  section.D8(DW_OP_lo_user - 1);
+  string expr;
+  bool ok = section.GetContents(&expr);
+  EXPECT_TRUE(ok);
+
+  {
+    InSequence s;
+    // required start marker
+    EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_Start, 0)));
+  }
+
+  int32_t ix =
+      parseDwarfExpr(&summ, &reader, ImageSlice(expr), false, false, false);
+  EXPECT_TRUE(ix == -1);
+}
+
+TEST_F(LulDwarfExpr, ExpressionOverrun) {
+  MockSummariser summ;
+  ByteReader reader(ENDIANNESS_LITTLE);
+
+  CFISection section(kLittleEndian, 8);
+  section.D8(DW_OP_const4s).D8(0x12).D8(0x34).D8(0x56);
+  string expr;
+  bool ok = section.GetContents(&expr);
+  EXPECT_TRUE(ok);
+
+  {
+    InSequence s;
+    // required start marker
+    EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_Start, 0)));
+    // DW_OP_const4s followed by 3 (a.k.a. not enough) bytes
+    // We expect PfxInstr(PX_Simm32, not-known-for-sure-32-bit-immediate)
+    // Hence must use _ as the argument.
+    EXPECT_CALL(summ, AddPfxInstr(_));
+  }
+
+  int32_t ix =
+      parseDwarfExpr(&summ, &reader, ImageSlice(expr), false, false, false);
+  EXPECT_TRUE(ix == -1);
+}
+
+// We'll need to mention specific Dwarf registers in the EvaluatePfxExpr tests,
+// and those names are arch-specific, so a bit of macro magic is helpful.
+#if defined(GP_ARCH_arm)
+#  define TESTED_REG_STRUCT_NAME r11
+#  define TESTED_REG_DWARF_NAME DW_REG_ARM_R11
+#elif defined(GP_ARCH_arm64)
+#  define TESTED_REG_STRUCT_NAME x29
+#  define TESTED_REG_DWARF_NAME DW_REG_AARCH64_X29
+#elif defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
+#  define TESTED_REG_STRUCT_NAME xbp
+#  define TESTED_REG_DWARF_NAME DW_REG_INTEL_XBP
+#else
+#  error "Unknown plat"
+#endif
+
+struct EvaluatePfxExprFixture {
+  // Creates:
+  // initial stack, AVMA 0x12345678, at offset 4 bytes = 0xdeadbeef
+  // initial regs, with XBP = 0x14141356
+  // initial CFA = 0x5432ABCD
+  EvaluatePfxExprFixture() {
+    // The test stack.
+    si.mStartAvma = 0x12345678;
+    si.mLen = 0;
+#define XX(_byte)                      \
+  do {                                 \
+    si.mContents[si.mLen++] = (_byte); \
+  } while (0)
+    XX(0x55);
+    XX(0x55);
+    XX(0x55);
+    XX(0x55);
+    if (sizeof(void*) == 8) {
+      // le64
+      XX(0xEF);
+      XX(0xBE);
+      XX(0xAD);
+      XX(0xDE);
+      XX(0);
+      XX(0);
+      XX(0);
+      XX(0);
+    } else {
+      // le32
+      XX(0xEF);
+      XX(0xBE);
+      XX(0xAD);
+      XX(0xDE);
+    }
+    XX(0xAA);
+    XX(0xAA);
+    XX(0xAA);
+    XX(0xAA);
+#undef XX
+    // The initial CFA.
+    initialCFA = TaggedUWord(0x5432ABCD);
+    // The initial register state.
+    memset(&regs, 0, sizeof(regs));
+    regs.TESTED_REG_STRUCT_NAME = TaggedUWord(0x14141356);
+  }
+
+  StackImage si;
+  TaggedUWord initialCFA;
+  UnwindRegs regs;
+};
+
+class LulDwarfEvaluatePfxExpr : public EvaluatePfxExprFixture, public Test {};
+
+TEST_F(LulDwarfEvaluatePfxExpr, NormalEvaluation) {
+  vector<PfxInstr> instrs;
+  // Put some junk at the start of the insn sequence.
+  instrs.push_back(PfxInstr(PX_End));
+  instrs.push_back(PfxInstr(PX_End));
+
+  // Now the real sequence
+  // stack is empty
+  instrs.push_back(PfxInstr(PX_Start, 1));
+  // 0x5432ABCD
+  instrs.push_back(PfxInstr(PX_SImm32, 0x31415927));
+  // 0x5432ABCD 0x31415927
+  instrs.push_back(PfxInstr(PX_DwReg, TESTED_REG_DWARF_NAME));
+  // 0x5432ABCD 0x31415927 0x14141356
+  instrs.push_back(PfxInstr(PX_SImm32, 42));
+  // 0x5432ABCD 0x31415927 0x14141356 42
+  instrs.push_back(PfxInstr(PX_Sub));
+  // 0x5432ABCD 0x31415927 0x1414132c
+  instrs.push_back(PfxInstr(PX_Add));
+  // 0x5432ABCD 0x45556c53
+  instrs.push_back(PfxInstr(PX_SImm32, si.mStartAvma + 4));
+  // 0x5432ABCD 0x45556c53 0x1234567c
+  instrs.push_back(PfxInstr(PX_Deref));
+  // 0x5432ABCD 0x45556c53 0xdeadbeef
+  instrs.push_back(PfxInstr(PX_SImm32, 0xFE01DC23));
+  // 0x5432ABCD 0x45556c53 0xdeadbeef 0xFE01DC23
+  instrs.push_back(PfxInstr(PX_And));
+  // 0x5432ABCD 0x45556c53 0xde019c23
+  instrs.push_back(PfxInstr(PX_SImm32, 7));
+  // 0x5432ABCD 0x45556c53 0xde019c23 7
+  instrs.push_back(PfxInstr(PX_Shl));
+  // 0x5432ABCD 0x45556c53 0x6f00ce1180
+  instrs.push_back(PfxInstr(PX_SImm32, 0x7fffffff));
+  // 0x5432ABCD 0x45556c53 0x6f00ce1180 7fffffff
+  instrs.push_back(PfxInstr(PX_And));
+  // 0x5432ABCD 0x45556c53 0x00ce1180
+  instrs.push_back(PfxInstr(PX_Add));
+  // 0x5432ABCD 0x46237dd3
+  instrs.push_back(PfxInstr(PX_Sub));
+  // 0xe0f2dfa
+
+  instrs.push_back(PfxInstr(PX_End));
+
+  TaggedUWord res = EvaluatePfxExpr(2 /*offset of start insn*/, &regs,
+                                    initialCFA, &si, instrs);
+  EXPECT_TRUE(res.Valid());
+  EXPECT_TRUE(res.Value() == 0xe0f2dfa);
+}
+
+TEST_F(LulDwarfEvaluatePfxExpr, EmptySequence) {
+  vector<PfxInstr> instrs;
+  TaggedUWord res = EvaluatePfxExpr(0, &regs, initialCFA, &si, instrs);
+  EXPECT_FALSE(res.Valid());
+}
+
+TEST_F(LulDwarfEvaluatePfxExpr, BogusStartPoint) {
+  vector<PfxInstr> instrs;
+  instrs.push_back(PfxInstr(PX_SImm32, 42));
+  instrs.push_back(PfxInstr(PX_SImm32, 24));
+  instrs.push_back(PfxInstr(PX_SImm32, 4224));
+  TaggedUWord res = EvaluatePfxExpr(1, &regs, initialCFA, &si, instrs);
+  EXPECT_FALSE(res.Valid());
+}
+
+TEST_F(LulDwarfEvaluatePfxExpr, MissingEndMarker) {
+  vector<PfxInstr> instrs;
+  instrs.push_back(PfxInstr(PX_Start, 0));
+  instrs.push_back(PfxInstr(PX_SImm32, 24));
+  TaggedUWord res = EvaluatePfxExpr(0, &regs, initialCFA, &si, instrs);
+  EXPECT_FALSE(res.Valid());
+}
+
+TEST_F(LulDwarfEvaluatePfxExpr, StackUnderflow) {
+  vector<PfxInstr> instrs;
+  instrs.push_back(PfxInstr(PX_Start, 0));
+  instrs.push_back(PfxInstr(PX_End));
+  TaggedUWord res = EvaluatePfxExpr(0, &regs, initialCFA, &si, instrs);
+  EXPECT_FALSE(res.Valid());
+}
+
+TEST_F(LulDwarfEvaluatePfxExpr, StackNoUnderflow) {
+  vector<PfxInstr> instrs;
+  instrs.push_back(PfxInstr(PX_Start, 1 /*push the initial CFA*/));
+  instrs.push_back(PfxInstr(PX_End));
+  TaggedUWord res = EvaluatePfxExpr(0, &regs, initialCFA, &si, instrs);
+  EXPECT_TRUE(res.Valid());
+  EXPECT_TRUE(res == initialCFA);
+}
+
+TEST_F(LulDwarfEvaluatePfxExpr, StackOverflow) {
+  vector<PfxInstr> instrs;
+  instrs.push_back(PfxInstr(PX_Start, 0));
+  for (int i = 0; i < 10 + 1; i++) {
+    instrs.push_back(PfxInstr(PX_SImm32, i + 100));
+  }
+  instrs.push_back(PfxInstr(PX_End));
+  TaggedUWord res = EvaluatePfxExpr(0, &regs, initialCFA, &si, instrs);
+  EXPECT_FALSE(res.Valid());
+}
+
+TEST_F(LulDwarfEvaluatePfxExpr, StackNoOverflow) {
+  vector<PfxInstr> instrs;
+  instrs.push_back(PfxInstr(PX_Start, 0));
+  for (int i = 0; i < 10 + 0; i++) {
+    instrs.push_back(PfxInstr(PX_SImm32, i + 100));
+  }
+  instrs.push_back(PfxInstr(PX_End));
+  TaggedUWord res = EvaluatePfxExpr(0, &regs, initialCFA, &si, instrs);
+  EXPECT_TRUE(res.Valid());
+  EXPECT_TRUE(res == TaggedUWord(109));
+}
+
+TEST_F(LulDwarfEvaluatePfxExpr, OutOfRangeShl) {
+  vector<PfxInstr> instrs;
+  instrs.push_back(PfxInstr(PX_Start, 0));
+  instrs.push_back(PfxInstr(PX_SImm32, 1234));
+  instrs.push_back(PfxInstr(PX_SImm32, 5678));
+  instrs.push_back(PfxInstr(PX_Shl));
+  TaggedUWord res = EvaluatePfxExpr(0, &regs, initialCFA, &si, instrs);
+  EXPECT_TRUE(!res.Valid());
+}
+
+TEST_F(LulDwarfEvaluatePfxExpr, TestCmpGES) {
+  const int32_t argsL[6] = {0, 0, 1, -2, -1, -2};
+  const int32_t argsR[6] = {0, 1, 0, -2, -2, -1};
+  // expecting:              t  f  t  t   t    f   = 101110 = 0x2E
+  vector<PfxInstr> instrs;
+  instrs.push_back(PfxInstr(PX_Start, 0));
+  // The "running total"
+  instrs.push_back(PfxInstr(PX_SImm32, 0));
+  for (unsigned int i = 0; i < sizeof(argsL) / sizeof(argsL[0]); i++) {
+    // Shift the "running total" at the bottom of the stack left by one bit
+    instrs.push_back(PfxInstr(PX_SImm32, 1));
+    instrs.push_back(PfxInstr(PX_Shl));
+    // Push both test args and do the comparison
+    instrs.push_back(PfxInstr(PX_SImm32, argsL[i]));
+    instrs.push_back(PfxInstr(PX_SImm32, argsR[i]));
+    instrs.push_back(PfxInstr(PX_CmpGES));
+    // Or the result into the running total
+    instrs.push_back(PfxInstr(PX_Or));
+  }
+  instrs.push_back(PfxInstr(PX_End));
+  TaggedUWord res = EvaluatePfxExpr(0, &regs, initialCFA, &si, instrs);
+  EXPECT_TRUE(res.Valid());
+  EXPECT_TRUE(res == TaggedUWord(0x2E));
+}
+
+}  // namespace lul
diff --git a/tools/profiler/tests/gtest/LulTestInfrastructure.cpp b/tools/profiler/tests/gtest/LulTestInfrastructure.cpp
new file mode 100644
index 0000000000..6d49557e9c
--- /dev/null
+++ b/tools/profiler/tests/gtest/LulTestInfrastructure.cpp
@@ -0,0 +1,498 @@
+// Copyright (c) 2010, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
+
+// Derived from:
+// test_assembler.cc: Implementation of google_breakpad::TestAssembler.
+// See test_assembler.h for details.
+
+// Derived from:
+// cfi_assembler.cc: Implementation of google_breakpad::CFISection class.
+// See cfi_assembler.h for details.
+
+#include "LulTestInfrastructure.h"
+
+#include "LulDwarfInt.h"
+
+#include <cassert>
+
+namespace lul_test {
+namespace test_assembler {
+
+using std::back_insert_iterator;
+
+Label::Label() : value_(new Binding()) {}
+Label::Label(uint64_t value) : value_(new Binding(value)) {}
+Label::Label(const Label& label) {
+  value_ = label.value_;
+  value_->Acquire();
+}
+Label::~Label() {
+  if (value_->Release()) delete value_;
+}
+
+Label& Label::operator=(uint64_t value) {
+  value_->Set(NULL, value);
+  return *this;
+}
+
+Label& Label::operator=(const Label& label) {
+  value_->Set(label.value_, 0);
+  return *this;
+}
+
+Label Label::operator+(uint64_t addend) const {
+  Label l;
+  l.value_->Set(this->value_, addend);
+  return l;
+}
+
+Label Label::operator-(uint64_t subtrahend) const {
+  Label l;
+  l.value_->Set(this->value_, -subtrahend);
+  return l;
+}
+
+// When NDEBUG is #defined, assert doesn't evaluate its argument. This
+// means you can't simply use assert to check the return value of a
+// function with necessary side effects.
+//
+// ALWAYS_EVALUATE_AND_ASSERT(x) evaluates x regardless of whether
+// NDEBUG is #defined; when NDEBUG is not #defined, it further asserts
+// that x is true.
+#ifdef NDEBUG
+#  define ALWAYS_EVALUATE_AND_ASSERT(x) x
+#else
+#  define ALWAYS_EVALUATE_AND_ASSERT(x) assert(x)
+#endif
+
+uint64_t Label::operator-(const Label& label) const {
+  uint64_t offset;
+  ALWAYS_EVALUATE_AND_ASSERT(IsKnownOffsetFrom(label, &offset));
+  return offset;
+}
+
+bool Label::IsKnownConstant(uint64_t* value_p) const {
+  Binding* base;
+  uint64_t addend;
+  value_->Get(&base, &addend);
+  if (base != NULL) return false;
+  if (value_p) *value_p = addend;
+  return true;
+}
+
+bool Label::IsKnownOffsetFrom(const Label& label, uint64_t* offset_p) const {
+  Binding *label_base, *this_base;
+  uint64_t label_addend, this_addend;
+  label.value_->Get(&label_base, &label_addend);
+  value_->Get(&this_base, &this_addend);
+  // If this and label are related, Get will find their final
+  // common ancestor, regardless of how indirect the relation is. This
+  // comparison also handles the constant vs. constant case.
+  if (this_base != label_base) return false;
+  if (offset_p) *offset_p = this_addend - label_addend;
+  return true;
+}
+
+Label::Binding::Binding() : base_(this), addend_(), reference_count_(1) {}
+
+Label::Binding::Binding(uint64_t addend)
+    : base_(NULL), addend_(addend), reference_count_(1) {}
+
+Label::Binding::~Binding() {
+  assert(reference_count_ == 0);
+  if (base_ && base_ != this && base_->Release()) delete base_;
+}
+
+void Label::Binding::Set(Binding* binding, uint64_t addend) {
+  if (!base_ && !binding) {
+    // We're equating two constants. This could be okay.
+    assert(addend_ == addend);
+  } else if (!base_) {
+    // We are a known constant, but BINDING may not be, so turn the
+    // tables and try to set BINDING's value instead.
+    binding->Set(NULL, addend_ - addend);
+  } else {
+    if (binding) {
+      // Find binding's final value. Since the final value is always either
+      // completely unconstrained or a constant, never a reference to
+      // another variable (otherwise, it wouldn't be final), this
+      // guarantees we won't create cycles here, even for code like this:
+      //   l = m, m = n, n = l;
+      uint64_t binding_addend;
+      binding->Get(&binding, &binding_addend);
+      addend += binding_addend;
+    }
+
+    // It seems likely that setting a binding to itself is a bug
+    // (although I can imagine this might turn out to be helpful to
+    // permit).
+    assert(binding != this);
+
+    if (base_ != this) {
+      // Set the other bindings on our chain as well. Note that this
+      // is sufficient even though binding relationships form trees:
+      // All binding operations traverse their chains to the end, and
+      // all bindings related to us share some tail of our chain, so
+      // they will see the changes we make here.
+      base_->Set(binding, addend - addend_);
+      // We're not going to use base_ any more.
+      if (base_->Release()) delete base_;
+    }
+
+    // Adopt BINDING as our base. Note that it should be correct to
+    // acquire here, after the release above, even though the usual
+    // reference-counting rules call for acquiring first, and then
+    // releasing: the self-reference assertion above should have
+    // complained if BINDING were 'this' or anywhere along our chain,
+    // so we didn't release BINDING.
+    if (binding) binding->Acquire();
+    base_ = binding;
+    addend_ = addend;
+  }
+}
+
+void Label::Binding::Get(Binding** base, uint64_t* addend) {
+  if (base_ && base_ != this) {
+    // Recurse to find the end of our reference chain (the root of our
+    // tree), and then rewrite every binding along the chain to refer
+    // to it directly, adjusting addends appropriately. (This is why
+    // this member function isn't this-const.)
+    Binding* final_base;
+    uint64_t final_addend;
+    base_->Get(&final_base, &final_addend);
+    if (final_base) final_base->Acquire();
+    if (base_->Release()) delete base_;
+    base_ = final_base;
+    addend_ += final_addend;
+  }
+  *base = base_;
+  *addend = addend_;
+}
+
+template <typename Inserter>
+static inline void InsertEndian(test_assembler::Endianness endianness,
+                                size_t size, uint64_t number, Inserter dest) {
+  assert(size > 0);
+  if (endianness == kLittleEndian) {
+    for (size_t i = 0; i < size; i++) {
+      *dest++ = (char)(number & 0xff);
+      number >>= 8;
+    }
+  } else {
+    assert(endianness == kBigEndian);
+    // The loop condition is odd, but it's correct for size_t.
+    for (size_t i = size - 1; i < size; i--)
+      *dest++ = (char)((number >> (i * 8)) & 0xff);
+  }
+}
+
+Section& Section::Append(Endianness endianness, size_t size, uint64_t number) {
+  InsertEndian(endianness, size, number,
+               back_insert_iterator<string>(contents_));
+  return *this;
+}
+
+Section& Section::Append(Endianness endianness, size_t size,
+                         const Label& label) {
+  // If this label's value is known, there's no reason to waste an
+  // entry in references_ on it.
+  uint64_t value;
+  if (label.IsKnownConstant(&value)) return Append(endianness, size, value);
+
+  // This will get caught when the references are resolved, but it's
+  // nicer to find out earlier.
+  assert(endianness != kUnsetEndian);
+
+  references_.push_back(Reference(contents_.size(), endianness, size, label));
+  contents_.append(size, 0);
+  return *this;
+}
+
+#define ENDIANNESS_L kLittleEndian
+#define ENDIANNESS_B kBigEndian
+#define ENDIANNESS(e) ENDIANNESS_##e
+
+#define DEFINE_SHORT_APPEND_NUMBER_ENDIAN(e, bits)         \
+  Section& Section::e##bits(uint##bits##_t v) {            \
+    InsertEndian(ENDIANNESS(e), bits / 8, v,               \
+                 back_insert_iterator<string>(contents_)); \
+    return *this;                                          \
+  }
+
+#define DEFINE_SHORT_APPEND_LABEL_ENDIAN(e, bits) \
+  Section& Section::e##bits(const Label& v) {     \
+    return Append(ENDIANNESS(e), bits / 8, v);    \
+  }
+
+// Define L16, B32, and friends.
+#define DEFINE_SHORT_APPEND_ENDIAN(e, bits)  \
+  DEFINE_SHORT_APPEND_NUMBER_ENDIAN(e, bits) \
+  DEFINE_SHORT_APPEND_LABEL_ENDIAN(e, bits)
+
+DEFINE_SHORT_APPEND_LABEL_ENDIAN(L, 8);
+DEFINE_SHORT_APPEND_LABEL_ENDIAN(B, 8);
+DEFINE_SHORT_APPEND_ENDIAN(L, 16);
+DEFINE_SHORT_APPEND_ENDIAN(L, 32);
+DEFINE_SHORT_APPEND_ENDIAN(L, 64);
+DEFINE_SHORT_APPEND_ENDIAN(B, 16);
+DEFINE_SHORT_APPEND_ENDIAN(B, 32);
+DEFINE_SHORT_APPEND_ENDIAN(B, 64);
+
+#define DEFINE_SHORT_APPEND_NUMBER_DEFAULT(bits)           \
+  Section& Section::D##bits(uint##bits##_t v) {            \
+    InsertEndian(endianness_, bits / 8, v,                 \
+                 back_insert_iterator<string>(contents_)); \
+    return *this;                                          \
+  }
+#define DEFINE_SHORT_APPEND_LABEL_DEFAULT(bits) \
+  Section& Section::D##bits(const Label& v) {   \
+    return Append(endianness_, bits / 8, v);    \
+  }
+#define DEFINE_SHORT_APPEND_DEFAULT(bits)  \
+  DEFINE_SHORT_APPEND_NUMBER_DEFAULT(bits) \
+  DEFINE_SHORT_APPEND_LABEL_DEFAULT(bits)
+
+DEFINE_SHORT_APPEND_LABEL_DEFAULT(8)
+DEFINE_SHORT_APPEND_DEFAULT(16);
+DEFINE_SHORT_APPEND_DEFAULT(32);
+DEFINE_SHORT_APPEND_DEFAULT(64);
+
+Section& Section::LEB128(long long value) {
+  while (value < -0x40 || 0x3f < value) {
+    contents_ += (value & 0x7f) | 0x80;
+    if (value < 0)
+      value = (value >> 7) | ~(((unsigned long long)-1) >> 7);
+    else
+      value = (value >> 7);
+  }
+  contents_ += value & 0x7f;
+  return *this;
+}
+
+Section& Section::ULEB128(uint64_t value) {
+  while (value > 0x7f) {
+    contents_ += (value & 0x7f) | 0x80;
+    value = (value >> 7);
+  }
+  contents_ += value;
+  return *this;
+}
+
+Section& Section::Align(size_t alignment, uint8_t pad_byte) {
+  // ALIGNMENT must be a power of two.
+  assert(((alignment - 1) & alignment) == 0);
+  size_t new_size = (contents_.size() + alignment - 1) & ~(alignment - 1);
+  contents_.append(new_size - contents_.size(), pad_byte);
+  assert((contents_.size() & (alignment - 1)) == 0);
+  return *this;
+}
+
+bool Section::GetContents(string* contents) {
+  // For each label reference, find the label's value, and patch it into
+  // the section's contents.
+  for (size_t i = 0; i < references_.size(); i++) {
+    Reference& r = references_[i];
+    uint64_t value;
+    if (!r.label.IsKnownConstant(&value)) {
+      fprintf(stderr, "Undefined label #%zu at offset 0x%zx\n", i, r.offset);
+      return false;
+    }
+    assert(r.offset < contents_.size());
+    assert(contents_.size() - r.offset >= r.size);
+    InsertEndian(r.endianness, r.size, value, contents_.begin() + r.offset);
+  }
+  contents->clear();
+  std::swap(contents_, *contents);
+  references_.clear();
+  return true;
+}
+
+}  // namespace test_assembler
+}  // namespace lul_test
+
+namespace lul_test {
+
+CFISection& CFISection::CIEHeader(uint64_t code_alignment_factor,
+                                  int data_alignment_factor,
+                                  unsigned return_address_register,
+                                  uint8_t version, const string& augmentation,
+                                  bool dwarf64) {
+  assert(!entry_length_);
+  entry_length_ = new PendingLength();
+  in_fde_ = false;
+
+  if (dwarf64) {
+    D32(kDwarf64InitialLengthMarker);
+    D64(entry_length_->length);
+    entry_length_->start = Here();
+    D64(eh_frame_ ? kEHFrame64CIEIdentifier : kDwarf64CIEIdentifier);
+  } else {
+    D32(entry_length_->length);
+    entry_length_->start = Here();
+    D32(eh_frame_ ? kEHFrame32CIEIdentifier : kDwarf32CIEIdentifier);
+  }
+  D8(version);
+  AppendCString(augmentation);
+  ULEB128(code_alignment_factor);
+  LEB128(data_alignment_factor);
+  if (version == 1)
+    D8(return_address_register);
+  else
+    ULEB128(return_address_register);
+  return *this;
+}
+
+CFISection& CFISection::FDEHeader(Label cie_pointer, uint64_t initial_location,
+                                  uint64_t address_range, bool dwarf64) {
+  assert(!entry_length_);
+  entry_length_ = new PendingLength();
+  in_fde_ = true;
+  fde_start_address_ = initial_location;
+
+  if (dwarf64) {
+    D32(0xffffffff);
+    D64(entry_length_->length);
+    entry_length_->start = Here();
+    if (eh_frame_)
+      D64(Here() - cie_pointer);
+    else
+      D64(cie_pointer);
+  } else {
+    D32(entry_length_->length);
+    entry_length_->start = Here();
+    if (eh_frame_)
+      D32(Here() - cie_pointer);
+    else
+      D32(cie_pointer);
+  }
+  EncodedPointer(initial_location);
+  // The FDE length in an .eh_frame section uses the same encoding as the
+  // initial location, but ignores the base address (selected by the upper
+  // nybble of the encoding), as it's a length, not an address that can be
+  // made relative.
+  EncodedPointer(address_range, DwarfPointerEncoding(pointer_encoding_ & 0x0f));
+  return *this;
+}
+
+CFISection& CFISection::FinishEntry() {
+  assert(entry_length_);
+  Align(address_size_, lul::DW_CFA_nop);
+  entry_length_->length = Here() - entry_length_->start;
+  delete entry_length_;
+  entry_length_ = NULL;
+  in_fde_ = false;
+  return *this;
+}
+
+CFISection& CFISection::EncodedPointer(uint64_t address,
+                                       DwarfPointerEncoding encoding,
+                                       const EncodedPointerBases& bases) {
+  // Omitted data is extremely easy to emit.
+  if (encoding == lul::DW_EH_PE_omit) return *this;
+
+  // If (encoding & lul::DW_EH_PE_indirect) != 0, then we assume
+  // that ADDRESS is the address at which the pointer is stored --- in
+  // other words, that bit has no effect on how we write the pointer.
+  encoding = DwarfPointerEncoding(encoding & ~lul::DW_EH_PE_indirect);
+
+  // Find the base address to which this pointer is relative. The upper
+  // nybble of the encoding specifies this.
+  uint64_t base;
+  switch (encoding & 0xf0) {
+    case lul::DW_EH_PE_absptr:
+      base = 0;
+      break;
+    case lul::DW_EH_PE_pcrel:
+      base = bases.cfi + Size();
+      break;
+    case lul::DW_EH_PE_textrel:
+      base = bases.text;
+      break;
+    case lul::DW_EH_PE_datarel:
+      base = bases.data;
+      break;
+    case lul::DW_EH_PE_funcrel:
+      base = fde_start_address_;
+      break;
+    case lul::DW_EH_PE_aligned:
+      base = 0;
+      break;
+    default:
+      abort();
+  };
+
+  // Make ADDRESS relative. Yes, this is appropriate even for "absptr"
+  // values; see gcc/unwind-pe.h.
+  address -= base;
+
+  // Align the pointer, if required.
+  if ((encoding & 0xf0) == lul::DW_EH_PE_aligned) Align(AddressSize());
+
+  // Append ADDRESS to this section in the appropriate form. For the
+  // fixed-width forms, we don't need to differentiate between signed and
+  // unsigned encodings, because ADDRESS has already been extended to 64
+  // bits before it was passed to us.
+  switch (encoding & 0x0f) {
+    case lul::DW_EH_PE_absptr:
+      Address(address);
+      break;
+
+    case lul::DW_EH_PE_uleb128:
+      ULEB128(address);
+      break;
+
+    case lul::DW_EH_PE_sleb128:
+      LEB128(address);
+      break;
+
+    case lul::DW_EH_PE_udata2:
+    case lul::DW_EH_PE_sdata2:
+      D16(address);
+      break;
+
+    case lul::DW_EH_PE_udata4:
+    case lul::DW_EH_PE_sdata4:
+      D32(address);
+      break;
+
+    case lul::DW_EH_PE_udata8:
+    case lul::DW_EH_PE_sdata8:
+      D64(address);
+      break;
+
+    default:
+      abort();
+  }
+
+  return *this;
+};
+
+}  // namespace lul_test
diff --git a/tools/profiler/tests/gtest/LulTestInfrastructure.h b/tools/profiler/tests/gtest/LulTestInfrastructure.h
new file mode 100644
index 0000000000..9faa7ca858
--- /dev/null
+++ b/tools/profiler/tests/gtest/LulTestInfrastructure.h
@@ -0,0 +1,736 @@
+// -*- mode: C++ -*-
+
+// Copyright (c) 2010, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
+
+// Derived from:
+// cfi_assembler.h: Define CFISection, a class for creating properly
+// (and improperly) formatted DWARF CFI data for unit tests.
+
+// Derived from:
+// test-assembler.h: interface to class for building complex binary streams.
+
+// To test the Breakpad symbol dumper and processor thoroughly, for
+// all combinations of host system and minidump processor
+// architecture, we need to be able to easily generate complex test
+// data like debugging information and minidump files.
+//
+// For example, if we want our unit tests to provide full code
+// coverage for stack walking, it may be difficult to persuade the
+// compiler to generate every possible sort of stack walking
+// information that we want to support; there are probably DWARF CFI
+// opcodes that GCC never emits. Similarly, if we want to test our
+// error handling, we will need to generate damaged minidumps or
+// debugging information that (we hope) the client or compiler will
+// never produce on its own.
+//
+// google_breakpad::TestAssembler provides a predictable and
+// (relatively) simple way to generate complex formatted data streams
+// like minidumps and CFI. Furthermore, because TestAssembler is
+// portable, developers without access to (say) Visual Studio or a
+// SPARC assembler can still work on test data for those targets.
+
+#ifndef LUL_TEST_INFRASTRUCTURE_H
+#define LUL_TEST_INFRASTRUCTURE_H
+
+#include "LulDwarfExt.h"
+
+#include <string>
+#include <vector>
+
+using std::string;
+using std::vector;
+
+namespace lul_test {
+namespace test_assembler {
+
+// A Label represents a value not yet known that we need to store in a
+// section. As long as all the labels a section refers to are defined
+// by the time we retrieve its contents as bytes, we can use undefined
+// labels freely in that section's construction.
+//
+// A label can be in one of three states:
+// - undefined,
+// - defined as the sum of some other label and a constant, or
+// - a constant.
+//
+// A label's value never changes, but it can accumulate constraints.
+// Adding labels and integers is permitted, and yields a label.
+// Subtracting a constant from a label is permitted, and also yields a
+// label. Subtracting two labels that have some relationship to each
+// other is permitted, and yields a constant.
+//
+// For example:
+//
+//   Label a;               // a's value is undefined
+//   Label b;               // b's value is undefined
+//   {
+//     Label c = a + 4;     // okay, even though a's value is unknown
+//     b = c + 4;           // also okay; b is now a+8
+//   }
+//   Label d = b - 2;       // okay; d == a+6, even though c is gone
+//   d.Value();             // error: d's value is not yet known
+//   d - a;                 // is 6, even though their values are not known
+//   a = 12;                // now b == 20, and d == 18
+//   d.Value();             // 18: no longer an error
+//   b.Value();             // 20
+//   d = 10;                // error: d is already defined.
+//
+// Label objects' lifetimes are unconstrained: notice that, in the
+// above example, even though a and b are only related through c, and
+// c goes out of scope, the assignment to a sets b's value as well. In
+// particular, it's not necessary to ensure that a Label lives beyond
+// Sections that refer to it.
+class Label {
+ public:
+  Label();                         // An undefined label.
+  explicit Label(uint64_t value);  // A label with a fixed value
+  Label(const Label& value);       // A label equal to another.
+  ~Label();
+
+  Label& operator=(uint64_t value);
+  Label& operator=(const Label& value);
+  Label operator+(uint64_t addend) const;
+  Label operator-(uint64_t subtrahend) const;
+  uint64_t operator-(const Label& subtrahend) const;
+
+  // We could also provide == and != that work on undefined, but
+  // related, labels.
+
+  // Return true if this label's value is known. If VALUE_P is given,
+  // set *VALUE_P to the known value if returning true.
+  bool IsKnownConstant(uint64_t* value_p = NULL) const;
+
+  // Return true if the offset from LABEL to this label is known. If
+  // OFFSET_P is given, set *OFFSET_P to the offset when returning true.
+  //
+  // You can think of l.KnownOffsetFrom(m, &d) as being like 'd = l-m',
+  // except that it also returns a value indicating whether the
+  // subtraction is possible given what we currently know of l and m.
+  // It can be possible even if we don't know l and m's values. For
+  // example:
+  //
+  //   Label l, m;
+  //   m = l + 10;
+  //   l.IsKnownConstant();             // false
+  //   m.IsKnownConstant();             // false
+  //   uint64_t d;
+  //   l.IsKnownOffsetFrom(m, &d);      // true, and sets d to -10.
+  //   l-m                              // -10
+  //   m-l                              // 10
+  //   m.Value()                        // error: m's value is not known
+  bool IsKnownOffsetFrom(const Label& label, uint64_t* offset_p = NULL) const;
+
+ private:
+  // A label's value, or if that is not yet known, how the value is
+  // related to other labels' values. A binding may be:
+  // - a known constant,
+  // - constrained to be equal to some other binding plus a constant, or
+  // - unconstrained, and free to take on any value.
+  //
+  // Many labels may point to a single binding, and each binding may
+  // refer to another, so bindings and labels form trees whose leaves
+  // are labels, whose interior nodes (and roots) are bindings, and
+  // where links point from children to parents. Bindings are
+  // reference counted, allowing labels to be lightweight, copyable,
+  // assignable, placed in containers, and so on.
+  class Binding {
+   public:
+    Binding();
+    explicit Binding(uint64_t addend);
+    ~Binding();
+
+    // Increment our reference count.
+    void Acquire() { reference_count_++; };
+    // Decrement our reference count, and return true if it is zero.
+    bool Release() { return --reference_count_ == 0; }
+
+    // Set this binding to be equal to BINDING + ADDEND. If BINDING is
+    // NULL, then set this binding to the known constant ADDEND.
+    // Update every binding on this binding's chain to point directly
+    // to BINDING, or to be a constant, with addends adjusted
+    // appropriately.
+    void Set(Binding* binding, uint64_t value);
+
+    // Return what we know about the value of this binding.
+    // - If this binding's value is a known constant, set BASE to
+    //   NULL, and set ADDEND to its value.
+    // - If this binding is not a known constant but related to other
+    //   bindings, set BASE to the binding at the end of the relation
+    //   chain (which will always be unconstrained), and set ADDEND to the
+    //   value to add to that binding's value to get this binding's
+    //   value.
+    // - If this binding is unconstrained, set BASE to this, and leave
+    //   ADDEND unchanged.
+    void Get(Binding** base, uint64_t* addend);
+
+   private:
+    // There are three cases:
+    //
+    // - A binding representing a known constant value has base_ NULL,
+    //   and addend_ equal to the value.
+    //
+    // - A binding representing a completely unconstrained value has
+    //   base_ pointing to this; addend_ is unused.
+    //
+    // - A binding whose value is related to some other binding's
+    //   value has base_ pointing to that other binding, and addend_
+    //   set to the amount to add to that binding's value to get this
+    //   binding's value. We only represent relationships of the form
+    //   x = y+c.
+    //
+    // Thus, the bind_ links form a chain terminating in either a
+    // known constant value or a completely unconstrained value. Most
+    // operations on bindings do path compression: they change every
+    // binding on the chain to point directly to the final value,
+    // adjusting addends as appropriate.
+    Binding* base_;
+    uint64_t addend_;
+
+    // The number of Labels and Bindings pointing to this binding.
+    // (When a binding points to itself, indicating a completely
+    // unconstrained binding, that doesn't count as a reference.)
+    int reference_count_;
+  };
+
+  // This label's value.
+  Binding* value_;
+};
+
+// Conventions for representing larger numbers as sequences of bytes.
+enum Endianness {
+  kBigEndian,     // Big-endian: the most significant byte comes first.
+  kLittleEndian,  // Little-endian: the least significant byte comes first.
+  kUnsetEndian,   // used internally
+};
+
+// A section is a sequence of bytes, constructed by appending bytes
+// to the end. Sections have a convenient and flexible set of member
+// functions for appending data in various formats: big-endian and
+// little-endian signed and unsigned values of different sizes;
+// LEB128 and ULEB128 values (see below), and raw blocks of bytes.
+//
+// If you need to append a value to a section that is not convenient
+// to compute immediately, you can create a label, append the
+// label's value to the section, and then set the label's value
+// later, when it's convenient to do so. Once a label's value is
+// known, the section class takes care of updating all previously
+// appended references to it.
+//
+// Once all the labels to which a section refers have had their
+// values determined, you can get a copy of the section's contents
+// as a string.
+//
+// Note that there is no specified "start of section" label. This is
+// because there are typically several different meanings for "the
+// start of a section": the offset of the section within an object
+// file, the address in memory at which the section's content appear,
+// and so on. It's up to the code that uses the Section class to
+// keep track of these explicitly, as they depend on the application.
+class Section {
+ public:
+  explicit Section(Endianness endianness = kUnsetEndian)
+      : endianness_(endianness){};
+
+  // A base class destructor should be either public and virtual,
+  // or protected and nonvirtual.
+  virtual ~Section(){};
+
+  // Return the default endianness of this section.
+  Endianness endianness() const { return endianness_; }
+
+  // Append the SIZE bytes at DATA to the end of this section. Return
+  // a reference to this section.
+  Section& Append(const string& data) {
+    contents_.append(data);
+    return *this;
+  };
+
+  // Append data from SLICE to the end of this section. Return
+  // a reference to this section.
+  Section& Append(const lul::ImageSlice& slice) {
+    for (size_t i = 0; i < slice.length_; i++) {
+      contents_.append(1, slice.start_[i]);
+    }
+    return *this;
+  }
+
+  // Append data from CSTRING to the end of this section.  The terminating
+  // zero is not included.  Return a reference to this section.
+  Section& Append(const char* cstring) {
+    for (size_t i = 0; cstring[i] != '\0'; i++) {
+      contents_.append(1, cstring[i]);
+    }
+    return *this;
+  }
+
+  // Append SIZE copies of BYTE to the end of this section. Return a
+  // reference to this section.
+  Section& Append(size_t size, uint8_t byte) {
+    contents_.append(size, (char)byte);
+    return *this;
+  }
+
+  // Append NUMBER to this section. ENDIANNESS is the endianness to
+  // use to write the number. SIZE is the length of the number in
+  // bytes. Return a reference to this section.
+  Section& Append(Endianness endianness, size_t size, uint64_t number);
+  Section& Append(Endianness endianness, size_t size, const Label& label);
+
+  // Append SECTION to the end of this section. The labels SECTION
+  // refers to need not be defined yet.
+  //
+  // Note that this has no effect on any Labels' values, or on
+  // SECTION. If placing SECTION within 'this' provides new
+  // constraints on existing labels' values, then it's up to the
+  // caller to fiddle with those labels as needed.
+  Section& Append(const Section& section);
+
+  // Append the contents of DATA as a series of bytes terminated by
+  // a NULL character.
+  Section& AppendCString(const string& data) {
+    Append(data);
+    contents_ += '\0';
+    return *this;
+  }
+
+  // Append VALUE or LABEL to this section, with the given bit width and
+  // endianness. Return a reference to this section.
+  //
+  // The names of these functions have the form <ENDIANNESS><BITWIDTH>:
+  // <ENDIANNESS> is either 'L' (little-endian, least significant byte first),
+  //                        'B' (big-endian, most significant byte first), or
+  //                        'D' (default, the section's default endianness)
+  // <BITWIDTH> is 8, 16, 32, or 64.
+  //
+  // Since endianness doesn't matter for a single byte, all the
+  // <BITWIDTH>=8 functions are equivalent.
+  //
+  // These can be used to write both signed and unsigned values, as
+  // the compiler will properly sign-extend a signed value before
+  // passing it to the function, at which point the function's
+  // behavior is the same either way.
+  Section& L8(uint8_t value) {
+    contents_ += value;
+    return *this;
+  }
+  Section& B8(uint8_t value) {
+    contents_ += value;
+    return *this;
+  }
+  Section& D8(uint8_t value) {
+    contents_ += value;
+    return *this;
+  }
+  Section &L16(uint16_t), &L32(uint32_t), &L64(uint64_t), &B16(uint16_t),
+      &B32(uint32_t), &B64(uint64_t), &D16(uint16_t), &D32(uint32_t),
+      &D64(uint64_t);
+  Section &L8(const Label& label), &L16(const Label& label),
+      &L32(const Label& label), &L64(const Label& label),
+      &B8(const Label& label), &B16(const Label& label),
+      &B32(const Label& label), &B64(const Label& label),
+      &D8(const Label& label), &D16(const Label& label),
+      &D32(const Label& label), &D64(const Label& label);
+
+  // Append VALUE in a signed LEB128 (Little-Endian Base 128) form.
+  //
+  // The signed LEB128 representation of an integer N is a variable
+  // number of bytes:
+  //
+  // - If N is between -0x40 and 0x3f, then its signed LEB128
+  //   representation is a single byte whose value is N.
+  //
+  // - Otherwise, its signed LEB128 representation is (N & 0x7f) |
+  //   0x80, followed by the signed LEB128 representation of N / 128,
+  //   rounded towards negative infinity.
+  //
+  // In other words, we break VALUE into groups of seven bits, put
+  // them in little-endian order, and then write them as eight-bit
+  // bytes with the high bit on all but the last.
+  //
+  // Note that VALUE cannot be a Label (we would have to implement
+  // relaxation).
+  Section& LEB128(long long value);
+
+  // Append VALUE in unsigned LEB128 (Little-Endian Base 128) form.
+  //
+  // The unsigned LEB128 representation of an integer N is a variable
+  // number of bytes:
+  //
+  // - If N is between 0 and 0x7f, then its unsigned LEB128
+  //   representation is a single byte whose value is N.
+  //
+  // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) |
+  //   0x80, followed by the unsigned LEB128 representation of N /
+  //   128, rounded towards negative infinity.
+  //
+  // Note that VALUE cannot be a Label (we would have to implement
+  // relaxation).
+  Section& ULEB128(uint64_t value);
+
+  // Jump to the next location aligned on an ALIGNMENT-byte boundary,
+  // relative to the start of the section. Fill the gap with PAD_BYTE.
+  // ALIGNMENT must be a power of two. Return a reference to this
+  // section.
+  Section& Align(size_t alignment, uint8_t pad_byte = 0);
+
+  // Return the current size of the section.
+  size_t Size() const { return contents_.size(); }
+
+  // Return a label representing the start of the section.
+  //
+  // It is up to the user whether this label represents the section's
+  // position in an object file, the section's address in memory, or
+  // what have you; some applications may need both, in which case
+  // this simple-minded interface won't be enough. This class only
+  // provides a single start label, for use with the Here and Mark
+  // member functions.
+  //
+  // Ideally, we'd provide this in a subclass that actually knows more
+  // about the application at hand and can provide an appropriate
+  // collection of start labels. But then the appending member
+  // functions like Append and D32 would return a reference to the
+  // base class, not the derived class, and the chaining won't work.
+  // Since the only value here is in pretty notation, that's a fatal
+  // flaw.
+  Label start() const { return start_; }
+
+  // Return a label representing the point at which the next Appended
+  // item will appear in the section, relative to start().
+  Label Here() const { return start_ + Size(); }
+
+  // Set *LABEL to Here, and return a reference to this section.
+  Section& Mark(Label* label) {
+    *label = Here();
+    return *this;
+  }
+
+  // If there are no undefined label references left in this
+  // section, set CONTENTS to the contents of this section, as a
+  // string, and clear this section. Return true on success, or false
+  // if there were still undefined labels.
+  bool GetContents(string* contents);
+
+ private:
+  // Used internally. A reference to a label's value.
+  struct Reference {
+    Reference(size_t set_offset, Endianness set_endianness, size_t set_size,
+              const Label& set_label)
+        : offset(set_offset),
+          endianness(set_endianness),
+          size(set_size),
+          label(set_label) {}
+
+    // The offset of the reference within the section.
+    size_t offset;
+
+    // The endianness of the reference.
+    Endianness endianness;
+
+    // The size of the reference.
+    size_t size;
+
+    // The label to which this is a reference.
+    Label label;
+  };
+
+  // The default endianness of this section.
+  Endianness endianness_;
+
+  // The contents of the section.
+  string contents_;
+
+  // References to labels within those contents.
+  vector<Reference> references_;
+
+  // A label referring to the beginning of the section.
+  Label start_;
+};
+
+}  // namespace test_assembler
+}  // namespace lul_test
+
+namespace lul_test {
+
+using lul::DwarfPointerEncoding;
+using lul_test::test_assembler::Endianness;
+using lul_test::test_assembler::Label;
+using lul_test::test_assembler::Section;
+
+class CFISection : public Section {
+ public:
+  // CFI augmentation strings beginning with 'z', defined by the
+  // Linux/IA-64 C++ ABI, can specify interesting encodings for
+  // addresses appearing in FDE headers and call frame instructions (and
+  // for additional fields whose presence the augmentation string
+  // specifies). In particular, pointers can be specified to be relative
+  // to various base address: the start of the .text section, the
+  // location holding the address itself, and so on. These allow the
+  // frame data to be position-independent even when they live in
+  // write-protected pages. These variants are specified at the
+  // following two URLs:
+  //
+  // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html
+  // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html
+  //
+  // CFISection leaves the production of well-formed 'z'-augmented CIEs and
+  // FDEs to the user, but does provide EncodedPointer, to emit
+  // properly-encoded addresses for a given pointer encoding.
+  // EncodedPointer uses an instance of this structure to find the base
+  // addresses it should use; you can establish a default for all encoded
+  // pointers appended to this section with SetEncodedPointerBases.
+  struct EncodedPointerBases {
+    EncodedPointerBases() : cfi(), text(), data() {}
+
+    // The starting address of this CFI section in memory, for
+    // DW_EH_PE_pcrel. DW_EH_PE_pcrel pointers may only be used in data
+    // that has is loaded into the program's address space.
+    uint64_t cfi;
+
+    // The starting address of this file's .text section, for DW_EH_PE_textrel.
+    uint64_t text;
+
+    // The starting address of this file's .got or .eh_frame_hdr section,
+    // for DW_EH_PE_datarel.
+    uint64_t data;
+  };
+
+  // Create a CFISection whose endianness is ENDIANNESS, and where
+  // machine addresses are ADDRESS_SIZE bytes long. If EH_FRAME is
+  // true, use the .eh_frame format, as described by the Linux
+  // Standards Base Core Specification, instead of the DWARF CFI
+  // format.
+  CFISection(Endianness endianness, size_t address_size, bool eh_frame = false)
+      : Section(endianness),
+        address_size_(address_size),
+        eh_frame_(eh_frame),
+        pointer_encoding_(lul::DW_EH_PE_absptr),
+        encoded_pointer_bases_(),
+        entry_length_(NULL),
+        in_fde_(false) {
+    // The 'start', 'Here', and 'Mark' members of a CFISection all refer
+    // to section offsets.
+    start() = 0;
+  }
+
+  // Return this CFISection's address size.
+  size_t AddressSize() const { return address_size_; }
+
+  // Return true if this CFISection uses the .eh_frame format, or
+  // false if it contains ordinary DWARF CFI data.
+  bool ContainsEHFrame() const { return eh_frame_; }
+
+  // Use ENCODING for pointers in calls to FDEHeader and EncodedPointer.
+  void SetPointerEncoding(DwarfPointerEncoding encoding) {
+    pointer_encoding_ = encoding;
+  }
+
+  // Use the addresses in BASES as the base addresses for encoded
+  // pointers in subsequent calls to FDEHeader or EncodedPointer.
+  // This function makes a copy of BASES.
+  void SetEncodedPointerBases(const EncodedPointerBases& bases) {
+    encoded_pointer_bases_ = bases;
+  }
+
+  // Append a Common Information Entry header to this section with the
+  // given values. If dwarf64 is true, use the 64-bit DWARF initial
+  // length format for the CIE's initial length. Return a reference to
+  // this section. You should call FinishEntry after writing the last
+  // instruction for the CIE.
+  //
+  // Before calling this function, you will typically want to use Mark
+  // or Here to make a label to pass to FDEHeader that refers to this
+  // CIE's position in the section.
+  CFISection& CIEHeader(uint64_t code_alignment_factor,
+                        int data_alignment_factor,
+                        unsigned return_address_register, uint8_t version = 3,
+                        const string& augmentation = "", bool dwarf64 = false);
+
+  // Append a Frame Description Entry header to this section with the
+  // given values. If dwarf64 is true, use the 64-bit DWARF initial
+  // length format for the CIE's initial length. Return a reference to
+  // this section. You should call FinishEntry after writing the last
+  // instruction for the CIE.
+  //
+  // This function doesn't support entries that are longer than
+  // 0xffffff00 bytes. (The "initial length" is always a 32-bit
+  // value.) Nor does it support .debug_frame sections longer than
+  // 0xffffff00 bytes.
+  CFISection& FDEHeader(Label cie_pointer, uint64_t initial_location,
+                        uint64_t address_range, bool dwarf64 = false);
+
+  // Note the current position as the end of the last CIE or FDE we
+  // started, after padding with DW_CFA_nops for alignment. This
+  // defines the label representing the entry's length, cited in the
+  // entry's header. Return a reference to this section.
+  CFISection& FinishEntry();
+
+  // Append the contents of BLOCK as a DW_FORM_block value: an
+  // unsigned LEB128 length, followed by that many bytes of data.
+  CFISection& Block(const lul::ImageSlice& block) {
+    ULEB128(block.length_);
+    Append(block);
+    return *this;
+  }
+
+  // Append data from CSTRING as a DW_FORM_block value: an unsigned LEB128
+  // length, followed by that many bytes of data. The terminating zero is not
+  // included.
+  CFISection& Block(const char* cstring) {
+    ULEB128(strlen(cstring));
+    Append(cstring);
+    return *this;
+  }
+
+  // Append ADDRESS to this section, in the appropriate size and
+  // endianness. Return a reference to this section.
+  CFISection& Address(uint64_t address) {
+    Section::Append(endianness(), address_size_, address);
+    return *this;
+  }
+
+  // Append ADDRESS to this section, using ENCODING and BASES. ENCODING
+  // defaults to this section's default encoding, established by
+  // SetPointerEncoding. BASES defaults to this section's bases, set by
+  // SetEncodedPointerBases. If the DW_EH_PE_indirect bit is set in the
+  // encoding, assume that ADDRESS is where the true address is stored.
+  // Return a reference to this section.
+  //
+  // (C++ doesn't let me use default arguments here, because I want to
+  // refer to members of *this in the default argument expression.)
+  CFISection& EncodedPointer(uint64_t address) {
+    return EncodedPointer(address, pointer_encoding_, encoded_pointer_bases_);
+  }
+  CFISection& EncodedPointer(uint64_t address, DwarfPointerEncoding encoding) {
+    return EncodedPointer(address, encoding, encoded_pointer_bases_);
+  }
+  CFISection& EncodedPointer(uint64_t address, DwarfPointerEncoding encoding,
+                             const EncodedPointerBases& bases);
+
+  // Restate some member functions, to keep chaining working nicely.
+  CFISection& Mark(Label* label) {
+    Section::Mark(label);
+    return *this;
+  }
+  CFISection& D8(uint8_t v) {
+    Section::D8(v);
+    return *this;
+  }
+  CFISection& D16(uint16_t v) {
+    Section::D16(v);
+    return *this;
+  }
+  CFISection& D16(Label v) {
+    Section::D16(v);
+    return *this;
+  }
+  CFISection& D32(uint32_t v) {
+    Section::D32(v);
+    return *this;
+  }
+  CFISection& D32(const Label& v) {
+    Section::D32(v);
+    return *this;
+  }
+  CFISection& D64(uint64_t v) {
+    Section::D64(v);
+    return *this;
+  }
+  CFISection& D64(const Label& v) {
+    Section::D64(v);
+    return *this;
+  }
+  CFISection& LEB128(long long v) {
+    Section::LEB128(v);
+    return *this;
+  }
+  CFISection& ULEB128(uint64_t v) {
+    Section::ULEB128(v);
+    return *this;
+  }
+
+ private:
+  // A length value that we've appended to the section, but is not yet
+  // known. LENGTH is the appended value; START is a label referring
+  // to the start of the data whose length was cited.
+  struct PendingLength {
+    Label length;
+    Label start;
+  };
+
+  // Constants used in CFI/.eh_frame data:
+
+  // If the first four bytes of an "initial length" are this constant, then
+  // the data uses the 64-bit DWARF format, and the length itself is the
+  // subsequent eight bytes.
+  static const uint32_t kDwarf64InitialLengthMarker = 0xffffffffU;
+
+  // The CIE identifier for 32- and 64-bit DWARF CFI and .eh_frame data.
+  static const uint32_t kDwarf32CIEIdentifier = ~(uint32_t)0;
+  static const uint64_t kDwarf64CIEIdentifier = ~(uint64_t)0;
+  static const uint32_t kEHFrame32CIEIdentifier = 0;
+  static const uint64_t kEHFrame64CIEIdentifier = 0;
+
+  // The size of a machine address for the data in this section.
+  size_t address_size_;
+
+  // If true, we are generating a Linux .eh_frame section, instead of
+  // a standard DWARF .debug_frame section.
+  bool eh_frame_;
+
+  // The encoding to use for FDE pointers.
+  DwarfPointerEncoding pointer_encoding_;
+
+  // The base addresses to use when emitting encoded pointers.
+  EncodedPointerBases encoded_pointer_bases_;
+
+  // The length value for the current entry.
+  //
+  // Oddly, this must be dynamically allocated. Labels never get new
+  // values; they only acquire constraints on the value they already
+  // have, or assert if you assign them something incompatible. So
+  // each header needs truly fresh Label objects to cite in their
+  // headers and track their positions. The alternative is explicit
+  // destructor invocation and a placement new. Ick.
+  PendingLength* entry_length_;
+
+  // True if we are currently emitting an FDE --- that is, we have
+  // called FDEHeader but have not yet called FinishEntry.
+  bool in_fde_;
+
+  // If in_fde_ is true, this is its starting address. We use this for
+  // emitting DW_EH_PE_funcrel pointers.
+  uint64_t fde_start_address_;
+};
+
+}  // namespace lul_test
+
+#endif  // LUL_TEST_INFRASTRUCTURE_H
diff --git a/tools/profiler/tests/gtest/ThreadProfileTest.cpp b/tools/profiler/tests/gtest/ThreadProfileTest.cpp
new file mode 100644
index 0000000000..b8a15c39b2
--- /dev/null
+++ b/tools/profiler/tests/gtest/ThreadProfileTest.cpp
@@ -0,0 +1,60 @@
+
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef MOZ_GECKO_PROFILER
+
+#  include "ProfileBuffer.h"
+
+#  include "mozilla/PowerOfTwo.h"
+#  include "mozilla/ProfileBufferChunkManagerWithLocalLimit.h"
+#  include "mozilla/ProfileChunkedBuffer.h"
+
+#  include "gtest/gtest.h"
+
+// Make sure we can record one entry and read it
+TEST(ThreadProfile, InsertOneEntry)
+{
+  mozilla::ProfileBufferChunkManagerWithLocalLimit chunkManager(
+      2 * (1 + uint32_t(sizeof(ProfileBufferEntry))) * 4,
+      2 * (1 + uint32_t(sizeof(ProfileBufferEntry))));
+  mozilla::ProfileChunkedBuffer profileChunkedBuffer(
+      mozilla::ProfileChunkedBuffer::ThreadSafety::WithMutex, chunkManager);
+  auto pb = mozilla::MakeUnique<ProfileBuffer>(profileChunkedBuffer);
+  pb->AddEntry(ProfileBufferEntry::Time(123.1));
+  ProfileBufferEntry entry = pb->GetEntry(pb->BufferRangeStart());
+  ASSERT_TRUE(entry.IsTime());
+  ASSERT_EQ(123.1, entry.GetDouble());
+}
+
+// See if we can insert some entries
+TEST(ThreadProfile, InsertEntriesNoWrap)
+{
+  mozilla::ProfileBufferChunkManagerWithLocalLimit chunkManager(
+      100 * (1 + uint32_t(sizeof(ProfileBufferEntry))),
+      100 * (1 + uint32_t(sizeof(ProfileBufferEntry))) / 4);
+  mozilla::ProfileChunkedBuffer profileChunkedBuffer(
+      mozilla::ProfileChunkedBuffer::ThreadSafety::WithMutex, chunkManager);
+  auto pb = mozilla::MakeUnique<ProfileBuffer>(profileChunkedBuffer);
+  const int test_size = 50;
+  for (int i = 0; i < test_size; i++) {
+    pb->AddEntry(ProfileBufferEntry::Time(i));
+  }
+  int times = 0;
+  uint64_t readPos = pb->BufferRangeStart();
+  while (readPos != pb->BufferRangeEnd()) {
+    ProfileBufferEntry entry = pb->GetEntry(readPos);
+    readPos++;
+    if (entry.GetKind() == ProfileBufferEntry::Kind::INVALID) {
+      continue;
+    }
+    ASSERT_TRUE(entry.IsTime());
+    ASSERT_EQ(times, entry.GetDouble());
+    times++;
+  }
+  ASSERT_EQ(test_size, times);
+}
+
+#endif  // MOZ_GECKO_PROFILER
diff --git a/tools/profiler/tests/gtest/moz.build b/tools/profiler/tests/gtest/moz.build
new file mode 100644
index 0000000000..4eb1fef762
--- /dev/null
+++ b/tools/profiler/tests/gtest/moz.build
@@ -0,0 +1,45 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, you can obtain one at http://mozilla.org/MPL/2.0/.
+
+if (
+    CONFIG["MOZ_GECKO_PROFILER"]
+    and CONFIG["OS_TARGET"] in ("Android", "Linux")
+    and CONFIG["CPU_ARCH"]
+    in (
+        "arm",
+        "aarch64",
+        "x86",
+        "x86_64",
+    )
+):
+    UNIFIED_SOURCES += [
+        "LulTest.cpp",
+        "LulTestDwarf.cpp",
+        "LulTestInfrastructure.cpp",
+    ]
+
+LOCAL_INCLUDES += [
+    "/netwerk/base",
+    "/netwerk/protocol/http",
+    "/toolkit/components/jsoncpp/include",
+    "/tools/profiler/core",
+    "/tools/profiler/gecko",
+    "/tools/profiler/lul",
+]
+
+if CONFIG["OS_TARGET"] != "Android":
+    UNIFIED_SOURCES += [
+        "GeckoProfiler.cpp",
+        "ThreadProfileTest.cpp",
+    ]
+
+USE_LIBS += [
+    "jsoncpp",
+]
+
+include("/ipc/chromium/chromium-config.mozbuild")
+
+FINAL_LIBRARY = "xul-gtest"
diff --git a/tools/profiler/tests/shared-head.js b/tools/profiler/tests/shared-head.js
new file mode 100644
index 0000000000..d1b2f6868a
--- /dev/null
+++ b/tools/profiler/tests/shared-head.js
@@ -0,0 +1,591 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* globals Assert */
+/* globals info */
+
+/**
+ * This file contains utilities that can be shared between xpcshell tests and mochitests.
+ */
+
+// The marker phases.
+const INSTANT = 0;
+const INTERVAL = 1;
+const INTERVAL_START = 2;
+const INTERVAL_END = 3;
+
+// This Services declaration may shadow another from head.js, so define it as
+// a var rather than a const.
+
+const defaultSettings = {
+  entries: 8 * 1024 * 1024, // 8M entries = 64MB
+  interval: 1, // ms
+  features: [],
+  threads: ["GeckoMain"],
+};
+
+// Effectively `async`: Start the profiler and return the `startProfiler`
+// promise that will get resolved when all child process have started their own
+// profiler.
+async function startProfiler(callersSettings) {
+  if (Services.profiler.IsActive()) {
+    Assert.ok(
+      Services.env.exists("MOZ_PROFILER_STARTUP"),
+      "The profiler is active at the begining of the test, " +
+        "the MOZ_PROFILER_STARTUP environment variable should be set."
+    );
+    if (Services.env.exists("MOZ_PROFILER_STARTUP")) {
+      // If the startup profiling environment variable exists, it is likely
+      // that tests are being profiled.
+      // Stop the profiler before starting profiler tests.
+      info(
+        "This test starts and stops the profiler and is not compatible " +
+          "with the use of MOZ_PROFILER_STARTUP. " +
+          "Stopping the profiler before starting the test."
+      );
+      await Services.profiler.StopProfiler();
+    } else {
+      throw new Error(
+        "The profiler must not be active before starting it in a test."
+      );
+    }
+  }
+  const settings = Object.assign({}, defaultSettings, callersSettings);
+  return Services.profiler.StartProfiler(
+    settings.entries,
+    settings.interval,
+    settings.features,
+    settings.threads,
+    0,
+    settings.duration
+  );
+}
+
+function startProfilerForMarkerTests() {
+  return startProfiler({
+    features: ["nostacksampling", "js"],
+    threads: ["GeckoMain", "DOM Worker"],
+  });
+}
+
+/**
+ * This is a helper function be able to run `await wait(500)`. Unfortunately
+ * this is needed as the act of collecting functions relies on the periodic
+ * sampling of the threads. See:
+ * https://bugzilla.mozilla.org/show_bug.cgi?id=1529053
+ *
+ * @param {number} time
+ * @returns {Promise}
+ */
+function wait(time) {
+  return new Promise(resolve => {
+    // eslint-disable-next-line mozilla/no-arbitrary-setTimeout
+    setTimeout(resolve, time);
+  });
+}
+
+/**
+ * Get the payloads of a type recursively, including from all subprocesses.
+ *
+ * @param {Object} profile The gecko profile.
+ * @param {string} type The marker payload type, e.g. "DiskIO".
+ * @param {Array} payloadTarget The recursive list of payloads.
+ * @return {Array} The final payloads.
+ */
+function getPayloadsOfTypeFromAllThreads(profile, type, payloadTarget = []) {
+  for (const { markers } of profile.threads) {
+    for (const markerTuple of markers.data) {
+      const payload = markerTuple[markers.schema.data];
+      if (payload && payload.type === type) {
+        payloadTarget.push(payload);
+      }
+    }
+  }
+
+  for (const subProcess of profile.processes) {
+    getPayloadsOfTypeFromAllThreads(subProcess, type, payloadTarget);
+  }
+
+  return payloadTarget;
+}
+
+/**
+ * Get the payloads of a type from a single thread.
+ *
+ * @param {Object} thread The thread from a profile.
+ * @param {string} type The marker payload type, e.g. "DiskIO".
+ * @return {Array} The payloads.
+ */
+function getPayloadsOfType(thread, type) {
+  const { markers } = thread;
+  const results = [];
+  for (const markerTuple of markers.data) {
+    const payload = markerTuple[markers.schema.data];
+    if (payload && payload.type === type) {
+      results.push(payload);
+    }
+  }
+  return results;
+}
+
+/**
+ * Applies the marker schema to create individual objects for each marker
+ *
+ * @param {Object} thread The thread from a profile.
+ * @return {InflatedMarker[]} The markers.
+ */
+function getInflatedMarkerData(thread) {
+  const { markers, stringTable } = thread;
+  return markers.data.map(markerTuple => {
+    const marker = {};
+    for (const [key, tupleIndex] of Object.entries(markers.schema)) {
+      marker[key] = markerTuple[tupleIndex];
+      if (key === "name") {
+        // Use the string from the string table.
+        marker[key] = stringTable[marker[key]];
+      }
+    }
+    return marker;
+  });
+}
+
+/**
+ * Applies the marker schema to create individual objects for each marker, then
+ * keeps only the network markers that match the profiler tests.
+ *
+ * @param {Object} thread The thread from a profile.
+ * @return {InflatedMarker[]} The filtered network markers.
+ */
+function getInflatedNetworkMarkers(thread) {
+  const markers = getInflatedMarkerData(thread);
+  return markers.filter(
+    m =>
+      m.data &&
+      m.data.type === "Network" &&
+      // We filter out network markers that aren't related to the test, to
+      // avoid intermittents.
+      m.data.URI.includes("/tools/profiler/")
+  );
+}
+
+/**
+ * From a list of network markers, this returns pairs of start/stop markers.
+ * If a stop marker can't be found for a start marker, this will return an array
+ * of only 1 element.
+ *
+ * @param {InflatedMarker[]} networkMarkers Network markers
+ * @return {InflatedMarker[][]} Pairs of network markers
+ */
+function getPairsOfNetworkMarkers(allNetworkMarkers) {
+  // For each 'start' marker we want to find the next 'stop' or 'redirect'
+  // marker with the same id.
+  const result = [];
+  const mapOfStartMarkers = new Map(); // marker id -> id in result array
+  for (const marker of allNetworkMarkers) {
+    const { data } = marker;
+    if (data.status === "STATUS_START") {
+      if (mapOfStartMarkers.has(data.id)) {
+        const previousMarker = result[mapOfStartMarkers.get(data.id)][0];
+        Assert.ok(
+          false,
+          `We found 2 start markers with the same id ${data.id}, without end marker in-between.` +
+            `The first marker has URI ${previousMarker.data.URI}, the second marker has URI ${data.URI}.` +
+            ` This should not happen.`
+        );
+        continue;
+      }
+
+      mapOfStartMarkers.set(data.id, result.length);
+      result.push([marker]);
+    } else {
+      // STOP or REDIRECT
+      if (!mapOfStartMarkers.has(data.id)) {
+        Assert.ok(
+          false,
+          `We found an end marker without a start marker (id: ${data.id}, URI: ${data.URI}). This should not happen.`
+        );
+        continue;
+      }
+      result[mapOfStartMarkers.get(data.id)].push(marker);
+      mapOfStartMarkers.delete(data.id);
+    }
+  }
+
+  return result;
+}
+
+/**
+ * It can be helpful to force the profiler to collect a JavaScript sample. This
+ * function spins on a while loop until at least one more sample is collected.
+ *
+ * @return {number} The index of the collected sample.
+ */
+function captureAtLeastOneJsSample() {
+  function getProfileSampleCount() {
+    const profile = Services.profiler.getProfileData();
+    return profile.threads[0].samples.data.length;
+  }
+
+  const sampleCount = getProfileSampleCount();
+  // Create an infinite loop until a sample has been collected.
+  while (true) {
+    if (sampleCount < getProfileSampleCount()) {
+      return sampleCount;
+    }
+  }
+}
+
+function isJSONWhitespace(c) {
+  return ["\n", "\r", " ", "\t"].includes(c);
+}
+
+function verifyJSONStringIsCompact(s) {
+  const stateData = 0;
+  const stateString = 1;
+  const stateEscapedChar = 2;
+  let state = stateData;
+  for (let i = 0; i < s.length; ++i) {
+    let c = s[i];
+    switch (state) {
+      case stateData:
+        if (isJSONWhitespace(c)) {
+          Assert.ok(
+            false,
+            `"Unexpected JSON whitespace at index ${i} in profile: <<<${s}>>>"`
+          );
+          return;
+        }
+        if (c == '"') {
+          state = stateString;
+        }
+        break;
+      case stateString:
+        if (c == '"') {
+          state = stateData;
+        } else if (c == "\\") {
+          state = stateEscapedChar;
+        }
+        break;
+      case stateEscapedChar:
+        state = stateString;
+        break;
+    }
+  }
+}
+
+/**
+ * This function pauses the profiler before getting the profile. Then after
+ * getting the data, the profiler is stopped, and all profiler data is removed.
+ * @returns {Promise<Profile>}
+ */
+async function stopNowAndGetProfile() {
+  // Don't await the pause, because each process will handle it before it
+  // receives the following `getProfileDataAsArrayBuffer()`.
+  Services.profiler.Pause();
+
+  const profileArrayBuffer =
+    await Services.profiler.getProfileDataAsArrayBuffer();
+  await Services.profiler.StopProfiler();
+
+  const profileUint8Array = new Uint8Array(profileArrayBuffer);
+  const textDecoder = new TextDecoder("utf-8", { fatal: true });
+  const profileString = textDecoder.decode(profileUint8Array);
+  verifyJSONStringIsCompact(profileString);
+
+  return JSON.parse(profileString);
+}
+
+/**
+ * This function ensures there's at least one sample, then pauses the profiler
+ * before getting the profile. Then after getting the data, the profiler is
+ * stopped, and all profiler data is removed.
+ * @returns {Promise<Profile>}
+ */
+async function waitSamplingAndStopAndGetProfile() {
+  await Services.profiler.waitOnePeriodicSampling();
+  return stopNowAndGetProfile();
+}
+
+/**
+ * Verifies that a marker is an interval marker.
+ *
+ * @param {InflatedMarker} marker
+ * @returns {boolean}
+ */
+function isIntervalMarker(inflatedMarker) {
+  return (
+    inflatedMarker.phase === 1 &&
+    typeof inflatedMarker.startTime === "number" &&
+    typeof inflatedMarker.endTime === "number"
+  );
+}
+
+/**
+ * @param {Profile} profile
+ * @returns {Thread[]}
+ */
+function getThreads(profile) {
+  const threads = [];
+
+  function getThreadsRecursive(process) {
+    for (const thread of process.threads) {
+      threads.push(thread);
+    }
+    for (const subprocess of process.processes) {
+      getThreadsRecursive(subprocess);
+    }
+  }
+
+  getThreadsRecursive(profile);
+  return threads;
+}
+
+/**
+ * Find a specific marker schema from any process of a profile.
+ *
+ * @param {Profile} profile
+ * @param {string} name
+ * @returns {MarkerSchema}
+ */
+function getSchema(profile, name) {
+  {
+    const schema = profile.meta.markerSchema.find(s => s.name === name);
+    if (schema) {
+      return schema;
+    }
+  }
+  for (const subprocess of profile.processes) {
+    const schema = subprocess.meta.markerSchema.find(s => s.name === name);
+    if (schema) {
+      return schema;
+    }
+  }
+  console.error("Parent process schema", profile.meta.markerSchema);
+  for (const subprocess of profile.processes) {
+    console.error("Child process schema", subprocess.meta.markerSchema);
+  }
+  throw new Error(`Could not find a schema for "${name}".`);
+}
+
+/**
+ * This escapes all characters that have a special meaning in RegExps.
+ * This was stolen from https://github.com/sindresorhus/escape-string-regexp and
+ * so it is licence MIT and:
+ * Copyright (c) Sindre Sorhus <sindresorhus@gmail.com> (https://sindresorhus.com).
+ * See the full license in https://raw.githubusercontent.com/sindresorhus/escape-string-regexp/main/license.
+ * @param {string} string The string to be escaped
+ * @returns {string} The result
+ */
+function escapeStringRegexp(string) {
+  if (typeof string !== "string") {
+    throw new TypeError("Expected a string");
+  }
+
+  // Escape characters with special meaning either inside or outside character
+  // sets.  Use a simple backslash escape when it’s always valid, and a `\xnn`
+  // escape when the simpler form would be disallowed by Unicode patterns’
+  // stricter grammar.
+  return string.replace(/[|\\{}()[\]^$+*?.]/g, "\\$&").replace(/-/g, "\\x2d");
+}
+
+/** ------ Assertions helper ------ */
+/**
+ * This assert helper function makes it easy to check a lot of properties in an
+ * object. We augment Assert.sys.mjs to make it easier to use.
+ */
+Object.assign(Assert, {
+  /*
+   * It checks if the properties on the right are all present in the object on
+   * the left. Note that the object might still have other properties (see
+   * objectContainsOnly below if you want the stricter form).
+   *
+   * The basic form does basic equality on each expected property:
+   *
+   * Assert.objectContains(fixture, {
+   *   foo: "foo",
+   *   bar: 1,
+   *   baz: true,
+   * });
+   *
+   * But it also has a more powerful form with expectations. The available
+   * expectations are:
+   * - any(): this only checks for the existence of the property, not its value
+   * - number(), string(), boolean(), bigint(), function(), symbol(), object():
+   *   this checks if the value is of this type
+   * - objectContains(expected): this applies Assert.objectContains()
+   *   recursively on this property.
+   * - stringContains(needle): this checks if the expected value is included in
+   *   the property value.
+   * - stringMatches(regexp): this checks if the property value matches this
+   *   regexp. The regexp can be passed as a string, to be dynamically built.
+   *
+   * example:
+   *
+   * Assert.objectContains(fixture, {
+   *   name: Expect.stringMatches(`Load \\d+:.*${url}`),
+   *   data: Expect.objectContains({
+   *     status: "STATUS_STOP",
+   *     URI: Expect.stringContains("https://"),
+   *     requestMethod: "GET",
+   *     contentType: Expect.string(),
+   *     startTime: Expect.number(),
+   *     cached: Expect.boolean(),
+   *   }),
+   * });
+   *
+   * Each expectation will translate into one or more Assert call. Therefore if
+   * one expectation fails, this will be clearly visible in the test output.
+   *
+   * Expectations can also be normal functions, for example:
+   *
+   * Assert.objectContains(fixture, {
+   *   number: value => Assert.greater(value, 5)
+   * });
+   *
+   * Note that you'll need to use Assert inside this function.
+   */
+  objectContains(object, expectedProperties) {
+    // Basic tests: we don't want to run other assertions if these tests fail.
+    if (typeof object !== "object") {
+      this.ok(
+        false,
+        `The first parameter should be an object, but found: ${object}.`
+      );
+      return;
+    }
+
+    if (typeof expectedProperties !== "object") {
+      this.ok(
+        false,
+        `The second parameter should be an object, but found: ${expectedProperties}.`
+      );
+      return;
+    }
+
+    for (const key of Object.keys(expectedProperties)) {
+      const expected = expectedProperties[key];
+      if (!(key in object)) {
+        this.report(
+          true,
+          object,
+          expectedProperties,
+          `The object should contain the property "${key}", but it's missing.`
+        );
+        continue;
+      }
+
+      if (typeof expected === "function") {
+        // This is a function, so let's call it.
+        expected(
+          object[key],
+          `The object should contain the property "${key}" with an expected value and type.`
+        );
+      } else {
+        // Otherwise, we check for equality.
+        this.equal(
+          object[key],
+          expectedProperties[key],
+          `The object should contain the property "${key}" with an expected value.`
+        );
+      }
+    }
+  },
+
+  /**
+   * This is very similar to the previous `objectContains`, but this also looks
+   * at the number of the objects' properties. Thus this will fail if the
+   * objects don't have the same properties exactly.
+   */
+  objectContainsOnly(object, expectedProperties) {
+    // Basic tests: we don't want to run other assertions if these tests fail.
+    if (typeof object !== "object") {
+      this.ok(
+        false,
+        `The first parameter should be an object but found: ${object}.`
+      );
+      return;
+    }
+
+    if (typeof expectedProperties !== "object") {
+      this.ok(
+        false,
+        `The second parameter should be an object but found: ${expectedProperties}.`
+      );
+      return;
+    }
+
+    // In objectContainsOnly, we specifically want to check if all properties
+    // from the fixture object are expected.
+    // We'll be failing a test only for the specific properties that weren't
+    // expected, and only fail with one message, so that the test outputs aren't
+    // spammed.
+    const extraProperties = [];
+    for (const fixtureKey of Object.keys(object)) {
+      if (!(fixtureKey in expectedProperties)) {
+        extraProperties.push(fixtureKey);
+      }
+    }
+
+    if (extraProperties.length) {
+      // Some extra properties have been found.
+      this.report(
+        true,
+        object,
+        expectedProperties,
+        `These properties are present, but shouldn't: "${extraProperties.join(
+          '", "'
+        )}".`
+      );
+    }
+
+    // Now, let's carry on the rest of our work.
+    this.objectContains(object, expectedProperties);
+  },
+});
+
+const Expect = {
+  any:
+    () =>
+    actual => {} /* We don't check anything more than the presence of this property. */,
+};
+
+/* These functions are part of the Assert object, and we want to reuse them. */
+[
+  "stringContains",
+  "stringMatches",
+  "objectContains",
+  "objectContainsOnly",
+].forEach(
+  assertChecker =>
+    (Expect[assertChecker] =
+      expected =>
+      (actual, ...moreArgs) =>
+        Assert[assertChecker](actual, expected, ...moreArgs))
+);
+
+/* These functions will only check for the type. */
+[
+  "number",
+  "string",
+  "boolean",
+  "bigint",
+  "symbol",
+  "object",
+  "function",
+].forEach(type => (Expect[type] = makeTypeChecker(type)));
+
+function makeTypeChecker(type) {
+  return (...unexpectedArgs) => {
+    if (unexpectedArgs.length) {
+      throw new Error(
+        "Type checkers expectations aren't expecting any argument."
+      );
+    }
+    return (actual, message) => {
+      const isCorrect = typeof actual === type;
+      Assert.report(!isCorrect, actual, type, message, "has type");
+    };
+  };
+}
+/* ------ End of assertion helper ------ */
diff --git a/tools/profiler/tests/xpcshell/head.js b/tools/profiler/tests/xpcshell/head.js
new file mode 100644
index 0000000000..ce87b32fd5
--- /dev/null
+++ b/tools/profiler/tests/xpcshell/head.js
@@ -0,0 +1,244 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* import-globals-from ../shared-head.js */
+
+// This Services declaration may shadow another from head.js, so define it as
+// a var rather than a const.
+
+const { AppConstants } = ChromeUtils.importESModule(
+  "resource://gre/modules/AppConstants.sys.mjs"
+);
+const { setTimeout } = ChromeUtils.importESModule(
+  "resource://gre/modules/Timer.sys.mjs"
+);
+
+// Load the shared head
+const sharedHead = do_get_file("shared-head.js", false);
+if (!sharedHead) {
+  throw new Error("Could not load the shared head.");
+}
+Services.scriptloader.loadSubScript(
+  Services.io.newFileURI(sharedHead).spec,
+  this
+);
+
+/**
+ * This function takes a thread, and a sample tuple from the "data" array, and
+ * inflates the frame to be an array of strings.
+ *
+ * @param {Object} thread - The thread from the profile.
+ * @param {Array} sample - The tuple from the thread.samples.data array.
+ * @returns {Array<string>} An array of function names.
+ */
+function getInflatedStackLocations(thread, sample) {
+  let stackTable = thread.stackTable;
+  let frameTable = thread.frameTable;
+  let stringTable = thread.stringTable;
+  let SAMPLE_STACK_SLOT = thread.samples.schema.stack;
+  let STACK_PREFIX_SLOT = stackTable.schema.prefix;
+  let STACK_FRAME_SLOT = stackTable.schema.frame;
+  let FRAME_LOCATION_SLOT = frameTable.schema.location;
+
+  // Build the stack from the raw data and accumulate the locations in
+  // an array.
+  let stackIndex = sample[SAMPLE_STACK_SLOT];
+  let locations = [];
+  while (stackIndex !== null) {
+    let stackEntry = stackTable.data[stackIndex];
+    let frame = frameTable.data[stackEntry[STACK_FRAME_SLOT]];
+    locations.push(stringTable[frame[FRAME_LOCATION_SLOT]]);
+    stackIndex = stackEntry[STACK_PREFIX_SLOT];
+  }
+
+  // The profiler tree is inverted, so reverse the array.
+  return locations.reverse();
+}
+
+/**
+ * This utility matches up stacks to see if they contain a certain sequence of
+ * stack frames. A correctly functioning profiler will have a certain sequence
+ * of stacks, but we can't always determine exactly which stacks will show up
+ * due to implementation changes, as well as memory addresses being arbitrary to
+ * that particular build.
+ *
+ * This function triggers a test failure with a nice debug message when it
+ * fails.
+ *
+ * @param {Array<string>} actualStackFrames - As generated by
+ *     inflatedStackFrames.
+ * @param {Array<string | RegExp>} expectedStackFrames - Matches a subset of
+ *     actualStackFrames
+ */
+function expectStackToContain(
+  actualStackFrames,
+  expectedStackFrames,
+  message = "The actual stack and expected stack do not match."
+) {
+  // Log the stacks that are being passed to this assertion, as it could be
+  // useful for when these tests fail.
+  console.log("Actual stack: ", actualStackFrames);
+  console.log(
+    "Expected to contain: ",
+    expectedStackFrames.map(s => s.toString())
+  );
+
+  let actualIndex = 0;
+
+  // Start walking the expected stack and look for matches.
+  for (
+    let expectedIndex = 0;
+    expectedIndex < expectedStackFrames.length;
+    expectedIndex++
+  ) {
+    const expectedStackFrame = expectedStackFrames[expectedIndex];
+
+    while (true) {
+      // Make sure that we haven't run out of actual stack frames.
+      if (actualIndex >= actualStackFrames.length) {
+        info(`Could not find a match for: "${expectedStackFrame.toString()}"`);
+        Assert.ok(false, message);
+      }
+
+      const actualStackFrame = actualStackFrames[actualIndex];
+      actualIndex++;
+
+      const itMatches =
+        typeof expectedStackFrame === "string"
+          ? expectedStackFrame === actualStackFrame
+          : actualStackFrame.match(expectedStackFrame);
+
+      if (itMatches) {
+        // We found a match, break out of this loop.
+        break;
+      }
+      // Keep on looping looking for a match.
+    }
+  }
+
+  Assert.ok(true, message);
+}
+
+/**
+ * @param {Thread} thread
+ * @param {string} filename - The filename used to trigger FileIO.
+ * @returns {InflatedMarkers[]}
+ */
+function getInflatedFileIOMarkers(thread, filename) {
+  const markers = getInflatedMarkerData(thread);
+  return markers.filter(
+    marker =>
+      marker.data?.type === "FileIO" &&
+      marker.data?.filename?.endsWith(filename)
+  );
+}
+
+/**
+ * Checks properties common to all FileIO markers.
+ *
+ * @param {InflatedMarkers[]} markers
+ * @param {string} filename
+ */
+function checkInflatedFileIOMarkers(markers, filename) {
+  greater(markers.length, 0, "Found some markers");
+
+  // See IOInterposeObserver::Observation::ObservedOperationString
+  const validOperations = new Set([
+    "write",
+    "fsync",
+    "close",
+    "stat",
+    "create/open",
+    "read",
+  ]);
+  const validSources = new Set(["PoisonIOInterposer", "NSPRIOInterposer"]);
+
+  for (const marker of markers) {
+    try {
+      ok(
+        marker.name.startsWith("FileIO"),
+        "Has a marker.name that starts with FileIO"
+      );
+      equal(marker.data.type, "FileIO", "Has a marker.data.type");
+      ok(isIntervalMarker(marker), "All FileIO markers are interval markers");
+      ok(
+        validOperations.has(marker.data.operation),
+        `The markers have a known operation - "${marker.data.operation}"`
+      );
+      ok(
+        validSources.has(marker.data.source),
+        `The FileIO marker has a known source "${marker.data.source}"`
+      );
+      ok(marker.data.filename.endsWith(filename));
+      ok(Boolean(marker.data.stack), "A stack was collected");
+    } catch (error) {
+      console.error("Failing inflated FileIO marker:", marker);
+      throw error;
+    }
+  }
+}
+
+/**
+ * Do deep equality checks for schema, but then surface nice errors for a user to know
+ * what to do if the check fails.
+ */
+function checkSchema(actual, expected) {
+  const schemaName = expected.name;
+  info(`Checking marker schema for "${schemaName}"`);
+
+  try {
+    ok(
+      actual,
+      `Schema was found for "${schemaName}". See the test output for more information.`
+    );
+    // Check individual properties to surface easier to debug errors.
+    deepEqual(
+      expected.display,
+      actual.display,
+      `The "display" property for ${schemaName} schema matches. See the test output for more information.`
+    );
+    if (expected.data) {
+      ok(actual.data, `Schema was found for "${schemaName}"`);
+      for (const expectedDatum of expected.data) {
+        const actualDatum = actual.data.find(d => d.key === expectedDatum.key);
+        deepEqual(
+          expectedDatum,
+          actualDatum,
+          `The "${schemaName}" field "${expectedDatum.key}" matches expectations. See the test output for more information.`
+        );
+      }
+      equal(
+        expected.data.length,
+        actual.data.length,
+        "The expected and actual data have the same number of items"
+      );
+    }
+
+    // Finally do a true deep equal.
+    deepEqual(expected, actual, "The entire schema is deepEqual");
+  } catch (error) {
+    // The test results are not very human readable. This is a bit of a hacky
+    // solution to make it more readable.
+    dump("-----------------------------------------------------\n");
+    dump("The expected marker schema:\n");
+    dump("-----------------------------------------------------\n");
+    dump(JSON.stringify(expected, null, 2));
+    dump("\n");
+    dump("-----------------------------------------------------\n");
+    dump("The actual marker schema:\n");
+    dump("-----------------------------------------------------\n");
+    dump(JSON.stringify(actual, null, 2));
+    dump("\n");
+    dump("-----------------------------------------------------\n");
+    dump("A marker schema was not equal to expectations. If you\n");
+    dump("are modifying the schema, then please copy and paste\n");
+    dump("the new schema into this test.\n");
+    dump("-----------------------------------------------------\n");
+    dump("Copy this: " + JSON.stringify(actual));
+    dump("\n");
+    dump("-----------------------------------------------------\n");
+
+    throw error;
+  }
+}
diff --git a/tools/profiler/tests/xpcshell/test_active_configuration.js b/tools/profiler/tests/xpcshell/test_active_configuration.js
new file mode 100644
index 0000000000..c4336f3f32
--- /dev/null
+++ b/tools/profiler/tests/xpcshell/test_active_configuration.js
@@ -0,0 +1,115 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+add_task(async () => {
+  info(
+    "Checking that the profiler can fetch the information about the active " +
+      "configuration that is being used to power the profiler."
+  );
+
+  equal(
+    Services.profiler.activeConfiguration,
+    null,
+    "When the profile is off, there is no active configuration."
+  );
+
+  {
+    info("Start the profiler.");
+    const entries = 10000;
+    const interval = 1;
+    const threads = ["GeckoMain"];
+    const features = ["js"];
+    const activeTabID = 123;
+    await Services.profiler.StartProfiler(
+      entries,
+      interval,
+      features,
+      threads,
+      activeTabID
+    );
+
+    info("Generate the activeConfiguration.");
+    const { activeConfiguration } = Services.profiler;
+    const expectedConfiguration = {
+      interval,
+      threads,
+      features,
+      activeTabID,
+      // The buffer is created as a power of two that can fit all of the entires
+      // into it. If the ratio of entries to buffer size ever changes, this setting
+      // will need to be updated.
+      capacity: Math.pow(2, 14),
+    };
+
+    deepEqual(
+      activeConfiguration,
+      expectedConfiguration,
+      "The active configuration matches configuration given."
+    );
+
+    info("Get the profile.");
+    const profile = Services.profiler.getProfileData();
+    deepEqual(
+      profile.meta.configuration,
+      expectedConfiguration,
+      "The configuration also matches on the profile meta object."
+    );
+  }
+
+  {
+    const entries = 20000;
+    const interval = 0.5;
+    const threads = ["GeckoMain", "DOM Worker"];
+    const features = [];
+    const activeTabID = 111;
+    const duration = 20;
+
+    info("Restart the profiler with a new configuration.");
+    await Services.profiler.StartProfiler(
+      entries,
+      interval,
+      features,
+      threads,
+      activeTabID,
+      // Also start it with duration, this property is optional.
+      duration
+    );
+
+    info("Generate the activeConfiguration.");
+    const { activeConfiguration } = Services.profiler;
+    const expectedConfiguration = {
+      interval,
+      threads,
+      features,
+      activeTabID,
+      duration,
+      // The buffer is created as a power of two that can fit all of the entires
+      // into it. If the ratio of entries to buffer size ever changes, this setting
+      // will need to be updated.
+      capacity: Math.pow(2, 15),
+    };
+
+    deepEqual(
+      activeConfiguration,
+      expectedConfiguration,
+      "The active configuration matches the new configuration."
+    );
+
+    info("Get the profile.");
+    const profile = Services.profiler.getProfileData();
+    deepEqual(
+      profile.meta.configuration,
+      expectedConfiguration,
+      "The configuration also matches on the profile meta object."
+    );
+  }
+
+  await Services.profiler.StopProfiler();
+
+  equal(
+    Services.profiler.activeConfiguration,
+    null,
+    "When the profile is off, there is no active configuration."
+  );
+});
diff --git a/tools/profiler/tests/xpcshell/test_addProfilerMarker.js b/tools/profiler/tests/xpcshell/test_addProfilerMarker.js
new file mode 100644
index 0000000000..b11545a41c
--- /dev/null
+++ b/tools/profiler/tests/xpcshell/test_addProfilerMarker.js
@@ -0,0 +1,221 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/**
+ * Test that ChromeUtils.addProfilerMarker is working correctly.
+ */
+
+const markerNamePrefix = "test_addProfilerMarker";
+const markerText = "Text payload";
+// The same startTime will be used for all markers with a duration,
+// and we store this value globally so that expectDuration and
+// expectNoDuration can access it. The value isn't set here as we
+// want a start time after the profiler has started
+var startTime;
+
+function expectNoDuration(marker) {
+  Assert.equal(
+    typeof marker.startTime,
+    "number",
+    "startTime should be a number"
+  );
+  Assert.greater(
+    marker.startTime,
+    startTime,
+    "startTime should be after the begining of the test"
+  );
+  Assert.equal(typeof marker.endTime, "number", "endTime should be a number");
+  Assert.equal(marker.endTime, 0, "endTime should be 0");
+}
+
+function expectDuration(marker) {
+  Assert.equal(
+    typeof marker.startTime,
+    "number",
+    "startTime should be a number"
+  );
+  // Floats can cause rounding issues. We've seen up to a 4.17e-5 difference in
+  // intermittent failures, so we are permissive and accept up to 5e-5.
+  Assert.less(
+    Math.abs(marker.startTime - startTime),
+    5e-5,
+    "startTime should be the expected time"
+  );
+  Assert.equal(typeof marker.endTime, "number", "endTime should be a number");
+  Assert.greater(
+    marker.endTime,
+    startTime,
+    "endTime should be after startTime"
+  );
+}
+
+function expectNoData(marker) {
+  Assert.equal(
+    typeof marker.data,
+    "undefined",
+    "The data property should be undefined"
+  );
+}
+
+function expectText(marker) {
+  Assert.equal(
+    typeof marker.data,
+    "object",
+    "The data property should be an object"
+  );
+  Assert.equal(marker.data.type, "Text", "Should be a Text marker");
+  Assert.equal(
+    marker.data.name,
+    markerText,
+    "The payload should contain the expected text"
+  );
+}
+
+function expectNoStack(marker) {
+  Assert.ok(!marker.data || !marker.data.stack, "There should be no stack");
+}
+
+function expectStack(marker, thread) {
+  let stack = marker.data.stack;
+  Assert.ok(!!stack, "There should be a stack");
+
+  // Marker stacks are recorded as a profile of a thread with a single sample,
+  // get the stack id.
+  stack = stack.samples.data[0][stack.samples.schema.stack];
+
+  const stackPrefixCol = thread.stackTable.schema.prefix;
+  const stackFrameCol = thread.stackTable.schema.frame;
+  const frameLocationCol = thread.frameTable.schema.location;
+
+  // Get the entire stack in an array for easier processing.
+  let result = [];
+  while (stack != null) {
+    let stackEntry = thread.stackTable.data[stack];
+    let frame = thread.frameTable.data[stackEntry[stackFrameCol]];
+    result.push(thread.stringTable[frame[frameLocationCol]]);
+    stack = stackEntry[stackPrefixCol];
+  }
+
+  Assert.greaterOrEqual(
+    result.length,
+    1,
+    "There should be at least one frame in the stack"
+  );
+
+  Assert.ok(
+    result.some(frame => frame.includes("testMarker")),
+    "the 'testMarker' function should be visible in the stack"
+  );
+
+  Assert.ok(
+    !result.some(frame => frame.includes("ChromeUtils.addProfilerMarker")),
+    "the 'ChromeUtils.addProfilerMarker' label frame should not be visible in the stack"
+  );
+}
+
+add_task(async () => {
+  startProfilerForMarkerTests();
+  startTime = Cu.now();
+  while (Cu.now() < startTime + 1) {
+    // Busy wait for 1ms to ensure the intentionally set start time of markers
+    // will be significantly different from the time at which the marker is
+    // recorded.
+  }
+  info("startTime used for markers with durations: " + startTime);
+
+  /* Each call to testMarker will record a marker with a unique name.
+   * The testFunctions and testCases objects contain respectively test
+   * functions to verify that the marker found in the captured profile
+   * matches expectations, and a string that can be printed to describe
+   * in which way ChromeUtils.addProfilerMarker was called. */
+  let testFunctions = {};
+  let testCases = {};
+  let markerId = 0;
+  function testMarker(args, checks) {
+    let name = markerNamePrefix + markerId++;
+    ChromeUtils.addProfilerMarker(name, ...args);
+    testFunctions[name] = checks;
+    testCases[name] = `ChromeUtils.addProfilerMarker(${[name, ...args]
+      .toSource()
+      .slice(1, -1)})`;
+  }
+
+  info("Record markers without options object.");
+  testMarker([], m => {
+    expectNoDuration(m);
+    expectNoData(m);
+  });
+  testMarker([startTime], m => {
+    expectDuration(m);
+    expectNoData(m);
+  });
+  testMarker([undefined, markerText], m => {
+    expectNoDuration(m);
+    expectText(m);
+  });
+  testMarker([startTime, markerText], m => {
+    expectDuration(m);
+    expectText(m);
+  });
+
+  info("Record markers providing the duration as the startTime property.");
+  testMarker([{ startTime }], m => {
+    expectDuration(m);
+    expectNoData(m);
+  });
+  testMarker([{}, markerText], m => {
+    expectNoDuration(m);
+    expectText(m);
+  });
+  testMarker([{ startTime }, markerText], m => {
+    expectDuration(m);
+    expectText(m);
+  });
+
+  info("Record markers to test the captureStack property.");
+  const captureStack = true;
+  testMarker([], expectNoStack);
+  testMarker([startTime, markerText], expectNoStack);
+  testMarker([{ captureStack: false }], expectNoStack);
+  testMarker([{ captureStack }], expectStack);
+  testMarker([{ startTime, captureStack }], expectStack);
+  testMarker([{ captureStack }, markerText], expectStack);
+  testMarker([{ startTime, captureStack }, markerText], expectStack);
+
+  info("Record markers to test the category property");
+  function testCategory(args, expectedCategory) {
+    testMarker(args, marker => {
+      Assert.equal(marker.category, expectedCategory);
+    });
+  }
+  testCategory([], "JavaScript");
+  testCategory([{ category: "Test" }], "Test");
+  testCategory([{ category: "Test" }, markerText], "Test");
+  testCategory([{ category: "JavaScript" }], "JavaScript");
+  testCategory([{ category: "Other" }], "Other");
+  testCategory([{ category: "DOM" }], "DOM");
+  testCategory([{ category: "does not exist" }], "Other");
+
+  info("Capture the profile");
+  const profile = await stopNowAndGetProfile();
+  const mainThread = profile.threads.find(({ name }) => name === "GeckoMain");
+  const markers = getInflatedMarkerData(mainThread).filter(m =>
+    m.name.startsWith(markerNamePrefix)
+  );
+  Assert.equal(
+    markers.length,
+    Object.keys(testFunctions).length,
+    `Found ${markers.length} test markers in the captured profile`
+  );
+
+  for (let marker of markers) {
+    marker.category = profile.meta.categories[marker.category].name;
+    info(`${testCases[marker.name]} -> ${marker.toSource()}`);
+
+    testFunctions[marker.name](marker, mainThread);
+    delete testFunctions[marker.name];
+  }
+
+  Assert.equal(0, Object.keys(testFunctions).length, "all markers were found");
+});
diff --git a/tools/profiler/tests/xpcshell/test_asm.js b/tools/profiler/tests/xpcshell/test_asm.js
new file mode 100644
index 0000000000..ced36ce429
--- /dev/null
+++ b/tools/profiler/tests/xpcshell/test_asm.js
@@ -0,0 +1,76 @@
+// Check that asm.js code shows up on the stack.
+add_task(async () => {
+  // This test assumes that it's starting on an empty profiler stack.
+  // (Note that the other profiler tests also assume the profiler
+  // isn't already started.)
+  Assert.ok(!Services.profiler.IsActive());
+
+  let jsFuns = Cu.getJSTestingFunctions();
+  if (!jsFuns.isAsmJSCompilationAvailable()) {
+    return;
+  }
+
+  const ms = 10;
+  await Services.profiler.StartProfiler(10000, ms, ["js"]);
+
+  let stack = null;
+  function ffi_function() {
+    var delayMS = 5;
+    while (1) {
+      let then = Date.now();
+      do {
+        // do nothing
+      } while (Date.now() - then < delayMS);
+
+      var thread0 = Services.profiler.getProfileData().threads[0];
+
+      if (delayMS > 30000) {
+        return;
+      }
+
+      delayMS *= 2;
+
+      if (!thread0.samples.data.length) {
+        continue;
+      }
+
+      var lastSample = thread0.samples.data[thread0.samples.data.length - 1];
+      stack = String(getInflatedStackLocations(thread0, lastSample));
+      if (stack.includes("trampoline")) {
+        return;
+      }
+    }
+  }
+
+  function asmjs_module(global, ffis) {
+    "use asm";
+    var ffi = ffis.ffi;
+    function asmjs_function() {
+      ffi();
+    }
+    return asmjs_function;
+  }
+
+  Assert.ok(jsFuns.isAsmJSModule(asmjs_module));
+
+  var asmjs_function = asmjs_module(null, { ffi: ffi_function });
+  Assert.ok(jsFuns.isAsmJSFunction(asmjs_function));
+
+  asmjs_function();
+
+  Assert.notEqual(stack, null);
+
+  var i1 = stack.indexOf("entry trampoline");
+  Assert.ok(i1 !== -1);
+  var i2 = stack.indexOf("asmjs_function");
+  Assert.ok(i2 !== -1);
+  var i3 = stack.indexOf("exit trampoline");
+  Assert.ok(i3 !== -1);
+  var i4 = stack.indexOf("ffi_function");
+  Assert.ok(i4 !== -1);
+  Assert.ok(i1 < i2);
+  Assert.ok(i2 < i3);
+  Assert.ok(i3 < i4);
+
+  await Services.profiler.StopProfiler();
+});
diff --git a/tools/profiler/tests/xpcshell/test_assertion_helper.js b/tools/profiler/tests/xpcshell/test_assertion_helper.js
new file mode 100644
index 0000000000..baa4c34818
--- /dev/null
+++ b/tools/profiler/tests/xpcshell/test_assertion_helper.js
@@ -0,0 +1,162 @@
+add_task(function setup() {
+  // With the default reporter, an assertion doesn't throw if it fails, it
+  // merely report the result to the reporter and then go on. But in this test
+  // we want that a failure really throws, so that we can actually assert that
+  // it throws in case of failures!
+  // That's why we disable the default repoter here.
+  // I noticed that this line needs to be in an add_task (or possibly run_test)
+  // function. If put outside this will crash the test.
+  Assert.setReporter(null);
+});
+
+add_task(function test_objectContains() {
+  const fixture = {
+    foo: "foo",
+    bar: "bar",
+  };
+
+  Assert.objectContains(fixture, { foo: "foo" }, "Matches one property value");
+  Assert.objectContains(
+    fixture,
+    { foo: "foo", bar: "bar" },
+    "Matches both properties"
+  );
+  Assert.objectContainsOnly(
+    fixture,
+    { foo: "foo", bar: "bar" },
+    "Matches both properties"
+  );
+  Assert.throws(
+    () => Assert.objectContainsOnly(fixture, { foo: "foo" }),
+    /AssertionError/,
+    "Fails if some properties are missing"
+  );
+  Assert.throws(
+    () => Assert.objectContains(fixture, { foo: "bar" }),
+    /AssertionError/,
+    "Fails if the value for a present property is wrong"
+  );
+  Assert.throws(
+    () => Assert.objectContains(fixture, { hello: "world" }),
+    /AssertionError/,
+    "Fails if an expected property is missing"
+  );
+  Assert.throws(
+    () => Assert.objectContains(fixture, { foo: "foo", hello: "world" }),
+    /AssertionError/,
+    "Fails if some properties are present but others are missing"
+  );
+});
+
+add_task(function test_objectContains_expectations() {
+  const fixture = {
+    foo: "foo",
+    bar: "bar",
+    num: 42,
+    nested: {
+      nestedFoo: "nestedFoo",
+      nestedBar: "nestedBar",
+    },
+  };
+
+  Assert.objectContains(
+    fixture,
+    {
+      foo: Expect.stringMatches(/^fo/),
+      bar: Expect.stringContains("ar"),
+      num: Expect.number(),
+      nested: Expect.objectContainsOnly({
+        nestedFoo: Expect.stringMatches(/[Ff]oo/),
+        nestedBar: Expect.stringMatches(/[Bb]ar/),
+      }),
+    },
+    "Supports expectations"
+  );
+  Assert.objectContainsOnly(
+    fixture,
+    {
+      foo: Expect.stringMatches(/^fo/),
+      bar: Expect.stringContains("ar"),
+      num: Expect.number(),
+      nested: Expect.objectContains({
+        nestedFoo: Expect.stringMatches(/[Ff]oo/),
+      }),
+    },
+    "Supports expectations"
+  );
+
+  Assert.objectContains(fixture, {
+    num: val => Assert.greater(val, 40),
+  });
+
+  // Failed expectations
+  Assert.throws(
+    () =>
+      Assert.objectContains(fixture, {
+        foo: Expect.stringMatches(/bar/),
+      }),
+    /AssertionError/,
+    "Expect.stringMatches shouldn't match when the value is unexpected"
+  );
+  Assert.throws(
+    () =>
+      Assert.objectContains(fixture, {
+        foo: Expect.stringContains("bar"),
+      }),
+    /AssertionError/,
+    "Expect.stringContains shouldn't match when the value is unexpected"
+  );
+  Assert.throws(
+    () =>
+      Assert.objectContains(fixture, {
+        foo: Expect.number(),
+      }),
+    /AssertionError/,
+    "Expect.number shouldn't match when the value isn't a number"
+  );
+  Assert.throws(
+    () =>
+      Assert.objectContains(fixture, {
+        nested: Expect.objectContains({
+          nestedFoo: "bar",
+        }),
+      }),
+    /AssertionError/,
+    "Expect.objectContains should throw when the value is unexpected"
+  );
+
+  Assert.throws(
+    () =>
+      Assert.objectContains(fixture, {
+        num: val => Assert.less(val, 40),
+      }),
+    /AssertionError/,
+    "Expect.objectContains should throw when a function assertion fails"
+  );
+});
+
+add_task(function test_type_expectations() {
+  const fixture = {
+    any: "foo",
+    string: "foo",
+    number: 42,
+    boolean: true,
+    bigint: 42n,
+    symbol: Symbol("foo"),
+    object: { foo: "foo" },
+    function1() {},
+    function2: () => {},
+  };
+
+  Assert.objectContains(fixture, {
+    any: Expect.any(),
+    string: Expect.string(),
+    number: Expect.number(),
+    boolean: Expect.boolean(),
+    bigint: Expect.bigint(),
+    symbol: Expect.symbol(),
+    object: Expect.object(),
+    function1: Expect.function(),
+    function2: Expect.function(),
+  });
+});
diff --git a/tools/profiler/tests/xpcshell/test_enterjit_osr.js b/tools/profiler/tests/xpcshell/test_enterjit_osr.js
new file mode 100644
index 0000000000..86845ddc76
--- /dev/null
+++ b/tools/profiler/tests/xpcshell/test_enterjit_osr.js
@@ -0,0 +1,52 @@
+// Check that the EnterJIT frame, added by the JIT trampoline and
+// usable by a native unwinder to resume unwinding after encountering
+// JIT code, is pushed as expected.
+function run_test() {
+  // This test assumes that it's starting on an empty profiler stack.
+  // (Note that the other profiler tests also assume the profiler
+  // isn't already started.)
+  Assert.ok(!Services.profiler.IsActive());
+
+  const ms = 5;
+  Services.profiler.StartProfiler(10000, ms, ["js"]);
+
+  function has_arbitrary_name_in_stack() {
+    // A frame for |arbitrary_name| has been pushed.  Do a sequence of
+    // increasingly long spins until we get a sample.
+    var delayMS = 5;
+    while (1) {
+      info("loop: ms = " + delayMS);
+      const then = Date.now();
+      do {
+        let n = 10000;
+        // eslint-disable-next-line no-empty
+        while (--n) {} // OSR happens here
+        // Spin in the hope of getting a sample.
+      } while (Date.now() - then < delayMS);
+      let profile = Services.profiler.getProfileData().threads[0];
+
+      // Go through all of the stacks, and search for this function name.
+      for (const sample of profile.samples.data) {
+        const stack = getInflatedStackLocations(profile, sample);
+        info(`The following stack was found: ${stack}`);
+        for (var i = 0; i < stack.length; i++) {
+          if (stack[i].match(/arbitrary_name/)) {
+            // This JS sample was correctly found.
+            return true;
+          }
+        }
+      }
+
+      // Continue running this function with an increasingly long delay.
+      delayMS *= 2;
+      if (delayMS > 30000) {
+        return false;
+      }
+    }
+  }
+  Assert.ok(
+    has_arbitrary_name_in_stack(),
+    "A JS frame was found before the test timeout."
+  );
+  Services.profiler.StopProfiler();
+}
diff --git a/tools/profiler/tests/xpcshell/test_enterjit_osr_disabling.js b/tools/profiler/tests/xpcshell/test_enterjit_osr_disabling.js
new file mode 100644
index 0000000000..558c9b0c3b
--- /dev/null
+++ b/tools/profiler/tests/xpcshell/test_enterjit_osr_disabling.js
@@ -0,0 +1,14 @@
+function run_test() {
+  Assert.ok(!Services.profiler.IsActive());
+
+  Services.profiler.StartProfiler(100, 10, ["js"]);
+  // The function is entered with the profiler enabled
+  (function () {
+    Services.profiler.StopProfiler();
+    let n = 10000;
+    // eslint-disable-next-line no-empty
+    while (--n) {} // OSR happens here with the profiler disabled.
+    // An assertion will fail when this function returns, if the
+    // profiler stack was misbalanced.
+  })();
+}
diff --git a/tools/profiler/tests/xpcshell/test_enterjit_osr_enabling.js b/tools/profiler/tests/xpcshell/test_enterjit_osr_enabling.js
new file mode 100644
index 0000000000..313d939caf
--- /dev/null
+++ b/tools/profiler/tests/xpcshell/test_enterjit_osr_enabling.js
@@ -0,0 +1,14 @@
+function run_test() {
+  Assert.ok(!Services.profiler.IsActive());
+
+  // The function is entered with the profiler disabled.
+  (function () {
+    Services.profiler.StartProfiler(100, 10, ["js"]);
+    let n = 10000;
+    // eslint-disable-next-line no-empty
+    while (--n) {} // OSR happens here with the profiler enabled.
+    // An assertion will fail when this function returns, if the
+    // profiler stack was misbalanced.
+  })();
+  Services.profiler.StopProfiler();
+}
diff --git a/tools/profiler/tests/xpcshell/test_feature_fileioall.js b/tools/profiler/tests/xpcshell/test_feature_fileioall.js
new file mode 100644
index 0000000000..e5ac040b98
--- /dev/null
+++ b/tools/profiler/tests/xpcshell/test_feature_fileioall.js
@@ -0,0 +1,159 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+add_task(async () => {
+  info(
+    "Test that off-main thread fileio is captured for a profiled thread, " +
+      "and that it will be sent to the main thread."
+  );
+  const filename = "test_marker_fileio";
+  const profile = await startProfilerAndTriggerFileIO({
+    features: ["fileioall"],
+    threadsFilter: ["GeckoMain", "BgIOThreadPool"],
+    filename,
+  });
+
+  const threads = getThreads(profile);
+  const mainThread = threads.find(thread => thread.name === "GeckoMain");
+  const mainThreadFileIO = getInflatedFileIOMarkers(mainThread, filename);
+  let backgroundThread;
+  let backgroundThreadFileIO;
+  for (const thread of threads) {
+    // Check for FileIO in any of the background threads.
+    if (thread.name.startsWith("BgIOThreadPool")) {
+      const markers = getInflatedFileIOMarkers(thread, filename);
+      if (markers.length) {
+        backgroundThread = thread;
+        backgroundThreadFileIO = markers;
+        break;
+      }
+    }
+  }
+
+  info("Check all of the main thread FileIO markers.");
+  checkInflatedFileIOMarkers(mainThreadFileIO, filename);
+  for (const { data, name } of mainThreadFileIO) {
+    equal(
+      name,
+      "FileIO (non-main thread)",
+      "The markers from off main thread are labeled as such."
+    );
+    equal(
+      data.threadId,
+      backgroundThread.tid,
+      "The main thread FileIO markers were all sent from the background thread."
+    );
+  }
+
+  info("Check all of the background thread FileIO markers.");
+  checkInflatedFileIOMarkers(backgroundThreadFileIO, filename);
+  for (const { data, name } of backgroundThreadFileIO) {
+    equal(
+      name,
+      "FileIO",
+      "The markers on the thread where they were generated just say FileIO"
+    );
+    equal(
+      data.threadId,
+      undefined,
+      "The background thread FileIO correctly excludes the threadId."
+    );
+  }
+});
+
+add_task(async () => {
+  info(
+    "Test that off-main thread fileio is captured for a thread that is not profiled, " +
+      "and that it will be sent to the main thread."
+  );
+  const filename = "test_marker_fileio";
+  const profile = await startProfilerAndTriggerFileIO({
+    features: ["fileioall"],
+    threadsFilter: ["GeckoMain"],
+    filename,
+  });
+
+  const threads = getThreads(profile);
+  const mainThread = threads.find(thread => thread.name === "GeckoMain");
+  const mainThreadFileIO = getInflatedFileIOMarkers(mainThread, filename);
+
+  info("Check all of the main thread FileIO markers.");
+  checkInflatedFileIOMarkers(mainThreadFileIO, filename);
+  for (const { data, name } of mainThreadFileIO) {
+    equal(
+      name,
+      "FileIO (non-profiled thread)",
+      "The markers from off main thread are labeled as such."
+    );
+    equal(typeof data.threadId, "number", "A thread ID is captured.");
+  }
+});
+
+/**
+ * @typedef {Object} TestConfig
+ * @prop {Array} features The list of profiler features
+ * @prop {string[]} threadsFilter The list of threads to profile
+ * @prop {string} filename A filename to trigger a write operation
+ */
+
+/**
+ * Start the profiler and get FileIO markers.
+ * @param {TestConfig}
+ * @returns {Profile}
+ */
+async function startProfilerAndTriggerFileIO({
+  features,
+  threadsFilter,
+  filename,
+}) {
+  const entries = 10000;
+  const interval = 10;
+  await Services.profiler.StartProfiler(
+    entries,
+    interval,
+    features,
+    threadsFilter
+  );
+
+  const path = PathUtils.join(PathUtils.tempDir, filename);
+
+  info(`Using a temporary file to test FileIO: ${path}`);
+
+  if (fileExists(path)) {
+    console.warn(
+      "This test is triggering FileIO by writing to a file. However, the test found an " +
+        "existing file at the location it was trying to write to. This could happen " +
+        "because a previous run of the test failed to clean up after itself. This test " +
+        " will now clean up that file before running the test again."
+    );
+    await removeFile(path);
+  }
+
+  info("Write to the file, but do so using a background thread.");
+
+  // IOUtils handles file operations using a background thread.
+  await IOUtils.write(path, new TextEncoder().encode("Test data."));
+  const exists = await fileExists(path);
+  ok(exists, `Created temporary file at: ${path}`);
+
+  info("Remove the file");
+  await removeFile(path);
+
+  return stopNowAndGetProfile();
+}
+
+async function fileExists(file) {
+  try {
+    let { type } = await IOUtils.stat(file);
+    return type === "regular";
+  } catch (_error) {
+    return false;
+  }
+}
+
+async function removeFile(file) {
+  await IOUtils.remove(file);
+  const exists = await fileExists(file);
+  ok(!exists, `Removed temporary file: ${file}`);
+}
diff --git a/tools/profiler/tests/xpcshell/test_feature_java.js b/tools/profiler/tests/xpcshell/test_feature_java.js
new file mode 100644
index 0000000000..e2f6879c2b
--- /dev/null
+++ b/tools/profiler/tests/xpcshell/test_feature_java.js
@@ -0,0 +1,31 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/**
+ * Test that Java capturing works as expected.
+ */
+add_task(async () => {
+  info("Test that Android Java sampler works as expected.");
+  const entries = 10000;
+  const interval = 1;
+  const threads = [];
+  const features = ["java"];
+
+  Services.profiler.StartProfiler(entries, interval, features, threads);
+  Assert.ok(Services.profiler.IsActive());
+
+  await captureAtLeastOneJsSample();
+
+  info(
+    "Stop the profiler and check that we have successfully captured a profile" +
+      " with the AndroidUI thread."
+  );
+  const profile = await stopNowAndGetProfile();
+  Assert.notEqual(profile, null);
+  const androidUiThread = profile.threads.find(
+    thread => thread.name == "AndroidUI (JVM)"
+  );
+  Assert.notEqual(androidUiThread, null);
+  Assert.ok(!Services.profiler.IsActive());
+});
diff --git a/tools/profiler/tests/xpcshell/test_feature_js.js b/tools/profiler/tests/xpcshell/test_feature_js.js
new file mode 100644
index 0000000000..a5949e4a0c
--- /dev/null
+++ b/tools/profiler/tests/xpcshell/test_feature_js.js
@@ -0,0 +1,63 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/**
+ * Test that JS capturing works as expected.
+ */
+add_task(async () => {
+  const entries = 10000;
+  const interval = 1;
+  const threads = [];
+  const features = ["js"];
+
+  await Services.profiler.StartProfiler(entries, interval, features, threads);
+
+  // Call the following to get a nice stack in the profiler:
+  // functionA -> functionB -> functionC -> captureAtLeastOneJsSample
+  const sampleIndex = await functionA();
+
+  const profile = await stopNowAndGetProfile();
+
+  const [thread] = profile.threads;
+  const { samples } = thread;
+
+  const inflatedStackFrames = getInflatedStackLocations(
+    thread,
+    samples.data[sampleIndex]
+  );
+
+  expectStackToContain(
+    inflatedStackFrames,
+    [
+      "(root)",
+      "js::RunScript",
+      // The following regexes match a string similar to:
+      //
+      // "functionA (/gecko/obj/_tests/xpcshell/tools/profiler/tests/xpcshell/test_feature_js.js:47:0)"
+      // or
+      // "functionA (test_feature_js.js:47:0)"
+      //
+      //          this matches the script location
+      //          |                       match the line number
+      //          |                       |   match the column number
+      //          v                       v   v
+      /^functionA \(.*test_feature_js\.js:\d+:\d+\)$/,
+      /^functionB \(.*test_feature_js\.js:\d+:\d+\)$/,
+      /^functionC \(.*test_feature_js\.js:\d+:\d+\)$/,
+    ],
+    "The stack contains a few frame labels, as well as the JS functions that we called."
+  );
+});
+
+function functionA() {
+  return functionB();
+}
+
+function functionB() {
+  return functionC();
+}
+
+async function functionC() {
+  return captureAtLeastOneJsSample();
+}
diff --git a/tools/profiler/tests/xpcshell/test_feature_mainthreadio.js b/tools/profiler/tests/xpcshell/test_feature_mainthreadio.js
new file mode 100644
index 0000000000..8ff5c9206d
--- /dev/null
+++ b/tools/profiler/tests/xpcshell/test_feature_mainthreadio.js
@@ -0,0 +1,122 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+const { FileUtils } = ChromeUtils.importESModule(
+  "resource://gre/modules/FileUtils.sys.mjs"
+);
+
+/**
+ * Test that the IOInterposer is working correctly to capture main thread IO.
+ *
+ * This test should not run on release or beta, as the IOInterposer is wrapped in
+ * an ifdef.
+ */
+add_task(async () => {
+  {
+    const filename = "profiler-mainthreadio-test-firstrun";
+    const { markers, schema } = await runProfilerWithFileIO(
+      ["mainthreadio"],
+      filename
+    );
+    info("Check the FileIO markers when using the mainthreadio feature");
+    checkInflatedFileIOMarkers(markers, filename);
+
+    checkSchema(schema, {
+      name: "FileIO",
+      display: ["marker-chart", "marker-table", "timeline-fileio"],
+      data: [
+        {
+          key: "operation",
+          label: "Operation",
+          format: "string",
+          searchable: true,
+        },
+        { key: "source", label: "Source", format: "string", searchable: true },
+        {
+          key: "filename",
+          label: "Filename",
+          format: "file-path",
+          searchable: true,
+        },
+        {
+          key: "threadId",
+          label: "Thread ID",
+          format: "string",
+          searchable: true,
+        },
+      ],
+    });
+  }
+
+  {
+    const filename = "profiler-mainthreadio-test-no-instrumentation";
+    const { markers } = await runProfilerWithFileIO([], filename);
+    equal(
+      markers.length,
+      0,
+      "No FileIO markers are found when the mainthreadio feature is not turned on " +
+        "in the profiler."
+    );
+  }
+
+  {
+    const filename = "profiler-mainthreadio-test-secondrun";
+    const { markers } = await runProfilerWithFileIO(["mainthreadio"], filename);
+    info("Check the FileIO markers when re-starting the mainthreadio feature");
+    checkInflatedFileIOMarkers(markers, filename);
+  }
+});
+
+/**
+ * Start the profiler and get FileIO markers and schema.
+ *
+ * @param {Array} features The list of profiler features
+ * @param {string} filename A filename to trigger a write operation
+ * @returns {{
+ *   markers: InflatedMarkers[];
+ *   schema: MarkerSchema;
+ * }}
+ */
+async function runProfilerWithFileIO(features, filename) {
+  const entries = 10000;
+  const interval = 10;
+  const threads = [];
+  await Services.profiler.StartProfiler(entries, interval, features, threads);
+
+  info("Get the file");
+  const file = FileUtils.getFile("TmpD", [filename]);
+  if (file.exists()) {
+    console.warn(
+      "This test is triggering FileIO by writing to a file. However, the test found an " +
+        "existing file at the location it was trying to write to. This could happen " +
+        "because a previous run of the test failed to clean up after itself. This test " +
+        " will now clean up that file before running the test again."
+    );
+    file.remove(false);
+  }
+
+  info(
+    "Generate file IO on the main thread using FileUtils.openSafeFileOutputStream."
+  );
+  const outputStream = FileUtils.openSafeFileOutputStream(file);
+
+  const data = "Test data.";
+  info("Write to the file");
+  outputStream.write(data, data.length);
+
+  info("Close the file");
+  FileUtils.closeSafeFileOutputStream(outputStream);
+
+  info("Remove the file");
+  file.remove(false);
+
+  const profile = await stopNowAndGetProfile();
+  const mainThread = profile.threads.find(({ name }) => name === "GeckoMain");
+
+  const schema = getSchema(profile, "FileIO");
+
+  const markers = getInflatedFileIOMarkers(mainThread, filename);
+
+  return { schema, markers };
+}
diff --git a/tools/profiler/tests/xpcshell/test_feature_nativeallocations.js b/tools/profiler/tests/xpcshell/test_feature_nativeallocations.js
new file mode 100644
index 0000000000..64398d7ef9
--- /dev/null
+++ b/tools/profiler/tests/xpcshell/test_feature_nativeallocations.js
@@ -0,0 +1,158 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+add_task(async () => {
+  if (!Services.profiler.GetFeatures().includes("nativeallocations")) {
+    Assert.ok(
+      true,
+      "Native allocations are not supported by this build, " +
+        "skip run the rest of the test."
+    );
+    return;
+  }
+
+  Assert.ok(
+    !Services.profiler.IsActive(),
+    "The profiler is not currently active"
+  );
+
+  info(
+    "Test that the profiler can install memory hooks and collect native allocation " +
+      "information in the marker payloads."
+  );
+  {
+    info("Start the profiler.");
+    await startProfiler({
+      // Only instrument the main thread.
+      threads: ["GeckoMain"],
+      features: ["js", "nativeallocations"],
+    });
+
+    info(
+      "Do some JS work for a little bit. This will increase the amount of allocations " +
+        "that take place."
+    );
+    doWork();
+
+    info("Get the profile data and analyze it.");
+    const profile = await waitSamplingAndStopAndGetProfile();
+
+    const {
+      allocationPayloads,
+      unmatchedAllocations,
+      logAllocationsAndDeallocations,
+    } = getAllocationInformation(profile);
+
+    Assert.greater(
+      allocationPayloads.length,
+      0,
+      "Native allocation payloads were recorded for the parent process' main thread when " +
+        "the Native Allocation feature was turned on."
+    );
+
+    if (unmatchedAllocations.length !== 0) {
+      info(
+        "There were unmatched allocations. Log all of the allocations and " +
+          "deallocations in order to aid debugging."
+      );
+      logAllocationsAndDeallocations();
+      ok(
+        false,
+        "Found a deallocation that did not have a matching allocation site. " +
+          "This could happen if balanced allocations is broken, or if the the " +
+          "buffer size of this test was too small, and some markers ended up " +
+          "rolling off."
+      );
+    }
+
+    ok(true, "All deallocation sites had matching allocations.");
+  }
+
+  info("Restart the profiler, to ensure that we get no more allocations.");
+  {
+    await startProfiler({ features: ["js"] });
+    info("Do some work again.");
+    doWork();
+    info("Wait for the periodic sampling.");
+    const profile = await waitSamplingAndStopAndGetProfile();
+    const allocationPayloads = getPayloadsOfType(
+      profile.threads[0],
+      "Native allocation"
+    );
+
+    Assert.equal(
+      allocationPayloads.length,
+      0,
+      "No native allocations were collected when the feature was disabled."
+    );
+  }
+});
+
+function doWork() {
+  this.n = 0;
+  for (let i = 0; i < 1e5; i++) {
+    this.n += Math.random();
+  }
+}
+
+/**
+ * Extract the allocation payloads, and find the unmatched allocations.
+ */
+function getAllocationInformation(profile) {
+  // Get all of the allocation payloads.
+  const allocationPayloads = getPayloadsOfType(
+    profile.threads[0],
+    "Native allocation"
+  );
+
+  // Decide what is an allocation and deallocation.
+  const allocations = allocationPayloads.filter(
+    payload => ensureIsNumber(payload.size) >= 0
+  );
+  const deallocations = allocationPayloads.filter(
+    payload => ensureIsNumber(payload.size) < 0
+  );
+
+  // Now determine the unmatched allocations by building a set
+  const allocationSites = new Set(
+    allocations.map(({ memoryAddress }) => memoryAddress)
+  );
+
+  const unmatchedAllocations = deallocations.filter(
+    ({ memoryAddress }) => !allocationSites.has(memoryAddress)
+  );
+
+  // Provide a helper to log out the allocations and deallocations on failure.
+  function logAllocationsAndDeallocations() {
+    for (const { memoryAddress } of allocations) {
+      console.log("Allocations", formatHex(memoryAddress));
+      allocationSites.add(memoryAddress);
+    }
+
+    for (const { memoryAddress } of deallocations) {
+      console.log("Deallocations", formatHex(memoryAddress));
+    }
+
+    for (const { memoryAddress } of unmatchedAllocations) {
+      console.log("Deallocation with no allocation", formatHex(memoryAddress));
+    }
+  }
+
+  return {
+    allocationPayloads,
+    unmatchedAllocations,
+    logAllocationsAndDeallocations,
+  };
+}
+
+function ensureIsNumber(value) {
+  if (typeof value !== "number") {
+    throw new Error(`Expected a number: ${value}`);
+  }
+  return value;
+}
+
+function formatHex(number) {
+  return `0x${number.toString(16)}`;
+}
diff --git a/tools/profiler/tests/xpcshell/test_feature_stackwalking.js b/tools/profiler/tests/xpcshell/test_feature_stackwalking.js
new file mode 100644
index 0000000000..aa0bc86547
--- /dev/null
+++ b/tools/profiler/tests/xpcshell/test_feature_stackwalking.js
@@ -0,0 +1,48 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/**
+ * Do a basic test to see if native frames are being collected for stackwalking. This
+ * test is fairly naive, as it does not attempt to check that these are valid symbols,
+ * only that some kind of stack walking is happening. It does this by making sure at
+ * least two native frames are collected.
+ */
+add_task(async () => {
+  const entries = 10000;
+  const interval = 1;
+  const threads = [];
+  const features = ["stackwalk"];
+
+  await Services.profiler.StartProfiler(entries, interval, features, threads);
+  const sampleIndex = await captureAtLeastOneJsSample();
+
+  const profile = await stopNowAndGetProfile();
+  const [thread] = profile.threads;
+  const { samples } = thread;
+
+  const inflatedStackFrames = getInflatedStackLocations(
+    thread,
+    samples.data[sampleIndex]
+  );
+  const nativeStack = /^0x[0-9a-f]+$/;
+
+  expectStackToContain(
+    inflatedStackFrames,
+    [
+      "(root)",
+      // There are probably more native stacks here.
+      nativeStack,
+      nativeStack,
+      // Since this is an xpcshell test we know that JavaScript will run:
+      "js::RunScript",
+      // There are probably more native stacks here.
+      nativeStack,
+      nativeStack,
+    ],
+    "Expected native stacks to be interleaved between some frame labels. There should" +
+      "be more than one native stack if stack walking is working correctly. There " +
+      "is no attempt here to determine if the memory addresses point to the correct " +
+      "symbols"
+  );
+});
diff --git a/tools/profiler/tests/xpcshell/test_get_features.js b/tools/profiler/tests/xpcshell/test_get_features.js
new file mode 100644
index 0000000000..e9bf0047c8
--- /dev/null
+++ b/tools/profiler/tests/xpcshell/test_get_features.js
@@ -0,0 +1,8 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+function run_test() {
+  var profilerFeatures = Services.profiler.GetFeatures();
+  Assert.ok(profilerFeatures != null);
+}
diff --git a/tools/profiler/tests/xpcshell/test_merged_stacks.js b/tools/profiler/tests/xpcshell/test_merged_stacks.js
new file mode 100644
index 0000000000..7f851e8de9
--- /dev/null
+++ b/tools/profiler/tests/xpcshell/test_merged_stacks.js
@@ -0,0 +1,74 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/**
+ * Test that we correctly merge the three stack types, JS, native, and frame labels.
+ */
+add_task(async () => {
+  const entries = 10000;
+  const interval = 1;
+  const threads = [];
+  const features = ["js", "stackwalk"];
+
+  await Services.profiler.StartProfiler(entries, interval, features, threads);
+
+  // Call the following to get a nice stack in the profiler:
+  // functionA -> functionB -> functionC
+  const sampleIndex = await functionA();
+
+  const profile = await stopNowAndGetProfile();
+  const [thread] = profile.threads;
+  const { samples } = thread;
+
+  const inflatedStackFrames = getInflatedStackLocations(
+    thread,
+    samples.data[sampleIndex]
+  );
+
+  const nativeStack = /^0x[0-9a-f]+$/;
+
+  expectStackToContain(
+    inflatedStackFrames,
+    [
+      "(root)",
+      nativeStack,
+      nativeStack,
+      // There are more native stacks and frame labels here, but we know some execute
+      // and then the "js::RunScript" frame label runs.
+      "js::RunScript",
+      nativeStack,
+      nativeStack,
+      // The following regexes match a string similar to:
+      //
+      // "functionA (/gecko/obj/_tests/xpcshell/tools/profiler/tests/xpcshell/test_merged_stacks.js:47:0)"
+      // or
+      // "functionA (test_merged_stacks.js:47:0)"
+      //
+      //          this matches the script location
+      //          |                          match the line number
+      //          |                          |   match the column number
+      //          v                          v   v
+      /^functionA \(.*test_merged_stacks\.js:\d+:\d+\)$/,
+      /^functionB \(.*test_merged_stacks\.js:\d+:\d+\)$/,
+      /^functionC \(.*test_merged_stacks\.js:\d+:\d+\)$/,
+      // After the JS frames, then there are a bunch of arbitrary native stack frames
+      // that run.
+      nativeStack,
+      nativeStack,
+    ],
+    "The stack contains a few frame labels, as well as the JS functions that we called."
+  );
+});
+
+async function functionA() {
+  return functionB();
+}
+
+async function functionB() {
+  return functionC();
+}
+
+async function functionC() {
+  return captureAtLeastOneJsSample();
+}
diff --git a/tools/profiler/tests/xpcshell/test_pause.js b/tools/profiler/tests/xpcshell/test_pause.js
new file mode 100644
index 0000000000..0e621fb19f
--- /dev/null
+++ b/tools/profiler/tests/xpcshell/test_pause.js
@@ -0,0 +1,126 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+add_task(async () => {
+  Assert.ok(!Services.profiler.IsActive());
+  Assert.ok(!Services.profiler.IsPaused());
+
+  let startPromise = Services.profiler.StartProfiler(1000, 10, []);
+
+  // Default: Active and not paused.
+  Assert.ok(Services.profiler.IsActive());
+  Assert.ok(!Services.profiler.IsPaused());
+  Assert.ok(!Services.profiler.IsSamplingPaused());
+
+  await startPromise;
+  Assert.ok(Services.profiler.IsActive());
+  Assert.ok(!Services.profiler.IsPaused());
+  Assert.ok(!Services.profiler.IsSamplingPaused());
+
+  // Pause everything, implicitly pauses sampling.
+  let pausePromise = Services.profiler.Pause();
+
+  Assert.ok(Services.profiler.IsActive());
+  Assert.ok(Services.profiler.IsPaused());
+  Assert.ok(Services.profiler.IsSamplingPaused());
+
+  await pausePromise;
+  Assert.ok(Services.profiler.IsActive());
+  Assert.ok(Services.profiler.IsPaused());
+  Assert.ok(Services.profiler.IsSamplingPaused());
+
+  // While fully paused, pause and resume sampling only, no expected changes.
+  let pauseSamplingPromise = Services.profiler.PauseSampling();
+
+  Assert.ok(Services.profiler.IsActive());
+  Assert.ok(Services.profiler.IsPaused());
+  Assert.ok(Services.profiler.IsSamplingPaused());
+
+  await pauseSamplingPromise;
+  Assert.ok(Services.profiler.IsActive());
+  Assert.ok(Services.profiler.IsPaused());
+  Assert.ok(Services.profiler.IsSamplingPaused());
+
+  let resumeSamplingPromise = Services.profiler.ResumeSampling();
+
+  Assert.ok(Services.profiler.IsActive());
+  Assert.ok(Services.profiler.IsPaused());
+  Assert.ok(Services.profiler.IsSamplingPaused());
+
+  await resumeSamplingPromise;
+  Assert.ok(Services.profiler.IsActive());
+  Assert.ok(Services.profiler.IsPaused());
+  Assert.ok(Services.profiler.IsSamplingPaused());
+
+  // Resume everything.
+  let resumePromise = Services.profiler.Resume();
+
+  Assert.ok(Services.profiler.IsActive());
+  Assert.ok(!Services.profiler.IsPaused());
+  Assert.ok(!Services.profiler.IsSamplingPaused());
+
+  await resumePromise;
+  Assert.ok(Services.profiler.IsActive());
+  Assert.ok(!Services.profiler.IsPaused());
+  Assert.ok(!Services.profiler.IsSamplingPaused());
+
+  // Pause sampling only.
+  let pauseSampling2Promise = Services.profiler.PauseSampling();
+
+  Assert.ok(Services.profiler.IsActive());
+  Assert.ok(!Services.profiler.IsPaused());
+  Assert.ok(Services.profiler.IsSamplingPaused());
+
+  await pauseSampling2Promise;
+  Assert.ok(Services.profiler.IsActive());
+  Assert.ok(!Services.profiler.IsPaused());
+  Assert.ok(Services.profiler.IsSamplingPaused());
+
+  // While sampling is paused, pause everything.
+  let pause2Promise = Services.profiler.Pause();
+
+  Assert.ok(Services.profiler.IsActive());
+  Assert.ok(Services.profiler.IsPaused());
+  Assert.ok(Services.profiler.IsSamplingPaused());
+
+  await pause2Promise;
+  Assert.ok(Services.profiler.IsActive());
+  Assert.ok(Services.profiler.IsPaused());
+  Assert.ok(Services.profiler.IsSamplingPaused());
+
+  // Resume, but sampling is still paused separately.
+  let resume2promise = Services.profiler.Resume();
+
+  Assert.ok(Services.profiler.IsActive());
+  Assert.ok(!Services.profiler.IsPaused());
+  Assert.ok(Services.profiler.IsSamplingPaused());
+
+  await resume2promise;
+  Assert.ok(Services.profiler.IsActive());
+  Assert.ok(!Services.profiler.IsPaused());
+  Assert.ok(Services.profiler.IsSamplingPaused());
+
+  // Resume sampling only.
+  let resumeSampling2Promise = Services.profiler.ResumeSampling();
+
+  Assert.ok(Services.profiler.IsActive());
+  Assert.ok(!Services.profiler.IsPaused());
+  Assert.ok(!Services.profiler.IsSamplingPaused());
+
+  await resumeSampling2Promise;
+  Assert.ok(Services.profiler.IsActive());
+  Assert.ok(!Services.profiler.IsPaused());
+  Assert.ok(!Services.profiler.IsSamplingPaused());
+
+  let stopPromise = Services.profiler.StopProfiler();
+  Assert.ok(!Services.profiler.IsActive());
+  // Stopping is not pausing.
+  Assert.ok(!Services.profiler.IsPaused());
+  Assert.ok(!Services.profiler.IsSamplingPaused());
+
+  await stopPromise;
+  Assert.ok(!Services.profiler.IsActive());
+  Assert.ok(!Services.profiler.IsPaused());
+  Assert.ok(!Services.profiler.IsSamplingPaused());
+});
diff --git a/tools/profiler/tests/xpcshell/test_responsiveness.js b/tools/profiler/tests/xpcshell/test_responsiveness.js
new file mode 100644
index 0000000000..5f57173090
--- /dev/null
+++ b/tools/profiler/tests/xpcshell/test_responsiveness.js
@@ -0,0 +1,50 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/**
+ * Test that we can measure non-zero event delays
+ */
+
+add_task(async () => {
+  const entries = 10000;
+  const interval = 1;
+  const threads = [];
+  const features = [];
+
+  await Services.profiler.StartProfiler(entries, interval, features, threads);
+
+  await functionA();
+
+  const profile = await stopNowAndGetProfile();
+  const [thread] = profile.threads;
+  const { samples } = thread;
+  const message = "eventDelay > 0 not found.";
+  let SAMPLE_STACK_SLOT = thread.samples.schema.eventDelay;
+
+  for (let i = 0; i < samples.data.length; i++) {
+    if (samples.data[i][SAMPLE_STACK_SLOT] > 0) {
+      Assert.ok(true, message);
+      return;
+    }
+  }
+  Assert.ok(false, message);
+});
+
+function doSyncWork(milliseconds) {
+  const start = Date.now();
+  while (true) {
+    this.n = 0;
+    for (let i = 0; i < 1e5; i++) {
+      this.n += Math.random();
+    }
+    if (Date.now() - start > milliseconds) {
+      return;
+    }
+  }
+}
+
+async function functionA() {
+  doSyncWork(100);
+  return captureAtLeastOneJsSample();
+}
diff --git a/tools/profiler/tests/xpcshell/test_run.js b/tools/profiler/tests/xpcshell/test_run.js
new file mode 100644
index 0000000000..0e30edfd4e
--- /dev/null
+++ b/tools/profiler/tests/xpcshell/test_run.js
@@ -0,0 +1,37 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+function run_test() {
+  Assert.ok(!Services.profiler.IsActive());
+
+  Services.profiler.StartProfiler(1000, 10, []);
+
+  Assert.ok(Services.profiler.IsActive());
+
+  do_test_pending();
+
+  do_timeout(1000, function wait() {
+    // Check text profile format
+    var profileStr = Services.profiler.GetProfile();
+    Assert.ok(profileStr.length > 10);
+
+    // check json profile format
+    var profileObj = Services.profiler.getProfileData();
+    Assert.notEqual(profileObj, null);
+    Assert.notEqual(profileObj.threads, null);
+    // We capture memory counters by default only when jemalloc is turned
+    // on (and it isn't for ASAN), so unless we can conditionalize for ASAN
+    // here we can't check that we're capturing memory counter data.
+    Assert.notEqual(profileObj.counters, null);
+    Assert.notEqual(profileObj.memory, null);
+    Assert.ok(profileObj.threads.length >= 1);
+    Assert.notEqual(profileObj.threads[0].samples, null);
+    // NOTE: The number of samples will be empty since we
+    //       don't have any labels in the xpcshell code
+
+    Services.profiler.StopProfiler();
+    Assert.ok(!Services.profiler.IsActive());
+    do_test_finished();
+  });
+}
diff --git a/tools/profiler/tests/xpcshell/test_shared_library.js b/tools/profiler/tests/xpcshell/test_shared_library.js
new file mode 100644
index 0000000000..e211ca642b
--- /dev/null
+++ b/tools/profiler/tests/xpcshell/test_shared_library.js
@@ -0,0 +1,21 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+function run_test() {
+  var libs = Services.profiler.sharedLibraries;
+
+  Assert.equal(typeof libs, "object");
+  Assert.ok(Array.isArray(libs));
+  Assert.equal(typeof libs, "object");
+  Assert.ok(libs.length >= 1);
+  Assert.equal(typeof libs[0], "object");
+  Assert.equal(typeof libs[0].name, "string");
+  Assert.equal(typeof libs[0].path, "string");
+  Assert.equal(typeof libs[0].debugName, "string");
+  Assert.equal(typeof libs[0].debugPath, "string");
+  Assert.equal(typeof libs[0].arch, "string");
+  Assert.equal(typeof libs[0].start, "number");
+  Assert.equal(typeof libs[0].end, "number");
+  Assert.ok(libs[0].start <= libs[0].end);
+}
diff --git a/tools/profiler/tests/xpcshell/test_start.js b/tools/profiler/tests/xpcshell/test_start.js
new file mode 100644
index 0000000000..c9ae135eb8
--- /dev/null
+++ b/tools/profiler/tests/xpcshell/test_start.js
@@ -0,0 +1,21 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+add_task(async () => {
+  Assert.ok(!Services.profiler.IsActive());
+
+  let startPromise = Services.profiler.StartProfiler(10, 100, []);
+
+  Assert.ok(Services.profiler.IsActive());
+
+  await startPromise;
+  Assert.ok(Services.profiler.IsActive());
+
+  let stopPromise = Services.profiler.StopProfiler();
+
+  Assert.ok(!Services.profiler.IsActive());
+
+  await stopPromise;
+  Assert.ok(!Services.profiler.IsActive());
+});
diff --git a/tools/profiler/tests/xpcshell/xpcshell.ini b/tools/profiler/tests/xpcshell/xpcshell.ini
new file mode 100644
index 0000000000..a7c461b4ac
--- /dev/null
+++ b/tools/profiler/tests/xpcshell/xpcshell.ini
@@ -0,0 +1,72 @@
+[DEFAULT]
+head = head.js
+support-files =
+  ../shared-head.js
+
+[test_active_configuration.js]
+skip-if = tsan # Intermittent timeouts, bug 1781449
+[test_addProfilerMarker.js]
+[test_start.js]
+skip-if = true
+[test_get_features.js]
+[test_responsiveness.js]
+skip-if = tsan # Times out on TSan, bug 1612707
+[test_shared_library.js]
+[test_run.js]
+skip-if = true
+[test_pause.js]
+[test_enterjit_osr.js]
+[test_enterjit_osr_disabling.js]
+skip-if = !debug
+[test_enterjit_osr_enabling.js]
+skip-if = !debug
+[test_asm.js]
+[test_feature_mainthreadio.js]
+skip-if =
+  release_or_beta
+  os == "win" && socketprocess_networking
+[test_feature_fileioall.js]
+skip-if =
+  release_or_beta
+
+# The sanitizer checks appears to overwrite our own memory hooks in xpcshell tests,
+# and no allocation markers are gathered. Skip this test in that configuration.
+[test_feature_nativeallocations.js]
+skip-if =
+  os == "android" && verify # bug 1757528
+  asan
+  tsan
+  socketprocess_networking
+
+# Native stackwalking is somewhat unreliable depending on the platform.
+#
+# We don't have frame pointers on macOS release and beta, so stack walking does not
+# work. See Bug 1571216 for more details.
+#
+# Linux can be very unreliable when native stackwalking through JavaScript code.
+# See Bug 1434402 for more details.
+#
+# For sanitizer builds, there were many intermittents, and we're not getting much
+# additional coverage there, so it's better to be a bit more reliable.
+[test_feature_stackwalking.js]
+skip-if =
+  os == "mac" && release_or_beta
+  os == "linux" && release_or_beta && !debug
+  asan
+  tsan
+
+[test_feature_js.js]
+skip-if = tsan # Times out on TSan, bug 1612707
+
+# See the comment on test_feature_stackwalking.js
+[test_merged_stacks.js]
+skip-if =
+  os == "mac" && release_or_beta
+  os == "linux" && release_or_beta && !debug
+  asan
+  tsan
+
+[test_assertion_helper.js]
+[test_feature_java.js]
+skip-if =
+  os != "android"
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 17:32:43 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 17:32:43 +0000
commit	6bf0a5cb5034a7e684dcc3500e841785237ce2dd (patch)
tree	a68f146d7fa01f0134297619fbe7e33db084e0aa /tools/profiler
parent	Initial commit. (diff)
download	thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.tar.xz thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.zip