25 files changed, 11388 insertions, 0 deletions
diff --git a/mozglue/baseprofiler/public/BaseProfileJSONWriter.h b/mozglue/baseprofiler/public/BaseProfileJSONWriter.h
new file mode 100644
index 0000000000..5dcf06f3f3
--- /dev/null
+++ b/mozglue/baseprofiler/public/BaseProfileJSONWriter.h
@@ -0,0 +1,388 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef BASEPROFILEJSONWRITER_H
+#define BASEPROFILEJSONWRITER_H
+
+#include "mozilla/HashFunctions.h"
+#include "mozilla/HashTable.h"
+#include "mozilla/JSONWriter.h"
+#include "mozilla/TimeStamp.h"
+#include "mozilla/UniquePtr.h"
+
+#include <functional>
+#include <ostream>
+#include <string_view>
+
+namespace mozilla {
+namespace baseprofiler {
+
+class SpliceableJSONWriter;
+
+// On average, profile JSONs are large enough such that we want to avoid
+// reallocating its buffer when expanding. Additionally, the contents of the
+// profile are not accessed until the profile is entirely written. For these
+// reasons we use a chunked writer that keeps an array of chunks, which is
+// concatenated together after writing is finished.
+class ChunkedJSONWriteFunc final : public JSONWriteFunc {
+ public:
+  friend class SpliceableJSONWriter;
+
+  ChunkedJSONWriteFunc() : mChunkPtr{nullptr}, mChunkEnd{nullptr} {
+    AllocChunk(kChunkSize);
+  }
+
+  bool IsEmpty() const {
+    MOZ_ASSERT_IF(!mChunkPtr, !mChunkEnd && mChunkList.length() == 0 &&
+                                  mChunkLengths.length() == 0);
+    return !mChunkPtr;
+  }
+
+  void Write(const Span<const char>& aStr) override {
+    MOZ_ASSERT(mChunkPtr >= mChunkList.back().get() && mChunkPtr <= mChunkEnd);
+    MOZ_ASSERT(mChunkEnd >= mChunkList.back().get() + mChunkLengths.back());
+    MOZ_ASSERT(*mChunkPtr == '\0');
+
+    // Most strings to be written are small, but subprocess profiles (e.g.,
+    // from the content process in e10s) may be huge. If the string is larger
+    // than a chunk, allocate its own chunk.
+    char* newPtr;
+    if (aStr.size() >= kChunkSize) {
+      AllocChunk(aStr.size() + 1);
+      newPtr = mChunkPtr + aStr.size();
+    } else {
+      newPtr = mChunkPtr + aStr.size();
+      if (newPtr >= mChunkEnd) {
+        AllocChunk(kChunkSize);
+        newPtr = mChunkPtr + aStr.size();
+      }
+    }
+
+    memcpy(mChunkPtr, aStr.data(), aStr.size());
+    *newPtr = '\0';
+    mChunkPtr = newPtr;
+    mChunkLengths.back() += aStr.size();
+  }
+  void CopyDataIntoLazilyAllocatedBuffer(
+      const std::function<char*(size_t)>& aAllocator) const {
+    // Request a buffer for the full content plus a null terminator.
+    MOZ_ASSERT(mChunkLengths.length() == mChunkList.length());
+    size_t totalLen = 1;
+    for (size_t i = 0; i < mChunkLengths.length(); i++) {
+      MOZ_ASSERT(strlen(mChunkList[i].get()) == mChunkLengths[i]);
+      totalLen += mChunkLengths[i];
+    }
+    char* ptr = aAllocator(totalLen);
+
+    if (!ptr) {
+      // Failed to allocate memory.
+      return;
+    }
+
+    for (size_t i = 0; i < mChunkList.length(); i++) {
+      size_t len = mChunkLengths[i];
+      memcpy(ptr, mChunkList[i].get(), len);
+      ptr += len;
+    }
+    *ptr = '\0';
+  }
+  UniquePtr<char[]> CopyData() const {
+    UniquePtr<char[]> c;
+    CopyDataIntoLazilyAllocatedBuffer([&](size_t allocationSize) {
+      c = MakeUnique<char[]>(allocationSize);
+      return c.get();
+    });
+    return c;
+  }
+  void Take(ChunkedJSONWriteFunc&& aOther) {
+    for (size_t i = 0; i < aOther.mChunkList.length(); i++) {
+      MOZ_ALWAYS_TRUE(mChunkLengths.append(aOther.mChunkLengths[i]));
+      MOZ_ALWAYS_TRUE(mChunkList.append(std::move(aOther.mChunkList[i])));
+    }
+    mChunkPtr = mChunkList.back().get() + mChunkLengths.back();
+    mChunkEnd = mChunkPtr;
+    aOther.mChunkPtr = nullptr;
+    aOther.mChunkEnd = nullptr;
+    aOther.mChunkList.clear();
+    aOther.mChunkLengths.clear();
+  }
+
+ private:
+  void AllocChunk(size_t aChunkSize) {
+    MOZ_ASSERT(mChunkLengths.length() == mChunkList.length());
+    UniquePtr<char[]> newChunk = MakeUnique<char[]>(aChunkSize);
+    mChunkPtr = newChunk.get();
+    mChunkEnd = mChunkPtr + aChunkSize;
+    *mChunkPtr = '\0';
+    MOZ_ALWAYS_TRUE(mChunkLengths.append(0));
+    MOZ_ALWAYS_TRUE(mChunkList.append(std::move(newChunk)));
+  }
+
+  static const size_t kChunkSize = 4096 * 512;
+
+  // Pointer for writing inside the current chunk.
+  //
+  // The current chunk is always at the back of mChunkList, i.e.,
+  // mChunkList.back() <= mChunkPtr <= mChunkEnd.
+  char* mChunkPtr;
+
+  // Pointer to the end of the current chunk.
+  //
+  // The current chunk is always at the back of mChunkList, i.e.,
+  // mChunkEnd >= mChunkList.back() + mChunkLengths.back().
+  char* mChunkEnd;
+
+  // List of chunks and their lengths.
+  //
+  // For all i, the length of the string in mChunkList[i] is
+  // mChunkLengths[i].
+  Vector<UniquePtr<char[]>> mChunkList;
+  Vector<size_t> mChunkLengths;
+};
+
+struct OStreamJSONWriteFunc final : public JSONWriteFunc {
+  explicit OStreamJSONWriteFunc(std::ostream& aStream) : mStream(aStream) {}
+
+  void Write(const Span<const char>& aStr) override {
+    std::string_view sv(aStr.data(), aStr.size());
+    mStream << sv;
+  }
+
+  std::ostream& mStream;
+};
+
+class UniqueJSONStrings;
+
+class SpliceableJSONWriter : public JSONWriter {
+ public:
+  explicit SpliceableJSONWriter(UniquePtr<JSONWriteFunc> aWriter)
+      : JSONWriter(std::move(aWriter)) {}
+
+  void StartBareList(CollectionStyle aStyle = MultiLineStyle) {
+    StartCollection(scEmptyString, scEmptyString, aStyle);
+  }
+
+  void EndBareList() { EndCollection(scEmptyString); }
+
+  // This function must be used to correctly stream timestamps in profiles.
+  // Null timestamps don't output anything.
+  void TimeProperty(const Span<const char>& aName, const TimeStamp& aTime) {
+    if (!aTime.IsNull()) {
+      DoubleProperty(aName,
+                     (aTime - TimeStamp::ProcessCreation()).ToMilliseconds());
+    }
+  }
+
+  void NullElements(uint32_t aCount) {
+    for (uint32_t i = 0; i < aCount; i++) {
+      NullElement();
+    }
+  }
+
+  void Splice(const Span<const char>& aStr) {
+    Separator();
+    WriteFunc()->Write(aStr);
+    mNeedComma[mDepth] = true;
+  }
+
+  void Splice(const char* aStr, size_t aLen) {
+    Separator();
+    WriteFunc()->Write(Span<const char>(aStr, aLen));
+    mNeedComma[mDepth] = true;
+  }
+
+  // Splice the given JSON directly in, without quoting.
+  void SplicedJSONProperty(const Span<const char>& aMaybePropertyName,
+                           const Span<const char>& aJsonValue) {
+    Scalar(aMaybePropertyName, aJsonValue);
+  }
+
+  void CopyAndSplice(const ChunkedJSONWriteFunc& aFunc) {
+    Separator();
+    for (size_t i = 0; i < aFunc.mChunkList.length(); i++) {
+      WriteFunc()->Write(
+          Span<const char>(aFunc.mChunkList[i].get(), aFunc.mChunkLengths[i]));
+    }
+    mNeedComma[mDepth] = true;
+  }
+
+  // Takes the chunks from aFunc and write them. If move is not possible
+  // (e.g., using OStreamJSONWriteFunc), aFunc's chunks are copied and its
+  // storage cleared.
+  virtual void TakeAndSplice(ChunkedJSONWriteFunc&& aFunc) {
+    Separator();
+    for (size_t i = 0; i < aFunc.mChunkList.length(); i++) {
+      WriteFunc()->Write(
+          Span<const char>(aFunc.mChunkList[i].get(), aFunc.mChunkLengths[i]));
+    }
+    aFunc.mChunkPtr = nullptr;
+    aFunc.mChunkEnd = nullptr;
+    aFunc.mChunkList.clear();
+    aFunc.mChunkLengths.clear();
+    mNeedComma[mDepth] = true;
+  }
+
+  // Set (or reset) the pointer to a UniqueJSONStrings.
+  void SetUniqueStrings(UniqueJSONStrings& aUniqueStrings) {
+    MOZ_RELEASE_ASSERT(!mUniqueStrings);
+    mUniqueStrings = &aUniqueStrings;
+  }
+
+  // Set (or reset) the pointer to a UniqueJSONStrings.
+  void ResetUniqueStrings() {
+    MOZ_RELEASE_ASSERT(mUniqueStrings);
+    mUniqueStrings = nullptr;
+  }
+
+  // Add `aStr` to the unique-strings list (if not already there), and write its
+  // index as a named object property.
+  inline void UniqueStringProperty(const Span<const char>& aName,
+                                   const Span<const char>& aStr);
+
+  // Add `aStr` to the unique-strings list (if not already there), and write its
+  // index as an array element.
+  inline void UniqueStringElement(const Span<const char>& aStr);
+
+ private:
+  UniqueJSONStrings* mUniqueStrings = nullptr;
+};
+
+class SpliceableChunkedJSONWriter final : public SpliceableJSONWriter {
+ public:
+  explicit SpliceableChunkedJSONWriter()
+      : SpliceableJSONWriter(MakeUnique<ChunkedJSONWriteFunc>()) {}
+
+  // Access the ChunkedJSONWriteFunc as reference-to-const, usually to copy data
+  // out.
+  const ChunkedJSONWriteFunc& ChunkedWriteFunc() const {
+    MOZ_ASSERT(!mTaken);
+    // The WriteFunc was non-fallibly allocated as a ChunkedJSONWriteFunc in the
+    // only constructor above, so it's safe to cast to ChunkedJSONWriteFunc*.
+    return *static_cast<const ChunkedJSONWriteFunc*>(WriteFunc());
+  }
+
+  // Access the ChunkedJSONWriteFunc as rvalue-reference, usually to take its
+  // data out. This writer shouldn't be used anymore after this.
+  ChunkedJSONWriteFunc&& TakeChunkedWriteFunc() {
+#ifdef DEBUG
+    MOZ_ASSERT(!mTaken);
+    mTaken = true;
+#endif  //
+    // The WriteFunc was non-fallibly allocated as a ChunkedJSONWriteFunc in the
+    // only constructor above, so it's safe to cast to ChunkedJSONWriteFunc*.
+    return std::move(*static_cast<ChunkedJSONWriteFunc*>(WriteFunc()));
+  }
+
+  // Adopts the chunks from aFunc without copying.
+  void TakeAndSplice(ChunkedJSONWriteFunc&& aFunc) override {
+    MOZ_ASSERT(!mTaken);
+    Separator();
+    // The WriteFunc was non-fallibly allocated as a ChunkedJSONWriteFunc in the
+    // only constructor above, so it's safe to cast to ChunkedJSONWriteFunc*.
+    static_cast<ChunkedJSONWriteFunc*>(WriteFunc())->Take(std::move(aFunc));
+    mNeedComma[mDepth] = true;
+  }
+
+#ifdef DEBUG
+ private:
+  bool mTaken = false;
+#endif  //
+};
+
+class JSONSchemaWriter {
+  JSONWriter& mWriter;
+  uint32_t mIndex;
+
+ public:
+  explicit JSONSchemaWriter(JSONWriter& aWriter) : mWriter(aWriter), mIndex(0) {
+    aWriter.StartObjectProperty("schema",
+                                SpliceableJSONWriter::SingleLineStyle);
+  }
+
+  void WriteField(const Span<const char>& aName) {
+    mWriter.IntProperty(aName, mIndex++);
+  }
+
+  template <size_t Np1>
+  void WriteField(const char (&aName)[Np1]) {
+    WriteField(Span<const char>(aName, Np1 - 1));
+  }
+
+  ~JSONSchemaWriter() { mWriter.EndObject(); }
+};
+
+// This class helps create an indexed list of unique strings, and inserts the
+// index as a JSON value. The collected list of unique strings can later be
+// inserted as a JSON array.
+// This can be useful for elements/properties with many repeated strings.
+//
+// With only JSONWriter w,
+// `w.WriteElement("a"); w.WriteElement("b"); w.WriteElement("a");`
+// when done inside a JSON array, will generate:
+// `["a", "b", "c"]`
+//
+// With UniqueStrings u,
+// `u.WriteElement(w, "a"); u.WriteElement(w, "b"); u.WriteElement(w, "a");`
+// when done inside a JSON array, will generate:
+// `[0, 1, 0]`
+// and later, `u.SpliceStringTableElements(w)` (inside a JSON array), will
+// output the corresponding indexed list of unique strings:
+// `["a", "b"]`
+class UniqueJSONStrings {
+ public:
+  // Start an empty list of unique strings.
+  MFBT_API explicit UniqueJSONStrings(
+      JSONWriter::CollectionStyle aStyle = JSONWriter::MultiLineStyle);
+
+  // Start with a copy of the strings from another list.
+  MFBT_API explicit UniqueJSONStrings(
+      const UniqueJSONStrings& aOther,
+      JSONWriter::CollectionStyle aStyle = JSONWriter::MultiLineStyle);
+
+  MFBT_API ~UniqueJSONStrings();
+
+  // Add `aStr` to the list (if not already there), and write its index as a
+  // named object property.
+  void WriteProperty(JSONWriter& aWriter, const Span<const char>& aName,
+                     const Span<const char>& aStr) {
+    aWriter.IntProperty(aName, GetOrAddIndex(aStr));
+  }
+
+  // Add `aStr` to the list (if not already there), and write its index as an
+  // array element.
+  void WriteElement(JSONWriter& aWriter, const Span<const char>& aStr) {
+    aWriter.IntElement(GetOrAddIndex(aStr));
+  }
+
+  // Splice all collected unique strings into an array. This should only be done
+  // once, and then this UniqueStrings shouldn't be used anymore.
+  MFBT_API void SpliceStringTableElements(SpliceableJSONWriter& aWriter);
+
+ private:
+  // If `aStr` is already listed, return its index.
+  // Otherwise add it to the list and return the new index.
+  MFBT_API uint32_t GetOrAddIndex(const Span<const char>& aStr);
+
+  SpliceableChunkedJSONWriter mStringTableWriter;
+  HashMap<HashNumber, uint32_t> mStringHashToIndexMap;
+};
+
+void SpliceableJSONWriter::UniqueStringProperty(const Span<const char>& aName,
+                                                const Span<const char>& aStr) {
+  MOZ_RELEASE_ASSERT(mUniqueStrings);
+  mUniqueStrings->WriteProperty(*this, aName, aStr);
+}
+
+// Add `aStr` to the list (if not already there), and write its index as an
+// array element.
+void SpliceableJSONWriter::UniqueStringElement(const Span<const char>& aStr) {
+  MOZ_RELEASE_ASSERT(mUniqueStrings);
+  mUniqueStrings->WriteElement(*this, aStr);
+}
+
+}  // namespace baseprofiler
+}  // namespace mozilla
+
+#endif  // BASEPROFILEJSONWRITER_H
diff --git a/mozglue/baseprofiler/public/BaseProfiler.h b/mozglue/baseprofiler/public/BaseProfiler.h
new file mode 100644
index 0000000000..4bf1705041
--- /dev/null
+++ b/mozglue/baseprofiler/public/BaseProfiler.h
@@ -0,0 +1,964 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// The Gecko Profiler is an always-on profiler that takes fast and low overhead
+// samples of the program execution using only userspace functionality for
+// portability. The goal of this module is to provide performance data in a
+// generic cross-platform way without requiring custom tools or kernel support.
+//
+// Samples are collected to form a timeline with optional timeline event
+// (markers) used for filtering. The samples include both native stacks and
+// platform-independent "label stack" frames.
+
+#ifndef BaseProfiler_h
+#define BaseProfiler_h
+
+// This file is safe to include unconditionally, and only defines
+// empty macros if MOZ_GECKO_PROFILER is not set.
+
+// These headers are also safe to include unconditionally, with empty macros if
+// MOZ_GECKO_PROFILER is not set.
+#include "mozilla/BaseProfilerCounts.h"
+
+// BaseProfilerMarkers.h is #included in the middle of this header!
+// #include "mozilla/BaseProfilerMarkers.h"
+
+#ifndef MOZ_GECKO_PROFILER
+
+#  include "mozilla/BaseProfilerMarkers.h"
+#  include "mozilla/UniquePtr.h"
+
+// This file can be #included unconditionally. However, everything within this
+// file must be guarded by a #ifdef MOZ_GECKO_PROFILER, *except* for the
+// following macros and functions, which encapsulate the most common operations
+// and thus avoid the need for many #ifdefs.
+
+#  define AUTO_BASE_PROFILER_INIT
+
+#  define BASE_PROFILER_REGISTER_THREAD(name)
+#  define BASE_PROFILER_UNREGISTER_THREAD()
+#  define AUTO_BASE_PROFILER_REGISTER_THREAD(name)
+
+#  define AUTO_BASE_PROFILER_THREAD_SLEEP
+#  define AUTO_BASE_PROFILER_THREAD_WAKE
+
+#  define AUTO_BASE_PROFILER_LABEL(label, categoryPair)
+#  define AUTO_BASE_PROFILER_LABEL_CATEGORY_PAIR(categoryPair)
+#  define AUTO_BASE_PROFILER_LABEL_DYNAMIC_CSTR(label, categoryPair, cStr)
+#  define AUTO_BASE_PROFILER_LABEL_DYNAMIC_STRING(label, categoryPair, str)
+#  define AUTO_BASE_PROFILER_LABEL_FAST(label, categoryPair, ctx)
+#  define AUTO_BASE_PROFILER_LABEL_DYNAMIC_FAST(label, dynamicString, \
+                                                categoryPair, ctx, flags)
+
+#  define AUTO_PROFILER_STATS(name)
+
+// Function stubs for when MOZ_GECKO_PROFILER is not defined.
+
+namespace mozilla {
+// This won't be used, it's just there to allow the empty definition of
+// `profiler_capture_backtrace`.
+class ProfileChunkedBuffer {};
+
+namespace baseprofiler {
+// This won't be used, it's just there to allow the empty definition of
+// `profiler_get_backtrace`.
+struct ProfilerBacktrace {};
+using UniqueProfilerBacktrace = UniquePtr<ProfilerBacktrace>;
+
+// Get/Capture-backtrace functions can return nullptr or false, the result
+// should be fed to another empty macro or stub anyway.
+
+static inline UniqueProfilerBacktrace profiler_get_backtrace() {
+  return nullptr;
+}
+
+static inline bool profiler_capture_backtrace_into(
+    ProfileChunkedBuffer& aChunkedBuffer) {
+  return false;
+}
+
+static inline UniquePtr<ProfileChunkedBuffer> profiler_capture_backtrace() {
+  return nullptr;
+}
+}  // namespace baseprofiler
+}  // namespace mozilla
+
+#else  // !MOZ_GECKO_PROFILER
+
+#  include "BaseProfilingStack.h"
+
+#  include "mozilla/Assertions.h"
+#  include "mozilla/Atomics.h"
+#  include "mozilla/Attributes.h"
+#  include "mozilla/Maybe.h"
+#  include "mozilla/PowerOfTwo.h"
+#  include "mozilla/Sprintf.h"
+#  include "mozilla/ThreadLocal.h"
+#  include "mozilla/TimeStamp.h"
+#  include "mozilla/UniquePtr.h"
+
+#  include <functional>
+#  include <stdint.h>
+#  include <string>
+
+namespace mozilla {
+
+class MallocAllocPolicy;
+class ProfileChunkedBuffer;
+template <class T, size_t MinInlineCapacity, class AllocPolicy>
+class Vector;
+
+namespace baseprofiler {
+
+class ProfilerBacktrace;
+class SpliceableJSONWriter;
+
+// Macros used by the AUTO_PROFILER_* macros below.
+#  define BASE_PROFILER_RAII_PASTE(id, line) id##line
+#  define BASE_PROFILER_RAII_EXPAND(id, line) BASE_PROFILER_RAII_PASTE(id, line)
+#  define BASE_PROFILER_RAII BASE_PROFILER_RAII_EXPAND(raiiObject, __LINE__)
+
+//---------------------------------------------------------------------------
+// Profiler features
+//---------------------------------------------------------------------------
+
+// Higher-order macro containing all the feature info in one place. Define
+// |MACRO| appropriately to extract the relevant parts. Note that the number
+// values are used internally only and so can be changed without consequence.
+// Any changes to this list should also be applied to the feature list in
+// toolkit/components/extensions/schemas/geckoProfiler.json.
+#  define BASE_PROFILER_FOR_EACH_FEATURE(MACRO)                                \
+    MACRO(0, "java", Java, "Profile Java code, Android only")                  \
+                                                                               \
+    MACRO(1, "js", JS,                                                         \
+          "Get the JS engine to expose the JS stack to the profiler")          \
+                                                                               \
+    /* The DevTools profiler doesn't want the native addresses. */             \
+    MACRO(2, "leaf", Leaf, "Include the C++ leaf node if not stackwalking")    \
+                                                                               \
+    MACRO(3, "mainthreadio", MainThreadIO, "Add main thread file I/O")         \
+                                                                               \
+    MACRO(4, "fileio", FileIO,                                                 \
+          "Add file I/O from all profiled threads, implies mainthreadio")      \
+                                                                               \
+    MACRO(5, "fileioall", FileIOAll,                                           \
+          "Add file I/O from all threads, implies fileio")                     \
+                                                                               \
+    MACRO(6, "noiostacks", NoIOStacks,                                         \
+          "File I/O markers do not capture stacks, to reduce overhead")        \
+                                                                               \
+    MACRO(7, "screenshots", Screenshots,                                       \
+          "Take a snapshot of the window on every composition")                \
+                                                                               \
+    MACRO(8, "seqstyle", SequentialStyle,                                      \
+          "Disable parallel traversal in styling")                             \
+                                                                               \
+    MACRO(9, "stackwalk", StackWalk,                                           \
+          "Walk the C++ stack, not available on all platforms")                \
+                                                                               \
+    MACRO(10, "tasktracer", TaskTracer,                                        \
+          "Start profiling with feature TaskTracer")                           \
+                                                                               \
+    MACRO(11, "threads", Threads, "Profile the registered secondary threads")  \
+                                                                               \
+    MACRO(12, "jstracer", JSTracer, "Enable tracing of the JavaScript engine") \
+                                                                               \
+    MACRO(13, "jsallocations", JSAllocations,                                  \
+          "Have the JavaScript engine track allocations")                      \
+                                                                               \
+    MACRO(14, "nostacksampling", NoStackSampling,                              \
+          "Disable all stack sampling: Cancels \"js\", \"leaf\", "             \
+          "\"stackwalk\" and labels")                                          \
+                                                                               \
+    MACRO(15, "preferencereads", PreferenceReads,                              \
+          "Track when preferences are read")                                   \
+                                                                               \
+    MACRO(16, "nativeallocations", NativeAllocations,                          \
+          "Collect the stacks from a smaller subset of all native "            \
+          "allocations, biasing towards collecting larger allocations")        \
+                                                                               \
+    MACRO(17, "ipcmessages", IPCMessages,                                      \
+          "Have the IPC layer track cross-process messages")                   \
+                                                                               \
+    MACRO(18, "audiocallbacktracing", AudioCallbackTracing,                    \
+          "Audio callback tracing")                                            \
+                                                                               \
+    MACRO(19, "cpu", CPUUtilization, "CPU utilization")
+
+struct ProfilerFeature {
+#  define DECLARE(n_, str_, Name_, desc_)                     \
+    static constexpr uint32_t Name_ = (1u << n_);             \
+    static constexpr bool Has##Name_(uint32_t aFeatures) {    \
+      return aFeatures & Name_;                               \
+    }                                                         \
+    static constexpr void Set##Name_(uint32_t& aFeatures) {   \
+      aFeatures |= Name_;                                     \
+    }                                                         \
+    static constexpr void Clear##Name_(uint32_t& aFeatures) { \
+      aFeatures &= ~Name_;                                    \
+    }
+
+  // Define a bitfield constant, a getter, and two setters for each feature.
+  BASE_PROFILER_FOR_EACH_FEATURE(DECLARE)
+
+#  undef DECLARE
+};
+
+namespace detail {
+
+// RacyFeatures is only defined in this header file so that its methods can
+// be inlined into profiler_is_active(). Please do not use anything from the
+// detail namespace outside the profiler.
+
+// Within the profiler's code, the preferred way to check profiler activeness
+// and features is via ActivePS(). However, that requires locking gPSMutex.
+// There are some hot operations where absolute precision isn't required, so we
+// duplicate the activeness/feature state in a lock-free manner in this class.
+class RacyFeatures {
+ public:
+  MFBT_API static void SetActive(uint32_t aFeatures);
+
+  MFBT_API static void SetInactive();
+
+  MFBT_API static void SetPaused();
+
+  MFBT_API static void SetUnpaused();
+
+  MFBT_API static void SetSamplingPaused();
+
+  MFBT_API static void SetSamplingUnpaused();
+
+  MFBT_API static bool IsActive();
+
+  MFBT_API static bool IsActiveWithFeature(uint32_t aFeature);
+
+  // True if profiler is active, and not fully paused.
+  // Note that periodic sampling *could* be paused!
+  MFBT_API static bool IsActiveAndUnpaused();
+
+  // True if profiler is active, and sampling is not paused (though generic
+  // `SetPaused()` or specific `SetSamplingPaused()`).
+  MFBT_API static bool IsActiveAndSamplingUnpaused();
+
+ private:
+  static constexpr uint32_t Active = 1u << 31;
+  static constexpr uint32_t Paused = 1u << 30;
+  static constexpr uint32_t SamplingPaused = 1u << 29;
+
+// Ensure Active/Paused don't overlap with any of the feature bits.
+#  define NO_OVERLAP(n_, str_, Name_, desc_)                \
+    static_assert(ProfilerFeature::Name_ != SamplingPaused, \
+                  "bad feature value");
+
+  BASE_PROFILER_FOR_EACH_FEATURE(NO_OVERLAP);
+
+#  undef NO_OVERLAP
+
+  // We combine the active bit with the feature bits so they can be read or
+  // written in a single atomic operation.
+  // TODO: Could this be MFBT_DATA for better inlining optimization?
+  static Atomic<uint32_t, MemoryOrdering::Relaxed> sActiveAndFeatures;
+};
+
+MFBT_API bool IsThreadBeingProfiled();
+
+}  // namespace detail
+
+//---------------------------------------------------------------------------
+// Start and stop the profiler
+//---------------------------------------------------------------------------
+
+static constexpr PowerOfTwo32 BASE_PROFILER_DEFAULT_ENTRIES =
+#  if !defined(GP_PLAT_arm_android)
+    MakePowerOfTwo32<1024 * 1024>();  // 1M entries = 8MB
+#  else
+    MakePowerOfTwo32<128 * 1024>();  // 128k entries = 1MB
+#  endif
+
+// Startup profiling usually need to capture more data, especially on slow
+// systems.
+static constexpr PowerOfTwo32 BASE_PROFILER_DEFAULT_STARTUP_ENTRIES =
+#  if !defined(GP_PLAT_arm_android)
+    MakePowerOfTwo32<4 * 1024 * 1024>();  // 4M entries = 32MB
+#  else
+    MakePowerOfTwo32<256 * 1024>();  // 256k entries = 2MB
+#  endif
+
+#  define BASE_PROFILER_DEFAULT_DURATION 20
+#  define BASE_PROFILER_DEFAULT_INTERVAL 1
+
+// Initialize the profiler. If MOZ_PROFILER_STARTUP is set the profiler will
+// also be started. This call must happen before any other profiler calls
+// (except profiler_start(), which will call profiler_init() if it hasn't
+// already run).
+MFBT_API void profiler_init(void* stackTop);
+
+#  define AUTO_BASE_PROFILER_INIT \
+    ::mozilla::baseprofiler::AutoProfilerInit BASE_PROFILER_RAII
+
+// Clean up the profiler module, stopping it if required. This function may
+// also save a shutdown profile if requested. No profiler calls should happen
+// after this point and all profiling stack labels should have been popped.
+MFBT_API void profiler_shutdown();
+
+// Start the profiler -- initializing it first if necessary -- with the
+// selected options. Stops and restarts the profiler if it is already active.
+// After starting the profiler is "active". The samples will be recorded in a
+// circular buffer.
+//   "aCapacity" is the maximum number of 8-byte entries in the profiler's
+//               circular buffer.
+//   "aInterval" the sampling interval, measured in millseconds.
+//   "aFeatures" is the feature set. Features unsupported by this
+//               platform/configuration are ignored.
+//   "aFilters" is the list of thread filters. Threads that do not match any
+//              of the filters are not profiled. A filter matches a thread if
+//              (a) the thread name contains the filter as a case-insensitive
+//                  substring, or
+//              (b) the filter is of the form "pid:<n>" where n is the process
+//                  id of the process that the thread is running in.
+//   "aDuration" is the duration of entries in the profiler's circular buffer.
+MFBT_API void profiler_start(PowerOfTwo32 aCapacity, double aInterval,
+                             uint32_t aFeatures, const char** aFilters,
+                             uint32_t aFilterCount,
+                             const Maybe<double>& aDuration = Nothing());
+
+// Stop the profiler and discard the profile without saving it. A no-op if the
+// profiler is inactive. After stopping the profiler is "inactive".
+MFBT_API void profiler_stop();
+
+// If the profiler is inactive, start it. If it's already active, restart it if
+// the requested settings differ from the current settings. Both the check and
+// the state change are performed while the profiler state is locked.
+// The only difference to profiler_start is that the current buffer contents are
+// not discarded if the profiler is already running with the requested settings.
+MFBT_API void profiler_ensure_started(
+    PowerOfTwo32 aCapacity, double aInterval, uint32_t aFeatures,
+    const char** aFilters, uint32_t aFilterCount,
+    const Maybe<double>& aDuration = Nothing());
+
+//---------------------------------------------------------------------------
+// Control the profiler
+//---------------------------------------------------------------------------
+
+// Register/unregister threads with the profiler. Both functions operate the
+// same whether the profiler is active or inactive.
+#  define BASE_PROFILER_REGISTER_THREAD(name)                             \
+    do {                                                                  \
+      char stackTop;                                                      \
+      ::mozilla::baseprofiler::profiler_register_thread(name, &stackTop); \
+    } while (0)
+#  define BASE_PROFILER_UNREGISTER_THREAD() \
+    ::mozilla::baseprofiler::profiler_unregister_thread()
+MFBT_API ProfilingStack* profiler_register_thread(const char* name,
+                                                  void* guessStackTop);
+MFBT_API void profiler_unregister_thread();
+
+// Registers a DOM Window (the JS global `window`) with the profiler. Each
+// Window _roughly_ corresponds to a single document loaded within a
+// BrowsingContext. The unique IDs for both the Window and BrowsingContext are
+// recorded to allow correlating different Windows loaded within the same tab or
+// frame element.
+//
+// We register pages for each navigations but we do not register
+// history.pushState or history.replaceState since they correspond to the same
+// Inner Window ID. When a Browsing context is first loaded, the first url
+// loaded in it will be about:blank. Because of that, this call keeps the first
+// non-about:blank registration of window and discards the previous one.
+//
+//   "aBrowsingContextID"     is the ID of the browsing context that document
+//                            belongs to. That's used to determine the tab of
+//                            that page.
+//   "aInnerWindowID"         is the ID of the `window` global object of that
+//                            document.
+//   "aUrl"                   is the URL of the page.
+//   "aEmbedderInnerWindowID" is the inner window id of embedder. It's used to
+//                            determine sub documents of a page.
+MFBT_API void profiler_register_page(uint64_t aBrowsingContextID,
+                                     uint64_t aInnerWindowID,
+                                     const std::string& aUrl,
+                                     uint64_t aEmbedderInnerWindowID);
+// Unregister page with the profiler.
+//
+// Take a Inner Window ID and unregister the page entry that has the same ID.
+MFBT_API void profiler_unregister_page(uint64_t aRegisteredInnerWindowID);
+
+// Remove all registered and unregistered pages in the profiler.
+void profiler_clear_all_pages();
+
+class BaseProfilerCount;
+MFBT_API void profiler_add_sampled_counter(BaseProfilerCount* aCounter);
+MFBT_API void profiler_remove_sampled_counter(BaseProfilerCount* aCounter);
+
+// Register and unregister a thread within a scope.
+#  define AUTO_BASE_PROFILER_REGISTER_THREAD(name) \
+    ::mozilla::baseprofiler::AutoProfilerRegisterThread BASE_PROFILER_RAII(name)
+
+// Pause and resume the profiler. No-ops if the profiler is inactive. While
+// paused the profile will not take any samples and will not record any data
+// into its buffers. The profiler remains fully initialized in this state.
+// This feature will keep JavaScript profiling enabled, thus allowing toggling
+// the profiler without invalidating the JIT.
+MFBT_API void profiler_pause();
+MFBT_API void profiler_resume();
+
+// Only pause and resume the periodic sampling loop, including stack sampling,
+// counters, and profiling overheads.
+MFBT_API void profiler_pause_sampling();
+MFBT_API void profiler_resume_sampling();
+
+// These functions tell the profiler that a thread went to sleep so that we can
+// avoid sampling it while it's sleeping. Calling profiler_thread_sleep()
+// twice without an intervening profiler_thread_wake() is an error. All three
+// functions operate the same whether the profiler is active or inactive.
+MFBT_API void profiler_thread_sleep();
+MFBT_API void profiler_thread_wake();
+
+// Mark a thread as asleep/awake within a scope.
+#  define AUTO_BASE_PROFILER_THREAD_SLEEP \
+    ::mozilla::baseprofiler::AutoProfilerThreadSleep BASE_PROFILER_RAII
+#  define AUTO_BASE_PROFILER_THREAD_WAKE \
+    ::mozilla::baseprofiler::AutoProfilerThreadWake BASE_PROFILER_RAII
+
+//---------------------------------------------------------------------------
+// Get information from the profiler
+//---------------------------------------------------------------------------
+
+// Is the profiler active? Note: the return value of this function can become
+// immediately out-of-date. E.g. the profile might be active but then
+// profiler_stop() is called immediately afterward. One common and reasonable
+// pattern of usage is the following:
+//
+//   if (profiler_is_active()) {
+//     ExpensiveData expensiveData = CreateExpensiveData();
+//     PROFILER_OPERATION(expensiveData);
+//   }
+//
+// where PROFILER_OPERATION is a no-op if the profiler is inactive. In this
+// case the profiler_is_active() check is just an optimization -- it prevents
+// us calling CreateExpensiveData() unnecessarily in most cases, but the
+// expensive data will end up being created but not used if another thread
+// stops the profiler between the CreateExpensiveData() and PROFILER_OPERATION
+// calls.
+inline bool profiler_is_active() {
+  return baseprofiler::detail::RacyFeatures::IsActive();
+}
+
+// Same as profiler_is_active(), but with the same extra checks that determine
+// if the profiler would currently store markers. So this should be used before
+// doing some potentially-expensive work that's used in a marker. E.g.:
+//
+//   if (profiler_can_accept_markers()) {
+//     BASE_PROFILER_MARKER(name, OTHER, SomeMarkerType, expensivePayload);
+//   }
+inline bool profiler_can_accept_markers() {
+  return baseprofiler::detail::RacyFeatures::IsActiveAndUnpaused();
+}
+
+// Is the profiler active, and is the current thread being profiled?
+// (Same caveats and recommented usage as profiler_is_active().)
+inline bool profiler_thread_is_being_profiled() {
+  return profiler_is_active() && baseprofiler::detail::IsThreadBeingProfiled();
+}
+
+// Is the profiler active and paused? Returns false if the profiler is inactive.
+MFBT_API bool profiler_is_paused();
+
+// Is the profiler active and sampling is paused? Returns false if the profiler
+// is inactive.
+MFBT_API bool profiler_is_sampling_paused();
+
+// Is the current thread sleeping?
+MFBT_API bool profiler_thread_is_sleeping();
+
+// Get all the features supported by the profiler that are accepted by
+// profiler_start(). The result is the same whether the profiler is active or
+// not.
+MFBT_API uint32_t profiler_get_available_features();
+
+// Check if a profiler feature (specified via the ProfilerFeature type) is
+// active. Returns false if the profiler is inactive. Note: the return value
+// can become immediately out-of-date, much like the return value of
+// profiler_is_active().
+MFBT_API bool profiler_feature_active(uint32_t aFeature);
+
+// Get the params used to start the profiler. Returns 0 and an empty vector
+// (via outparams) if the profile is inactive. It's possible that the features
+// returned may be slightly different to those requested due to required
+// adjustments.
+MFBT_API void profiler_get_start_params(
+    int* aEntrySize, Maybe<double>* aDuration, double* aInterval,
+    uint32_t* aFeatures, Vector<const char*, 0, MallocAllocPolicy>* aFilters);
+
+// The number of milliseconds since the process started. Operates the same
+// whether the profiler is active or inactive.
+MFBT_API double profiler_time();
+
+// Get the current process's ID.
+MFBT_API int profiler_current_process_id();
+
+// Get the current thread's ID.
+MFBT_API int profiler_current_thread_id();
+
+// Statically initialized to 0, then set once from profiler_init(), which should
+// be called from the main thread before any other use of the profiler.
+extern MFBT_DATA int scProfilerMainThreadId;
+
+inline int profiler_main_thread_id() { return scProfilerMainThreadId; }
+
+inline bool profiler_is_main_thread() {
+  return profiler_current_thread_id() == profiler_main_thread_id();
+}
+
+// An object of this class is passed to profiler_suspend_and_sample_thread().
+// For each stack frame, one of the Collect methods will be called.
+class ProfilerStackCollector {
+ public:
+  // Some collectors need to worry about possibly overwriting previous
+  // generations of data. If that's not an issue, this can return Nothing,
+  // which is the default behaviour.
+  virtual Maybe<uint64_t> SamplePositionInBuffer() { return Nothing(); }
+  virtual Maybe<uint64_t> BufferRangeStart() { return Nothing(); }
+
+  // This method will be called once if the thread being suspended is the main
+  // thread. Default behaviour is to do nothing.
+  virtual void SetIsMainThread() {}
+
+  // WARNING: The target thread is suspended when the Collect methods are
+  // called. Do not try to allocate or acquire any locks, or you could
+  // deadlock. The target thread will have resumed by the time this function
+  // returns.
+
+  virtual void CollectNativeLeafAddr(void* aAddr) = 0;
+
+  virtual void CollectProfilingStackFrame(
+      const ProfilingStackFrame& aFrame) = 0;
+};
+
+// This method suspends the thread identified by aThreadId, samples its
+// profiling stack, JS stack, and (optionally) native stack, passing the
+// collected frames into aCollector. aFeatures dictates which compiler features
+// are used. |Leaf| is the only relevant one.
+MFBT_API void profiler_suspend_and_sample_thread(
+    int aThreadId, uint32_t aFeatures, ProfilerStackCollector& aCollector,
+    bool aSampleNative = true);
+
+struct ProfilerBacktraceDestructor {
+  MFBT_API void operator()(ProfilerBacktrace*);
+};
+
+using UniqueProfilerBacktrace =
+    UniquePtr<ProfilerBacktrace, ProfilerBacktraceDestructor>;
+
+// Immediately capture the current thread's call stack, store it in the provided
+// buffer (usually to avoid allocations if you can construct the buffer on the
+// stack). Returns false if unsuccessful, or if the profiler is inactive.
+MFBT_API bool profiler_capture_backtrace_into(
+    ProfileChunkedBuffer& aChunkedBuffer);
+
+// Immediately capture the current thread's call stack, and return it in a
+// ProfileChunkedBuffer (usually for later use in MarkerStack::TakeBacktrace()).
+// May be null if unsuccessful, or if the profiler is inactive.
+MFBT_API UniquePtr<ProfileChunkedBuffer> profiler_capture_backtrace();
+
+// Immediately capture the current thread's call stack, and return it in a
+// ProfilerBacktrace (usually for later use in marker function that take a
+// ProfilerBacktrace). May be null if unsuccessful, or if the profiler is
+// inactive.
+MFBT_API UniqueProfilerBacktrace profiler_get_backtrace();
+
+struct ProfilerStats {
+  unsigned n = 0;
+  double sum = 0;
+  double min = std::numeric_limits<double>::max();
+  double max = 0;
+  void Count(double v) {
+    ++n;
+    sum += v;
+    if (v < min) {
+      min = v;
+    }
+    if (v > max) {
+      max = v;
+    }
+  }
+};
+
+struct ProfilerBufferInfo {
+  // Index of the oldest entry.
+  uint64_t mRangeStart;
+  // Index of the newest entry.
+  uint64_t mRangeEnd;
+  // Buffer capacity in number of 8-byte entries.
+  uint32_t mEntryCount;
+  // Sampling stats: Interval (us) between successive samplings.
+  ProfilerStats mIntervalsUs;
+  // Sampling stats: Total duration (us) of each sampling. (Split detail below.)
+  ProfilerStats mOverheadsUs;
+  // Sampling stats: Time (us) to acquire the lock before sampling.
+  ProfilerStats mLockingsUs;
+  // Sampling stats: Time (us) to discard expired data.
+  ProfilerStats mCleaningsUs;
+  // Sampling stats: Time (us) to collect counter data.
+  ProfilerStats mCountersUs;
+  // Sampling stats: Time (us) to sample thread stacks.
+  ProfilerStats mThreadsUs;
+};
+
+// Get information about the current buffer status.
+// Returns Nothing() if the profiler is inactive.
+//
+// This information may be useful to a user-interface displaying the current
+// status of the profiler, allowing the user to get a sense for how fast the
+// buffer is being written to, and how much data is visible.
+MFBT_API Maybe<ProfilerBufferInfo> profiler_get_buffer_info();
+
+// Uncomment the following line to display profiler runtime statistics at
+// shutdown.
+// #  define PROFILER_RUNTIME_STATS
+
+#  ifdef PROFILER_RUNTIME_STATS
+// This class gathers durations and displays some basic stats when destroyed.
+// It is intended to be used as a static variable (see `AUTO_PROFILER_STATS`
+// below), to display stats at the end of the program.
+class StaticBaseProfilerStats {
+ public:
+  explicit StaticBaseProfilerStats(const char* aName) : mName(aName) {}
+
+  ~StaticBaseProfilerStats() {
+    // Using unsigned long long for computations and printfs.
+    using ULL = unsigned long long;
+    ULL n = static_cast<ULL>(mNumberDurations);
+    if (n != 0) {
+      ULL sumNs = static_cast<ULL>(mSumDurationsNs);
+      printf(
+          "[%d] Profiler stats `%s`: %llu ns / %llu = %llu ns, max %llu ns\n",
+          profiler_current_process_id(), mName, sumNs, n, sumNs / n,
+          static_cast<ULL>(mLongestDurationNs));
+    } else {
+      printf("[%d] Profiler stats `%s`: (nothing)\n",
+             profiler_current_process_id(), mName);
+    }
+  }
+
+  void AddDurationFrom(TimeStamp aStart) {
+    DurationNs duration = static_cast<DurationNs>(
+        (TimeStamp::NowUnfuzzed() - aStart).ToMicroseconds() * 1000 + 0.5);
+    mSumDurationsNs += duration;
+    ++mNumberDurations;
+    // Update mLongestDurationNs if this one is longer.
+    for (;;) {
+      DurationNs longest = mLongestDurationNs;
+      if (MOZ_LIKELY(longest >= duration)) {
+        // This duration is not the longest, nothing to do.
+        break;
+      }
+      if (MOZ_LIKELY(mLongestDurationNs.compareExchange(longest, duration))) {
+        // Successfully updated `mLongestDurationNs` with the new value.
+        break;
+      }
+      // Otherwise someone else just updated `mLongestDurationNs`, we need to
+      // try again by looping.
+    }
+  }
+
+ private:
+  using DurationNs = uint64_t;
+  using Count = uint32_t;
+
+  Atomic<DurationNs> mSumDurationsNs{0};
+  Atomic<DurationNs> mLongestDurationNs{0};
+  Atomic<Count> mNumberDurations{0};
+  const char* mName;
+};
+
+// RAII object that measure its scoped lifetime duration and reports it to a
+// `StaticBaseProfilerStats`.
+class MOZ_RAII AutoProfilerStats {
+ public:
+  explicit AutoProfilerStats(StaticBaseProfilerStats& aStats)
+      : mStats(aStats), mStart(TimeStamp::NowUnfuzzed()) {}
+
+  ~AutoProfilerStats() { mStats.AddDurationFrom(mStart); }
+
+ private:
+  StaticBaseProfilerStats& mStats;
+  TimeStamp mStart;
+};
+
+// Macro that should be used to collect basic statistics from measurements of
+// block durations, from where this macro is, until the end of its enclosing
+// scope. The name is used in the static variable name and when displaying stats
+// at the end of the program; Another location could use the same name but their
+// stats will not be combined, so use different name if these locations should
+// be distinguished.
+#    define AUTO_PROFILER_STATS(name)                                      \
+      static ::mozilla::baseprofiler::StaticBaseProfilerStats sStat##name( \
+          #name);                                                          \
+      ::mozilla::baseprofiler::AutoProfilerStats autoStat##name(sStat##name);
+
+#  else  // PROFILER_RUNTIME_STATS
+
+#    define AUTO_PROFILER_STATS(name)
+
+#  endif  // PROFILER_RUNTIME_STATS else
+
+}  // namespace baseprofiler
+}  // namespace mozilla
+
+// BaseProfilerMarkers.h requires some stuff from this header.
+// TODO: Move common stuff to shared header, and move this #include to the top.
+#  include "mozilla/BaseProfilerMarkers.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+//---------------------------------------------------------------------------
+// Put profiling data into the profiler (labels and markers)
+//---------------------------------------------------------------------------
+
+// Insert an RAII object in this scope to enter a label stack frame. Any
+// samples collected in this scope will contain this label in their stack.
+// The label argument must be a static C string. It is usually of the
+// form "ClassName::FunctionName". (Ideally we'd use the compiler to provide
+// that for us, but __func__ gives us the function name without the class
+// name.) If the label applies to only part of a function, you can qualify it
+// like this: "ClassName::FunctionName:PartName".
+//
+// Use AUTO_BASE_PROFILER_LABEL_DYNAMIC_* if you want to add additional /
+// dynamic information to the label stack frame.
+#  define AUTO_BASE_PROFILER_LABEL(label, categoryPair)            \
+    ::mozilla::baseprofiler::AutoProfilerLabel BASE_PROFILER_RAII( \
+        label, nullptr,                                            \
+        ::mozilla::baseprofiler::ProfilingCategoryPair::categoryPair)
+
+// Similar to AUTO_BASE_PROFILER_LABEL, but with only one argument: the category
+// pair. The label string is taken from the category pair. This is convenient
+// for labels like
+// AUTO_BASE_PROFILER_LABEL_CATEGORY_PAIR(GRAPHICS_LayerBuilding) which would
+// otherwise just repeat the string.
+#  define AUTO_BASE_PROFILER_LABEL_CATEGORY_PAIR(categoryPair)         \
+    ::mozilla::baseprofiler::AutoProfilerLabel BASE_PROFILER_RAII(     \
+        "", nullptr,                                                   \
+        ::mozilla::baseprofiler::ProfilingCategoryPair::categoryPair,  \
+        uint32_t(::mozilla::baseprofiler::ProfilingStackFrame::Flags:: \
+                     LABEL_DETERMINED_BY_CATEGORY_PAIR))
+
+// Similar to AUTO_BASE_PROFILER_LABEL, but with an additional string. The
+// inserted RAII object stores the cStr pointer in a field; it does not copy the
+// string.
+//
+// WARNING: This means that the string you pass to this macro needs to live at
+// least until the end of the current scope. Be careful using this macro with
+// ns[C]String; the other AUTO_BASE_PROFILER_LABEL_DYNAMIC_* macros below are
+// preferred because they avoid this problem.
+//
+// If the profiler samples the current thread and walks the label stack while
+// this RAII object is on the stack, it will copy the supplied string into the
+// profile buffer. So there's one string copy operation, and it happens at
+// sample time.
+//
+// Compare this to the plain AUTO_BASE_PROFILER_LABEL macro, which only accepts
+// literal strings: When the label stack frames generated by
+// AUTO_BASE_PROFILER_LABEL are sampled, no string copy needs to be made because
+// the profile buffer can just store the raw pointers to the literal strings.
+// Consequently, AUTO_BASE_PROFILER_LABEL frames take up considerably less space
+// in the profile buffer than AUTO_BASE_PROFILER_LABEL_DYNAMIC_* frames.
+#  define AUTO_BASE_PROFILER_LABEL_DYNAMIC_CSTR(label, categoryPair, cStr) \
+    ::mozilla::baseprofiler::AutoProfilerLabel BASE_PROFILER_RAII(         \
+        label, cStr,                                                       \
+        ::mozilla::baseprofiler::ProfilingCategoryPair::categoryPair)
+
+// Similar to AUTO_BASE_PROFILER_LABEL_DYNAMIC_CSTR, but takes an std::string.
+//
+// Note: The use of the Maybe<>s ensures the scopes for the dynamic string and
+// the AutoProfilerLabel are appropriate, while also not incurring the runtime
+// cost of the string assignment unless the profiler is active. Therefore,
+// unlike AUTO_BASE_PROFILER_LABEL and AUTO_BASE_PROFILER_LABEL_DYNAMIC_CSTR,
+// this macro doesn't push/pop a label when the profiler is inactive.
+#  define AUTO_BASE_PROFILER_LABEL_DYNAMIC_STRING(label, categoryPair, str) \
+    Maybe<std::string> autoStr;                                             \
+    Maybe<::mozilla::baseprofiler::AutoProfilerLabel> raiiObjectString;     \
+    if (::mozilla::baseprofiler::profiler_is_active()) {                    \
+      autoStr.emplace(str);                                                 \
+      raiiObjectString.emplace(                                             \
+          label, autoStr->c_str(),                                          \
+          ::mozilla::baseprofiler::ProfilingCategoryPair::categoryPair);    \
+    }
+
+// Similar to AUTO_BASE_PROFILER_LABEL, but accepting a JSContext* parameter,
+// and a no-op if the profiler is disabled. Used to annotate functions for which
+// overhead in the range of nanoseconds is noticeable. It avoids overhead from
+// the TLS lookup because it can get the ProfilingStack from the JS context, and
+// avoids almost all overhead in the case where the profiler is disabled.
+#  define AUTO_BASE_PROFILER_LABEL_FAST(label, categoryPair, ctx)  \
+    ::mozilla::baseprofiler::AutoProfilerLabel BASE_PROFILER_RAII( \
+        ctx, label, nullptr,                                       \
+        ::mozilla::baseprofiler::ProfilingCategoryPair::categoryPair)
+
+// Similar to AUTO_BASE_PROFILER_LABEL_FAST, but also takes an extra string and
+// an additional set of flags. The flags parameter should carry values from the
+// ProfilingStackFrame::Flags enum.
+#  define AUTO_BASE_PROFILER_LABEL_DYNAMIC_FAST(label, dynamicString,     \
+                                                categoryPair, ctx, flags) \
+    ::mozilla::baseprofiler::AutoProfilerLabel BASE_PROFILER_RAII(        \
+        ctx, label, dynamicString,                                        \
+        ::mozilla::baseprofiler::ProfilingCategoryPair::categoryPair, flags)
+
+MFBT_API void profiler_add_js_marker(const char* aMarkerName,
+                                     const char* aMarkerText);
+
+// Returns true if any of the profiler mutexes are currently locked *on the
+// current thread*. This may be used by re-entrant code that may call profiler
+// functions while the same of a different profiler mutex is locked, which could
+// deadlock.
+bool profiler_is_locked_on_current_thread();
+
+//---------------------------------------------------------------------------
+// Output profiles
+//---------------------------------------------------------------------------
+
+// Set a user-friendly process name, used in JSON stream.
+MFBT_API void profiler_set_process_name(const std::string& aProcessName,
+                                        const std::string* aETLDplus1);
+
+// Get the profile encoded as a JSON string. A no-op (returning nullptr) if the
+// profiler is inactive.
+// If aIsShuttingDown is true, the current time is included as the process
+// shutdown time in the JSON's "meta" object.
+MFBT_API UniquePtr<char[]> profiler_get_profile(double aSinceTime = 0,
+                                                bool aIsShuttingDown = false,
+                                                bool aOnlyThreads = false);
+
+// Write the profile for this process (excluding subprocesses) into aWriter.
+// Returns false if the profiler is inactive.
+MFBT_API bool profiler_stream_json_for_this_process(
+    SpliceableJSONWriter& aWriter, double aSinceTime = 0,
+    bool aIsShuttingDown = false, bool aOnlyThreads = false);
+
+// Get the profile and write it into a file. A no-op if the profile is
+// inactive.
+MFBT_API void profiler_save_profile_to_file(const char* aFilename);
+
+//---------------------------------------------------------------------------
+// RAII classes
+//---------------------------------------------------------------------------
+
+class MOZ_RAII AutoProfilerInit {
+ public:
+  explicit AutoProfilerInit() { profiler_init(this); }
+
+  ~AutoProfilerInit() { profiler_shutdown(); }
+
+ private:
+};
+
+// Convenience class to register and unregister a thread with the profiler.
+// Needs to be the first object on the stack of the thread.
+class MOZ_RAII AutoProfilerRegisterThread final {
+ public:
+  explicit AutoProfilerRegisterThread(const char* aName) {
+    profiler_register_thread(aName, this);
+  }
+
+  ~AutoProfilerRegisterThread() { profiler_unregister_thread(); }
+
+ private:
+  AutoProfilerRegisterThread(const AutoProfilerRegisterThread&) = delete;
+  AutoProfilerRegisterThread& operator=(const AutoProfilerRegisterThread&) =
+      delete;
+};
+
+class MOZ_RAII AutoProfilerThreadSleep {
+ public:
+  explicit AutoProfilerThreadSleep() { profiler_thread_sleep(); }
+
+  ~AutoProfilerThreadSleep() { profiler_thread_wake(); }
+
+ private:
+};
+
+// Temporarily wake up the profiling of a thread while servicing events such as
+// Asynchronous Procedure Calls (APCs).
+class MOZ_RAII AutoProfilerThreadWake {
+ public:
+  explicit AutoProfilerThreadWake()
+      : mIssuedWake(profiler_thread_is_sleeping()) {
+    if (mIssuedWake) {
+      profiler_thread_wake();
+    }
+  }
+
+  ~AutoProfilerThreadWake() {
+    if (mIssuedWake) {
+      MOZ_ASSERT(!profiler_thread_is_sleeping());
+      profiler_thread_sleep();
+    }
+  }
+
+ private:
+  bool mIssuedWake;
+};
+
+// This class creates a non-owning ProfilingStack reference. Objects of this
+// class are stack-allocated, and so exist within a thread, and are thus bounded
+// by the lifetime of the thread, which ensures that the references held can't
+// be used after the ProfilingStack is destroyed.
+class MOZ_RAII AutoProfilerLabel {
+ public:
+  // This is the AUTO_BASE_PROFILER_LABEL and AUTO_BASE_PROFILER_LABEL_DYNAMIC
+  // variant.
+  AutoProfilerLabel(const char* aLabel, const char* aDynamicString,
+                    ProfilingCategoryPair aCategoryPair, uint32_t aFlags = 0) {
+    // Get the ProfilingStack from TLS.
+    Push(GetProfilingStack(), aLabel, aDynamicString, aCategoryPair, aFlags);
+  }
+
+  void Push(ProfilingStack* aProfilingStack, const char* aLabel,
+            const char* aDynamicString, ProfilingCategoryPair aCategoryPair,
+            uint32_t aFlags = 0) {
+    // This function runs both on and off the main thread.
+
+    mProfilingStack = aProfilingStack;
+    if (mProfilingStack) {
+      mProfilingStack->pushLabelFrame(aLabel, aDynamicString, this,
+                                      aCategoryPair, aFlags);
+    }
+  }
+
+  ~AutoProfilerLabel() {
+    // This function runs both on and off the main thread.
+
+    if (mProfilingStack) {
+      mProfilingStack->pop();
+    }
+  }
+
+  MFBT_API static ProfilingStack* GetProfilingStack();
+
+ private:
+  // We save a ProfilingStack pointer in the ctor so we don't have to redo the
+  // TLS lookup in the dtor.
+  ProfilingStack* mProfilingStack;
+
+ public:
+  // See the comment on the definition in platform.cpp for details about this.
+  static MOZ_THREAD_LOCAL(ProfilingStack*) sProfilingStack;
+};
+
+// Get the MOZ_PROFILER_STARTUP* environment variables that should be
+// supplied to a child process that is about to be launched, in order
+// to make that child process start with the same profiler settings as
+// in the current process.  The given function is invoked once for
+// each variable to be set.
+MFBT_API void GetProfilerEnvVarsForChildProcess(
+    std::function<void(const char* key, const char* value)>&& aSetEnv);
+
+}  // namespace baseprofiler
+}  // namespace mozilla
+
+#endif  // !MOZ_GECKO_PROFILER
+
+#endif  // BaseProfiler_h
diff --git a/mozglue/baseprofiler/public/BaseProfilerCounts.h b/mozglue/baseprofiler/public/BaseProfilerCounts.h
new file mode 100644
index 0000000000..fbcc713744
--- /dev/null
+++ b/mozglue/baseprofiler/public/BaseProfilerCounts.h
@@ -0,0 +1,280 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef BaseProfilerCounts_h
+#define BaseProfilerCounts_h
+
+#ifndef MOZ_GECKO_PROFILER
+
+#  define BASE_PROFILER_DEFINE_COUNT_TOTAL(label, category, description)
+#  define BASE_PROFILER_DEFINE_COUNT(label, category, description)
+#  define BASE_PROFILER_DEFINE_STATIC_COUNT_TOTAL(label, category, description)
+#  define AUTO_BASE_PROFILER_COUNT_TOTAL(label, count)
+#  define AUTO_BASE_PROFILER_COUNT(label)
+#  define AUTO_BASE_PROFILER_STATIC_COUNT(label, count)
+#  define AUTO_BASE_PROFILER_FORCE_ALLOCATION(label)
+
+#else
+
+#  include "mozilla/Atomics.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+class BaseProfilerCount;
+MFBT_API void profiler_add_sampled_counter(BaseProfilerCount* aCounter);
+MFBT_API void profiler_remove_sampled_counter(BaseProfilerCount* aCounter);
+
+typedef Atomic<int64_t, MemoryOrdering::Relaxed> ProfilerAtomicSigned;
+typedef Atomic<uint64_t, MemoryOrdering::Relaxed> ProfilerAtomicUnsigned;
+
+// Counter support
+// There are two types of counters:
+// 1) a simple counter which can be added to or subtracted from.  This could
+// track the number of objects of a type, the number of calls to something
+// (reflow, JIT, etc).
+// 2) a combined counter which has the above, plus a number-of-calls counter
+// that is incremented by 1 for each call to modify the count.  This provides
+// an optional source for a 'heatmap' of access.  This can be used (for
+// example) to track the amount of memory allocated, and provide a heatmap of
+// memory operations (allocs/frees).
+//
+// Counters are sampled by the profiler once per sample-period.  At this time,
+// all counters are global to the process.  In the future, there might be more
+// versions with per-thread or other discriminators.
+//
+// Typical usage:
+// There are two ways to use counters: With heap-created counter objects,
+// or using macros.  Note: the macros use statics, and will be slightly
+// faster/smaller, and you need to care about creating them before using
+// them.  They're similar to the use-pattern for the other AUTO_PROFILER*
+// macros, but they do need the PROFILER_DEFINE* to be use to instantiate
+// the statics.
+//
+// PROFILER_DEFINE_COUNT(mything, "JIT", "Some JIT byte count")
+// ...
+// void foo() { ... AUTO_PROFILER_COUNT(mything, number_of_bytes_used); ... }
+//
+// or (to also get a heatmap)
+//
+// PROFILER_DEFINE_COUNT_TOTAL(mything, "JIT", "Some JIT byte count")
+// ...
+// void foo() {
+//   ...
+//   AUTO_PROFILER_COUNT_TOTAL(mything, number_of_bytes_generated);
+//   ...
+// }
+//
+// To use without statics/macros:
+//
+// UniquePtr<ProfilerCounter> myCounter;
+// ...
+// myCounter =
+//   MakeUnique<ProfilerCounter>("mything", "JIT", "Some JIT byte count"));
+// ...
+// void foo() { ... myCounter->Add(number_of_bytes_generated0; ... }
+
+class BaseProfilerCount {
+ public:
+  BaseProfilerCount(const char* aLabel, ProfilerAtomicSigned* aCounter,
+                    ProfilerAtomicUnsigned* aNumber, const char* aCategory,
+                    const char* aDescription)
+      : mLabel(aLabel),
+        mCategory(aCategory),
+        mDescription(aDescription),
+        mCounter(aCounter),
+        mNumber(aNumber) {
+#  define COUNTER_CANARY 0xDEADBEEF
+#  ifdef DEBUG
+    mCanary = COUNTER_CANARY;
+    mPrevNumber = 0;
+#  endif
+    // Can't call profiler_* here since this may be non-xul-library
+  }
+#  ifdef DEBUG
+  ~BaseProfilerCount() { mCanary = 0; }
+#  endif
+
+  void Sample(int64_t& aCounter, uint64_t& aNumber) {
+    MOZ_ASSERT(mCanary == COUNTER_CANARY);
+
+    aCounter = *mCounter;
+    aNumber = mNumber ? *mNumber : 0;
+#  ifdef DEBUG
+    MOZ_ASSERT(aNumber >= mPrevNumber);
+    mPrevNumber = aNumber;
+#  endif
+  }
+
+  // We don't define ++ and Add() here, since the static defines directly
+  // increment the atomic counters, and the subclasses implement ++ and
+  // Add() directly.
+
+  // These typically are static strings (for example if you use the macros
+  // below)
+  const char* mLabel;
+  const char* mCategory;
+  const char* mDescription;
+  // We're ok with these being un-ordered in race conditions.  These are
+  // pointers because we want to be able to use statics and increment them
+  // directly.  Otherwise we could just have them inline, and not need the
+  // constructor args.
+  // These can be static globals (using the macros below), though they
+  // don't have to be - their lifetime must be longer than the use of them
+  // by the profiler (see profiler_add/remove_sampled_counter()).  If you're
+  // using a lot of these, they probably should be allocated at runtime (see
+  // class ProfilerCountOnly below).
+  ProfilerAtomicSigned* mCounter;
+  ProfilerAtomicUnsigned* mNumber;  // may be null
+
+#  ifdef DEBUG
+  uint32_t mCanary;
+  uint64_t mPrevNumber;  // value of number from the last Sample()
+#  endif
+};
+
+// Designed to be allocated dynamically, and simply incremented with obj++
+// or obj->Add(n)
+class ProfilerCounter final : public BaseProfilerCount {
+ public:
+  ProfilerCounter(const char* aLabel, const char* aCategory,
+                  const char* aDescription)
+      : BaseProfilerCount(aLabel, &mCounter, nullptr, aCategory, aDescription) {
+    // Assume we're in libxul
+    profiler_add_sampled_counter(this);
+  }
+
+  virtual ~ProfilerCounter() { profiler_remove_sampled_counter(this); }
+
+  BaseProfilerCount& operator++() {
+    Add(1);
+    return *this;
+  }
+
+  void Add(int64_t aNumber) { mCounter += aNumber; }
+
+  ProfilerAtomicSigned mCounter;
+};
+
+// Also keeps a heatmap (number of calls to ++/Add())
+class ProfilerCounterTotal final : public BaseProfilerCount {
+ public:
+  ProfilerCounterTotal(const char* aLabel, const char* aCategory,
+                       const char* aDescription)
+      : BaseProfilerCount(aLabel, &mCounter, &mNumber, aCategory,
+                          aDescription) {
+    // Assume we're in libxul
+    profiler_add_sampled_counter(this);
+  }
+
+  virtual ~ProfilerCounterTotal() { profiler_remove_sampled_counter(this); }
+
+  BaseProfilerCount& operator++() {
+    Add(1);
+    return *this;
+  }
+
+  void Add(int64_t aNumber) {
+    mCounter += aNumber;
+    mNumber++;
+  }
+
+  ProfilerAtomicSigned mCounter;
+  ProfilerAtomicUnsigned mNumber;
+};
+
+// Defines a counter that is sampled on each profiler tick, with a running
+// count (signed), and number-of-instances. Note that because these are two
+// independent Atomics, there is a possiblity that count will not include
+// the last call, but number of uses will.  I think this is not worth
+// worrying about
+#  define BASE_PROFILER_DEFINE_COUNT_TOTAL(label, category, description) \
+    ProfilerAtomicSigned profiler_count_##label(0);                      \
+    ProfilerAtomicUnsigned profiler_number_##label(0);                   \
+    const char profiler_category_##label[] = category;                   \
+    const char profiler_description_##label[] = description;             \
+    UniquePtr<::mozilla::baseprofiler::BaseProfilerCount> AutoCount_##label;
+
+// This counts, but doesn't keep track of the number of calls to
+// AUTO_PROFILER_COUNT()
+#  define BASE_PROFILER_DEFINE_COUNT(label, category, description) \
+    ProfilerAtomicSigned profiler_count_##label(0);                \
+    const char profiler_category_##label[] = category;             \
+    const char profiler_description_##label[] = description;       \
+    UniquePtr<::mozilla::baseprofiler::BaseProfilerCount> AutoCount_##label;
+
+// This will create a static initializer if used, but avoids a possible
+// allocation.
+#  define BASE_PROFILER_DEFINE_STATIC_COUNT_TOTAL(label, category,           \
+                                                  description)               \
+    ProfilerAtomicSigned profiler_count_##label(0);                          \
+    ProfilerAtomicUnsigned profiler_number_##label(0);                       \
+    ::mozilla::baseprofiler::BaseProfilerCount AutoCount_##label(            \
+        #label, &profiler_count_##label, &profiler_number_##label, category, \
+        description);
+
+// If we didn't care about static initializers, we could avoid the need for
+// a ptr to the BaseProfilerCount object.
+
+// XXX It would be better to do this without the if() and without the
+// theoretical race to set the UniquePtr (i.e. possible leak).
+#  define AUTO_BASE_PROFILER_COUNT_TOTAL(label, count)                      \
+    do {                                                                    \
+      profiler_number_##label++; /* do this first*/                         \
+      profiler_count_##label += count;                                      \
+      if (!AutoCount_##label) {                                             \
+        /* Ignore that we could call this twice in theory, and that we leak \
+         * them                                                             \
+         */                                                                 \
+        AutoCount_##label.reset(new BaseProfilerCount(                      \
+            #label, &profiler_count_##label, &profiler_number_##label,      \
+            profiler_category_##label, profiler_description_##label));      \
+        ::mozilla::baseprofiler::profiler_add_sampled_counter(              \
+            AutoCount_##label.get());                                       \
+      }                                                                     \
+    } while (0)
+
+#  define AUTO_BASE_PROFILER_COUNT(label, count)                            \
+    do {                                                                    \
+      profiler_count_##label += count; /* do this first*/                   \
+      if (!AutoCount_##label) {                                             \
+        /* Ignore that we could call this twice in theory, and that we leak \
+         * them                                                             \
+         */                                                                 \
+        AutoCount_##label.reset(new BaseProfilerCount(                      \
+            #label, nullptr, &profiler_number_##label,                      \
+            profiler_category_##label, profiler_description_##label));      \
+        ::mozilla::baseprofiler::profiler_add_sampled_counter(              \
+            AutoCount_##label.get());                                       \
+      }                                                                     \
+    } while (0)
+
+#  define AUTO_BASE_PROFILER_STATIC_COUNT(label, count) \
+    do {                                                \
+      profiler_number_##label++; /* do this first*/     \
+      profiler_count_##label += count;                  \
+    } while (0)
+
+// if we need to force the allocation
+#  define AUTO_BASE_PROFILER_FORCE_ALLOCATION(label)                        \
+    do {                                                                    \
+      if (!AutoCount_##label) {                                             \
+        /* Ignore that we could call this twice in theory, and that we leak \
+         * them                                                             \
+         */                                                                 \
+        AutoCount_##label.reset(                                            \
+            new ::mozilla::baseprofiler::BaseProfilerCount(                 \
+                #label, &profiler_count_##label, &profiler_number_##label,  \
+                profiler_category_##label, profiler_description_##label));  \
+      }                                                                     \
+    } while (0)
+
+}  // namespace baseprofiler
+}  // namespace mozilla
+
+#endif  // !MOZ_GECKO_PROFILER
+
+#endif  // BaseProfilerCounts_h
diff --git a/mozglue/baseprofiler/public/BaseProfilerDetail.h b/mozglue/baseprofiler/public/BaseProfilerDetail.h
new file mode 100644
index 0000000000..9027f32bc7
--- /dev/null
+++ b/mozglue/baseprofiler/public/BaseProfilerDetail.h
@@ -0,0 +1,189 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// Internal Base Profiler utilities.
+
+#ifndef BaseProfilerDetail_h
+#define BaseProfilerDetail_h
+
+#include "mozilla/Atomics.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/PlatformMutex.h"
+
+#ifndef MOZ_GECKO_PROFILER
+#  error Do not #include this header when MOZ_GECKO_PROFILER is not #defined.
+#endif
+
+namespace mozilla {
+namespace baseprofiler {
+
+// Implemented in platform.cpp
+MFBT_API int profiler_current_thread_id();
+
+namespace detail {
+
+// Thin shell around mozglue PlatformMutex, for Base Profiler internal use.
+class BaseProfilerMutex : private ::mozilla::detail::MutexImpl {
+ public:
+  BaseProfilerMutex() : ::mozilla::detail::MutexImpl() {}
+  explicit BaseProfilerMutex(const char* aName)
+      : ::mozilla::detail::MutexImpl(), mName(aName) {}
+
+  BaseProfilerMutex(const BaseProfilerMutex&) = delete;
+  BaseProfilerMutex& operator=(const BaseProfilerMutex&) = delete;
+  BaseProfilerMutex(BaseProfilerMutex&&) = delete;
+  BaseProfilerMutex& operator=(BaseProfilerMutex&&) = delete;
+
+#ifdef DEBUG
+  ~BaseProfilerMutex() { MOZ_ASSERT(mOwningThreadId == 0); }
+#endif  // DEBUG
+
+  [[nodiscard]] bool IsLockedOnCurrentThread() const {
+    return mOwningThreadId == baseprofiler::profiler_current_thread_id();
+  }
+
+  void AssertCurrentThreadOwns() const {
+    MOZ_ASSERT(IsLockedOnCurrentThread());
+  }
+
+  void Lock() {
+    const int tid = baseprofiler::profiler_current_thread_id();
+    MOZ_ASSERT(tid != 0);
+    MOZ_ASSERT(!IsLockedOnCurrentThread(), "Recursive locking");
+    ::mozilla::detail::MutexImpl::lock();
+    MOZ_ASSERT(mOwningThreadId == 0, "Not unlocked properly");
+    mOwningThreadId = tid;
+  }
+
+  [[nodiscard]] bool TryLock() {
+    const int tid = baseprofiler::profiler_current_thread_id();
+    MOZ_ASSERT(tid != 0);
+    MOZ_ASSERT(!IsLockedOnCurrentThread(), "Recursive locking");
+    if (!::mozilla::detail::MutexImpl::tryLock()) {
+      // Failed to lock, nothing more to do.
+      return false;
+    }
+    MOZ_ASSERT(mOwningThreadId == 0, "Not unlocked properly");
+    mOwningThreadId = tid;
+    return true;
+  }
+
+  void Unlock() {
+    MOZ_ASSERT(IsLockedOnCurrentThread(), "Unlocking when not locked here");
+    // We're still holding the mutex here, so it's safe to just reset
+    // `mOwningThreadId`.
+    mOwningThreadId = 0;
+    ::mozilla::detail::MutexImpl::unlock();
+  }
+
+  const char* GetName() const { return mName; }
+
+ private:
+  // Thread currently owning the lock, or 0.
+  // Atomic because it may be read at any time independent of the mutex.
+  // Relaxed because threads only need to know if they own it already, so:
+  // - If it's their id, only *they* wrote that value with a locked mutex.
+  // - If it's different from their thread id it doesn't matter what other
+  //   number it is (0 or another id) and that it can change again at any time.
+  Atomic<int, MemoryOrdering::Relaxed> mOwningThreadId{0};
+
+  const char* mName = nullptr;
+};
+
+// RAII class to lock a mutex.
+class MOZ_RAII BaseProfilerAutoLock {
+ public:
+  explicit BaseProfilerAutoLock(BaseProfilerMutex& aMutex) : mMutex(aMutex) {
+    mMutex.Lock();
+  }
+
+  BaseProfilerAutoLock(const BaseProfilerAutoLock&) = delete;
+  BaseProfilerAutoLock& operator=(const BaseProfilerAutoLock&) = delete;
+  BaseProfilerAutoLock(BaseProfilerAutoLock&&) = delete;
+  BaseProfilerAutoLock& operator=(BaseProfilerAutoLock&&) = delete;
+
+  ~BaseProfilerAutoLock() { mMutex.Unlock(); }
+
+ private:
+  BaseProfilerMutex& mMutex;
+};
+
+// Thin shell around mozglue PlatformMutex, for Base Profiler internal use.
+// Actual mutex may be disabled at construction time.
+class BaseProfilerMaybeMutex : private ::mozilla::detail::MutexImpl {
+ public:
+  explicit BaseProfilerMaybeMutex(bool aActivate) {
+    if (aActivate) {
+      mMaybeMutex.emplace();
+    }
+  }
+
+  BaseProfilerMaybeMutex(const BaseProfilerMaybeMutex&) = delete;
+  BaseProfilerMaybeMutex& operator=(const BaseProfilerMaybeMutex&) = delete;
+  BaseProfilerMaybeMutex(BaseProfilerMaybeMutex&&) = delete;
+  BaseProfilerMaybeMutex& operator=(BaseProfilerMaybeMutex&&) = delete;
+
+  ~BaseProfilerMaybeMutex() = default;
+
+  bool IsActivated() const { return mMaybeMutex.isSome(); }
+
+  [[nodiscard]] bool IsActivatedAndLockedOnCurrentThread() const {
+    if (!IsActivated()) {
+      // Not activated, so we can never be locked.
+      return false;
+    }
+    return mMaybeMutex->IsLockedOnCurrentThread();
+  }
+
+  void AssertCurrentThreadOwns() const {
+#ifdef DEBUG
+    if (IsActivated()) {
+      mMaybeMutex->AssertCurrentThreadOwns();
+    }
+#endif  // DEBUG
+  }
+
+  void Lock() {
+    if (IsActivated()) {
+      mMaybeMutex->Lock();
+    }
+  }
+
+  void Unlock() {
+    if (IsActivated()) {
+      mMaybeMutex->Unlock();
+    }
+  }
+
+ private:
+  Maybe<BaseProfilerMutex> mMaybeMutex;
+};
+
+// RAII class to lock a mutex.
+class MOZ_RAII BaseProfilerMaybeAutoLock {
+ public:
+  explicit BaseProfilerMaybeAutoLock(BaseProfilerMaybeMutex& aMaybeMutex)
+      : mMaybeMutex(aMaybeMutex) {
+    mMaybeMutex.Lock();
+  }
+
+  BaseProfilerMaybeAutoLock(const BaseProfilerMaybeAutoLock&) = delete;
+  BaseProfilerMaybeAutoLock& operator=(const BaseProfilerMaybeAutoLock&) =
+      delete;
+  BaseProfilerMaybeAutoLock(BaseProfilerMaybeAutoLock&&) = delete;
+  BaseProfilerMaybeAutoLock& operator=(BaseProfilerMaybeAutoLock&&) = delete;
+
+  ~BaseProfilerMaybeAutoLock() { mMaybeMutex.Unlock(); }
+
+ private:
+  BaseProfilerMaybeMutex& mMaybeMutex;
+};
+
+}  // namespace detail
+}  // namespace baseprofiler
+}  // namespace mozilla
+
+#endif  // BaseProfilerDetail_h
diff --git a/mozglue/baseprofiler/public/BaseProfilerMarkerTypes.h b/mozglue/baseprofiler/public/BaseProfilerMarkerTypes.h
new file mode 100644
index 0000000000..1556b7a272
--- /dev/null
+++ b/mozglue/baseprofiler/public/BaseProfilerMarkerTypes.h
@@ -0,0 +1,69 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef BaseProfilerMarkerTypes_h
+#define BaseProfilerMarkerTypes_h
+
+// This header contains common marker type definitions.
+//
+// It #include's "mozilla/BaseProfilerMarkers.h", see that file for how to
+// define other marker types, and how to add markers to the profiler buffers.
+//
+// If you don't need to use these common types, #include
+// "mozilla/BaseProfilerMarkers.h" instead.
+//
+// Types in this files can be defined without relying on xpcom.
+// Others are defined in "ProfilerMarkerTypes.h".
+
+// !!!                       /!\ WORK IN PROGRESS /!\                       !!!
+// This file contains draft marker definitions, but most are not used yet.
+// Further work is needed to complete these definitions, and use them to convert
+// existing PROFILER_ADD_MARKER calls. See meta bug 1661394.
+
+#include "mozilla/BaseProfilerMarkers.h"
+
+#ifdef MOZ_GECKO_PROFILER
+
+namespace mozilla::baseprofiler::markers {
+
+struct MediaSampleMarker {
+  static constexpr Span<const char> MarkerTypeName() {
+    return MakeStringSpan("MediaSample");
+  }
+  static void StreamJSONMarkerData(SpliceableJSONWriter& aWriter,
+                                   int64_t aSampleStartTimeUs,
+                                   int64_t aSampleEndTimeUs) {
+    aWriter.IntProperty("sampleStartTimeUs", aSampleStartTimeUs);
+    aWriter.IntProperty("sampleEndTimeUs", aSampleEndTimeUs);
+  }
+  static MarkerSchema MarkerTypeDisplay() {
+    using MS = MarkerSchema;
+    MS schema{MS::Location::markerChart, MS::Location::markerTable};
+    schema.AddKeyLabelFormat("sampleStartTimeUs", "Sample start time",
+                             MS::Format::microseconds);
+    schema.AddKeyLabelFormat("sampleEndTimeUs", "Sample end time",
+                             MS::Format::microseconds);
+    return schema;
+  }
+};
+
+struct ContentBuildMarker {
+  static constexpr Span<const char> MarkerTypeName() {
+    return MakeStringSpan("CONTENT_FULL_PAINT_TIME");
+  }
+  static void StreamJSONMarkerData(SpliceableJSONWriter& aWriter) {}
+  static MarkerSchema MarkerTypeDisplay() {
+    using MS = MarkerSchema;
+    MS schema{MS::Location::markerChart, MS::Location::markerTable};
+    return schema;
+  }
+};
+
+}  // namespace mozilla::baseprofiler::markers
+
+#endif  // MOZ_GECKO_PROFILER
+
+#endif  // BaseProfilerMarkerTypes_h
diff --git a/mozglue/baseprofiler/public/BaseProfilerMarkers.h b/mozglue/baseprofiler/public/BaseProfilerMarkers.h
new file mode 100644
index 0000000000..c63b018f95
--- /dev/null
+++ b/mozglue/baseprofiler/public/BaseProfilerMarkers.h
@@ -0,0 +1,242 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// Markers are useful to delimit something important happening such as the first
+// paint. Unlike labels, which are only recorded in the profile buffer if a
+// sample is collected while the label is on the label stack, markers will
+// always be recorded in the profile buffer.
+//
+// This header contains basic definitions necessary to create marker types, and
+// to add markers to the profiler buffers.
+//
+// If basic marker types are needed, #include
+// "mozilla/BaseProfilerMarkerTypes.h" instead.
+//
+// But if you want to create your own marker type locally, you can #include this
+// header only; look at mozilla/BaseProfilerMarkerTypes.h for examples of how to
+// define types, and mozilla/BaseProfilerMarkerPrerequisites.h for some
+// supporting types.
+//
+// To then record markers:
+// - Use `baseprofiler::AddMarker(...)` from  mozglue or other libraries that
+//   are outside of xul, especially if they may happen outside of xpcom's
+//   lifetime (typically startup, shutdown, or tests).
+// - Otherwise #include "ProfilerMarkers.h" instead, and use
+//   `profiler_add_marker(...)`.
+// See these functions for more details.
+
+#ifndef BaseProfilerMarkers_h
+#define BaseProfilerMarkers_h
+
+#include "mozilla/BaseProfilerMarkersDetail.h"
+
+#ifndef MOZ_GECKO_PROFILER
+
+#  define BASE_PROFILER_MARKER_UNTYPED(markerName, categoryName, ...)
+#  define BASE_PROFILER_MARKER(markerName, categoryName, options, MarkerType, \
+                               ...)
+#  define BASE_PROFILER_MARKER_TEXT(markerName, categoryName, options, text)
+#  define AUTO_BASE_PROFILER_MARKER_TEXT(markerName, categoryName, options, \
+                                         text)
+
+#else  // ndef MOZ_GECKO_PROFILER
+
+#  include "mozilla/ProfileChunkedBuffer.h"
+#  include "mozilla/TimeStamp.h"
+#  include "mozilla/Unused.h"
+
+#  include <functional>
+#  include <string>
+#  include <utility>
+
+namespace mozilla::baseprofiler {
+
+// Add a marker to a given buffer. `AddMarker()` and related macros should be
+// used in most cases, see below for more information about them and the
+// parameters; This function may be useful when markers need to be recorded in a
+// local buffer outside of the main profiler buffer.
+template <typename MarkerType, typename... PayloadArguments>
+ProfileBufferBlockIndex AddMarkerToBuffer(
+    ProfileChunkedBuffer& aBuffer, const ProfilerString8View& aName,
+    const MarkerCategory& aCategory, MarkerOptions&& aOptions,
+    MarkerType aMarkerType, const PayloadArguments&... aPayloadArguments) {
+  Unused << aMarkerType;  // Only the empty object type is useful.
+  return base_profiler_markers_detail::AddMarkerToBuffer<MarkerType>(
+      aBuffer, aName, aCategory, std::move(aOptions),
+      ::mozilla::baseprofiler::profiler_capture_backtrace_into,
+      aPayloadArguments...);
+}
+
+// Add a marker (without payload) to a given buffer.
+inline ProfileBufferBlockIndex AddMarkerToBuffer(
+    ProfileChunkedBuffer& aBuffer, const ProfilerString8View& aName,
+    const MarkerCategory& aCategory, MarkerOptions&& aOptions = {}) {
+  return AddMarkerToBuffer(aBuffer, aName, aCategory, std::move(aOptions),
+                           markers::NoPayload{});
+}
+
+// Add a marker to the Base Profiler buffer.
+// - aName: Main name of this marker.
+// - aCategory: Category for this marker.
+// - aOptions: Optional settings (such as timing, inner window id,
+//   backtrace...), see `MarkerOptions` for details.
+// - aMarkerType: Empty object that specifies the type of marker.
+// - aPayloadArguments: Arguments expected by this marker type's
+// ` StreamJSONMarkerData` function.
+template <typename MarkerType, typename... PayloadArguments>
+ProfileBufferBlockIndex AddMarker(
+    const ProfilerString8View& aName, const MarkerCategory& aCategory,
+    MarkerOptions&& aOptions, MarkerType aMarkerType,
+    const PayloadArguments&... aPayloadArguments) {
+  if (!baseprofiler::profiler_can_accept_markers()) {
+    return {};
+  }
+  return ::mozilla::baseprofiler::AddMarkerToBuffer(
+      base_profiler_markers_detail::CachedBaseCoreBuffer(), aName, aCategory,
+      std::move(aOptions), aMarkerType, aPayloadArguments...);
+}
+
+// Add a marker (without payload) to the Base Profiler buffer.
+inline ProfileBufferBlockIndex AddMarker(const ProfilerString8View& aName,
+                                         const MarkerCategory& aCategory,
+                                         MarkerOptions&& aOptions = {}) {
+  return AddMarker(aName, aCategory, std::move(aOptions), markers::NoPayload{});
+}
+
+}  // namespace mozilla::baseprofiler
+
+// Same as `AddMarker()` (without payload). This macro is safe to use even if
+// MOZ_GECKO_PROFILER is not #defined.
+#  define BASE_PROFILER_MARKER_UNTYPED(markerName, categoryName, ...)  \
+    do {                                                               \
+      AUTO_PROFILER_STATS(BASE_PROFILER_MARKER_UNTYPED);               \
+      ::mozilla::baseprofiler::AddMarker(                              \
+          markerName, ::mozilla::baseprofiler::category::categoryName, \
+          ##__VA_ARGS__);                                              \
+    } while (false)
+
+// Same as `AddMarker()` (with payload). This macro is safe to use even if
+// MOZ_GECKO_PROFILER is not #defined.
+#  define BASE_PROFILER_MARKER(markerName, categoryName, options, MarkerType, \
+                               ...)                                           \
+    do {                                                                      \
+      AUTO_PROFILER_STATS(BASE_PROFILER_MARKER_with_##MarkerType);            \
+      ::mozilla::baseprofiler::AddMarker(                                     \
+          markerName, ::mozilla::baseprofiler::category::categoryName,        \
+          options, ::mozilla::baseprofiler::markers::MarkerType{},            \
+          ##__VA_ARGS__);                                                     \
+    } while (false)
+
+namespace mozilla::baseprofiler::markers {
+// Most common marker type. Others are in BaseProfilerMarkerTypes.h.
+struct TextMarker {
+  static constexpr Span<const char> MarkerTypeName() {
+    return MakeStringSpan("Text");
+  }
+  static void StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter& aWriter,
+                                   const ProfilerString8View& aText) {
+    aWriter.StringProperty("name", aText);
+  }
+  static MarkerSchema MarkerTypeDisplay() {
+    using MS = MarkerSchema;
+    MS schema{MS::Location::markerChart, MS::Location::markerTable};
+    schema.SetChartLabel("{marker.data.name}");
+    schema.SetTableLabel("{marker.name} - {marker.data.name}");
+    schema.AddKeyLabelFormat("name", "Details", MarkerSchema::Format::string);
+    return schema;
+  }
+};
+
+struct Tracing {
+  static constexpr Span<const char> MarkerTypeName() {
+    return MakeStringSpan("tracing");
+  }
+  static void StreamJSONMarkerData(SpliceableJSONWriter& aWriter,
+                                   const ProfilerString8View& aCategory) {
+    if (aCategory.Length() != 0) {
+      aWriter.StringProperty("category", aCategory);
+    }
+  }
+  static MarkerSchema MarkerTypeDisplay() {
+    using MS = MarkerSchema;
+    MS schema{MS::Location::markerChart, MS::Location::markerTable,
+              MS::Location::timelineOverview};
+    schema.AddKeyLabelFormat("category", "Type", MS::Format::string);
+    return schema;
+  }
+};
+}  // namespace mozilla::baseprofiler::markers
+
+// Add a text marker. This macro is safe to use even if MOZ_GECKO_PROFILER is
+// not #defined.
+#  define BASE_PROFILER_MARKER_TEXT(markerName, categoryName, options, text) \
+    do {                                                                     \
+      AUTO_PROFILER_STATS(BASE_PROFILER_MARKER_TEXT);                        \
+      ::mozilla::baseprofiler::AddMarker(                                    \
+          markerName, ::mozilla::baseprofiler::category::categoryName,       \
+          options, ::mozilla::baseprofiler::markers::TextMarker{}, text);    \
+    } while (false)
+
+namespace mozilla::baseprofiler {
+
+// RAII object that adds a BASE_PROFILER_MARKER_TEXT when destroyed; the
+// marker's timing will be the interval from construction (unless an instant or
+// start time is already specified in the provided options) until destruction.
+class MOZ_RAII AutoProfilerTextMarker {
+ public:
+  AutoProfilerTextMarker(const char* aMarkerName,
+                         const MarkerCategory& aCategory,
+                         MarkerOptions&& aOptions, const std::string& aText)
+      : mMarkerName(aMarkerName),
+        mCategory(aCategory),
+        mOptions(std::move(aOptions)),
+        mText(aText) {
+    MOZ_ASSERT(mOptions.Timing().EndTime().IsNull(),
+               "AutoProfilerTextMarker options shouldn't have an end time");
+    if (mOptions.Timing().StartTime().IsNull()) {
+      mOptions.Set(MarkerTiming::InstantNow());
+    }
+  }
+
+  ~AutoProfilerTextMarker() {
+    mOptions.TimingRef().SetIntervalEnd();
+    AUTO_PROFILER_STATS(AUTO_BASE_PROFILER_MARKER_TEXT);
+    AddMarker(ProfilerString8View::WrapNullTerminatedString(mMarkerName),
+              mCategory, std::move(mOptions), markers::TextMarker{}, mText);
+  }
+
+ protected:
+  const char* mMarkerName;
+  MarkerCategory mCategory;
+  MarkerOptions mOptions;
+  std::string mText;
+};
+
+extern template MFBT_API ProfileBufferBlockIndex
+AddMarker(const ProfilerString8View&, const MarkerCategory&, MarkerOptions&&,
+          markers::TextMarker, const std::string&);
+
+extern template MFBT_API ProfileBufferBlockIndex
+AddMarkerToBuffer(ProfileChunkedBuffer&, const ProfilerString8View&,
+                  const MarkerCategory&, MarkerOptions&&, markers::NoPayload);
+
+extern template MFBT_API ProfileBufferBlockIndex AddMarkerToBuffer(
+    ProfileChunkedBuffer&, const ProfilerString8View&, const MarkerCategory&,
+    MarkerOptions&&, markers::TextMarker, const std::string&);
+
+}  // namespace mozilla::baseprofiler
+
+// Creates an AutoProfilerTextMarker RAII object.  This macro is safe to use
+// even if MOZ_GECKO_PROFILER is not #defined.
+#  define AUTO_BASE_PROFILER_MARKER_TEXT(markerName, categoryName, options,   \
+                                         text)                                \
+    ::mozilla::baseprofiler::AutoProfilerTextMarker BASE_PROFILER_RAII(       \
+        markerName, ::mozilla::baseprofiler::category::categoryName, options, \
+        text)
+
+#endif  // nfed MOZ_GECKO_PROFILER else
+
+#endif  // BaseProfilerMarkers_h
diff --git a/mozglue/baseprofiler/public/BaseProfilerMarkersDetail.h b/mozglue/baseprofiler/public/BaseProfilerMarkersDetail.h
new file mode 100644
index 0000000000..b5dbe27343
--- /dev/null
+++ b/mozglue/baseprofiler/public/BaseProfilerMarkersDetail.h
@@ -0,0 +1,674 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef BaseProfilerMarkersDetail_h
+#define BaseProfilerMarkersDetail_h
+
+#ifndef BaseProfilerMarkers_h
+#  error "This header should only be #included by BaseProfilerMarkers.h"
+#endif
+
+#include "mozilla/BaseProfilerMarkersPrerequisites.h"
+
+#ifdef MOZ_GECKO_PROFILER
+
+//                        ~~ HERE BE DRAGONS ~~
+//
+// Everything below is internal implementation detail, you shouldn't need to
+// look at it unless working on the profiler code.
+
+#  include "mozilla/BaseProfileJSONWriter.h"
+#  include "mozilla/ProfileBufferEntryKinds.h"
+
+#  include <limits>
+#  include <tuple>
+
+namespace mozilla::baseprofiler {
+// Implemented in platform.cpp
+MFBT_API ProfileChunkedBuffer& profiler_get_core_buffer();
+}  // namespace mozilla::baseprofiler
+
+namespace mozilla::base_profiler_markers_detail {
+
+// Get the core buffer from the profiler, and cache it in a
+// non-templated-function static reference.
+inline ProfileChunkedBuffer& CachedBaseCoreBuffer() {
+  static ProfileChunkedBuffer& coreBuffer =
+      baseprofiler::profiler_get_core_buffer();
+  return coreBuffer;
+}
+
+struct Streaming {
+  // A `MarkerDataDeserializer` is a free function that can read a serialized
+  // payload from an `EntryReader` and streams it as JSON object properties.
+  using MarkerDataDeserializer = void (*)(ProfileBufferEntryReader&,
+                                          baseprofiler::SpliceableJSONWriter&);
+
+  // A `MarkerTypeNameFunction` is a free function that returns the name of the
+  // marker type.
+  using MarkerTypeNameFunction = Span<const char> (*)();
+
+  // A `MarkerSchemaFunction` is a free function that returns a
+  // `MarkerSchema`, which contains all the information needed to stream
+  // the display schema associated with a marker type.
+  using MarkerSchemaFunction = MarkerSchema (*)();
+
+  struct MarkerTypeFunctions {
+    MarkerDataDeserializer mMarkerDataDeserializer = nullptr;
+    MarkerTypeNameFunction mMarkerTypeNameFunction = nullptr;
+    MarkerSchemaFunction mMarkerSchemaFunction = nullptr;
+  };
+
+  // A `DeserializerTag` will be added before the payload, to help select the
+  // correct deserializer when reading back the payload.
+  using DeserializerTag = uint8_t;
+
+  // Store a deserializer (and other marker-type-specific functions) and get its
+  // `DeserializerTag`.
+  // This is intended to be only used once per deserializer when a new marker
+  // type is used for the first time, so it should be called to initialize a
+  // `static const` tag that will be re-used by all markers of the corresponding
+  // payload type -- see use below.
+  MFBT_API static DeserializerTag TagForMarkerTypeFunctions(
+      MarkerDataDeserializer aDeserializer,
+      MarkerTypeNameFunction aMarkerTypeNameFunction,
+      MarkerSchemaFunction aMarkerSchemaFunction);
+
+  // Get the `MarkerDataDeserializer` for a given `DeserializerTag`.
+  MFBT_API static MarkerDataDeserializer DeserializerForTag(
+      DeserializerTag aTag);
+
+  // Retrieve all MarkerTypeFunctions's.
+  MFBT_API static Span<const MarkerTypeFunctions> MarkerTypeFunctionsArray();
+};
+
+// This helper will examine a marker type's `StreamJSONMarkerData` function, see
+// specialization below.
+template <typename T>
+struct StreamFunctionTypeHelper;
+
+// Helper specialization that takes the expected
+// `StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter&, ...)` function and
+// provide information about the `...` parameters.
+template <typename R, typename... As>
+struct StreamFunctionTypeHelper<R(baseprofiler::SpliceableJSONWriter&, As...)> {
+  constexpr static size_t scArity = sizeof...(As);
+  using TupleType =
+      std::tuple<std::remove_cv_t<std::remove_reference_t<As>>...>;
+
+  // Serialization function that takes the exact same parameter types
+  // (const-ref'd) as `StreamJSONMarkerData`. This has to be inside the helper
+  // because only here can we access the raw parameter pack `As...`.
+  // And because we're using the same argument types through
+  // references-to-const, permitted implicit conversions can happen.
+  static ProfileBufferBlockIndex Serialize(
+      ProfileChunkedBuffer& aBuffer, const ProfilerString8View& aName,
+      const MarkerCategory& aCategory, MarkerOptions&& aOptions,
+      Streaming::DeserializerTag aDeserializerTag, const As&... aAs) {
+    // Note that options are first after the entry kind, because they contain
+    // the thread id, which is handled first to filter markers by threads.
+    return aBuffer.PutObjects(ProfileBufferEntryKind::Marker, aOptions, aName,
+                              aCategory, aDeserializerTag, aAs...);
+  }
+};
+
+// Helper for a marker type.
+// A marker type is defined in a `struct` with some expected static member
+// functions. See example in BaseProfilerMarkers.h.
+template <typename MarkerType>
+struct MarkerTypeSerialization {
+  // Definitions to access the expected
+  // `StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter&, ...)` function
+  // and its parameters.
+  using StreamFunctionType =
+      StreamFunctionTypeHelper<decltype(MarkerType::StreamJSONMarkerData)>;
+  constexpr static size_t scStreamFunctionParameterCount =
+      StreamFunctionType::scArity;
+  using StreamFunctionUserParametersTuple =
+      typename StreamFunctionType::TupleType;
+  template <size_t i>
+  using StreamFunctionParameter =
+      std::tuple_element_t<i, StreamFunctionUserParametersTuple>;
+
+  template <typename... Ts>
+  static ProfileBufferBlockIndex Serialize(ProfileChunkedBuffer& aBuffer,
+                                           const ProfilerString8View& aName,
+                                           const MarkerCategory& aCategory,
+                                           MarkerOptions&& aOptions,
+                                           const Ts&... aTs) {
+    static_assert(!std::is_same_v<MarkerType,
+                                  ::mozilla::baseprofiler::markers::NoPayload>,
+                  "NoPayload should have been handled in the caller.");
+    // Register marker type functions, and get the tag for this deserializer.
+    // Note that the tag is stored in a function-static object, and this
+    // function is static in a templated struct, so there should only be one tag
+    // per MarkerType.
+    // Making the tag class-static may have been more efficient (to avoid a
+    // thread-safe init check at every call), but random global static
+    // initialization order would make it more complex to coordinate with
+    // `Streaming::TagForMarkerTypeFunctions()`, and also would add a (small)
+    // cost for everybody, even the majority of users not using the profiler.
+    static const Streaming::DeserializerTag tag =
+        Streaming::TagForMarkerTypeFunctions(Deserialize,
+                                             MarkerType::MarkerTypeName,
+                                             MarkerType::MarkerTypeDisplay);
+    return StreamFunctionType::Serialize(aBuffer, aName, aCategory,
+                                         std::move(aOptions), tag, aTs...);
+  }
+
+ private:
+  // This templated function will recursively deserialize each argument expected
+  // by `MarkerType::StreamJSONMarkerData()` on the stack, and call it at the
+  // end. E.g., for `StreamJSONMarkerData(int, char)`:
+  // - DeserializeArguments<0>(aER, aWriter) reads an int and calls:
+  // - DeserializeArguments<1>(aER, aWriter, const int&) reads a char and calls:
+  // - MarkerType::StreamJSONMarkerData(aWriter, const int&, const char&).
+  // Prototyping on godbolt showed that clang and gcc can flatten these
+  // recursive calls into one function with successive reads followed by the one
+  // stream call; tested up to 40 arguments: https://godbolt.org/z/5KeeM4
+  template <size_t i = 0, typename... Args>
+  static void DeserializeArguments(ProfileBufferEntryReader& aEntryReader,
+                                   baseprofiler::SpliceableJSONWriter& aWriter,
+                                   const Args&... aArgs) {
+    static_assert(sizeof...(Args) == i,
+                  "We should have collected `i` arguments so far");
+    if constexpr (i < scStreamFunctionParameterCount) {
+      // Deserialize the i-th argument on this stack.
+      auto argument = aEntryReader.ReadObject<StreamFunctionParameter<i>>();
+      // Add our local argument to the next recursive call.
+      DeserializeArguments<i + 1>(aEntryReader, aWriter, aArgs..., argument);
+    } else {
+      // We've read all the arguments, finally call the `StreamJSONMarkerData`
+      // function, which should write the appropriate JSON elements for this
+      // marker type. Note that the MarkerType-specific "type" element is
+      // already written.
+      MarkerType::StreamJSONMarkerData(aWriter, aArgs...);
+    }
+  }
+
+ public:
+  static void Deserialize(ProfileBufferEntryReader& aEntryReader,
+                          baseprofiler::SpliceableJSONWriter& aWriter) {
+    aWriter.StringProperty("type", MarkerType::MarkerTypeName());
+    DeserializeArguments(aEntryReader, aWriter);
+  }
+};
+
+template <>
+struct MarkerTypeSerialization<::mozilla::baseprofiler::markers::NoPayload> {
+  // Nothing! NoPayload has special handling avoiding payload work.
+};
+
+template <typename MarkerType, typename... Ts>
+static ProfileBufferBlockIndex AddMarkerWithOptionalStackToBuffer(
+    ProfileChunkedBuffer& aBuffer, const ProfilerString8View& aName,
+    const MarkerCategory& aCategory, MarkerOptions&& aOptions,
+    const Ts&... aTs) {
+  if constexpr (std::is_same_v<MarkerType,
+                               ::mozilla::baseprofiler::markers::NoPayload>) {
+    static_assert(sizeof...(Ts) == 0,
+                  "NoPayload does not accept any payload arguments.");
+    // Special case for NoPayload where there is a stack or inner window id:
+    // Because these options would be stored in the payload 'data' object, but
+    // there is no such object for NoPayload, we convert the marker to another
+    // type (without user fields in the 'data' object), so that the stack and/or
+    // inner window id are not lost.
+    // TODO: Remove this when bug 1646714 lands.
+    if (aOptions.Stack().GetChunkedBuffer() ||
+        !aOptions.InnerWindowId().IsUnspecified()) {
+      struct NoPayloadUserData {
+        static constexpr Span<const char> MarkerTypeName() {
+          return MakeStringSpan("NoPayloadUserData");
+        }
+        static void StreamJSONMarkerData(
+            baseprofiler::SpliceableJSONWriter& aWriter) {
+          // No user payload.
+        }
+        static mozilla::MarkerSchema MarkerTypeDisplay() {
+          using MS = mozilla::MarkerSchema;
+          MS schema{MS::Location::markerChart, MS::Location::markerTable};
+          // No user data to display.
+          return schema;
+        }
+      };
+      return MarkerTypeSerialization<NoPayloadUserData>::Serialize(
+          aBuffer, aName, aCategory, std::move(aOptions));
+    }
+
+    // Note that options are first after the entry kind, because they contain
+    // the thread id, which is handled first to filter markers by threads.
+    return aBuffer.PutObjects(
+        ProfileBufferEntryKind::Marker, std::move(aOptions), aName, aCategory,
+        base_profiler_markers_detail::Streaming::DeserializerTag(0));
+  } else {
+    return MarkerTypeSerialization<MarkerType>::Serialize(
+        aBuffer, aName, aCategory, std::move(aOptions), aTs...);
+  }
+}
+
+// Pointer to a function that can capture a backtrace into the provided
+// `ProfileChunkedBuffer`, and returns true when successful.
+using BacktraceCaptureFunction = bool (*)(ProfileChunkedBuffer&);
+
+// Add a marker with the given name, options, and arguments to the given buffer.
+// Because this may be called from either Base or Gecko Profiler functions, the
+// appropriate backtrace-capturing function must also be provided.
+template <typename MarkerType, typename... Ts>
+ProfileBufferBlockIndex AddMarkerToBuffer(
+    ProfileChunkedBuffer& aBuffer, const ProfilerString8View& aName,
+    const MarkerCategory& aCategory, MarkerOptions&& aOptions,
+    BacktraceCaptureFunction aBacktraceCaptureFunction, const Ts&... aTs) {
+  if (aOptions.ThreadId().IsUnspecified()) {
+    // If yet unspecified, set thread to this thread where the marker is added.
+    aOptions.Set(MarkerThreadId::CurrentThread());
+  }
+
+  if (aOptions.IsTimingUnspecified()) {
+    // If yet unspecified, set timing to this instant of adding the marker.
+    aOptions.Set(MarkerTiming::InstantNow());
+  }
+
+  if (aOptions.Stack().IsCaptureNeeded()) {
+    // A capture was requested, let's attempt to do it here&now. This avoids a
+    // lot of allocations that would be necessary if capturing a backtrace
+    // separately.
+    // TODO use a local on-stack byte buffer to remove last allocation.
+    // TODO reduce internal profiler stack levels, see bug 1659872.
+    ProfileBufferChunkManagerSingle chunkManager(
+        ProfileBufferChunkManager::scExpectedMaximumStackSize);
+    ProfileChunkedBuffer chunkedBuffer(
+        ProfileChunkedBuffer::ThreadSafety::WithoutMutex, chunkManager);
+    aOptions.StackRef().UseRequestedBacktrace(
+        aBacktraceCaptureFunction(chunkedBuffer) ? &chunkedBuffer : nullptr);
+    // This call must be made from here, while chunkedBuffer is in scope.
+    return AddMarkerWithOptionalStackToBuffer<MarkerType>(
+        aBuffer, aName, aCategory, std::move(aOptions), aTs...);
+  }
+
+  return AddMarkerWithOptionalStackToBuffer<MarkerType>(
+      aBuffer, aName, aCategory, std::move(aOptions), aTs...);
+}
+
+template <typename StackCallback>
+[[nodiscard]] bool DeserializeAfterKindAndStream(
+    ProfileBufferEntryReader& aEntryReader,
+    baseprofiler::SpliceableJSONWriter& aWriter, int aThreadIdOrZero,
+    StackCallback&& aStackCallback) {
+  // Each entry is made up of the following:
+  //   ProfileBufferEntry::Kind::Marker, <- already read by caller
+  //   options,                          <- next location in entries
+  //   name,
+  //   payload
+  const MarkerOptions options = aEntryReader.ReadObject<MarkerOptions>();
+  if (aThreadIdOrZero != 0 &&
+      options.ThreadId().ThreadId() != aThreadIdOrZero) {
+    // A specific thread is being read, we're not in it.
+    return false;
+  }
+  // Write the information to JSON with the following schema:
+  // [name, startTime, endTime, phase, category, data]
+  aWriter.StartArrayElement();
+  {
+    aWriter.UniqueStringElement(aEntryReader.ReadObject<ProfilerString8View>());
+
+    const double startTime = options.Timing().GetStartTime();
+    aWriter.DoubleElement(startTime);
+
+    const double endTime = options.Timing().GetEndTime();
+    aWriter.DoubleElement(endTime);
+
+    aWriter.IntElement(static_cast<int64_t>(options.Timing().MarkerPhase()));
+
+    MarkerCategory category = aEntryReader.ReadObject<MarkerCategory>();
+    aWriter.IntElement(static_cast<int64_t>(category.GetCategory()));
+
+    if (const auto tag =
+            aEntryReader.ReadObject<mozilla::base_profiler_markers_detail::
+                                        Streaming::DeserializerTag>();
+        tag != 0) {
+      aWriter.StartObjectElement(JSONWriter::SingleLineStyle);
+      {
+        // Stream "common props".
+
+        // TODO: Move this to top-level tuple, when frontend supports it.
+        if (!options.InnerWindowId().IsUnspecified()) {
+          // Here, we are converting uint64_t to double. Both Browsing Context
+          // and Inner Window IDs are created using
+          // `nsContentUtils::GenerateProcessSpecificId`, which is specifically
+          // designed to only use 53 of the 64 bits to be lossless when passed
+          // into and out of JS as a double.
+          aWriter.DoubleProperty(
+              "innerWindowID",
+              static_cast<double>(options.InnerWindowId().Id()));
+        }
+
+        // TODO: Move this to top-level tuple, when frontend supports it.
+        if (ProfileChunkedBuffer* chunkedBuffer =
+                options.Stack().GetChunkedBuffer();
+            chunkedBuffer) {
+          aWriter.StartObjectProperty("stack");
+          { std::forward<StackCallback>(aStackCallback)(*chunkedBuffer); }
+          aWriter.EndObject();
+        }
+
+        // Stream the payload, including the type.
+        mozilla::base_profiler_markers_detail::Streaming::MarkerDataDeserializer
+            deserializer = mozilla::base_profiler_markers_detail::Streaming::
+                DeserializerForTag(tag);
+        MOZ_RELEASE_ASSERT(deserializer);
+        deserializer(aEntryReader, aWriter);
+      }
+      aWriter.EndObject();
+    }
+  }
+  aWriter.EndArray();
+  return true;
+}
+
+}  // namespace mozilla::base_profiler_markers_detail
+
+namespace mozilla {
+
+// ----------------------------------------------------------------------------
+// Serializer, Deserializer: ProfilerStringView<CHAR>
+
+// The serialization starts with a ULEB128 number that encodes both whether the
+// ProfilerStringView is literal (Least Significant Bit = 0) or not (LSB = 1),
+// plus the string length (excluding null terminator) in bytes, shifted left by
+// 1 bit. Following that number:
+// - If literal, the string pointer value.
+// - If non-literal, the contents as bytes (excluding null terminator if any).
+template <typename CHAR>
+struct ProfileBufferEntryWriter::Serializer<ProfilerStringView<CHAR>> {
+  static Length Bytes(const ProfilerStringView<CHAR>& aString) {
+    MOZ_RELEASE_ASSERT(
+        aString.Length() < std::numeric_limits<Length>::max() / 2,
+        "Double the string length doesn't fit in Length type");
+    const Length stringLength = static_cast<Length>(aString.Length());
+    if (aString.IsLiteral()) {
+      // Literal -> Length shifted left and LSB=0, then pointer.
+      return ULEB128Size(stringLength << 1 | 0u) +
+             static_cast<ProfileChunkedBuffer::Length>(sizeof(const CHAR*));
+    }
+    // Non-literal -> Length shifted left and LSB=1, then string size in bytes.
+    return ULEB128Size((stringLength << 1) | 1u) + stringLength * sizeof(CHAR);
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW,
+                    const ProfilerStringView<CHAR>& aString) {
+    MOZ_RELEASE_ASSERT(
+        aString.Length() < std::numeric_limits<Length>::max() / 2,
+        "Double the string length doesn't fit in Length type");
+    const Length stringLength = static_cast<Length>(aString.Length());
+    if (aString.IsLiteral()) {
+      // Literal -> Length shifted left and LSB=0, then pointer.
+      aEW.WriteULEB128(stringLength << 1 | 0u);
+      aEW.WriteObject(WrapProfileBufferRawPointer(aString.Data()));
+      return;
+    }
+    // Non-literal -> Length shifted left and LSB=1, then string size in bytes.
+    aEW.WriteULEB128(stringLength << 1 | 1u);
+    aEW.WriteBytes(aString.Data(), stringLength * sizeof(CHAR));
+  }
+};
+
+template <typename CHAR>
+struct ProfileBufferEntryReader::Deserializer<ProfilerStringView<CHAR>> {
+  static void ReadInto(ProfileBufferEntryReader& aER,
+                       ProfilerStringView<CHAR>& aString) {
+    const Length lengthAndIsLiteral = aER.ReadULEB128<Length>();
+    const Length stringLength = lengthAndIsLiteral >> 1;
+    if ((lengthAndIsLiteral & 1u) == 0u) {
+      // LSB==0 -> Literal string, read the string pointer.
+      aString.mStringView = std::basic_string_view<CHAR>(
+          aER.ReadObject<const CHAR*>(), stringLength);
+      aString.mOwnership = ProfilerStringView<CHAR>::Ownership::Literal;
+      return;
+    }
+    // LSB==1 -> Not a literal string, allocate a buffer to store the string
+    // (plus terminal, for safety), and give it to the ProfilerStringView; Note
+    // that this is a secret use of ProfilerStringView, which is intended to
+    // only be used between deserialization and JSON streaming.
+    CHAR* buffer = new CHAR[stringLength + 1];
+    aER.ReadBytes(buffer, stringLength * sizeof(CHAR));
+    buffer[stringLength] = CHAR(0);
+    aString.mStringView = std::basic_string_view<CHAR>(buffer, stringLength);
+    aString.mOwnership =
+        ProfilerStringView<CHAR>::Ownership::OwnedThroughStringView;
+  }
+
+  static ProfilerStringView<CHAR> Read(ProfileBufferEntryReader& aER) {
+    const Length lengthAndIsLiteral = aER.ReadULEB128<Length>();
+    const Length stringLength = lengthAndIsLiteral >> 1;
+    if ((lengthAndIsLiteral & 1u) == 0u) {
+      // LSB==0 -> Literal string, read the string pointer.
+      return ProfilerStringView<CHAR>(
+          aER.ReadObject<const CHAR*>(), stringLength,
+          ProfilerStringView<CHAR>::Ownership::Literal);
+    }
+    // LSB==1 -> Not a literal string, allocate a buffer to store the string
+    // (plus terminal, for safety), and give it to the ProfilerStringView; Note
+    // that this is a secret use of ProfilerStringView, which is intended to
+    // only be used between deserialization and JSON streaming.
+    CHAR* buffer = new CHAR[stringLength + 1];
+    aER.ReadBytes(buffer, stringLength * sizeof(CHAR));
+    buffer[stringLength] = CHAR(0);
+    return ProfilerStringView<CHAR>(
+        buffer, stringLength,
+        ProfilerStringView<CHAR>::Ownership::OwnedThroughStringView);
+  }
+};
+
+// Serializer, Deserializer: MarkerCategory
+
+// The serialization contains both category numbers encoded as ULEB128.
+template <>
+struct ProfileBufferEntryWriter::Serializer<MarkerCategory> {
+  static Length Bytes(const MarkerCategory& aCategory) {
+    return ULEB128Size(static_cast<uint32_t>(aCategory.CategoryPair()));
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW,
+                    const MarkerCategory& aCategory) {
+    aEW.WriteULEB128(static_cast<uint32_t>(aCategory.CategoryPair()));
+  }
+};
+
+template <>
+struct ProfileBufferEntryReader::Deserializer<MarkerCategory> {
+  static void ReadInto(ProfileBufferEntryReader& aER,
+                       MarkerCategory& aCategory) {
+    aCategory = Read(aER);
+  }
+
+  static MarkerCategory Read(ProfileBufferEntryReader& aER) {
+    return MarkerCategory(static_cast<baseprofiler::ProfilingCategoryPair>(
+        aER.ReadULEB128<uint32_t>()));
+  }
+};
+
+// ----------------------------------------------------------------------------
+// Serializer, Deserializer: MarkerTiming
+
+// The serialization starts with the marker phase, followed by one or two
+// timestamps as needed.
+template <>
+struct ProfileBufferEntryWriter::Serializer<MarkerTiming> {
+  static Length Bytes(const MarkerTiming& aTiming) {
+    MOZ_ASSERT(!aTiming.IsUnspecified());
+    const auto phase = aTiming.MarkerPhase();
+    switch (phase) {
+      case MarkerTiming::Phase::Instant:
+        return SumBytes(phase, aTiming.StartTime());
+      case MarkerTiming::Phase::Interval:
+        return SumBytes(phase, aTiming.StartTime(), aTiming.EndTime());
+      case MarkerTiming::Phase::IntervalStart:
+        return SumBytes(phase, aTiming.StartTime());
+      case MarkerTiming::Phase::IntervalEnd:
+        return SumBytes(phase, aTiming.EndTime());
+      default:
+        MOZ_RELEASE_ASSERT(phase == MarkerTiming::Phase::Instant ||
+                           phase == MarkerTiming::Phase::Interval ||
+                           phase == MarkerTiming::Phase::IntervalStart ||
+                           phase == MarkerTiming::Phase::IntervalEnd);
+        return 0;  // Only to avoid build errors.
+    }
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW,
+                    const MarkerTiming& aTiming) {
+    MOZ_ASSERT(!aTiming.IsUnspecified());
+    const auto phase = aTiming.MarkerPhase();
+    switch (phase) {
+      case MarkerTiming::Phase::Instant:
+        aEW.WriteObjects(phase, aTiming.StartTime());
+        return;
+      case MarkerTiming::Phase::Interval:
+        aEW.WriteObjects(phase, aTiming.StartTime(), aTiming.EndTime());
+        return;
+      case MarkerTiming::Phase::IntervalStart:
+        aEW.WriteObjects(phase, aTiming.StartTime());
+        return;
+      case MarkerTiming::Phase::IntervalEnd:
+        aEW.WriteObjects(phase, aTiming.EndTime());
+        return;
+      default:
+        MOZ_RELEASE_ASSERT(phase == MarkerTiming::Phase::Instant ||
+                           phase == MarkerTiming::Phase::Interval ||
+                           phase == MarkerTiming::Phase::IntervalStart ||
+                           phase == MarkerTiming::Phase::IntervalEnd);
+        return;
+    }
+  }
+};
+
+template <>
+struct ProfileBufferEntryReader::Deserializer<MarkerTiming> {
+  static void ReadInto(ProfileBufferEntryReader& aER, MarkerTiming& aTiming) {
+    aTiming.mPhase = aER.ReadObject<MarkerTiming::Phase>();
+    switch (aTiming.mPhase) {
+      case MarkerTiming::Phase::Instant:
+        aTiming.mStartTime = aER.ReadObject<TimeStamp>();
+        aTiming.mEndTime = TimeStamp{};
+        break;
+      case MarkerTiming::Phase::Interval:
+        aTiming.mStartTime = aER.ReadObject<TimeStamp>();
+        aTiming.mEndTime = aER.ReadObject<TimeStamp>();
+        break;
+      case MarkerTiming::Phase::IntervalStart:
+        aTiming.mStartTime = aER.ReadObject<TimeStamp>();
+        aTiming.mEndTime = TimeStamp{};
+        break;
+      case MarkerTiming::Phase::IntervalEnd:
+        aTiming.mStartTime = TimeStamp{};
+        aTiming.mEndTime = aER.ReadObject<TimeStamp>();
+        break;
+      default:
+        MOZ_RELEASE_ASSERT(aTiming.mPhase == MarkerTiming::Phase::Instant ||
+                           aTiming.mPhase == MarkerTiming::Phase::Interval ||
+                           aTiming.mPhase ==
+                               MarkerTiming::Phase::IntervalStart ||
+                           aTiming.mPhase == MarkerTiming::Phase::IntervalEnd);
+        break;
+    }
+  }
+
+  static MarkerTiming Read(ProfileBufferEntryReader& aER) {
+    TimeStamp start;
+    TimeStamp end;
+    auto phase = aER.ReadObject<MarkerTiming::Phase>();
+    switch (phase) {
+      case MarkerTiming::Phase::Instant:
+        start = aER.ReadObject<TimeStamp>();
+        break;
+      case MarkerTiming::Phase::Interval:
+        start = aER.ReadObject<TimeStamp>();
+        end = aER.ReadObject<TimeStamp>();
+        break;
+      case MarkerTiming::Phase::IntervalStart:
+        start = aER.ReadObject<TimeStamp>();
+        break;
+      case MarkerTiming::Phase::IntervalEnd:
+        end = aER.ReadObject<TimeStamp>();
+        break;
+      default:
+        MOZ_RELEASE_ASSERT(phase == MarkerTiming::Phase::Instant ||
+                           phase == MarkerTiming::Phase::Interval ||
+                           phase == MarkerTiming::Phase::IntervalStart ||
+                           phase == MarkerTiming::Phase::IntervalEnd);
+        break;
+    }
+    return MarkerTiming(start, end, phase);
+  }
+};
+
+// ----------------------------------------------------------------------------
+// Serializer, Deserializer: MarkerStack
+
+// The serialization only contains the `ProfileChunkedBuffer` from the
+// backtrace; if there is no backtrace or if it's empty, this will implicitly
+// store a nullptr (see
+// `ProfileBufferEntryWriter::Serializer<ProfilerChunkedBuffer*>`).
+template <>
+struct ProfileBufferEntryWriter::Serializer<MarkerStack> {
+  static Length Bytes(const MarkerStack& aStack) {
+    return SumBytes(aStack.GetChunkedBuffer());
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW, const MarkerStack& aStack) {
+    aEW.WriteObject(aStack.GetChunkedBuffer());
+  }
+};
+
+template <>
+struct ProfileBufferEntryReader::Deserializer<MarkerStack> {
+  static void ReadInto(ProfileBufferEntryReader& aER, MarkerStack& aStack) {
+    aStack = Read(aER);
+  }
+
+  static MarkerStack Read(ProfileBufferEntryReader& aER) {
+    return MarkerStack(aER.ReadObject<UniquePtr<ProfileChunkedBuffer>>());
+  }
+};
+
+// ----------------------------------------------------------------------------
+// Serializer, Deserializer: MarkerOptions
+
+// The serialization contains all members (either trivially-copyable, or they
+// provide their specialization above).
+template <>
+struct ProfileBufferEntryWriter::Serializer<MarkerOptions> {
+  static Length Bytes(const MarkerOptions& aOptions) {
+    return SumBytes(aOptions.ThreadId(), aOptions.Timing(), aOptions.Stack(),
+                    aOptions.InnerWindowId());
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW,
+                    const MarkerOptions& aOptions) {
+    aEW.WriteObjects(aOptions.ThreadId(), aOptions.Timing(), aOptions.Stack(),
+                     aOptions.InnerWindowId());
+  }
+};
+
+template <>
+struct ProfileBufferEntryReader::Deserializer<MarkerOptions> {
+  static void ReadInto(ProfileBufferEntryReader& aER, MarkerOptions& aOptions) {
+    aER.ReadIntoObjects(aOptions.mThreadId, aOptions.mTiming, aOptions.mStack,
+                        aOptions.mInnerWindowId);
+  }
+
+  static MarkerOptions Read(ProfileBufferEntryReader& aER) {
+    MarkerOptions options;
+    ReadInto(aER, options);
+    return options;
+  }
+};
+
+}  // namespace mozilla
+
+#endif  // MOZ_GECKO_PROFILER
+
+#endif  // BaseProfilerMarkersDetail_h
diff --git a/mozglue/baseprofiler/public/BaseProfilerMarkersPrerequisites.h b/mozglue/baseprofiler/public/BaseProfilerMarkersPrerequisites.h
new file mode 100644
index 0000000000..aa85b41896
--- /dev/null
+++ b/mozglue/baseprofiler/public/BaseProfilerMarkersPrerequisites.h
@@ -0,0 +1,866 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// This header contains basic definitions required to create marker types, and
+// to add markers to the profiler buffers.
+//
+// In most cases, #include "mozilla/BaseProfilerMarkers.h" instead, or
+// #include "mozilla/BaseProfilerMarkerTypes.h" for common marker types.
+
+#ifndef BaseProfilerMarkersPrerequisites_h
+#define BaseProfilerMarkersPrerequisites_h
+
+#ifdef MOZ_GECKO_PROFILER
+
+#  include "BaseProfilingCategory.h"
+#  include "mozilla/Maybe.h"
+#  include "mozilla/ProfileChunkedBuffer.h"
+#  include "mozilla/TimeStamp.h"
+#  include "mozilla/UniquePtr.h"
+#  include "mozilla/Variant.h"
+
+#  include <initializer_list>
+#  include <string_view>
+#  include <string>
+#  include <type_traits>
+#  include <utility>
+#  include <vector>
+
+// TODO: Move common stuff to shared header instead.
+#  include "BaseProfiler.h"
+
+namespace mozilla {
+
+// Return a NotNull<const CHAR*> pointing at the literal empty string `""`.
+template <typename CHAR>
+constexpr const CHAR* LiteralEmptyStringPointer() {
+  static_assert(std::is_same_v<CHAR, char> || std::is_same_v<CHAR, char16_t>,
+                "Only char and char16_t are supported in Firefox");
+  if constexpr (std::is_same_v<CHAR, char>) {
+    return "";
+  }
+  if constexpr (std::is_same_v<CHAR, char16_t>) {
+    return u"";
+  }
+}
+
+// Return a string_view<CHAR> pointing at the literal empty string.
+template <typename CHAR>
+constexpr std::basic_string_view<CHAR> LiteralEmptyStringView() {
+  static_assert(std::is_same_v<CHAR, char> || std::is_same_v<CHAR, char16_t>,
+                "Only char and char16_t are supported in Firefox");
+  // Use `operator""sv()` from <string_view>.
+  using namespace std::literals::string_view_literals;
+  if constexpr (std::is_same_v<CHAR, char>) {
+    return ""sv;
+  }
+  if constexpr (std::is_same_v<CHAR, char16_t>) {
+    return u""sv;
+  }
+}
+
+// General string view, optimized for short on-stack life before serialization,
+// and between deserialization and JSON-streaming.
+template <typename CHAR>
+class MOZ_STACK_CLASS ProfilerStringView {
+ public:
+  // Default constructor points at "" (literal empty string).
+  constexpr ProfilerStringView() = default;
+
+  // Don't allow copy.
+  ProfilerStringView(const ProfilerStringView&) = delete;
+  ProfilerStringView& operator=(const ProfilerStringView&) = delete;
+
+  // Allow move. For consistency the moved-from string is always reset to "".
+  constexpr ProfilerStringView(ProfilerStringView&& aOther)
+      : mStringView(std::move(aOther.mStringView)),
+        mOwnership(aOther.mOwnership) {
+    if (mOwnership == Ownership::OwnedThroughStringView) {
+      // We now own the buffer, make the other point at the literal "".
+      aOther.mStringView = LiteralEmptyStringView<CHAR>();
+      aOther.mOwnership = Ownership::Literal;
+    }
+  }
+  constexpr ProfilerStringView& operator=(ProfilerStringView&& aOther) {
+    mStringView = std::move(aOther.mStringView);
+    mOwnership = aOther.mOwnership;
+    if (mOwnership == Ownership::OwnedThroughStringView) {
+      // We now own the buffer, make the other point at the literal "".
+      aOther.mStringView = LiteralEmptyStringView<CHAR>();
+      aOther.mOwnership = Ownership::Literal;
+    }
+    return *this;
+  }
+
+  ~ProfilerStringView() {
+    if (MOZ_UNLIKELY(mOwnership == Ownership::OwnedThroughStringView)) {
+      // We own the buffer pointed at by mStringView, destroy it.
+      // This is only used between deserialization and streaming.
+      delete mStringView.data();
+    }
+  }
+
+  // Implicit construction from nullptr, points at "" (literal empty string).
+  constexpr MOZ_IMPLICIT ProfilerStringView(decltype(nullptr)) {}
+
+  // Implicit constructor from a literal string.
+  template <size_t Np1>
+  constexpr MOZ_IMPLICIT ProfilerStringView(const CHAR (&aLiteralString)[Np1])
+      : ProfilerStringView(aLiteralString, Np1 - 1, Ownership::Literal) {}
+
+  // Constructor from a non-literal string.
+  constexpr ProfilerStringView(const CHAR* aString, size_t aLength)
+      : ProfilerStringView(aString, aLength, Ownership::Reference) {}
+
+  // Implicit constructor from a string_view.
+  constexpr MOZ_IMPLICIT ProfilerStringView(
+      const std::basic_string_view<CHAR>& aStringView)
+      : ProfilerStringView(aStringView.data(), aStringView.length(),
+                           Ownership::Reference) {}
+
+  // Implicit constructor from an expiring string_view. We assume that the
+  // pointed-at string will outlive this ProfilerStringView.
+  constexpr MOZ_IMPLICIT ProfilerStringView(
+      std::basic_string_view<CHAR>&& aStringView)
+      : ProfilerStringView(aStringView.data(), aStringView.length(),
+                           Ownership::Reference) {}
+
+  // Implicit constructor from std::string.
+  constexpr MOZ_IMPLICIT ProfilerStringView(
+      const std::basic_string<CHAR>& aString)
+      : ProfilerStringView(aString.data(), aString.length(),
+                           Ownership::Reference) {}
+
+  // Construction from a raw pointer to a null-terminated string.
+  // This is a named class-static function to make it more obvious where work is
+  // being done (to determine the string length), and encourage users to instead
+  // provide a length, if already known.
+  // TODO: Find callers and convert them to constructor instead if possible.
+  static constexpr ProfilerStringView WrapNullTerminatedString(
+      const CHAR* aString) {
+    return ProfilerStringView(
+        aString, aString ? std::char_traits<CHAR>::length(aString) : 0,
+        Ownership::Reference);
+  }
+
+  // Implicit constructor for an object with member functions `Data()`
+  // `Length()`, and `IsLiteral()`, common in xpcom strings.
+  template <
+      typename String,
+      typename DataReturnType = decltype(std::declval<const String>().Data()),
+      typename LengthReturnType =
+          decltype(std::declval<const String>().Length()),
+      typename IsLiteralReturnType =
+          decltype(std::declval<const String>().IsLiteral()),
+      typename =
+          std::enable_if_t<std::is_convertible_v<DataReturnType, const CHAR*> &&
+                           std::is_integral_v<LengthReturnType> &&
+                           std::is_same_v<IsLiteralReturnType, bool>>>
+  constexpr MOZ_IMPLICIT ProfilerStringView(const String& aString)
+      : ProfilerStringView(
+            static_cast<const CHAR*>(aString.Data()), aString.Length(),
+            aString.IsLiteral() ? Ownership::Literal : Ownership::Reference) {}
+
+  [[nodiscard]] constexpr const std::basic_string_view<CHAR>& StringView()
+      const {
+    return mStringView;
+  }
+
+  [[nodiscard]] constexpr const CHAR* Data() const {
+    return mStringView.data();
+  }
+
+  [[nodiscard]] constexpr size_t Length() const { return mStringView.length(); }
+
+  [[nodiscard]] constexpr bool IsLiteral() const {
+    return mOwnership == Ownership::Literal;
+  }
+  [[nodiscard]] constexpr bool IsReference() const {
+    return mOwnership == Ownership::Reference;
+  }
+  // No `IsOwned...()` because it's a secret, only used internally!
+
+  [[nodiscard]] operator Span<const CHAR>() const {
+    return Span<const CHAR>(Data(), Length());
+  }
+
+ private:
+  enum class Ownership { Literal, Reference, OwnedThroughStringView };
+
+  // Allow deserializer to store anything here.
+  friend ProfileBufferEntryReader::Deserializer<ProfilerStringView>;
+
+  constexpr ProfilerStringView(const CHAR* aString, size_t aLength,
+                               Ownership aOwnership)
+      : mStringView(aString ? std::basic_string_view<CHAR>(aString, aLength)
+                            : LiteralEmptyStringView<CHAR>()),
+        mOwnership(aString ? aOwnership : Ownership::Literal) {}
+
+  // String view to an outside string (literal or reference).
+  // We may actually own the pointed-at buffer, but it is only used internally
+  // between deserialization and JSON streaming.
+  std::basic_string_view<CHAR> mStringView = LiteralEmptyStringView<CHAR>();
+
+  Ownership mOwnership = Ownership::Literal;
+};
+
+using ProfilerString8View = ProfilerStringView<char>;
+using ProfilerString16View = ProfilerStringView<char16_t>;
+
+// This compulsory marker parameter contains the required category information.
+class MarkerCategory {
+ public:
+  // Constructor from category pair (includes both super- and sub-categories).
+  constexpr explicit MarkerCategory(
+      baseprofiler::ProfilingCategoryPair aCategoryPair)
+      : mCategoryPair(aCategoryPair) {}
+
+  // Returns the stored category pair.
+  constexpr baseprofiler::ProfilingCategoryPair CategoryPair() const {
+    return mCategoryPair;
+  }
+
+  // Returns the super-category from the stored category pair.
+  baseprofiler::ProfilingCategory GetCategory() const {
+    return GetProfilingCategoryPairInfo(mCategoryPair).mCategory;
+  }
+
+ private:
+  baseprofiler::ProfilingCategoryPair mCategoryPair =
+      baseprofiler::ProfilingCategoryPair::OTHER;
+};
+
+namespace baseprofiler::category {
+
+// Each category pair name constructs a MarkerCategory.
+// E.g.: mozilla::baseprofiler::category::OTHER_Profiling
+// Profiler macros will take the category name alone without namespace.
+// E.g.: `PROFILER_MARKER_UNTYPED("name", OTHER_Profiling)`
+#  define CATEGORY_ENUM_BEGIN_CATEGORY(name, labelAsString, color)
+#  define CATEGORY_ENUM_SUBCATEGORY(supercategory, name, labelAsString) \
+    static constexpr MarkerCategory name{ProfilingCategoryPair::name};
+#  define CATEGORY_ENUM_END_CATEGORY
+MOZ_PROFILING_CATEGORY_LIST(CATEGORY_ENUM_BEGIN_CATEGORY,
+                            CATEGORY_ENUM_SUBCATEGORY,
+                            CATEGORY_ENUM_END_CATEGORY)
+#  undef CATEGORY_ENUM_BEGIN_CATEGORY
+#  undef CATEGORY_ENUM_SUBCATEGORY
+#  undef CATEGORY_ENUM_END_CATEGORY
+
+// Import `MarkerCategory` into this namespace. This will allow using this type
+// dynamically in macros that prepend `::mozilla::baseprofiler::category::` to
+// the given category, e.g.:
+// `PROFILER_MARKER_UNTYPED("name", MarkerCategory(...))`
+using MarkerCategory = ::mozilla::MarkerCategory;
+
+}  // namespace baseprofiler::category
+
+// The classes below are all embedded in a `MarkerOptions` object.
+class MarkerOptions;
+
+// This marker option captures a given thread id.
+// If left unspecified (by default construction) during the add-marker call, the
+// current thread id will be used then.
+class MarkerThreadId {
+ public:
+  // Default constructor, keeps the thread id unspecified.
+  constexpr MarkerThreadId() = default;
+
+  // Constructor from a given thread id.
+  constexpr explicit MarkerThreadId(int aThreadId) : mThreadId(aThreadId) {}
+
+  // Use the current thread's id.
+  static MarkerThreadId CurrentThread() {
+    return MarkerThreadId(baseprofiler::profiler_current_thread_id());
+  }
+
+  // Use the main thread's id. This can be useful to record a marker from a
+  // possibly-unregistered thread, and display it in the main thread track.
+  static MarkerThreadId MainThread() {
+    return MarkerThreadId(baseprofiler::profiler_main_thread_id());
+  }
+
+  [[nodiscard]] constexpr int ThreadId() const { return mThreadId; }
+
+  [[nodiscard]] constexpr bool IsUnspecified() const { return mThreadId == 0; }
+
+ private:
+  int mThreadId = 0;
+};
+
+// This marker option contains marker timing information.
+// This class encapsulates the logic for correctly storing a marker based on its
+// Use the static methods to create the MarkerTiming. This is a transient object
+// that is being used to enforce the constraints of the combinations of the
+// data.
+class MarkerTiming {
+ public:
+  // The following static methods are used to create the MarkerTiming based on
+  // the type that it is.
+
+  static MarkerTiming InstantAt(const TimeStamp& aTime) {
+    MOZ_ASSERT(!aTime.IsNull(), "Time is null for an instant marker.");
+    return MarkerTiming{aTime, TimeStamp{}, MarkerTiming::Phase::Instant};
+  }
+
+  static MarkerTiming InstantNow() {
+    return InstantAt(TimeStamp::NowUnfuzzed());
+  }
+
+  static MarkerTiming Interval(const TimeStamp& aStartTime,
+                               const TimeStamp& aEndTime) {
+    MOZ_ASSERT(!aStartTime.IsNull(),
+               "Start time is null for an interval marker.");
+    MOZ_ASSERT(!aEndTime.IsNull(), "End time is null for an interval marker.");
+    return MarkerTiming{aStartTime, aEndTime, MarkerTiming::Phase::Interval};
+  }
+
+  static MarkerTiming IntervalUntilNowFrom(const TimeStamp& aStartTime) {
+    return Interval(aStartTime, TimeStamp::NowUnfuzzed());
+  }
+
+  static MarkerTiming IntervalStart(
+      const TimeStamp& aTime = TimeStamp::NowUnfuzzed()) {
+    MOZ_ASSERT(!aTime.IsNull(), "Time is null for an interval start marker.");
+    return MarkerTiming{aTime, TimeStamp{}, MarkerTiming::Phase::IntervalStart};
+  }
+
+  static MarkerTiming IntervalEnd(
+      const TimeStamp& aTime = TimeStamp::NowUnfuzzed()) {
+    MOZ_ASSERT(!aTime.IsNull(), "Time is null for an interval end marker.");
+    return MarkerTiming{TimeStamp{}, aTime, MarkerTiming::Phase::IntervalEnd};
+  }
+
+  // Set the interval end in this timing.
+  // If there was already a start time, this makes it a full interval.
+  void SetIntervalEnd(const TimeStamp& aTime = TimeStamp::NowUnfuzzed()) {
+    MOZ_ASSERT(!aTime.IsNull(), "Time is null for an interval end marker.");
+    mEndTime = aTime;
+    mPhase = mStartTime.IsNull() ? Phase::IntervalEnd : Phase::Interval;
+  }
+
+  [[nodiscard]] const TimeStamp& StartTime() const { return mStartTime; }
+  [[nodiscard]] const TimeStamp& EndTime() const { return mEndTime; }
+
+  enum class Phase : uint8_t {
+    Instant = 0,
+    Interval = 1,
+    IntervalStart = 2,
+    IntervalEnd = 3,
+  };
+
+  [[nodiscard]] Phase MarkerPhase() const {
+    MOZ_ASSERT(!IsUnspecified());
+    return mPhase;
+  }
+
+  // The following getter methods are used to put the value into the buffer for
+  // storage.
+  [[nodiscard]] double GetStartTime() const {
+    MOZ_ASSERT(!IsUnspecified());
+    // If mStartTime is null (e.g., for IntervalEnd), this will output 0.0 as
+    // expected.
+    return MarkerTiming::timeStampToDouble(mStartTime);
+  }
+
+  [[nodiscard]] double GetEndTime() const {
+    MOZ_ASSERT(!IsUnspecified());
+    // If mEndTime is null (e.g., for Instant or IntervalStart), this will
+    // output 0.0 as expected.
+    return MarkerTiming::timeStampToDouble(mEndTime);
+  }
+
+  [[nodiscard]] uint8_t GetPhase() const {
+    MOZ_ASSERT(!IsUnspecified());
+    return static_cast<uint8_t>(mPhase);
+  }
+
+ private:
+  friend ProfileBufferEntryWriter::Serializer<MarkerTiming>;
+  friend ProfileBufferEntryReader::Deserializer<MarkerTiming>;
+  friend MarkerOptions;
+
+  // Default timing leaves it internally "unspecified", serialization getters
+  // and add-marker functions will default to `InstantNow()`.
+  constexpr MarkerTiming() = default;
+
+  // This should only be used by internal profiler code.
+  [[nodiscard]] bool IsUnspecified() const {
+    return mStartTime.IsNull() && mEndTime.IsNull();
+  }
+
+  // Full constructor, used by static factory functions.
+  constexpr MarkerTiming(const TimeStamp& aStartTime, const TimeStamp& aEndTime,
+                         Phase aPhase)
+      : mStartTime(aStartTime), mEndTime(aEndTime), mPhase(aPhase) {}
+
+  static double timeStampToDouble(const TimeStamp& time) {
+    if (time.IsNull()) {
+      // The Phase lets us know not to use this value.
+      return 0;
+    }
+    return (time - TimeStamp::ProcessCreation()).ToMilliseconds();
+  }
+
+  TimeStamp mStartTime;
+  TimeStamp mEndTime;
+  Phase mPhase = Phase::Instant;
+};
+
+// This marker option allows three cases:
+// - By default, no stacks are captured.
+// - The caller can request a stack capture, and the add-marker code will take
+//   care of it in the most efficient way.
+// - The caller can still provide an existing backtrace, for cases where a
+//   marker reports something that happened elsewhere.
+class MarkerStack {
+ public:
+  // Default constructor, no capture.
+  constexpr MarkerStack() = default;
+
+  // Disallow copy.
+  MarkerStack(const MarkerStack&) = delete;
+  MarkerStack& operator=(const MarkerStack&) = delete;
+
+  // Allow move.
+  MarkerStack(MarkerStack&& aOther)
+      : mIsCaptureRequested(aOther.mIsCaptureRequested),
+        mOptionalChunkedBufferStorage(
+            std::move(aOther.mOptionalChunkedBufferStorage)),
+        mChunkedBuffer(aOther.mChunkedBuffer) {
+    AssertInvariants();
+    aOther.Clear();
+  }
+  MarkerStack& operator=(MarkerStack&& aOther) {
+    mIsCaptureRequested = aOther.mIsCaptureRequested;
+    mOptionalChunkedBufferStorage =
+        std::move(aOther.mOptionalChunkedBufferStorage);
+    mChunkedBuffer = aOther.mChunkedBuffer;
+    AssertInvariants();
+    aOther.Clear();
+    return *this;
+  }
+
+  // Take ownership of a backtrace. If null or empty, equivalent to NoStack().
+  explicit MarkerStack(UniquePtr<ProfileChunkedBuffer>&& aExternalChunkedBuffer)
+      : mIsCaptureRequested(false),
+        mOptionalChunkedBufferStorage(
+            (!aExternalChunkedBuffer || aExternalChunkedBuffer->IsEmpty())
+                ? nullptr
+                : std::move(aExternalChunkedBuffer)),
+        mChunkedBuffer(mOptionalChunkedBufferStorage.get()) {
+    AssertInvariants();
+  }
+
+  // Use an existing backtrace stored elsewhere, which the user must guarantee
+  // is alive during the add-marker call. If empty, equivalent to NoStack().
+  explicit MarkerStack(ProfileChunkedBuffer& aExternalChunkedBuffer)
+      : mIsCaptureRequested(false),
+        mChunkedBuffer(aExternalChunkedBuffer.IsEmpty()
+                           ? nullptr
+                           : &aExternalChunkedBuffer) {
+    AssertInvariants();
+  }
+
+  // Don't capture a stack in this marker.
+  static MarkerStack NoStack() { return MarkerStack(false); }
+
+  // Capture a stack when adding this marker.
+  static MarkerStack Capture() {
+    // Actual capture will be handled inside profiler_add_marker.
+    return MarkerStack(true);
+  }
+
+  // Optionally capture a stack, useful for avoiding long-winded ternaries.
+  static MarkerStack MaybeCapture(bool aDoCapture) {
+    return MarkerStack(aDoCapture);
+  }
+
+  // Use an existing backtrace stored elsewhere, which the user must guarantee
+  // is alive during the add-marker call. If empty, equivalent to NoStack().
+  static MarkerStack UseBacktrace(
+      ProfileChunkedBuffer& aExternalChunkedBuffer) {
+    return MarkerStack(aExternalChunkedBuffer);
+  }
+
+  // Take ownership of a backtrace previously captured with
+  // `profiler_capture_backtrace()`. If null, equivalent to NoStack().
+  static MarkerStack TakeBacktrace(
+      UniquePtr<ProfileChunkedBuffer>&& aExternalChunkedBuffer) {
+    return MarkerStack(std::move(aExternalChunkedBuffer));
+  }
+
+  [[nodiscard]] bool IsCaptureNeeded() const {
+    // If the chunked buffer already contains something, consider the capture
+    // request already fulfilled.
+    return mIsCaptureRequested;
+  }
+
+  ProfileChunkedBuffer* GetChunkedBuffer() const { return mChunkedBuffer; }
+
+  // Use backtrace after a request. If null, equivalent to NoStack().
+  void UseRequestedBacktrace(ProfileChunkedBuffer* aExternalChunkedBuffer) {
+    MOZ_RELEASE_ASSERT(IsCaptureNeeded());
+    mIsCaptureRequested = false;
+    if (aExternalChunkedBuffer && !aExternalChunkedBuffer->IsEmpty()) {
+      // We only need to use the provided buffer if it is not empty.
+      mChunkedBuffer = aExternalChunkedBuffer;
+    }
+    AssertInvariants();
+  }
+
+  void Clear() {
+    mIsCaptureRequested = false;
+    mOptionalChunkedBufferStorage.reset();
+    mChunkedBuffer = nullptr;
+    AssertInvariants();
+  }
+
+ private:
+  explicit MarkerStack(bool aIsCaptureRequested)
+      : mIsCaptureRequested(aIsCaptureRequested) {
+    AssertInvariants();
+  }
+
+  // This should be called after every constructor and non-const function.
+  void AssertInvariants() const {
+#  ifdef DEBUG
+    if (mIsCaptureRequested) {
+      MOZ_ASSERT(!mOptionalChunkedBufferStorage,
+                 "We should not hold a buffer when capture is requested");
+      MOZ_ASSERT(!mChunkedBuffer,
+                 "We should not point at a buffer when capture is requested");
+    } else {
+      if (mOptionalChunkedBufferStorage) {
+        MOZ_ASSERT(mChunkedBuffer == mOptionalChunkedBufferStorage.get(),
+                   "Non-null mOptionalChunkedBufferStorage must be pointed-at "
+                   "by mChunkedBuffer");
+      }
+      if (mChunkedBuffer) {
+        MOZ_ASSERT(!mChunkedBuffer->IsEmpty(),
+                   "Non-null mChunkedBuffer must not be empty");
+      }
+    }
+#  endif  // DEBUG
+  }
+
+  // True if a capture is requested when marker is added to the profile buffer.
+  bool mIsCaptureRequested = false;
+
+  // Optional storage for the backtrace, in case it was captured before the
+  // add-marker call.
+  UniquePtr<ProfileChunkedBuffer> mOptionalChunkedBufferStorage;
+
+  // If not null, this points to the backtrace. It may point to a backtrace
+  // temporarily stored on the stack, or to mOptionalChunkedBufferStorage.
+  ProfileChunkedBuffer* mChunkedBuffer = nullptr;
+};
+
+// This marker option captures a given inner window id.
+class MarkerInnerWindowId {
+ public:
+  // Default constructor, it leaves the id unspecified.
+  constexpr MarkerInnerWindowId() = default;
+
+  // Constructor with a specified inner window id.
+  constexpr explicit MarkerInnerWindowId(uint64_t i) : mInnerWindowId(i) {}
+
+  // Constructor with either specified inner window id or Nothing.
+  constexpr explicit MarkerInnerWindowId(const Maybe<uint64_t>& i)
+      : mInnerWindowId(i.valueOr(scNoId)) {}
+
+  // Explicit option with unspecified id.
+  constexpr static MarkerInnerWindowId NoId() { return MarkerInnerWindowId{}; }
+
+  [[nodiscard]] bool IsUnspecified() const { return mInnerWindowId == scNoId; }
+
+  [[nodiscard]] constexpr uint64_t Id() const { return mInnerWindowId; }
+
+ private:
+  static constexpr uint64_t scNoId = 0;
+  uint64_t mInnerWindowId = scNoId;
+};
+
+// This class combines each of the possible marker options above.
+class MarkerOptions {
+ public:
+  // Constructor from individual options (including none).
+  // Implicit to allow `{}` and one option type as-is.
+  // Options that are not provided here are defaulted. In particular, timing
+  // defaults to `MarkerTiming::InstantNow()` when the marker is recorded.
+  template <typename... Options>
+  MOZ_IMPLICIT MarkerOptions(Options&&... aOptions) {
+    (Set(std::forward<Options>(aOptions)), ...);
+  }
+
+  // Disallow copy.
+  MarkerOptions(const MarkerOptions&) = delete;
+  MarkerOptions& operator=(const MarkerOptions&) = delete;
+
+  // Allow move.
+  MarkerOptions(MarkerOptions&&) = default;
+  MarkerOptions& operator=(MarkerOptions&&) = default;
+
+  // The embedded `MarkerTiming` hasn't been specified yet.
+  [[nodiscard]] bool IsTimingUnspecified() const {
+    return mTiming.IsUnspecified();
+  }
+
+  // Each option may be added in a chain by e.g.:
+  // `options.Set(MarkerThreadId(123)).Set(MarkerTiming::IntervalEnd())`.
+  // When passed to an add-marker function, it must be an rvalue, either created
+  // on the spot, or `std::move`d from storage, e.g.:
+  // `PROFILER_MARKER_UNTYPED("...", std::move(options).Set(...))`;
+  //
+  // Options can be read by their name (without "Marker"), e.g.: `o.ThreadId()`.
+  // Add "Ref" for a non-const reference, e.g.: `o.ThreadIdRef() = ...;`
+#  define FUNCTIONS_ON_MEMBER(NAME)                      \
+    MarkerOptions& Set(Marker##NAME&& a##NAME)& {        \
+      m##NAME = std::move(a##NAME);                      \
+      return *this;                                      \
+    }                                                    \
+                                                         \
+    MarkerOptions&& Set(Marker##NAME&& a##NAME)&& {      \
+      m##NAME = std::move(a##NAME);                      \
+      return std::move(*this);                           \
+    }                                                    \
+                                                         \
+    const Marker##NAME& NAME() const { return m##NAME; } \
+                                                         \
+    Marker##NAME& NAME##Ref() { return m##NAME; }
+
+  FUNCTIONS_ON_MEMBER(ThreadId);
+  FUNCTIONS_ON_MEMBER(Timing);
+  FUNCTIONS_ON_MEMBER(Stack);
+  FUNCTIONS_ON_MEMBER(InnerWindowId);
+#  undef FUNCTIONS_ON_MEMBER
+
+ private:
+  friend ProfileBufferEntryReader::Deserializer<MarkerOptions>;
+
+  MarkerThreadId mThreadId;
+  MarkerTiming mTiming;
+  MarkerStack mStack;
+  MarkerInnerWindowId mInnerWindowId;
+};
+
+}  // namespace mozilla
+
+namespace mozilla::baseprofiler::markers {
+
+// Default marker payload types, with no extra information, not even a marker
+// type and payload. This is intended for label-only markers.
+struct NoPayload final {};
+
+}  // namespace mozilla::baseprofiler::markers
+
+namespace mozilla {
+
+class JSONWriter;
+
+// This class collects all the information necessary to stream the JSON schema
+// that informs the front-end how to display a type of markers.
+// It will be created and populated in `MarkerTypeDisplay()` functions in each
+// marker type definition, see Add/Set functions.
+class MarkerSchema {
+ public:
+  enum class Location : unsigned {
+    markerChart,
+    markerTable,
+    // This adds markers to the main marker timeline in the header.
+    timelineOverview,
+    // In the timeline, this is a section that breaks out markers that are
+    // related to memory. When memory counters are enabled, this is its own
+    // track, otherwise it is displayed with the main thread.
+    timelineMemory,
+    // This adds markers to the IPC timeline area in the header.
+    timelineIPC,
+    // This adds markers to the FileIO timeline area in the header.
+    timelineFileIO,
+    // TODO - This is not supported yet.
+    stackChart
+  };
+
+  // Used as constructor parameter, to explicitly specify that the location (and
+  // other display options) are handled as a special case in the front-end.
+  // In this case, *no* schema will be output for this type.
+  struct SpecialFrontendLocation {};
+
+  enum class Format {
+    // ----------------------------------------------------
+    // String types.
+
+    // Show the URL, and handle PII sanitization
+    url,
+    // Show the file path, and handle PII sanitization.
+    filePath,
+    // Important, do not put URL or file path information here, as it will not
+    // be sanitized. Please be careful with including other types of PII here as
+    // well.
+    // e.g. "Label: Some String"
+    string,
+
+    // ----------------------------------------------------
+    // Numeric types
+
+    // For time data that represents a duration of time.
+    // e.g. "Label: 5s, 5ms, 5μs"
+    duration,
+    // Data that happened at a specific time, relative to the start of the
+    // profile. e.g. "Label: 15.5s, 20.5ms, 30.5μs"
+    time,
+    // The following are alternatives to display a time only in a specific unit
+    // of time.
+    seconds,       // "Label: 5s"
+    milliseconds,  // "Label: 5ms"
+    microseconds,  // "Label: 5μs"
+    nanoseconds,   // "Label: 5ns"
+    // e.g. "Label: 5.55mb, 5 bytes, 312.5kb"
+    bytes,
+    // This should be a value between 0 and 1.
+    // "Label: 50%"
+    percentage,
+    // The integer should be used for generic representations of numbers.
+    // Do not use it for time information.
+    // "Label: 52, 5,323, 1,234,567"
+    integer,
+    // The decimal should be used for generic representations of numbers.
+    // Do not use it for time information.
+    // "Label: 52.23, 0.0054, 123,456.78"
+    decimal
+  };
+
+  enum class Searchable { notSearchable, searchable };
+
+  // Marker schema, with a non-empty list of locations where markers should be
+  // shown.
+  // Tech note: Even though `aLocations` are templated arguments, they are
+  // assigned to an `enum class` object, so they can only be of that enum type.
+  template <typename... Locations>
+  explicit MarkerSchema(Location aLocation, Locations... aLocations)
+      : mLocations{aLocation, aLocations...} {}
+
+  // Marker schema for types that have special frontend handling.
+  // Nothing else should be set in this case.
+  // Implicit to allow quick return from MarkerTypeDisplay functions.
+  MOZ_IMPLICIT MarkerSchema(SpecialFrontendLocation) {}
+
+  // Caller must specify location(s) or SpecialFrontendLocation above.
+  MarkerSchema() = delete;
+
+  // Optional labels in the marker chart, the chart tooltip, and the marker
+  // table. If not provided, the marker "name" will be used. The given string
+  // can contain element keys in braces to include data elements streamed by
+  // `StreamJSONMarkerData()`. E.g.: "This is {text}"
+
+#  define LABEL_SETTER(name)                       \
+    MarkerSchema& Set##name(std::string a##name) { \
+      m##name = std::move(a##name);                \
+      return *this;                                \
+    }
+
+  LABEL_SETTER(ChartLabel)
+  LABEL_SETTER(TooltipLabel)
+  LABEL_SETTER(TableLabel)
+
+#  undef LABEL_SETTER
+
+  MarkerSchema& SetAllLabels(std::string aText) {
+    // Here we set the same text in each label.
+    // TODO: Move to a single "label" field once the front-end allows it.
+    SetChartLabel(aText);
+    SetTooltipLabel(aText);
+    SetTableLabel(std::move(aText));
+    return *this;
+  }
+
+  // Each data element that is streamed by `StreamJSONMarkerData()` can be
+  // displayed as indicated by using one of the `Add...` function below.
+  // Each `Add...` will add a line in the full marker description. Parameters:
+  // - `aKey`: Element property name as streamed by `StreamJSONMarkerData()`.
+  // - `aLabel`: Optional prefix. Defaults to the key name.
+  // - `aFormat`: How to format the data element value, see `Format` above.
+  // - `aSearchable`: Optional, indicates if the value is used in searches,
+  //   defaults to false.
+
+  MarkerSchema& AddKeyFormat(std::string aKey, Format aFormat) {
+    mData.emplace_back(mozilla::VariantType<DynamicData>{},
+                       DynamicData{std::move(aKey), mozilla::Nothing{}, aFormat,
+                                   mozilla::Nothing{}});
+    return *this;
+  }
+
+  MarkerSchema& AddKeyLabelFormat(std::string aKey, std::string aLabel,
+                                  Format aFormat) {
+    mData.emplace_back(
+        mozilla::VariantType<DynamicData>{},
+        DynamicData{std::move(aKey), mozilla::Some(std::move(aLabel)), aFormat,
+                    mozilla::Nothing{}});
+    return *this;
+  }
+
+  MarkerSchema& AddKeyFormatSearchable(std::string aKey, Format aFormat,
+                                       Searchable aSearchable) {
+    mData.emplace_back(mozilla::VariantType<DynamicData>{},
+                       DynamicData{std::move(aKey), mozilla::Nothing{}, aFormat,
+                                   mozilla::Some(aSearchable)});
+    return *this;
+  }
+
+  MarkerSchema& AddKeyLabelFormatSearchable(std::string aKey,
+                                            std::string aLabel, Format aFormat,
+                                            Searchable aSearchable) {
+    mData.emplace_back(
+        mozilla::VariantType<DynamicData>{},
+        DynamicData{std::move(aKey), mozilla::Some(std::move(aLabel)), aFormat,
+                    mozilla::Some(aSearchable)});
+    return *this;
+  }
+
+  // The display may also include static rows.
+
+  MarkerSchema& AddStaticLabelValue(std::string aLabel, std::string aValue) {
+    mData.emplace_back(mozilla::VariantType<StaticData>{},
+                       StaticData{std::move(aLabel), std::move(aValue)});
+    return *this;
+  }
+
+  // Internal streaming function.
+  MFBT_API void Stream(JSONWriter& aWriter, const Span<const char>& aName) &&;
+
+ private:
+  MFBT_API static Span<const char> LocationToStringSpan(Location aLocation);
+  MFBT_API static Span<const char> FormatToStringSpan(Format aFormat);
+
+  // List of marker display locations. Empty for SpecialFrontendLocation.
+  std::vector<Location> mLocations;
+  // Labels for different places.
+  std::string mChartLabel;
+  std::string mTooltipLabel;
+  std::string mTableLabel;
+  // Main display, made of zero or more rows of key+label+format or label+value.
+ private:
+  struct DynamicData {
+    std::string mKey;
+    mozilla::Maybe<std::string> mLabel;
+    Format mFormat;
+    mozilla::Maybe<Searchable> mSearchable;
+  };
+  struct StaticData {
+    std::string mLabel;
+    std::string mValue;
+  };
+  using DataRow = mozilla::Variant<DynamicData, StaticData>;
+  using DataRowVector = std::vector<DataRow>;
+
+  DataRowVector mData;
+};
+
+}  // namespace mozilla
+
+#endif  // MOZ_GECKO_PROFILER
+
+#endif  // BaseProfilerMarkersPrerequisites_h
diff --git a/mozglue/baseprofiler/public/BaseProfilerSharedLibraries.h b/mozglue/baseprofiler/public/BaseProfilerSharedLibraries.h
new file mode 100644
index 0000000000..0a104193c3
--- /dev/null
+++ b/mozglue/baseprofiler/public/BaseProfilerSharedLibraries.h
@@ -0,0 +1,146 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef BASE_PROFILER_SHARED_LIBRARIES_H_
+#define BASE_PROFILER_SHARED_LIBRARIES_H_
+
+#include "BaseProfiler.h"
+
+#ifndef MOZ_GECKO_PROFILER
+#  error Do not #include this header when MOZ_GECKO_PROFILER is not #defined.
+#endif
+
+#include <algorithm>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string>
+#include <vector>
+
+class SharedLibrary {
+ public:
+  SharedLibrary(uintptr_t aStart, uintptr_t aEnd, uintptr_t aOffset,
+                const std::string& aBreakpadId, const std::string& aModuleName,
+                const std::string& aModulePath, const std::string& aDebugName,
+                const std::string& aDebugPath, const std::string& aVersion,
+                const char* aArch)
+      : mStart(aStart),
+        mEnd(aEnd),
+        mOffset(aOffset),
+        mBreakpadId(aBreakpadId),
+        mModuleName(aModuleName),
+        mModulePath(aModulePath),
+        mDebugName(aDebugName),
+        mDebugPath(aDebugPath),
+        mVersion(aVersion),
+        mArch(aArch) {}
+
+  SharedLibrary(const SharedLibrary& aEntry)
+      : mStart(aEntry.mStart),
+        mEnd(aEntry.mEnd),
+        mOffset(aEntry.mOffset),
+        mBreakpadId(aEntry.mBreakpadId),
+        mModuleName(aEntry.mModuleName),
+        mModulePath(aEntry.mModulePath),
+        mDebugName(aEntry.mDebugName),
+        mDebugPath(aEntry.mDebugPath),
+        mVersion(aEntry.mVersion),
+        mArch(aEntry.mArch) {}
+
+  SharedLibrary& operator=(const SharedLibrary& aEntry) {
+    // Gracefully handle self assignment
+    if (this == &aEntry) return *this;
+
+    mStart = aEntry.mStart;
+    mEnd = aEntry.mEnd;
+    mOffset = aEntry.mOffset;
+    mBreakpadId = aEntry.mBreakpadId;
+    mModuleName = aEntry.mModuleName;
+    mModulePath = aEntry.mModulePath;
+    mDebugName = aEntry.mDebugName;
+    mDebugPath = aEntry.mDebugPath;
+    mVersion = aEntry.mVersion;
+    mArch = aEntry.mArch;
+    return *this;
+  }
+
+  bool operator==(const SharedLibrary& other) const {
+    return (mStart == other.mStart) && (mEnd == other.mEnd) &&
+           (mOffset == other.mOffset) && (mModuleName == other.mModuleName) &&
+           (mModulePath == other.mModulePath) &&
+           (mDebugName == other.mDebugName) &&
+           (mDebugPath == other.mDebugPath) &&
+           (mBreakpadId == other.mBreakpadId) && (mVersion == other.mVersion) &&
+           (mArch == other.mArch);
+  }
+
+  uintptr_t GetStart() const { return mStart; }
+  uintptr_t GetEnd() const { return mEnd; }
+  uintptr_t GetOffset() const { return mOffset; }
+  const std::string& GetBreakpadId() const { return mBreakpadId; }
+  const std::string& GetModuleName() const { return mModuleName; }
+  const std::string& GetModulePath() const { return mModulePath; }
+  const std::string& GetDebugName() const { return mDebugName; }
+  const std::string& GetDebugPath() const { return mDebugPath; }
+  const std::string& GetVersion() const { return mVersion; }
+  const std::string& GetArch() const { return mArch; }
+
+ private:
+  SharedLibrary() : mStart{0}, mEnd{0}, mOffset{0} {}
+
+  uintptr_t mStart;
+  uintptr_t mEnd;
+  uintptr_t mOffset;
+  std::string mBreakpadId;
+  std::string mModuleName;
+  std::string mModulePath;
+  std::string mDebugName;
+  std::string mDebugPath;
+  std::string mVersion;
+  std::string mArch;
+};
+
+static bool CompareAddresses(const SharedLibrary& first,
+                             const SharedLibrary& second) {
+  return first.GetStart() < second.GetStart();
+}
+
+class SharedLibraryInfo {
+ public:
+  static SharedLibraryInfo GetInfoForSelf();
+  static void Initialize();
+
+  SharedLibraryInfo() {}
+
+  void AddSharedLibrary(SharedLibrary entry) { mEntries.push_back(entry); }
+
+  const SharedLibrary& GetEntry(size_t i) const { return mEntries[i]; }
+
+  SharedLibrary& GetMutableEntry(size_t i) { return mEntries[i]; }
+
+  // Removes items in the range [first, last)
+  // i.e. element at the "last" index is not removed
+  void RemoveEntries(size_t first, size_t last) {
+    mEntries.erase(mEntries.begin() + first, mEntries.begin() + last);
+  }
+
+  bool Contains(const SharedLibrary& searchItem) const {
+    return (mEntries.end() !=
+            std::find(mEntries.begin(), mEntries.end(), searchItem));
+  }
+
+  size_t GetSize() const { return mEntries.size(); }
+
+  void SortByAddress() {
+    std::sort(mEntries.begin(), mEntries.end(), CompareAddresses);
+  }
+
+  void Clear() { mEntries.clear(); }
+
+ private:
+  std::vector<SharedLibrary> mEntries;
+};
+
+#endif  // BASE_PROFILER_SHARED_LIBRARIES_H_
diff --git a/mozglue/baseprofiler/public/BaseProfilingCategory.h b/mozglue/baseprofiler/public/BaseProfilingCategory.h
new file mode 100644
index 0000000000..6892ec40f4
--- /dev/null
+++ b/mozglue/baseprofiler/public/BaseProfilingCategory.h
@@ -0,0 +1,72 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+ * vim: set ts=8 sts=4 et sw=4 tw=99:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef BaseProfilingCategory_h
+#define BaseProfilingCategory_h
+
+#ifndef MOZ_GECKO_PROFILER
+#  error Do not #include this header when MOZ_GECKO_PROFILER is not #defined.
+#endif
+
+#include "mozilla/Types.h"
+
+#include <cstdint>
+
+#include "ProfilingCategoryList.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+// clang-format off
+
+// An enum that lists all possible category pairs in one list.
+// This is the enum that is used in profiler stack labels. Having one list that
+// includes subcategories from all categories in one list allows assigning the
+// category pair to a stack label with just one number.
+#define CATEGORY_ENUM_BEGIN_CATEGORY(name, labelAsString, color)
+#define CATEGORY_ENUM_SUBCATEGORY(supercategory, name, labelAsString) name,
+#define CATEGORY_ENUM_END_CATEGORY
+enum class ProfilingCategoryPair : uint32_t {
+  MOZ_PROFILING_CATEGORY_LIST(CATEGORY_ENUM_BEGIN_CATEGORY,
+                              CATEGORY_ENUM_SUBCATEGORY,
+                              CATEGORY_ENUM_END_CATEGORY)
+  COUNT,
+  LAST = COUNT - 1,
+};
+#undef CATEGORY_ENUM_BEGIN_CATEGORY
+#undef CATEGORY_ENUM_SUBCATEGORY
+#undef CATEGORY_ENUM_END_CATEGORY
+
+// An enum that lists just the categories without their subcategories.
+#define SUPERCATEGORY_ENUM_BEGIN_CATEGORY(name, labelAsString, color) name,
+#define SUPERCATEGORY_ENUM_SUBCATEGORY(supercategory, name, labelAsString)
+#define SUPERCATEGORY_ENUM_END_CATEGORY
+enum class ProfilingCategory : uint32_t {
+  MOZ_PROFILING_CATEGORY_LIST(SUPERCATEGORY_ENUM_BEGIN_CATEGORY,
+                              SUPERCATEGORY_ENUM_SUBCATEGORY,
+                              SUPERCATEGORY_ENUM_END_CATEGORY)
+  COUNT,
+  LAST = COUNT - 1,
+};
+#undef SUPERCATEGORY_ENUM_BEGIN_CATEGORY
+#undef SUPERCATEGORY_ENUM_SUBCATEGORY
+#undef SUPERCATEGORY_ENUM_END_CATEGORY
+
+// clang-format on
+
+struct ProfilingCategoryPairInfo {
+  ProfilingCategory mCategory;
+  uint32_t mSubcategoryIndex;
+  const char* mLabel;
+};
+
+MFBT_API const ProfilingCategoryPairInfo& GetProfilingCategoryPairInfo(
+    ProfilingCategoryPair aCategoryPair);
+
+}  // namespace baseprofiler
+}  // namespace mozilla
+
+#endif /* BaseProfilingCategory_h */
diff --git a/mozglue/baseprofiler/public/BaseProfilingStack.h b/mozglue/baseprofiler/public/BaseProfilingStack.h
new file mode 100644
index 0000000000..214fc1ebbf
--- /dev/null
+++ b/mozglue/baseprofiler/public/BaseProfilingStack.h
@@ -0,0 +1,520 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef BaseProfilingStack_h
+#define BaseProfilingStack_h
+
+#include "BaseProfilingCategory.h"
+
+#include "mozilla/Atomics.h"
+
+#include "BaseProfiler.h"
+
+#ifndef MOZ_GECKO_PROFILER
+#  error Do not #include this header when MOZ_GECKO_PROFILER is not #defined.
+#endif
+
+#include <algorithm>
+#include <stdint.h>
+
+// This file defines the classes ProfilingStack and ProfilingStackFrame.
+// The ProfilingStack manages an array of ProfilingStackFrames.
+// It keeps track of the "label stack" and the JS interpreter stack.
+// The two stack types are interleaved.
+//
+// Usage:
+//
+//  ProfilingStack* profilingStack = ...;
+//
+//  // For label frames:
+//  profilingStack->pushLabelFrame(...);
+//  // Execute some code. When finished, pop the frame:
+//  profilingStack->pop();
+//
+//  // For JS stack frames:
+//  profilingStack->pushJSFrame(...);
+//  // Execute some code. When finished, pop the frame:
+//  profilingStack->pop();
+//
+//
+// Concurrency considerations
+//
+// A thread's profiling stack (and the frames inside it) is only modified by
+// that thread. However, the profiling stack can be *read* by a different
+// thread, the sampler thread: Whenever the profiler wants to sample a given
+// thread A, the following happens:
+//  (1) Thread A is suspended.
+//  (2) The sampler thread (thread S) reads the ProfilingStack of thread A,
+//      including all ProfilingStackFrames that are currently in that stack
+//      (profilingStack->frames[0..profilingStack->stackSize()]).
+//  (3) Thread A is resumed.
+//
+// Thread suspension is achieved using platform-specific APIs; refer to each
+// platform's Sampler::SuspendAndSampleAndResumeThread implementation in
+// platform-*.cpp for details.
+//
+// When the thread is suspended, the values in profilingStack->stackPointer and
+// in the stack frame range
+// profilingStack->frames[0..profilingStack->stackPointer] need to be in a
+// consistent state, so that thread S does not read partially- constructed stack
+// frames. More specifically, we have two requirements:
+//  (1) When adding a new frame at the top of the stack, its ProfilingStackFrame
+//      data needs to be put in place *before* the stackPointer is incremented,
+//      and the compiler + CPU need to know that this order matters.
+//  (2) When popping an frame from the stack and then preparing the
+//      ProfilingStackFrame data for the next frame that is about to be pushed,
+//      the decrement of the stackPointer in pop() needs to happen *before* the
+//      ProfilingStackFrame for the new frame is being popuplated, and the
+//      compiler + CPU need to know that this order matters.
+//
+// We can express the relevance of these orderings in multiple ways.
+// Option A is to make stackPointer an atomic with SequentiallyConsistent
+// memory ordering. This would ensure that no writes in thread A would be
+// reordered across any writes to stackPointer, which satisfies requirements
+// (1) and (2) at the same time. Option A is the simplest.
+// Option B is to use ReleaseAcquire memory ordering both for writes to
+// stackPointer *and* for writes to ProfilingStackFrame fields. Release-stores
+// ensure that all writes that happened *before this write in program order* are
+// not reordered to happen after this write. ReleaseAcquire ordering places no
+// requirements on the ordering of writes that happen *after* this write in
+// program order.
+// Using release-stores for writes to stackPointer expresses requirement (1),
+// and using release-stores for writes to the ProfilingStackFrame fields
+// expresses requirement (2).
+//
+// Option B is more complicated than option A, but has much better performance
+// on x86/64: In a microbenchmark run on a Macbook Pro from 2017, switching
+// from option A to option B reduced the overhead of pushing+popping a
+// ProfilingStackFrame by 10 nanoseconds.
+// On x86/64, release-stores require no explicit hardware barriers or lock
+// instructions.
+// On ARM/64, option B may be slower than option A, because the compiler will
+// generate hardware barriers for every single release-store instead of just
+// for the writes to stackPointer. However, the actual performance impact of
+// this has not yet been measured on ARM, so we're currently using option B
+// everywhere. This is something that we may want to change in the future once
+// we've done measurements.
+
+namespace mozilla {
+namespace baseprofiler {
+
+// A call stack can be specified to the JS engine such that all JS entry/exits
+// to functions push/pop a stack frame to/from the specified stack.
+//
+// For more detailed information, see vm/GeckoProfiler.h.
+//
+class ProfilingStackFrame {
+  // A ProfilingStackFrame represents either a label frame or a JS frame.
+
+  // WARNING WARNING WARNING
+  //
+  // All the fields below are Atomic<...,ReleaseAcquire>. This is needed so
+  // that writes to these fields are release-writes, which ensures that
+  // earlier writes in this thread don't get reordered after the writes to
+  // these fields. In particular, the decrement of the stack pointer in
+  // ProfilingStack::pop() is a write that *must* happen before the values in
+  // this ProfilingStackFrame are changed. Otherwise, the sampler thread might
+  // see an inconsistent state where the stack pointer still points to a
+  // ProfilingStackFrame which has already been popped off the stack and whose
+  // fields have now been partially repopulated with new values.
+  // See the "Concurrency considerations" paragraph at the top of this file
+  // for more details.
+
+  // Descriptive label for this stack frame. Must be a static string! Can be
+  // an empty string, but not a null pointer.
+  Atomic<const char*, ReleaseAcquire> label_;
+
+  // An additional descriptive string of this frame which is combined with
+  // |label_| in profiler output. Need not be (and usually isn't) static. Can
+  // be null.
+  Atomic<const char*, ReleaseAcquire> dynamicString_;
+
+  // Stack pointer for non-JS stack frames, the script pointer otherwise.
+  Atomic<void*, ReleaseAcquire> spOrScript;
+
+  // ID of the JS Realm for JS stack frames.
+  // Must not be used on non-JS frames; it'll contain either the default 0,
+  // or a leftover value from a previous JS stack frame that was using this
+  // ProfilingStackFrame object.
+  mozilla::Atomic<uint64_t, mozilla::ReleaseAcquire> realmID_;
+
+  // The bytecode offset for JS stack frames.
+  // Must not be used on non-JS frames; it'll contain either the default 0,
+  // or a leftover value from a previous JS stack frame that was using this
+  // ProfilingStackFrame object.
+  Atomic<int32_t, ReleaseAcquire> pcOffsetIfJS_;
+
+  // Bits 0...8 hold the Flags. Bits 9...31 hold the category pair.
+  Atomic<uint32_t, ReleaseAcquire> flagsAndCategoryPair_;
+
+ public:
+  ProfilingStackFrame() = default;
+  ProfilingStackFrame& operator=(const ProfilingStackFrame& other) {
+    label_ = other.label();
+    dynamicString_ = other.dynamicString();
+    void* spScript = other.spOrScript;
+    spOrScript = spScript;
+    int32_t offsetIfJS = other.pcOffsetIfJS_;
+    pcOffsetIfJS_ = offsetIfJS;
+    int64_t realmID = other.realmID_;
+    realmID_ = realmID;
+    uint32_t flagsAndCategory = other.flagsAndCategoryPair_;
+    flagsAndCategoryPair_ = flagsAndCategory;
+    return *this;
+  }
+
+  // Reserve up to 16 bits for flags, and 16 for category pair.
+  enum class Flags : uint32_t {
+    // The first three flags describe the kind of the frame and are
+    // mutually exclusive. (We still give them individual bits for
+    // simplicity.)
+
+    // A regular label frame. These usually come from AutoProfilerLabel.
+    IS_LABEL_FRAME = 1 << 0,
+
+    // A special frame indicating the start of a run of JS profiling stack
+    // frames. IS_SP_MARKER_FRAME frames are ignored, except for the sp
+    // field. These frames are needed to get correct ordering between JS
+    // and LABEL frames because JS frames don't carry sp information.
+    // SP is short for "stack pointer".
+    IS_SP_MARKER_FRAME = 1 << 1,
+
+    // A JS frame.
+    IS_JS_FRAME = 1 << 2,
+
+    // An interpreter JS frame that has OSR-ed into baseline. IS_JS_FRAME
+    // frames can have this flag set and unset during their lifetime.
+    // JS_OSR frames are ignored.
+    JS_OSR = 1 << 3,
+
+    // The next three are mutually exclusive.
+    // By default, for profiling stack frames that have both a label and a
+    // dynamic string, the two strings are combined into one string of the
+    // form "<label> <dynamicString>" during JSON serialization. The
+    // following flags can be used to change this preset.
+    STRING_TEMPLATE_METHOD = 1 << 4,  // "<label>.<dynamicString>"
+    STRING_TEMPLATE_GETTER = 1 << 5,  // "get <label>.<dynamicString>"
+    STRING_TEMPLATE_SETTER = 1 << 6,  // "set <label>.<dynamicString>"
+
+    // If set, causes this stack frame to be marked as "relevantForJS" in
+    // the profile JSON, which will make it show up in the "JS only" call
+    // tree view.
+    RELEVANT_FOR_JS = 1 << 7,
+
+    // If set, causes the label on this ProfilingStackFrame to be ignored
+    // and to be replaced by the subcategory's label.
+    LABEL_DETERMINED_BY_CATEGORY_PAIR = 1 << 8,
+
+    // Frame dynamic string does not contain user data.
+    NONSENSITIVE = 1 << 9,
+
+    // A JS Baseline Interpreter frame.
+    IS_BLINTERP_FRAME = 1 << 10,
+
+    FLAGS_BITCOUNT = 16,
+    FLAGS_MASK = (1 << FLAGS_BITCOUNT) - 1
+  };
+
+  static_assert(
+      uint32_t(ProfilingCategoryPair::LAST) <=
+          (UINT32_MAX >> uint32_t(Flags::FLAGS_BITCOUNT)),
+      "Too many category pairs to fit into u32 with together with the "
+      "reserved bits for the flags");
+
+  bool isLabelFrame() const {
+    return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::IS_LABEL_FRAME);
+  }
+
+  bool isSpMarkerFrame() const {
+    return uint32_t(flagsAndCategoryPair_) &
+           uint32_t(Flags::IS_SP_MARKER_FRAME);
+  }
+
+  bool isJsFrame() const {
+    return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::IS_JS_FRAME);
+  }
+
+  bool isOSRFrame() const {
+    return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::JS_OSR);
+  }
+
+  void setIsOSRFrame(bool isOSR) {
+    if (isOSR) {
+      flagsAndCategoryPair_ =
+          uint32_t(flagsAndCategoryPair_) | uint32_t(Flags::JS_OSR);
+    } else {
+      flagsAndCategoryPair_ =
+          uint32_t(flagsAndCategoryPair_) & ~uint32_t(Flags::JS_OSR);
+    }
+  }
+
+  const char* label() const {
+    uint32_t flagsAndCategoryPair = flagsAndCategoryPair_;
+    if (flagsAndCategoryPair &
+        uint32_t(Flags::LABEL_DETERMINED_BY_CATEGORY_PAIR)) {
+      auto categoryPair = ProfilingCategoryPair(
+          flagsAndCategoryPair >> uint32_t(Flags::FLAGS_BITCOUNT));
+      return GetProfilingCategoryPairInfo(categoryPair).mLabel;
+    }
+    return label_;
+  }
+
+  const char* dynamicString() const { return dynamicString_; }
+
+  void initLabelFrame(const char* aLabel, const char* aDynamicString, void* sp,
+                      ProfilingCategoryPair aCategoryPair, uint32_t aFlags) {
+    label_ = aLabel;
+    dynamicString_ = aDynamicString;
+    spOrScript = sp;
+    // pcOffsetIfJS_ is not set and must not be used on label frames.
+    flagsAndCategoryPair_ =
+        uint32_t(Flags::IS_LABEL_FRAME) |
+        (uint32_t(aCategoryPair) << uint32_t(Flags::FLAGS_BITCOUNT)) | aFlags;
+    MOZ_ASSERT(isLabelFrame());
+  }
+
+  void initSpMarkerFrame(void* sp) {
+    label_ = "";
+    dynamicString_ = nullptr;
+    spOrScript = sp;
+    // pcOffsetIfJS_ is not set and must not be used on sp marker frames.
+    flagsAndCategoryPair_ = uint32_t(Flags::IS_SP_MARKER_FRAME) |
+                            (uint32_t(ProfilingCategoryPair::OTHER)
+                             << uint32_t(Flags::FLAGS_BITCOUNT));
+    MOZ_ASSERT(isSpMarkerFrame());
+  }
+
+  void initJsFrame(const char* aLabel, const char* aDynamicString,
+                   void* /* JSScript* */ aScript, int32_t aOffset,
+                   uint64_t aRealmID) {
+    label_ = aLabel;
+    dynamicString_ = aDynamicString;
+    spOrScript = aScript;
+    pcOffsetIfJS_ = aOffset;
+    realmID_ = aRealmID;
+    flagsAndCategoryPair_ =
+        uint32_t(Flags::IS_JS_FRAME) | (uint32_t(ProfilingCategoryPair::JS)
+                                        << uint32_t(Flags::FLAGS_BITCOUNT));
+    MOZ_ASSERT(isJsFrame());
+  }
+
+  uint32_t flags() const {
+    return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::FLAGS_MASK);
+  }
+
+  ProfilingCategoryPair categoryPair() const {
+    return ProfilingCategoryPair(flagsAndCategoryPair_ >>
+                                 uint32_t(Flags::FLAGS_BITCOUNT));
+  }
+
+  uint64_t realmID() const { return realmID_; }
+
+  void* stackAddress() const {
+    MOZ_ASSERT(!isJsFrame());
+    return spOrScript;
+  }
+
+  // Note that the pointer returned might be invalid.
+  void* rawScript() const {
+    MOZ_ASSERT(isJsFrame());
+    return spOrScript;
+  }
+  void setRawScript(void* aScript) {
+    MOZ_ASSERT(isJsFrame());
+    spOrScript = aScript;
+  }
+
+  int32_t pcOffset() const {
+    MOZ_ASSERT(isJsFrame());
+    return pcOffsetIfJS_;
+  }
+
+  void setPCOffset(int32_t aOffset) {
+    MOZ_ASSERT(isJsFrame());
+    pcOffsetIfJS_ = aOffset;
+  }
+
+  // The offset of a pc into a script's code can actually be 0, so to
+  // signify a nullptr pc, use a -1 index. This is checked against in
+  // pc() and setPC() to set/get the right pc.
+  static const int32_t NullPCOffset = -1;
+};
+
+// Each thread has its own ProfilingStack. That thread modifies the
+// ProfilingStack, pushing and popping elements as necessary.
+//
+// The ProfilingStack is also read periodically by the profiler's sampler
+// thread. This happens only when the thread that owns the ProfilingStack is
+// suspended. So there are no genuine parallel accesses.
+//
+// However, it is possible for pushing/popping to be interrupted by a periodic
+// sample. Because of this, we need pushing/popping to be effectively atomic.
+//
+// - When pushing a new frame, we increment the stack pointer -- making the new
+//   frame visible to the sampler thread -- only after the new frame has been
+//   fully written. The stack pointer is Atomic<uint32_t,ReleaseAcquire>, so
+//   the increment is a release-store, which ensures that this store is not
+//   reordered before the writes of the frame.
+//
+// - When popping an old frame, the only operation is the decrementing of the
+//   stack pointer, which is obviously atomic.
+//
+class ProfilingStack final {
+ public:
+  ProfilingStack() = default;
+
+  MFBT_API ~ProfilingStack();
+
+  void pushLabelFrame(const char* label, const char* dynamicString, void* sp,
+                      ProfilingCategoryPair categoryPair, uint32_t flags = 0) {
+    // This thread is the only one that ever changes the value of
+    // stackPointer.
+    // Store the value of the atomic in a non-atomic local variable so that
+    // the compiler won't generate two separate loads from the atomic for
+    // the size check and the frames[] array indexing operation.
+    uint32_t stackPointerVal = stackPointer;
+
+    if (MOZ_UNLIKELY(stackPointerVal >= capacity)) {
+      ensureCapacitySlow();
+    }
+    frames[stackPointerVal].initLabelFrame(label, dynamicString, sp,
+                                           categoryPair, flags);
+
+    // This must happen at the end! The compiler will not reorder this
+    // update because stackPointer is Atomic<..., ReleaseAcquire>, so any
+    // the writes above will not be reordered below the stackPointer store.
+    // Do the read and the write as two separate statements, in order to
+    // make it clear that we don't need an atomic increment, which would be
+    // more expensive on x86 than the separate operations done here.
+    // However, don't use stackPointerVal here; instead, allow the compiler
+    // to turn this store into a non-atomic increment instruction which
+    // takes up less code size.
+    stackPointer = stackPointer + 1;
+  }
+
+  void pushSpMarkerFrame(void* sp) {
+    uint32_t oldStackPointer = stackPointer;
+
+    if (MOZ_UNLIKELY(oldStackPointer >= capacity)) {
+      ensureCapacitySlow();
+    }
+    frames[oldStackPointer].initSpMarkerFrame(sp);
+
+    // This must happen at the end, see the comment in pushLabelFrame.
+    stackPointer = oldStackPointer + 1;
+  }
+
+  void pushJsOffsetFrame(const char* label, const char* dynamicString,
+                         void* script, int32_t offset, uint64_t aRealmID) {
+    // This thread is the only one that ever changes the value of
+    // stackPointer. Only load the atomic once.
+    uint32_t oldStackPointer = stackPointer;
+
+    if (MOZ_UNLIKELY(oldStackPointer >= capacity)) {
+      ensureCapacitySlow();
+    }
+    frames[oldStackPointer].initJsFrame(label, dynamicString, script, offset,
+                                        aRealmID);
+
+    // This must happen at the end, see the comment in pushLabelFrame.
+    stackPointer = stackPointer + 1;
+  }
+
+  void pop() {
+    MOZ_ASSERT(stackPointer > 0);
+    // Do the read and the write as two separate statements, in order to
+    // make it clear that we don't need an atomic decrement, which would be
+    // more expensive on x86 than the separate operations done here.
+    // This thread is the only one that ever changes the value of
+    // stackPointer.
+    uint32_t oldStackPointer = stackPointer;
+    stackPointer = oldStackPointer - 1;
+  }
+
+  uint32_t stackSize() const { return stackPointer; }
+  uint32_t stackCapacity() const { return capacity; }
+
+ private:
+  // Out of line path for expanding the buffer, since otherwise this would get
+  // inlined in every DOM WebIDL call.
+  MFBT_API MOZ_COLD void ensureCapacitySlow();
+
+  // No copying.
+  ProfilingStack(const ProfilingStack&) = delete;
+  void operator=(const ProfilingStack&) = delete;
+
+  // No moving either.
+  ProfilingStack(ProfilingStack&&) = delete;
+  void operator=(ProfilingStack&&) = delete;
+
+  uint32_t capacity = 0;
+
+ public:
+  // The pointer to the stack frames, this is read from the profiler thread and
+  // written from the current thread.
+  //
+  // This is effectively a unique pointer.
+  Atomic<ProfilingStackFrame*, SequentiallyConsistent> frames{nullptr};
+
+  // This may exceed the capacity, so instead use the stackSize() method to
+  // determine the number of valid frames in stackFrames. When this is less
+  // than stackCapacity(), it refers to the first free stackframe past the top
+  // of the in-use stack (i.e. frames[stackPointer - 1] is the top stack
+  // frame).
+  //
+  // WARNING WARNING WARNING
+  //
+  // This is an atomic variable that uses ReleaseAcquire memory ordering.
+  // See the "Concurrency considerations" paragraph at the top of this file
+  // for more details.
+  Atomic<uint32_t, ReleaseAcquire> stackPointer{0};
+};
+
+class AutoGeckoProfilerEntry;
+class GeckoProfilerEntryMarker;
+class GeckoProfilerBaselineOSRMarker;
+
+class GeckoProfilerThread {
+  friend class AutoGeckoProfilerEntry;
+  friend class GeckoProfilerEntryMarker;
+  friend class GeckoProfilerBaselineOSRMarker;
+
+  ProfilingStack* profilingStack_;
+
+  // Same as profilingStack_ if the profiler is currently active, otherwise
+  // null.
+  ProfilingStack* profilingStackIfEnabled_;
+
+ public:
+  MFBT_API GeckoProfilerThread();
+
+  uint32_t stackPointer() {
+    MOZ_ASSERT(infraInstalled());
+    return profilingStack_->stackPointer;
+  }
+  ProfilingStackFrame* stack() { return profilingStack_->frames; }
+  ProfilingStack* getProfilingStack() { return profilingStack_; }
+  ProfilingStack* getProfilingStackIfEnabled() {
+    return profilingStackIfEnabled_;
+  }
+
+  /*
+   * True if the profiler infrastructure is setup.  Should be true in builds
+   * that include profiler support except during early startup or late
+   * shutdown.  Unrelated to the presence of the Gecko Profiler addon.
+   */
+  bool infraInstalled() { return profilingStack_ != nullptr; }
+
+  MFBT_API void setProfilingStack(ProfilingStack* profilingStack, bool enabled);
+  void enable(bool enable) {
+    profilingStackIfEnabled_ = enable ? profilingStack_ : nullptr;
+  }
+};
+
+}  // namespace baseprofiler
+}  // namespace mozilla
+
+#endif /* BaseProfilingStack_h */
diff --git a/mozglue/baseprofiler/public/BlocksRingBuffer.h b/mozglue/baseprofiler/public/BlocksRingBuffer.h
new file mode 100644
index 0000000000..6948ab8cf4
--- /dev/null
+++ b/mozglue/baseprofiler/public/BlocksRingBuffer.h
@@ -0,0 +1,1000 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef BlocksRingBuffer_h
+#define BlocksRingBuffer_h
+
+#include "mozilla/BaseProfilerDetail.h"
+#include "mozilla/ModuloBuffer.h"
+#include "mozilla/ProfileBufferIndex.h"
+#include "mozilla/ScopeExit.h"
+
+#include <functional>
+#include <string>
+#include <tuple>
+#include <utility>
+
+namespace mozilla {
+
+// Thread-safe Ring buffer that can store blocks of different sizes during
+// defined sessions.
+// Each *block* contains an *entry* and the entry size:
+// [ entry_size | entry ] [ entry_size | entry ] ...
+// *In-session* is a period of time during which `BlocksRingBuffer` allows
+// reading and writing. *Out-of-session*, the `BlocksRingBuffer` object is
+// still valid, but contains no data, and gracefully denies accesses.
+//
+// To write an entry, the buffer reserves a block of sufficient size (to contain
+// user data of predetermined size), writes the entry size, and lets the caller
+// fill the entry contents using ModuloBuffer::Iterator APIs and a few entry-
+// specific APIs. E.g.:
+// ```
+// BlockRingsBuffer brb(PowerOfTwo<BlockRingsBuffer::Length>(1024));
+// brb.ReserveAndPut([]() { return sizeof(123); },
+//                   [&](ProfileBufferEntryWriter& aEW) {
+//                     aEW.WriteObject(123);
+//                   });
+// ```
+// Other `Put...` functions may be used as shortcuts for simple entries.
+// The objects given to the caller's callbacks should only be used inside the
+// callbacks and not stored elsewhere, because they keep their own references to
+// the BlocksRingBuffer and therefore should not live longer.
+// Different type of objects may be serialized into an entry, see `Serializer`
+// for more information.
+//
+// When reading data, the buffer iterates over blocks (it knows how to read the
+// entry size, and therefore move to the next block), and lets the caller read
+// the entry inside of each block. E.g.:
+// ```
+// brb.Read([](BlocksRingBuffer::Reader aR) {}
+//   for (ProfileBufferEntryReader aER : aR) {
+//     /* Use ProfileBufferEntryReader functions to read serialized objects. */
+//     int n = aER.ReadObject<int>();
+//   }
+// });
+// ```
+// Different type of objects may be deserialized from an entry, see
+// `Deserializer` for more information.
+//
+// The caller may retrieve the `ProfileBufferBlockIndex` corresponding to an
+// entry (`ProfileBufferBlockIndex` is an opaque type preventing the user from
+// modifying it). That index may later be used to get back to that particular
+// entry if it still exists.
+class BlocksRingBuffer {
+ public:
+  // Using ModuloBuffer as underlying circular byte buffer.
+  using Buffer = ModuloBuffer<uint32_t, ProfileBufferIndex>;
+  using Byte = Buffer::Byte;
+
+  // Length type for total buffer (as PowerOfTwo<Length>) and each entry.
+  using Length = uint32_t;
+
+  enum class ThreadSafety { WithoutMutex, WithMutex };
+
+  // Default constructor starts out-of-session (nothing to read or write).
+  explicit BlocksRingBuffer(ThreadSafety aThreadSafety)
+      : mMutex(aThreadSafety != ThreadSafety::WithoutMutex) {}
+
+  // Create a buffer of the given length.
+  explicit BlocksRingBuffer(ThreadSafety aThreadSafety,
+                            PowerOfTwo<Length> aLength)
+      : mMutex(aThreadSafety != ThreadSafety::WithoutMutex),
+        mMaybeUnderlyingBuffer(Some(UnderlyingBuffer(aLength))) {}
+
+  // Take ownership of an existing buffer.
+  BlocksRingBuffer(ThreadSafety aThreadSafety,
+                   UniquePtr<Buffer::Byte[]> aExistingBuffer,
+                   PowerOfTwo<Length> aLength)
+      : mMutex(aThreadSafety != ThreadSafety::WithoutMutex),
+        mMaybeUnderlyingBuffer(
+            Some(UnderlyingBuffer(std::move(aExistingBuffer), aLength))) {}
+
+  // Use an externally-owned buffer.
+  BlocksRingBuffer(ThreadSafety aThreadSafety, Buffer::Byte* aExternalBuffer,
+                   PowerOfTwo<Length> aLength)
+      : mMutex(aThreadSafety != ThreadSafety::WithoutMutex),
+        mMaybeUnderlyingBuffer(
+            Some(UnderlyingBuffer(aExternalBuffer, aLength))) {}
+
+  // Destructor doesn't need to do anything special. (Clearing entries would
+  // only update indices and stats, which won't be accessible after the object
+  // is destroyed anyway.)
+  ~BlocksRingBuffer() = default;
+
+  // Remove underlying buffer, if any.
+  void Reset() {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    ResetUnderlyingBuffer();
+  }
+
+  // Create a buffer of the given length.
+  void Set(PowerOfTwo<Length> aLength) {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    ResetUnderlyingBuffer();
+    mMaybeUnderlyingBuffer.emplace(aLength);
+  }
+
+  // Take ownership of an existing buffer.
+  void Set(UniquePtr<Buffer::Byte[]> aExistingBuffer,
+           PowerOfTwo<Length> aLength) {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    ResetUnderlyingBuffer();
+    mMaybeUnderlyingBuffer.emplace(std::move(aExistingBuffer), aLength);
+  }
+
+  // Use an externally-owned buffer.
+  void Set(Buffer::Byte* aExternalBuffer, PowerOfTwo<Length> aLength) {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    ResetUnderlyingBuffer();
+    mMaybeUnderlyingBuffer.emplace(aExternalBuffer, aLength);
+  }
+
+  // This cannot change during the lifetime of this buffer, so there's no need
+  // to lock.
+  bool IsThreadSafe() const { return mMutex.IsActivated(); }
+
+  [[nodiscard]] bool IsInSession() const {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    return !!mMaybeUnderlyingBuffer;
+  }
+
+  // Lock the buffer mutex and run the provided callback.
+  // This can be useful when the caller needs to explicitly lock down this
+  // buffer, but not do anything else with it.
+  template <typename Callback>
+  auto LockAndRun(Callback&& aCallback) const {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    return std::forward<Callback>(aCallback)();
+  }
+
+  // Buffer length in bytes.
+  Maybe<PowerOfTwo<Length>> BufferLength() const {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    return mMaybeUnderlyingBuffer.map([](const UnderlyingBuffer& aBuffer) {
+      return aBuffer.mBuffer.BufferLength();
+    });
+    ;
+  }
+
+  // Size of external resources.
+  size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const {
+    if (!mMaybeUnderlyingBuffer) {
+      return 0;
+    }
+    return mMaybeUnderlyingBuffer->mBuffer.SizeOfExcludingThis(aMallocSizeOf);
+  }
+
+  size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
+    return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
+  }
+
+  // Snapshot of the buffer state.
+  struct State {
+    // Index to the first block.
+    ProfileBufferBlockIndex mRangeStart;
+
+    // Index past the last block. Equals mRangeStart if empty.
+    ProfileBufferBlockIndex mRangeEnd;
+
+    // Number of blocks that have been pushed into this buffer.
+    uint64_t mPushedBlockCount = 0;
+
+    // Number of blocks that have been removed from this buffer.
+    // Note: Live entries = pushed - cleared.
+    uint64_t mClearedBlockCount = 0;
+  };
+
+  // Get a snapshot of the current state.
+  // When out-of-session, mFirstReadIndex==mNextWriteIndex, and
+  // mPushedBlockCount==mClearedBlockCount==0.
+  // Note that these may change right after this thread-safe call, so they
+  // should only be used for statistical purposes.
+  State GetState() const {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    return {
+        mFirstReadIndex, mNextWriteIndex,
+        mMaybeUnderlyingBuffer ? mMaybeUnderlyingBuffer->mPushedBlockCount : 0,
+        mMaybeUnderlyingBuffer ? mMaybeUnderlyingBuffer->mClearedBlockCount
+                               : 0};
+  }
+
+  class Reader;
+
+  // Class that can iterate through blocks and provide
+  // `ProfileBufferEntryReader`s.
+  // Created through `Reader`, lives within a lock guard lifetime.
+  class BlockIterator {
+   public:
+#ifdef DEBUG
+    ~BlockIterator() {
+      // No BlockIterator should live outside of a mutexed call.
+      mRing->mMutex.AssertCurrentThreadOwns();
+    }
+#endif  // DEBUG
+
+    // Comparison with other iterator, mostly used in range-for loops.
+    bool operator==(const BlockIterator aRhs) const {
+      MOZ_ASSERT(mRing == aRhs.mRing);
+      return mBlockIndex == aRhs.mBlockIndex;
+    }
+    bool operator!=(const BlockIterator aRhs) const {
+      MOZ_ASSERT(mRing == aRhs.mRing);
+      return mBlockIndex != aRhs.mBlockIndex;
+    }
+
+    // Advance to next BlockIterator.
+    BlockIterator& operator++() {
+      mBlockIndex = NextBlockIndex();
+      return *this;
+    }
+
+    // Dereferencing creates a `ProfileBufferEntryReader` for the entry inside
+    // this block.
+    ProfileBufferEntryReader operator*() const {
+      return mRing->ReaderInBlockAt(mBlockIndex);
+    }
+
+    // True if this iterator is just past the last entry.
+    bool IsAtEnd() const {
+      MOZ_ASSERT(mBlockIndex <= BufferRangeEnd());
+      return mBlockIndex == BufferRangeEnd();
+    }
+
+    // Can be used as reference to come back to this entry with `ReadAt()`.
+    ProfileBufferBlockIndex CurrentBlockIndex() const { return mBlockIndex; }
+
+    // Index past the end of this block, which is the start of the next block.
+    ProfileBufferBlockIndex NextBlockIndex() const {
+      MOZ_ASSERT(!IsAtEnd());
+      const Length entrySize =
+          mRing->ReaderInBlockAt(mBlockIndex).RemainingBytes();
+      return ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+          mBlockIndex.ConvertToProfileBufferIndex() + ULEB128Size(entrySize) +
+          entrySize);
+    }
+
+    // Index of the first block in the whole buffer.
+    ProfileBufferBlockIndex BufferRangeStart() const {
+      return mRing->mFirstReadIndex;
+    }
+
+    // Index past the last block in the whole buffer.
+    ProfileBufferBlockIndex BufferRangeEnd() const {
+      return mRing->mNextWriteIndex;
+    }
+
+   private:
+    // Only a Reader can instantiate a BlockIterator.
+    friend class Reader;
+
+    BlockIterator(const BlocksRingBuffer& aRing,
+                  ProfileBufferBlockIndex aBlockIndex)
+        : mRing(WrapNotNull(&aRing)), mBlockIndex(aBlockIndex) {
+      // No BlockIterator should live outside of a mutexed call.
+      mRing->mMutex.AssertCurrentThreadOwns();
+    }
+
+    // Using a non-null pointer instead of a reference, to allow copying.
+    // This BlockIterator should only live inside one of the thread-safe
+    // BlocksRingBuffer functions, for this reference to stay valid.
+    NotNull<const BlocksRingBuffer*> mRing;
+    ProfileBufferBlockIndex mBlockIndex;
+  };
+
+  // Class that can create `BlockIterator`s (e.g., for range-for), or just
+  // iterate through entries; lives within a lock guard lifetime.
+  class MOZ_RAII Reader {
+   public:
+    Reader(const Reader&) = delete;
+    Reader& operator=(const Reader&) = delete;
+    Reader(Reader&&) = delete;
+    Reader& operator=(Reader&&) = delete;
+
+#ifdef DEBUG
+    ~Reader() {
+      // No Reader should live outside of a mutexed call.
+      mRing.mMutex.AssertCurrentThreadOwns();
+    }
+#endif  // DEBUG
+
+    // Index of the first block in the whole buffer.
+    ProfileBufferBlockIndex BufferRangeStart() const {
+      return mRing.mFirstReadIndex;
+    }
+
+    // Index past the last block in the whole buffer.
+    ProfileBufferBlockIndex BufferRangeEnd() const {
+      return mRing.mNextWriteIndex;
+    }
+
+    // Iterators to the first and past-the-last blocks.
+    // Compatible with range-for (see `ForEach` below as example).
+    BlockIterator begin() const {
+      return BlockIterator(mRing, BufferRangeStart());
+    }
+    // Note that a `BlockIterator` at the `end()` should not be dereferenced, as
+    // there is no actual block there!
+    BlockIterator end() const { return BlockIterator(mRing, BufferRangeEnd()); }
+
+    // Get a `BlockIterator` at the given `ProfileBufferBlockIndex`, clamped to
+    // the stored range. Note that a `BlockIterator` at the `end()` should not
+    // be dereferenced, as there is no actual block there!
+    BlockIterator At(ProfileBufferBlockIndex aBlockIndex) const {
+      if (aBlockIndex < BufferRangeStart()) {
+        // Anything before the range (including null ProfileBufferBlockIndex) is
+        // clamped at the beginning.
+        return begin();
+      }
+      // Otherwise we at least expect the index to be valid (pointing exactly at
+      // a live block, or just past the end.)
+      mRing.AssertBlockIndexIsValidOrEnd(aBlockIndex);
+      return BlockIterator(mRing, aBlockIndex);
+    }
+
+    // Run `aCallback(ProfileBufferEntryReader&)` on each entry from first to
+    // last. Callback should not store `ProfileBufferEntryReader`, as it may
+    // become invalid after this thread-safe call.
+    template <typename Callback>
+    void ForEach(Callback&& aCallback) const {
+      for (ProfileBufferEntryReader reader : *this) {
+        aCallback(reader);
+      }
+    }
+
+   private:
+    friend class BlocksRingBuffer;
+
+    explicit Reader(const BlocksRingBuffer& aRing) : mRing(aRing) {
+      // No Reader should live outside of a mutexed call.
+      mRing.mMutex.AssertCurrentThreadOwns();
+    }
+
+    // This Reader should only live inside one of the thread-safe
+    // BlocksRingBuffer functions, for this reference to stay valid.
+    const BlocksRingBuffer& mRing;
+  };
+
+  // Call `aCallback(BlocksRingBuffer::Reader*)` (nullptr when out-of-session),
+  // and return whatever `aCallback` returns. Callback should not store
+  // `Reader`, because it may become invalid after this call.
+  template <typename Callback>
+  auto Read(Callback&& aCallback) const {
+    {
+      baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+      if (MOZ_LIKELY(mMaybeUnderlyingBuffer)) {
+        Reader reader(*this);
+        return std::forward<Callback>(aCallback)(&reader);
+      }
+    }
+    return std::forward<Callback>(aCallback)(nullptr);
+  }
+
+  // Call `aCallback(ProfileBufferEntryReader&)` on each item.
+  // Callback should not store `ProfileBufferEntryReader`, because it may become
+  // invalid after this call.
+  template <typename Callback>
+  void ReadEach(Callback&& aCallback) const {
+    Read([&](Reader* aReader) {
+      if (MOZ_LIKELY(aReader)) {
+        aReader->ForEach(aCallback);
+      }
+    });
+  }
+
+  // Call `aCallback(Maybe<ProfileBufferEntryReader>&&)` on the entry at
+  // the given ProfileBufferBlockIndex; The `Maybe` will be `Nothing` if
+  // out-of-session, or if that entry doesn't exist anymore, or if we've reached
+  // just past the last entry. Return whatever `aCallback` returns. Callback
+  // should not store `ProfileBufferEntryReader`, because it may become invalid
+  // after this call.
+  template <typename Callback>
+  auto ReadAt(ProfileBufferBlockIndex aBlockIndex, Callback&& aCallback) const {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    MOZ_ASSERT(aBlockIndex <= mNextWriteIndex);
+    Maybe<ProfileBufferEntryReader> maybeEntryReader;
+    if (MOZ_LIKELY(mMaybeUnderlyingBuffer) && aBlockIndex >= mFirstReadIndex &&
+        aBlockIndex < mNextWriteIndex) {
+      AssertBlockIndexIsValid(aBlockIndex);
+      maybeEntryReader.emplace(ReaderInBlockAt(aBlockIndex));
+    }
+    return std::forward<Callback>(aCallback)(std::move(maybeEntryReader));
+  }
+
+  // Main function to write entries.
+  // Reserve `aCallbackBytes()` bytes, call `aCallback()` with a pointer to an
+  // on-stack temporary ProfileBufferEntryWriter (nullptr when out-of-session),
+  // and return whatever `aCallback` returns. Callback should not store
+  // `ProfileBufferEntryWriter`, because it may become invalid after this
+  // thread-safe call. Note: `aCallbackBytes` is a callback instead of a simple
+  // value, to delay this potentially-expensive computation until after we're
+  // checked that we're in-session; use `Put(Length, Callback)` below if you
+  // know the size already.
+  template <typename CallbackBytes, typename Callback>
+  auto ReserveAndPut(CallbackBytes aCallbackBytes, Callback&& aCallback) {
+    Maybe<ProfileBufferEntryWriter> maybeEntryWriter;
+
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+
+    if (MOZ_LIKELY(mMaybeUnderlyingBuffer)) {
+      const Length entryBytes = std::forward<CallbackBytes>(aCallbackBytes)();
+      MOZ_RELEASE_ASSERT(entryBytes > 0);
+      const Length bufferBytes =
+          mMaybeUnderlyingBuffer->mBuffer.BufferLength().Value();
+      MOZ_RELEASE_ASSERT(entryBytes <= bufferBytes - ULEB128Size(entryBytes),
+                         "Entry would wrap and overwrite itself");
+      // Compute block size from the requested entry size.
+      const Length blockBytes = ULEB128Size(entryBytes) + entryBytes;
+      // We will put this new block at the end of the current buffer.
+      const ProfileBufferIndex blockIndex =
+          mNextWriteIndex.ConvertToProfileBufferIndex();
+      // Compute the end of this new block.
+      const ProfileBufferIndex blockEnd = blockIndex + blockBytes;
+      while (blockEnd >
+             mFirstReadIndex.ConvertToProfileBufferIndex() + bufferBytes) {
+        // About to trample on an old block.
+        ProfileBufferEntryReader reader = ReaderInBlockAt(mFirstReadIndex);
+        mMaybeUnderlyingBuffer->mClearedBlockCount += 1;
+        // Move the buffer reading start past this cleared block.
+        mFirstReadIndex = ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+            mFirstReadIndex.ConvertToProfileBufferIndex() +
+            ULEB128Size(reader.RemainingBytes()) + reader.RemainingBytes());
+      }
+      // Store the new end of buffer.
+      mNextWriteIndex =
+          ProfileBufferBlockIndex::CreateFromProfileBufferIndex(blockEnd);
+      mMaybeUnderlyingBuffer->mPushedBlockCount += 1;
+      // Finally, let aCallback write into the entry.
+      mMaybeUnderlyingBuffer->mBuffer.EntryWriterFromTo(maybeEntryWriter,
+                                                        blockIndex, blockEnd);
+      MOZ_ASSERT(maybeEntryWriter.isSome(),
+                 "Non-empty entry should always create an EntryWriter");
+      maybeEntryWriter->WriteULEB128(entryBytes);
+      MOZ_ASSERT(maybeEntryWriter->RemainingBytes() == entryBytes);
+    }
+
+#ifdef DEBUG
+    auto checkAllWritten = MakeScopeExit([&]() {
+      MOZ_ASSERT(!maybeEntryWriter || maybeEntryWriter->RemainingBytes() == 0);
+    });
+#endif  // DEBUG
+    return std::forward<Callback>(aCallback)(maybeEntryWriter);
+  }
+
+  // Add a new entry of known size, call `aCallback` with a pointer to a
+  // temporary ProfileBufferEntryWriter (can be null when out-of-session), and
+  // return whatever `aCallback` returns. Callback should not store the
+  // `ProfileBufferEntryWriter`, as it may become invalid after this thread-safe
+  // call.
+  template <typename Callback>
+  auto Put(Length aBytes, Callback&& aCallback) {
+    return ReserveAndPut([aBytes]() { return aBytes; },
+                         std::forward<Callback>(aCallback));
+  }
+
+  // Add a new entry copied from the given buffer, return block index.
+  ProfileBufferBlockIndex PutFrom(const void* aSrc, Length aBytes) {
+    return ReserveAndPut([aBytes]() { return aBytes; },
+                         [&](Maybe<ProfileBufferEntryWriter>& aEntryWriter) {
+                           if (MOZ_UNLIKELY(aEntryWriter.isNothing())) {
+                             // Out-of-session, return "empty" index.
+                             return ProfileBufferBlockIndex{};
+                           }
+                           aEntryWriter->WriteBytes(aSrc, aBytes);
+                           return aEntryWriter->CurrentBlockIndex();
+                         });
+  }
+
+  // Add a new single entry with *all* given object (using a Serializer for
+  // each), return block index.
+  template <typename... Ts>
+  ProfileBufferBlockIndex PutObjects(const Ts&... aTs) {
+    static_assert(sizeof...(Ts) > 0,
+                  "PutObjects must be given at least one object.");
+    return ReserveAndPut(
+        [&]() { return ProfileBufferEntryWriter::SumBytes(aTs...); },
+        [&](Maybe<ProfileBufferEntryWriter>& aEntryWriter) {
+          if (MOZ_UNLIKELY(aEntryWriter.isNothing())) {
+            // Out-of-session, return "empty" index.
+            return ProfileBufferBlockIndex{};
+          }
+          aEntryWriter->WriteObjects(aTs...);
+          return aEntryWriter->CurrentBlockIndex();
+        });
+  }
+
+  // Add a new entry copied from the given object, return block index.
+  template <typename T>
+  ProfileBufferBlockIndex PutObject(const T& aOb) {
+    return PutObjects(aOb);
+  }
+
+  // Append the contents of another BlocksRingBuffer to this one.
+  ProfileBufferBlockIndex AppendContents(const BlocksRingBuffer& aSrc) {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+
+    if (MOZ_UNLIKELY(!mMaybeUnderlyingBuffer)) {
+      // We are out-of-session, could not append contents.
+      return ProfileBufferBlockIndex{};
+    }
+
+    baseprofiler::detail::BaseProfilerMaybeAutoLock srcLock(aSrc.mMutex);
+
+    if (MOZ_UNLIKELY(!aSrc.mMaybeUnderlyingBuffer)) {
+      // The other BRB is out-of-session, nothing to copy, we're done.
+      return ProfileBufferBlockIndex{};
+    }
+
+    const ProfileBufferIndex srcStartIndex =
+        aSrc.mFirstReadIndex.ConvertToProfileBufferIndex();
+    const ProfileBufferIndex srcEndIndex =
+        aSrc.mNextWriteIndex.ConvertToProfileBufferIndex();
+    const Length bytesToCopy = static_cast<Length>(srcEndIndex - srcStartIndex);
+
+    if (MOZ_UNLIKELY(bytesToCopy == 0)) {
+      // The other BRB is empty, nothing to copy, we're done.
+      return ProfileBufferBlockIndex{};
+    }
+
+    const Length bufferBytes =
+        mMaybeUnderlyingBuffer->mBuffer.BufferLength().Value();
+
+    MOZ_RELEASE_ASSERT(bytesToCopy <= bufferBytes,
+                       "Entry would wrap and overwrite itself");
+
+    // We will put all copied blocks at the end of the current buffer.
+    const ProfileBufferIndex dstStartIndex =
+        mNextWriteIndex.ConvertToProfileBufferIndex();
+    // Compute where the copy will end...
+    const ProfileBufferIndex dstEndIndex = dstStartIndex + bytesToCopy;
+
+    while (dstEndIndex >
+           mFirstReadIndex.ConvertToProfileBufferIndex() + bufferBytes) {
+      // About to trample on an old block.
+      ProfileBufferEntryReader reader = ReaderInBlockAt(mFirstReadIndex);
+      mMaybeUnderlyingBuffer->mClearedBlockCount += 1;
+      // Move the buffer reading start past this cleared block.
+      mFirstReadIndex = ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+          mFirstReadIndex.ConvertToProfileBufferIndex() +
+          ULEB128Size(reader.RemainingBytes()) + reader.RemainingBytes());
+    }
+
+    // Store the new end of buffer.
+    mNextWriteIndex =
+        ProfileBufferBlockIndex::CreateFromProfileBufferIndex(dstEndIndex);
+    // Update our pushed count with the number of live blocks we are copying.
+    mMaybeUnderlyingBuffer->mPushedBlockCount +=
+        aSrc.mMaybeUnderlyingBuffer->mPushedBlockCount -
+        aSrc.mMaybeUnderlyingBuffer->mClearedBlockCount;
+
+    auto reader = aSrc.mMaybeUnderlyingBuffer->mBuffer.EntryReaderFromTo(
+        srcStartIndex, srcEndIndex, nullptr, nullptr);
+    auto writer = mMaybeUnderlyingBuffer->mBuffer.EntryWriterFromTo(
+        dstStartIndex, dstEndIndex);
+    writer.WriteFromReader(reader, bytesToCopy);
+
+    return ProfileBufferBlockIndex::CreateFromProfileBufferIndex(dstStartIndex);
+  }
+
+  // Clear all entries: Move read index to the end so that these entries cannot
+  // be read anymore.
+  void Clear() {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    ClearAllEntries();
+  }
+
+  // Clear all entries strictly before aBlockIndex, and move read index to the
+  // end so that these entries cannot be read anymore.
+  void ClearBefore(ProfileBufferBlockIndex aBlockIndex) {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    if (!mMaybeUnderlyingBuffer) {
+      return;
+    }
+    // Don't accept a not-yet-written index. One-past-the-end is ok.
+    MOZ_ASSERT(aBlockIndex <= mNextWriteIndex);
+    if (aBlockIndex <= mFirstReadIndex) {
+      // Already cleared.
+      return;
+    }
+    if (aBlockIndex == mNextWriteIndex) {
+      // Right past the end, just clear everything.
+      ClearAllEntries();
+      return;
+    }
+    // Otherwise we need to clear a subset of entries.
+    AssertBlockIndexIsValid(aBlockIndex);
+    // Just count skipped entries.
+    Reader reader(*this);
+    BlockIterator it = reader.begin();
+    for (; it.CurrentBlockIndex() < aBlockIndex; ++it) {
+      MOZ_ASSERT(it.CurrentBlockIndex() < reader.end().CurrentBlockIndex());
+      mMaybeUnderlyingBuffer->mClearedBlockCount += 1;
+    }
+    MOZ_ASSERT(it.CurrentBlockIndex() == aBlockIndex);
+    // Move read index to given index, so there's effectively no more entries
+    // before.
+    mFirstReadIndex = aBlockIndex;
+  }
+
+#ifdef DEBUG
+  void Dump() const {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    if (!mMaybeUnderlyingBuffer) {
+      printf("empty BlocksRingBuffer\n");
+      return;
+    }
+    using ULL = unsigned long long;
+    printf("start=%llu (%llu) end=%llu (%llu) - ",
+           ULL(mFirstReadIndex.ConvertToProfileBufferIndex()),
+           ULL(mFirstReadIndex.ConvertToProfileBufferIndex() &
+               (mMaybeUnderlyingBuffer->mBuffer.BufferLength().Value() - 1)),
+           ULL(mNextWriteIndex.ConvertToProfileBufferIndex()),
+           ULL(mNextWriteIndex.ConvertToProfileBufferIndex() &
+               (mMaybeUnderlyingBuffer->mBuffer.BufferLength().Value() - 1)));
+    mMaybeUnderlyingBuffer->mBuffer.Dump();
+  }
+#endif  // DEBUG
+
+ private:
+  // In DEBUG mode, assert that `aBlockIndex` is a valid index for a live block.
+  // (Not just in range, but points exactly at the start of a block.)
+  // Slow, so avoid it for internal checks; this is more to check what callers
+  // provide us.
+  void AssertBlockIndexIsValid(ProfileBufferBlockIndex aBlockIndex) const {
+#ifdef DEBUG
+    mMutex.AssertCurrentThreadOwns();
+    MOZ_ASSERT(aBlockIndex >= mFirstReadIndex);
+    MOZ_ASSERT(aBlockIndex < mNextWriteIndex);
+    // Quick check (default), or slow check (change '1' to '0') below:
+#  if 1
+    // Quick check that this looks like a valid block start.
+    // Read the entry size at the start of the block.
+    const Length entryBytes = ReaderInBlockAt(aBlockIndex).RemainingBytes();
+    MOZ_ASSERT(entryBytes > 0, "Empty entries are not allowed");
+    MOZ_ASSERT(
+        entryBytes < mMaybeUnderlyingBuffer->mBuffer.BufferLength().Value() -
+                         ULEB128Size(entryBytes),
+        "Entry would wrap and overwrite itself");
+    // The end of the block should be inside the live buffer range.
+    MOZ_ASSERT(aBlockIndex.ConvertToProfileBufferIndex() +
+                   ULEB128Size(entryBytes) + entryBytes <=
+               mNextWriteIndex.ConvertToProfileBufferIndex());
+#  else
+    // Slow check that the index is really the start of the block.
+    // This kills performances, as it reads from the first index until
+    // aBlockIndex. Only use to debug issues locally.
+    Reader reader(*this);
+    BlockIterator it = reader.begin();
+    for (; it.CurrentBlockIndex() < aBlockIndex; ++it) {
+      MOZ_ASSERT(it.CurrentBlockIndex() < reader.end().CurrentBlockIndex());
+    }
+    MOZ_ASSERT(it.CurrentBlockIndex() == aBlockIndex);
+#  endif
+#endif  // DEBUG
+  }
+
+  // In DEBUG mode, assert that `aBlockIndex` is a valid index for a live block,
+  // or is just past-the-end. (Not just in range, but points exactly at the
+  // start of a block.) Slow, so avoid it for internal checks; this is more to
+  // check what callers provide us.
+  void AssertBlockIndexIsValidOrEnd(ProfileBufferBlockIndex aBlockIndex) const {
+#ifdef DEBUG
+    mMutex.AssertCurrentThreadOwns();
+    if (aBlockIndex == mNextWriteIndex) {
+      return;
+    }
+    AssertBlockIndexIsValid(aBlockIndex);
+#endif  // DEBUG
+  }
+
+  // Create a reader for the block starting at aBlockIndex.
+  ProfileBufferEntryReader ReaderInBlockAt(
+      ProfileBufferBlockIndex aBlockIndex) const {
+    mMutex.AssertCurrentThreadOwns();
+    MOZ_ASSERT(mMaybeUnderlyingBuffer.isSome());
+    MOZ_ASSERT(aBlockIndex >= mFirstReadIndex);
+    MOZ_ASSERT(aBlockIndex < mNextWriteIndex);
+    // Create a reader from the given index until the end of the buffer.
+    ProfileBufferEntryReader reader =
+        mMaybeUnderlyingBuffer->mBuffer.EntryReaderFromTo(
+            aBlockIndex.ConvertToProfileBufferIndex(),
+            mNextWriteIndex.ConvertToProfileBufferIndex(), nullptr, nullptr);
+    // Read the block size at the beginning.
+    const Length entryBytes = reader.ReadULEB128<Length>();
+    // Make sure we don't overshoot the buffer.
+    MOZ_RELEASE_ASSERT(entryBytes <= reader.RemainingBytes());
+    ProfileBufferIndex nextBlockIndex =
+        aBlockIndex.ConvertToProfileBufferIndex() + ULEB128Size(entryBytes) +
+        entryBytes;
+    // And reduce the reader to the entry area. Only provide a next-block-index
+    // if it's not at the end of the buffer (i.e., there's an actual block
+    // there).
+    reader = mMaybeUnderlyingBuffer->mBuffer.EntryReaderFromTo(
+        aBlockIndex.ConvertToProfileBufferIndex() + ULEB128Size(entryBytes),
+        nextBlockIndex, aBlockIndex,
+        (nextBlockIndex < mNextWriteIndex.ConvertToProfileBufferIndex())
+            ? ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+                  nextBlockIndex)
+            : ProfileBufferBlockIndex{});
+    return reader;
+  }
+
+  ProfileBufferEntryReader FullBufferReader() const {
+    mMutex.AssertCurrentThreadOwns();
+    if (!mMaybeUnderlyingBuffer) {
+      return {};
+    }
+    return mMaybeUnderlyingBuffer->mBuffer.EntryReaderFromTo(
+        mFirstReadIndex.ConvertToProfileBufferIndex(),
+        mNextWriteIndex.ConvertToProfileBufferIndex(), nullptr, nullptr);
+  }
+
+  // Clear all entries: Move read index to the end so that these entries cannot
+  // be read anymore.
+  void ClearAllEntries() {
+    mMutex.AssertCurrentThreadOwns();
+    if (!mMaybeUnderlyingBuffer) {
+      return;
+    }
+    // Mark all entries pushed so far as cleared.
+    mMaybeUnderlyingBuffer->mClearedBlockCount =
+        mMaybeUnderlyingBuffer->mPushedBlockCount;
+    // Move read index to write index, so there's effectively no more entries
+    // that can be read. (Not setting both to 0, in case user is keeping
+    // `ProfileBufferBlockIndex`'es to old entries.)
+    mFirstReadIndex = mNextWriteIndex;
+  }
+
+  // If there is an underlying buffer, clear all entries, and discard the
+  // buffer. This BlocksRingBuffer will now gracefully reject all API calls, and
+  // is in a state where a new underlying buffer may be set.
+  void ResetUnderlyingBuffer() {
+    mMutex.AssertCurrentThreadOwns();
+    if (!mMaybeUnderlyingBuffer) {
+      return;
+    }
+    ClearAllEntries();
+    mMaybeUnderlyingBuffer.reset();
+  }
+
+  // Used to de/serialize a BlocksRingBuffer (e.g., containing a backtrace).
+  friend ProfileBufferEntryWriter::Serializer<BlocksRingBuffer>;
+  friend ProfileBufferEntryReader::Deserializer<BlocksRingBuffer>;
+  friend ProfileBufferEntryWriter::Serializer<UniquePtr<BlocksRingBuffer>>;
+  friend ProfileBufferEntryReader::Deserializer<UniquePtr<BlocksRingBuffer>>;
+
+  // Mutex guarding the following members.
+  mutable baseprofiler::detail::BaseProfilerMaybeMutex mMutex;
+
+  struct UnderlyingBuffer {
+    // Create a buffer of the given length.
+    explicit UnderlyingBuffer(PowerOfTwo<Length> aLength) : mBuffer(aLength) {
+      MOZ_ASSERT(aLength.Value() > ULEB128MaxSize<Length>(),
+                 "Buffer should be able to contain more than a block size");
+    }
+
+    // Take ownership of an existing buffer.
+    UnderlyingBuffer(UniquePtr<Buffer::Byte[]> aExistingBuffer,
+                     PowerOfTwo<Length> aLength)
+        : mBuffer(std::move(aExistingBuffer), aLength) {
+      MOZ_ASSERT(aLength.Value() > ULEB128MaxSize<Length>(),
+                 "Buffer should be able to contain more than a block size");
+    }
+
+    // Use an externally-owned buffer.
+    UnderlyingBuffer(Buffer::Byte* aExternalBuffer, PowerOfTwo<Length> aLength)
+        : mBuffer(aExternalBuffer, aLength) {
+      MOZ_ASSERT(aLength.Value() > ULEB128MaxSize<Length>(),
+                 "Buffer should be able to contain more than a block size");
+    }
+
+    // Only allow move-construction.
+    UnderlyingBuffer(UnderlyingBuffer&&) = default;
+
+    // Copies and move-assignment are explictly disallowed.
+    UnderlyingBuffer(const UnderlyingBuffer&) = delete;
+    UnderlyingBuffer& operator=(const UnderlyingBuffer&) = delete;
+    UnderlyingBuffer& operator=(UnderlyingBuffer&&) = delete;
+
+    // Underlying circular byte buffer.
+    Buffer mBuffer;
+
+    // Statistics.
+    uint64_t mPushedBlockCount = 0;
+    uint64_t mClearedBlockCount = 0;
+  };
+
+  // Underlying buffer, with stats.
+  // Only valid during in-session period.
+  Maybe<UnderlyingBuffer> mMaybeUnderlyingBuffer;
+
+  // Index to the first block to be read (or cleared). Initialized to 1 because
+  // 0 is reserved for the "empty" ProfileBufferBlockIndex value. Kept between
+  // sessions, so that stored indices from one session will be gracefully denied
+  // in future sessions.
+  ProfileBufferBlockIndex mFirstReadIndex =
+      ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+          ProfileBufferIndex(1));
+  // Index where the next new block should be allocated. Initialized to 1
+  // because 0 is reserved for the "empty" ProfileBufferBlockIndex value. Kept
+  // between sessions, so that stored indices from one session will be
+  // gracefully denied in future sessions.
+  ProfileBufferBlockIndex mNextWriteIndex =
+      ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+          ProfileBufferIndex(1));
+};
+
+// ----------------------------------------------------------------------------
+// BlocksRingBuffer serialization
+
+// A BlocksRingBuffer can hide another one!
+// This will be used to store marker backtraces; They can be read back into a
+// UniquePtr<BlocksRingBuffer>.
+// Format: len (ULEB128) | start | end | buffer (len bytes) | pushed | cleared
+// len==0 marks an out-of-session buffer, or empty buffer.
+template <>
+struct ProfileBufferEntryWriter::Serializer<BlocksRingBuffer> {
+  static Length Bytes(const BlocksRingBuffer& aBuffer) {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(aBuffer.mMutex);
+    if (aBuffer.mMaybeUnderlyingBuffer.isNothing()) {
+      // Out-of-session, we only need 1 byte to store a length of 0.
+      return ULEB128Size<Length>(0);
+    }
+    const auto start = aBuffer.mFirstReadIndex.ConvertToProfileBufferIndex();
+    const auto end = aBuffer.mNextWriteIndex.ConvertToProfileBufferIndex();
+    const auto len = end - start;
+    if (len == 0) {
+      // In-session but empty, also store a length of 0.
+      return ULEB128Size<Length>(0);
+    }
+    return ULEB128Size(len) + sizeof(start) + sizeof(end) + len +
+           sizeof(aBuffer.mMaybeUnderlyingBuffer->mPushedBlockCount) +
+           sizeof(aBuffer.mMaybeUnderlyingBuffer->mClearedBlockCount);
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW,
+                    const BlocksRingBuffer& aBuffer) {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(aBuffer.mMutex);
+    if (aBuffer.mMaybeUnderlyingBuffer.isNothing()) {
+      // Out-of-session, only store a length of 0.
+      aEW.WriteULEB128<Length>(0);
+      return;
+    }
+    const auto start = aBuffer.mFirstReadIndex.ConvertToProfileBufferIndex();
+    const auto end = aBuffer.mNextWriteIndex.ConvertToProfileBufferIndex();
+    MOZ_ASSERT(end - start <= std::numeric_limits<Length>::max());
+    const auto len = static_cast<Length>(end - start);
+    if (len == 0) {
+      // In-session but empty, only store a length of 0.
+      aEW.WriteULEB128<Length>(0);
+      return;
+    }
+    // In-session.
+    // Store buffer length, start and end indices.
+    aEW.WriteULEB128<Length>(len);
+    aEW.WriteObject(start);
+    aEW.WriteObject(end);
+    // Write all the bytes.
+    auto reader = aBuffer.FullBufferReader();
+    aEW.WriteFromReader(reader, reader.RemainingBytes());
+    // And write stats.
+    aEW.WriteObject(aBuffer.mMaybeUnderlyingBuffer->mPushedBlockCount);
+    aEW.WriteObject(aBuffer.mMaybeUnderlyingBuffer->mClearedBlockCount);
+  }
+};
+
+// A serialized BlocksRingBuffer can be read into an empty buffer (either
+// out-of-session, or in-session with enough room).
+template <>
+struct ProfileBufferEntryReader::Deserializer<BlocksRingBuffer> {
+  static void ReadInto(ProfileBufferEntryReader& aER,
+                       BlocksRingBuffer& aBuffer) {
+    // Expect an empty buffer, as we're going to overwrite it.
+    MOZ_ASSERT(aBuffer.GetState().mRangeStart == aBuffer.GetState().mRangeEnd);
+    // Read the stored buffer length.
+    const auto len = aER.ReadULEB128<Length>();
+    if (len == 0) {
+      // 0-length means an "uninteresting" buffer, just return now.
+      return;
+    }
+    // We have a non-empty buffer to read.
+    if (aBuffer.BufferLength().isSome()) {
+      // Output buffer is in-session (i.e., it already has a memory buffer
+      // attached). Make sure the caller allocated enough space.
+      MOZ_RELEASE_ASSERT(aBuffer.BufferLength()->Value() >= len);
+    } else {
+      // Output buffer is out-of-session, attach a new memory buffer.
+      aBuffer.Set(PowerOfTwo<Length>(len));
+      MOZ_ASSERT(aBuffer.BufferLength()->Value() >= len);
+    }
+    // Read start and end indices.
+    const auto start = aER.ReadObject<ProfileBufferIndex>();
+    aBuffer.mFirstReadIndex =
+        ProfileBufferBlockIndex::CreateFromProfileBufferIndex(start);
+    const auto end = aER.ReadObject<ProfileBufferIndex>();
+    aBuffer.mNextWriteIndex =
+        ProfileBufferBlockIndex::CreateFromProfileBufferIndex(end);
+    MOZ_ASSERT(end - start == len);
+    // Copy bytes into the buffer.
+    auto writer =
+        aBuffer.mMaybeUnderlyingBuffer->mBuffer.EntryWriterFromTo(start, end);
+    writer.WriteFromReader(aER, end - start);
+    MOZ_ASSERT(writer.RemainingBytes() == 0);
+    // Finally copy stats.
+    aBuffer.mMaybeUnderlyingBuffer->mPushedBlockCount = aER.ReadObject<decltype(
+        aBuffer.mMaybeUnderlyingBuffer->mPushedBlockCount)>();
+    aBuffer.mMaybeUnderlyingBuffer->mClearedBlockCount =
+        aER.ReadObject<decltype(
+            aBuffer.mMaybeUnderlyingBuffer->mClearedBlockCount)>();
+  }
+
+  // We cannot output a BlocksRingBuffer object (not copyable), use `ReadInto()`
+  // or `aER.ReadObject<UniquePtr<BlocksRinbBuffer>>()` instead.
+  static BlocksRingBuffer Read(ProfileBufferEntryReader& aER) = delete;
+};
+
+// A BlocksRingBuffer is usually refererenced through a UniquePtr, for
+// convenience we support (de)serializing that UniquePtr directly.
+// This is compatible with the non-UniquePtr serialization above, with a null
+// pointer being treated like an out-of-session or empty buffer; and any of
+// these would be deserialized into a null pointer.
+template <>
+struct ProfileBufferEntryWriter::Serializer<UniquePtr<BlocksRingBuffer>> {
+  static Length Bytes(const UniquePtr<BlocksRingBuffer>& aBufferUPtr) {
+    if (!aBufferUPtr) {
+      // Null pointer, treat it like an empty buffer, i.e., write length of 0.
+      return ULEB128Size<Length>(0);
+    }
+    // Otherwise write the pointed-at BlocksRingBuffer (which could be
+    // out-of-session or empty.)
+    return SumBytes(*aBufferUPtr);
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW,
+                    const UniquePtr<BlocksRingBuffer>& aBufferUPtr) {
+    if (!aBufferUPtr) {
+      // Null pointer, treat it like an empty buffer, i.e., write length of 0.
+      aEW.WriteULEB128<Length>(0);
+      return;
+    }
+    // Otherwise write the pointed-at BlocksRingBuffer (which could be
+    // out-of-session or empty.)
+    aEW.WriteObject(*aBufferUPtr);
+  }
+};
+
+template <>
+struct ProfileBufferEntryReader::Deserializer<UniquePtr<BlocksRingBuffer>> {
+  static void ReadInto(ProfileBufferEntryReader& aER,
+                       UniquePtr<BlocksRingBuffer>& aBuffer) {
+    aBuffer = Read(aER);
+  }
+
+  static UniquePtr<BlocksRingBuffer> Read(ProfileBufferEntryReader& aER) {
+    UniquePtr<BlocksRingBuffer> bufferUPtr;
+    // Keep a copy of the reader before reading the length, so we can restart
+    // from here below.
+    ProfileBufferEntryReader readerBeforeLen = aER;
+    // Read the stored buffer length.
+    const auto len = aER.ReadULEB128<Length>();
+    if (len == 0) {
+      // 0-length means an "uninteresting" buffer, just return nullptr.
+      return bufferUPtr;
+    }
+    // We have a non-empty buffer.
+    // allocate an empty BlocksRingBuffer without mutex.
+    bufferUPtr = MakeUnique<BlocksRingBuffer>(
+        BlocksRingBuffer::ThreadSafety::WithoutMutex);
+    // Rewind the reader before the length and deserialize the contents, using
+    // the non-UniquePtr Deserializer.
+    aER = readerBeforeLen;
+    aER.ReadIntoObject(*bufferUPtr);
+    return bufferUPtr;
+  }
+};
+
+}  // namespace mozilla
+
+#endif  // BlocksRingBuffer_h
diff --git a/mozglue/baseprofiler/public/ModuloBuffer.h b/mozglue/baseprofiler/public/ModuloBuffer.h
new file mode 100644
index 0000000000..80e765279e
--- /dev/null
+++ b/mozglue/baseprofiler/public/ModuloBuffer.h
@@ -0,0 +1,618 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ModuloBuffer_h
+#define ModuloBuffer_h
+
+#include "mozilla/leb128iterator.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/MemoryReporting.h"
+#include "mozilla/NotNull.h"
+#include "mozilla/PowerOfTwo.h"
+#include "mozilla/ProfileBufferEntrySerialization.h"
+#include "mozilla/UniquePtr.h"
+
+#include <functional>
+#include <iterator>
+#include <limits>
+#include <type_traits>
+
+namespace mozilla {
+
+// The ModuloBuffer class is a circular buffer that holds raw byte values, with
+// data-read/write helpers.
+//
+// OffsetT: Type of the internal offset into the buffer of bytes, it should be
+// large enough to access all bytes of the buffer. It will also be used as
+// Length (in bytes) of the buffer and of any subset. Default uint32_t
+// IndexT: Type of the external index, it should be large enough that overflows
+// should not happen during the lifetime of the ModuloBuffer.
+//
+// The basic usage is to create an iterator-like object with `ReaderAt(Index)`
+// or `WriterAt(Index)`, and use it to read/write data blobs. Iterators
+// automatically manage the wrap-around (through "Modulo", which is effectively
+// an AND-masking with the PowerOfTwo buffer size.)
+//
+// There is zero safety: No thread safety, no checks that iterators may be
+// overwriting data that's still to be read, etc. It's up to the caller to add
+// adequate checks.
+// The intended use is as an underlying buffer for a safer container.
+template <typename OffsetT = uint32_t, typename IndexT = uint64_t>
+class ModuloBuffer {
+ public:
+  using Byte = uint8_t;
+  static_assert(sizeof(Byte) == 1, "ModuloBuffer::Byte must be 1 byte");
+  using Offset = OffsetT;
+  static_assert(!std::numeric_limits<Offset>::is_signed,
+                "ModuloBuffer::Offset must be an unsigned integral type");
+  using Length = Offset;
+  using Index = IndexT;
+  static_assert(!std::numeric_limits<Index>::is_signed,
+                "ModuloBuffer::Index must be an unsigned integral type");
+  static_assert(sizeof(Index) >= sizeof(Offset),
+                "ModuloBuffer::Index size must >= Offset");
+
+  // Create a buffer of the given length.
+  explicit ModuloBuffer(PowerOfTwo<Length> aLength)
+      : mMask(aLength.Mask()),
+        mBuffer(WrapNotNull(new Byte[aLength.Value()])),
+        mBufferDeleter([](Byte* aBuffer) { delete[] aBuffer; }) {}
+
+  // Take ownership of an existing buffer. Existing contents is ignored.
+  // Done by extracting the raw pointer from UniquePtr<Byte[]>, and adding
+  // an equivalent `delete[]` in `mBufferDeleter`.
+  ModuloBuffer(UniquePtr<Byte[]> aExistingBuffer, PowerOfTwo<Length> aLength)
+      : mMask(aLength.Mask()),
+        mBuffer(WrapNotNull(aExistingBuffer.release())),
+        mBufferDeleter([](Byte* aBuffer) { delete[] aBuffer; }) {}
+
+  // Use an externally-owned buffer. Existing contents is ignored.
+  ModuloBuffer(Byte* aExternalBuffer, PowerOfTwo<Length> aLength)
+      : mMask(aLength.Mask()), mBuffer(WrapNotNull(aExternalBuffer)) {}
+
+  // Disallow copying, as we may uniquely own the resource.
+  ModuloBuffer(const ModuloBuffer& aOther) = delete;
+  ModuloBuffer& operator=(const ModuloBuffer& aOther) = delete;
+
+  // Allow move-construction. Stealing ownership if the original had it.
+  // This effectively prevents copy construction, and all assignments; needed so
+  // that a ModuloBuffer may be initialized from a separate construction.
+  // The moved-from ModuloBuffer still points at the resource but doesn't own
+  // it, so it won't try to free it; but accesses are not guaranteed, so it
+  // should not be used anymore.
+  ModuloBuffer(ModuloBuffer&& aOther)
+      : mMask(std::move(aOther.mMask)),
+        mBuffer(std::move(aOther.mBuffer)),
+        mBufferDeleter(std::move(aOther.mBufferDeleter)) {
+    // The above move leaves `aOther.mBufferDeleter` in a valid state but with
+    // an unspecified value, so it could theoretically still contain the
+    // original function, which would be bad because we don't want aOther to
+    // delete the resource that `this` now owns.
+    if (aOther.mBufferDeleter) {
+      // `aOther` still had a non-empty deleter, reset it.
+      aOther.mBufferDeleter = nullptr;
+    }
+  }
+
+  // Disallow assignment, as we have some `const` members.
+  ModuloBuffer& operator=(ModuloBuffer&& aOther) = delete;
+
+  // Destructor, deletes the resource if we uniquely own it.
+  ~ModuloBuffer() {
+    if (mBufferDeleter) {
+      mBufferDeleter(mBuffer);
+    }
+  }
+
+  PowerOfTwo<Length> BufferLength() const {
+    return PowerOfTwo<Length>(mMask.MaskValue() + 1);
+  }
+
+  // Size of external resources.
+  // Note: `mBufferDeleter`'s potential external data (for its captures) is not
+  // included, as it's hidden in the `std::function` implementation.
+  size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const {
+    if (!mBufferDeleter) {
+      // If we don't have a buffer deleter, assume we don't own the data, so
+      // it's probably on the stack, or should be reported by its owner.
+      return 0;
+    }
+    return aMallocSizeOf(mBuffer);
+  }
+
+  size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
+    return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
+  }
+
+  ProfileBufferEntryReader EntryReaderFromTo(
+      Index aStart, Index aEnd, ProfileBufferBlockIndex aBlockIndex,
+      ProfileBufferBlockIndex aNextBlockIndex) const {
+    using EntrySpan = Span<const ProfileBufferEntryReader::Byte>;
+    if (aStart == aEnd) {
+      return ProfileBufferEntryReader{};
+    }
+    // Don't allow over-wrapping.
+    MOZ_ASSERT(aEnd - aStart <= mMask.MaskValue() + 1);
+    // Start offset in 0 .. (buffer size - 1)
+    Offset start = static_cast<Offset>(aStart) & mMask;
+    // End offset in 1 .. (buffer size)
+    Offset end = (static_cast<Offset>(aEnd - 1) & mMask) + 1;
+    if (start < end) {
+      // Segment doesn't cross buffer threshold, one span is enough.
+      return ProfileBufferEntryReader{EntrySpan(&mBuffer[start], end - start),
+                                      aBlockIndex, aNextBlockIndex};
+    }
+    // Segment crosses buffer threshold, we need one span until the end and one
+    // span restarting at the beginning of the buffer.
+    return ProfileBufferEntryReader{
+        EntrySpan(&mBuffer[start], mMask.MaskValue() + 1 - start),
+        EntrySpan(&mBuffer[0], end), aBlockIndex, aNextBlockIndex};
+  }
+
+  // Return an entry writer for the given range.
+  ProfileBufferEntryWriter EntryWriterFromTo(Index aStart, Index aEnd) const {
+    using EntrySpan = Span<ProfileBufferEntryReader::Byte>;
+    if (aStart == aEnd) {
+      return ProfileBufferEntryWriter{};
+    }
+    MOZ_ASSERT(aEnd - aStart <= mMask.MaskValue() + 1);
+    // Start offset in 0 .. (buffer size - 1)
+    Offset start = static_cast<Offset>(aStart) & mMask;
+    // End offset in 1 .. (buffer size)
+    Offset end = (static_cast<Offset>(aEnd - 1) & mMask) + 1;
+    if (start < end) {
+      // Segment doesn't cross buffer threshold, one span is enough.
+      return ProfileBufferEntryWriter{
+          EntrySpan(&mBuffer[start], end - start),
+          ProfileBufferBlockIndex::CreateFromProfileBufferIndex(aStart),
+          ProfileBufferBlockIndex::CreateFromProfileBufferIndex(aEnd)};
+    }
+    // Segment crosses buffer threshold, we need one span until the end and one
+    // span restarting at the beginning of the buffer.
+    return ProfileBufferEntryWriter{
+        EntrySpan(&mBuffer[start], mMask.MaskValue() + 1 - start),
+        EntrySpan(&mBuffer[0], end),
+        ProfileBufferBlockIndex::CreateFromProfileBufferIndex(aStart),
+        ProfileBufferBlockIndex::CreateFromProfileBufferIndex(aEnd)};
+  }
+
+  // Emplace an entry writer into `aMaybeEntryWriter` for the given range.
+  void EntryWriterFromTo(Maybe<ProfileBufferEntryWriter>& aMaybeEntryWriter,
+                         Index aStart, Index aEnd) const {
+    MOZ_ASSERT(aMaybeEntryWriter.isNothing(),
+               "Reference entry writer should be Nothing.");
+    using EntrySpan = Span<ProfileBufferEntryReader::Byte>;
+    if (aStart == aEnd) {
+      return;
+    }
+    MOZ_ASSERT(aEnd - aStart <= mMask.MaskValue() + 1);
+    // Start offset in 0 .. (buffer size - 1)
+    Offset start = static_cast<Offset>(aStart) & mMask;
+    // End offset in 1 .. (buffer size)
+    Offset end = (static_cast<Offset>(aEnd - 1) & mMask) + 1;
+    if (start < end) {
+      // Segment doesn't cross buffer threshold, one span is enough.
+      aMaybeEntryWriter.emplace(
+          EntrySpan(&mBuffer[start], end - start),
+          ProfileBufferBlockIndex::CreateFromProfileBufferIndex(aStart),
+          ProfileBufferBlockIndex::CreateFromProfileBufferIndex(aEnd));
+    } else {
+      // Segment crosses buffer threshold, we need one span until the end and
+      // one span restarting at the beginning of the buffer.
+      aMaybeEntryWriter.emplace(
+          EntrySpan(&mBuffer[start], mMask.MaskValue() + 1 - start),
+          EntrySpan(&mBuffer[0], end),
+          ProfileBufferBlockIndex::CreateFromProfileBufferIndex(aStart),
+          ProfileBufferBlockIndex::CreateFromProfileBufferIndex(aEnd));
+    }
+  }
+
+  // All ModuloBuffer operations should be done through this iterator, which has
+  // an effectively infinite range. The underlying wrapping-around is hidden.
+  // Use `ReaderAt(Index)` or `WriterAt(Index)` to create it.
+  //
+  // `const Iterator<...>` means the iterator itself cannot change, i.e., it
+  // cannot move, and only its const methods are available. Note that these
+  // const methods may still be used to modify the buffer contents (e.g.:
+  // `operator*()`, `Poke()`).
+  //
+  // `Iterator</*IsBufferConst=*/true>` means the buffer contents cannot be
+  // modified, i.e., write operations are forbidden, but the iterator may still
+  // move if non-const itself.
+  template <bool IsBufferConst>
+  class Iterator {
+    // Alias to const- or mutable-`ModuloBuffer` depending on `IsBufferConst`.
+    using ConstOrMutableBuffer =
+        std::conditional_t<IsBufferConst, const ModuloBuffer, ModuloBuffer>;
+
+    // Implementation note about the strange enable-if's below:
+    //   `template <bool NotIBC = !IsBufferConst> enable_if_t<NotIBC>`
+    // which intuitively could be simplified to:
+    //   `enable_if_t<!IsBufferConst>`
+    // The former extra-templated syntax is in fact necessary to delay
+    // instantiation of these functions until they are actually needed.
+    //
+    // If we were just doing `enable_if_t<!IsBufferConst>`, this would only
+    // depend on the *class* (`ModuloBuffer<...>::Iterator`), which gets
+    // instantiated when a `ModuloBuffer` is created with some template
+    // arguments; at that point, all non-templated methods get instantiated, so
+    // there's no "SFINAE" happening, and `enable_if_t<...>` is actually doing
+    // `typename enable_if<...>::type` on the spot, but there is no `type` if
+    // `IsBufferConst` is true, so it just fails right away. E.g.:
+    // error: no type named 'type' in 'std::enable_if<false, void>';
+    //        'enable_if' cannot be used to disable this declaration
+    // note: in instantiation of template type alias 'enable_if_t'
+    // > std::enable_if_t<!IsBufferConst> WriteObject(const T& aObject) {
+    //       in instantiation of template class
+    //       'mozilla::ModuloBuffer<...>::Iterator<true>'
+    // > auto it = mb.ReaderAt(1);
+    //
+    // By adding another template level `template <bool NotIsBufferConst =
+    // !IsBufferConst>`, the instantiation is delayed until the function is
+    // actually invoked somewhere, e.g. `it.Poke(...);`.
+    // So at that invocation point, the compiler looks for a "Poke" name in it,
+    // and considers potential template instantiations that could work. The
+    // `enable_if_t` is *now* attempted, with `NotIsBufferConst` taking its
+    // value from `!IsBufferConst`:
+    // - If `IsBufferConst` is false, `NotIsBufferConst` is true,
+    // `enable_if<NotIsBufferConst>` does define a `type` (`void` by default),
+    // so `enable_if_t` happily becomes `void`, the function exists and may be
+    // called.
+    // - Otherwise if `IsBufferConst` is true, `NotIsBufferConst` is false,
+    // `enable_if<NotIsBufferConst>` does *not* define a `type`, therefore
+    // `enable_if_t` produces an error because there is no `type`. Now "SFINAE"
+    // happens: This "Substitution Failure Is Not An Error" (by itself)... But
+    // then, there are no other functions named "Poke" as requested in the
+    // `it.Poke(...);` call, so we are now getting an error (can't find
+    // function), as expected because `it` had `IsBufferConst`==true. (But at
+    // least the compiler waited until this invocation attempt before outputting
+    // an error.)
+    //
+    // C++ is fun!
+
+   public:
+    // These definitions are expected by std functions, to recognize this as an
+    // iterator. See https://en.cppreference.com/w/cpp/iterator/iterator_traits
+    using difference_type = Index;
+    using value_type = Byte;
+    using pointer = std::conditional_t<IsBufferConst, const Byte*, Byte*>;
+    using reference = std::conditional_t<IsBufferConst, const Byte&, Byte&>;
+    using iterator_category = std::random_access_iterator_tag;
+
+    // Can always copy/assign from the same kind of iterator.
+    Iterator(const Iterator& aRhs) = default;
+    Iterator& operator=(const Iterator& aRhs) = default;
+
+    // Can implicitly copy an Iterator-to-mutable (reader+writer) to
+    // Iterator-to-const (reader-only), but not the reverse.
+    template <bool IsRhsBufferConst,
+              typename = std::enable_if_t<(!IsRhsBufferConst) && IsBufferConst>>
+    MOZ_IMPLICIT Iterator(const Iterator<IsRhsBufferConst>& aRhs)
+        : mModuloBuffer(aRhs.mModuloBuffer), mIndex(aRhs.mIndex) {}
+
+    // Can implicitly assign from an Iterator-to-mutable (reader+writer) to
+    // Iterator-to-const (reader-only), but not the reverse.
+    template <bool IsRhsBufferConst,
+              typename = std::enable_if_t<(!IsRhsBufferConst) && IsBufferConst>>
+    Iterator& operator=(const Iterator<IsRhsBufferConst>& aRhs) {
+      mModuloBuffer = aRhs.mModuloBuffer;
+      mIndex = aRhs.mIndex;
+      return *this;
+    }
+
+    // Current location of the iterator in the `Index` range.
+    // Note that due to wrapping, multiple indices may effectively point at the
+    // same byte in the buffer.
+    Index CurrentIndex() const { return mIndex; }
+
+    // Location comparison in the `Index` range. I.e., two `Iterator`s may look
+    // unequal, but refer to the same buffer location.
+    // Must be on the same buffer.
+    bool operator==(const Iterator& aRhs) const {
+      MOZ_ASSERT(mModuloBuffer == aRhs.mModuloBuffer);
+      return mIndex == aRhs.mIndex;
+    }
+    bool operator!=(const Iterator& aRhs) const {
+      MOZ_ASSERT(mModuloBuffer == aRhs.mModuloBuffer);
+      return mIndex != aRhs.mIndex;
+    }
+    bool operator<(const Iterator& aRhs) const {
+      MOZ_ASSERT(mModuloBuffer == aRhs.mModuloBuffer);
+      return mIndex < aRhs.mIndex;
+    }
+    bool operator<=(const Iterator& aRhs) const {
+      MOZ_ASSERT(mModuloBuffer == aRhs.mModuloBuffer);
+      return mIndex <= aRhs.mIndex;
+    }
+    bool operator>(const Iterator& aRhs) const {
+      MOZ_ASSERT(mModuloBuffer == aRhs.mModuloBuffer);
+      return mIndex > aRhs.mIndex;
+    }
+    bool operator>=(const Iterator& aRhs) const {
+      MOZ_ASSERT(mModuloBuffer == aRhs.mModuloBuffer);
+      return mIndex >= aRhs.mIndex;
+    }
+
+    // Movement in the `Index` range.
+    Iterator& operator++() {
+      ++mIndex;
+      return *this;
+    }
+    Iterator operator++(int) {
+      Iterator here(*mModuloBuffer, mIndex);
+      ++mIndex;
+      return here;
+    }
+    Iterator& operator--() {
+      --mIndex;
+      return *this;
+    }
+    Iterator operator--(int) {
+      Iterator here(*mModuloBuffer, mIndex);
+      --mIndex;
+      return here;
+    }
+    Iterator& operator+=(Length aLength) {
+      mIndex += aLength;
+      return *this;
+    }
+    Iterator operator+(Length aLength) const {
+      return Iterator(*mModuloBuffer, mIndex + aLength);
+    }
+    friend Iterator operator+(Length aLength, const Iterator& aIt) {
+      return aIt + aLength;
+    }
+    Iterator& operator-=(Length aLength) {
+      mIndex -= aLength;
+      return *this;
+    }
+    Iterator operator-(Length aLength) const {
+      return Iterator(*mModuloBuffer, mIndex - aLength);
+    }
+
+    // Distance from `aRef` to here in the `Index` range.
+    // May be negative (as 2's complement) if `aRef > *this`.
+    Index operator-(const Iterator& aRef) const {
+      MOZ_ASSERT(mModuloBuffer == aRef.mModuloBuffer);
+      return mIndex - aRef.mIndex;
+    }
+
+    // Dereference a single byte (read-only if `IsBufferConst` is true).
+    reference operator*() const {
+      return mModuloBuffer->mBuffer[OffsetInBuffer()];
+    }
+
+    // Random-access dereference.
+    reference operator[](Length aLength) const { return *(*this + aLength); }
+
+    // Write data (if `IsBufferConst` is false) but don't move iterator.
+    template <bool NotIsBufferConst = !IsBufferConst>
+    std::enable_if_t<NotIsBufferConst> Poke(const void* aSrc,
+                                            Length aLength) const {
+      // Don't allow data larger than the buffer.
+      MOZ_ASSERT(aLength <= mModuloBuffer->BufferLength().Value());
+      // Offset inside the buffer (corresponding to our Index).
+      Offset offset = OffsetInBuffer();
+      // Compute remaining bytes between this offset and the end of the buffer.
+      Length remaining = mModuloBuffer->BufferLength().Value() - offset;
+      if (MOZ_LIKELY(remaining >= aLength)) {
+        // Enough space to write everything before the end.
+        memcpy(&mModuloBuffer->mBuffer[offset], aSrc, aLength);
+      } else {
+        // Not enough space. Write as much as possible before the end.
+        memcpy(&mModuloBuffer->mBuffer[offset], aSrc, remaining);
+        // And then continue from the beginning of the buffer.
+        memcpy(&mModuloBuffer->mBuffer[0],
+               static_cast<const Byte*>(aSrc) + remaining,
+               (aLength - remaining));
+      }
+    }
+
+    // Write object data (if `IsBufferConst` is false) but don't move iterator.
+    // Note that this copies bytes from the object, with the intent to read them
+    // back later. Restricted to trivially-copyable types, which support this
+    // without Undefined Behavior!
+    template <typename T, bool NotIsBufferConst = !IsBufferConst>
+    std::enable_if_t<NotIsBufferConst> PokeObject(const T& aObject) const {
+      static_assert(std::is_trivially_copyable<T>::value,
+                    "PokeObject<T> - T must be trivially copyable");
+      return Poke(&aObject, sizeof(T));
+    }
+
+    // Write data (if `IsBufferConst` is false) and move iterator ahead.
+    template <bool NotIsBufferConst = !IsBufferConst>
+    std::enable_if_t<NotIsBufferConst> Write(const void* aSrc, Length aLength) {
+      Poke(aSrc, aLength);
+      mIndex += aLength;
+    }
+
+    // Write object data (if `IsBufferConst` is false) and move iterator ahead.
+    // Note that this copies bytes from the object, with the intent to read them
+    // back later. Restricted to trivially-copyable types, which support this
+    // without Undefined Behavior!
+    template <typename T, bool NotIsBufferConst = !IsBufferConst>
+    std::enable_if_t<NotIsBufferConst> WriteObject(const T& aObject) {
+      static_assert(std::is_trivially_copyable<T>::value,
+                    "WriteObject<T> - T must be trivially copyable");
+      return Write(&aObject, sizeof(T));
+    }
+
+    // Number of bytes needed to represent `aValue` in unsigned LEB128.
+    template <typename T>
+    static unsigned ULEB128Size(T aValue) {
+      return ::mozilla::ULEB128Size(aValue);
+    }
+
+    // Write number as unsigned LEB128 (if `IsBufferConst` is false) and move
+    // iterator ahead.
+    template <typename T, bool NotIsBufferConst = !IsBufferConst>
+    std::enable_if_t<NotIsBufferConst> WriteULEB128(T aValue) {
+      ::mozilla::WriteULEB128(aValue, *this);
+    }
+
+    // Read data but don't move iterator.
+    void Peek(void* aDst, Length aLength) const {
+      // Don't allow data larger than the buffer.
+      MOZ_ASSERT(aLength <= mModuloBuffer->BufferLength().Value());
+      // Offset inside the buffer (corresponding to our Index).
+      Offset offset = OffsetInBuffer();
+      // Compute remaining bytes between this offset and the end of the buffer.
+      Length remaining = mModuloBuffer->BufferLength().Value() - offset;
+      if (MOZ_LIKELY(remaining >= aLength)) {
+        // Can read everything we need before the end of the buffer.
+        memcpy(aDst, &mModuloBuffer->mBuffer[offset], aLength);
+      } else {
+        // Read as much as possible before the end of the buffer.
+        memcpy(aDst, &mModuloBuffer->mBuffer[offset], remaining);
+        // And then continue from the beginning of the buffer.
+        memcpy(static_cast<Byte*>(aDst) + remaining, &mModuloBuffer->mBuffer[0],
+               (aLength - remaining));
+      }
+    }
+
+    // Read data into an object but don't move iterator.
+    // Note that this overwrites `aObject` with bytes from the buffer.
+    // Restricted to trivially-copyable types, which support this without
+    // Undefined Behavior!
+    template <typename T>
+    void PeekIntoObject(T& aObject) const {
+      static_assert(std::is_trivially_copyable<T>::value,
+                    "PeekIntoObject<T> - T must be trivially copyable");
+      Peek(&aObject, sizeof(T));
+    }
+
+    // Read data as an object but don't move iterator.
+    // Note that this creates an default `T` first, and then overwrites it with
+    // bytes from the buffer. Restricted to trivially-copyable types, which
+    // support this without Undefined Behavior!
+    template <typename T>
+    T PeekObject() const {
+      static_assert(std::is_trivially_copyable<T>::value,
+                    "PeekObject<T> - T must be trivially copyable");
+      T object;
+      PeekIntoObject(object);
+      return object;
+    }
+
+    // Read data and move iterator ahead.
+    void Read(void* aDst, Length aLength) {
+      Peek(aDst, aLength);
+      mIndex += aLength;
+    }
+
+    // Read data into a mutable iterator and move both iterators ahead.
+    void ReadInto(Iterator</* IsBufferConst */ false>& aDst, Length aLength) {
+      // Don't allow data larger than the buffer.
+      MOZ_ASSERT(aLength <= mModuloBuffer->BufferLength().Value());
+      MOZ_ASSERT(aLength <= aDst.mModuloBuffer->BufferLength().Value());
+      // Offset inside the buffer (corresponding to our Index).
+      Offset offset = OffsetInBuffer();
+      // Compute remaining bytes between this offset and the end of the buffer.
+      Length remaining = mModuloBuffer->BufferLength().Value() - offset;
+      if (MOZ_LIKELY(remaining >= aLength)) {
+        // Can read everything we need before the end of the buffer.
+        aDst.Write(&mModuloBuffer->mBuffer[offset], aLength);
+      } else {
+        // Read as much as possible before the end of the buffer.
+        aDst.Write(&mModuloBuffer->mBuffer[offset], remaining);
+        // And then continue from the beginning of the buffer.
+        aDst.Write(&mModuloBuffer->mBuffer[0], (aLength - remaining));
+      }
+      mIndex += aLength;
+    }
+
+    // Read data into an object and move iterator ahead.
+    // Note that this overwrites `aObject` with bytes from the buffer.
+    // Restricted to trivially-copyable types, which support this without
+    // Undefined Behavior!
+    template <typename T>
+    void ReadIntoObject(T& aObject) {
+      static_assert(std::is_trivially_copyable<T>::value,
+                    "ReadIntoObject<T> - T must be trivially copyable");
+      Read(&aObject, sizeof(T));
+    }
+
+    // Read data as an object and move iterator ahead.
+    // Note that this creates an default `T` first, and then overwrites it with
+    // bytes from the buffer. Restricted to trivially-copyable types, which
+    // support this without Undefined Behavior!
+    template <typename T>
+    T ReadObject() {
+      static_assert(std::is_trivially_copyable<T>::value,
+                    "ReadObject<T> - T must be trivially copyable");
+      T object;
+      ReadIntoObject(object);
+      return object;
+    }
+
+    // Read an unsigned LEB128 number and move iterator ahead.
+    template <typename T>
+    T ReadULEB128() {
+      return ::mozilla::ReadULEB128<T>(*this);
+    }
+
+   private:
+    // Only a ModuloBuffer can instantiate its iterator.
+    friend class ModuloBuffer;
+
+    Iterator(ConstOrMutableBuffer& aBuffer, Index aIndex)
+        : mModuloBuffer(WrapNotNull(&aBuffer)), mIndex(aIndex) {}
+
+    // Convert the Iterator's mIndex into an offset inside the byte buffer.
+    Offset OffsetInBuffer() const {
+      return static_cast<Offset>(mIndex) & mModuloBuffer->mMask;
+    }
+
+    // ModuloBuffer that this Iterator operates on.
+    // Using a non-null pointer instead of a reference, to allow re-assignment
+    // of an Iterator variable.
+    NotNull<ConstOrMutableBuffer*> mModuloBuffer;
+
+    // Position of this iterator in the wider `Index` range. (Will be wrapped
+    // around as needed when actually accessing bytes from the buffer.)
+    Index mIndex;
+  };
+
+  // Shortcut to iterator to const (read-only) data.
+  using Reader = Iterator<true>;
+  // Shortcut to iterator to non-const (read/write) data.
+  using Writer = Iterator<false>;
+
+  // Create an iterator to const data at the given index.
+  Reader ReaderAt(Index aIndex) const { return Reader(*this, aIndex); }
+
+  // Create an iterator to non-const data at the given index.
+  Writer WriterAt(Index aIndex) { return Writer(*this, aIndex); }
+
+#ifdef DEBUG
+  void Dump() const {
+    Length len = BufferLength().Value();
+    if (len > 128) {
+      len = 128;
+    }
+    for (Length i = 0; i < len; ++i) {
+      printf("%02x ", mBuffer[i]);
+    }
+    printf("\n");
+  }
+#endif  // DEBUG
+
+ private:
+  // Mask used to convert an index to an offset in `mBuffer`
+  const PowerOfTwoMask<Offset> mMask;
+
+  // Buffer data. `const NotNull<...>` shows that `mBuffer is `const`, and
+  // `Byte* const` shows that the pointer cannot be changed to point at
+  // something else, but the pointed-at `Byte`s are writable.
+  const NotNull<Byte* const> mBuffer;
+
+  // Function used to release the buffer resource (if needed).
+  std::function<void(Byte*)> mBufferDeleter;
+};
+
+}  // namespace mozilla
+
+#endif  // ModuloBuffer_h
diff --git a/mozglue/baseprofiler/public/PowerOfTwo.h b/mozglue/baseprofiler/public/PowerOfTwo.h
new file mode 100644
index 0000000000..7d396c15e6
--- /dev/null
+++ b/mozglue/baseprofiler/public/PowerOfTwo.h
@@ -0,0 +1,322 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// PowerOfTwo is a value type that always hold a power of 2.
+// It has the same size as their underlying unsigned type, but offer the
+// guarantee of being a power of 2, which permits some optimizations when
+// involved in modulo operations (using masking instead of actual modulo).
+//
+// PowerOfTwoMask contains a mask corresponding to a power of 2.
+// E.g., 2^8 is 256 or 0x100, the corresponding mask is 2^8-1 or 255 or 0xFF.
+// It should be used instead of PowerOfTwo in situations where most operations
+// would be modulo, this saves having to recompute the mask from the stored
+// power of 2.
+//
+// One common use would be for ring-buffer containers with a power-of-2 size,
+// where an index is usually converted to an in-buffer offset by `i % size`.
+// Instead, the container could store a PowerOfTwo or PowerOfTwoMask, and do
+// `i % p2` or `i & p2m`, which is more efficient than for arbitrary sizes.
+//
+// Shortcuts for common 32- and 64-bit values: PowerOfTwo32, etc.
+//
+// To create constexpr constants, use MakePowerOfTwo<Type, Value>(), etc.
+
+#ifndef PowerOfTwo_h
+#define PowerOfTwo_h
+
+#include "mozilla/MathAlgorithms.h"
+
+#include <limits>
+
+namespace mozilla {
+
+// Compute the smallest power of 2 greater than or equal to aInput, except if
+// that would overflow in which case the highest possible power of 2 if chosen.
+// 0->1, 1->1, 2->2, 3->4, ... 2^31->2^31, 2^31+1->2^31 (for uint32_t), etc.
+template <typename T>
+T FriendlyRoundUpPow2(T aInput) {
+  // This is the same code as `RoundUpPow2()`, except we handle any type (that
+  // CeilingLog2 supports) and allow the greater-than-max-power case.
+  constexpr T max = T(1) << (sizeof(T) * CHAR_BIT - 1);
+  if (aInput >= max) {
+    return max;
+  }
+  return T(1) << CeilingLog2(aInput);
+}
+
+namespace detail {
+// Same function name `CountLeadingZeroes` with uint32_t and uint64_t overloads.
+inline uint_fast8_t CountLeadingZeroes(uint32_t aValue) {
+  MOZ_ASSERT(aValue != 0);
+  return detail::CountLeadingZeroes32(aValue);
+}
+inline uint_fast8_t CountLeadingZeroes(uint64_t aValue) {
+  MOZ_ASSERT(aValue != 0);
+  return detail::CountLeadingZeroes64(aValue);
+}
+// Refuse anything else.
+template <typename T>
+inline uint_fast8_t CountLeadingZeroes(T aValue) = delete;
+}  // namespace detail
+
+// Compute the smallest 2^N-1 mask where aInput can fit.
+// I.e., `x & mask == x`, but `x & (mask >> 1) != x`.
+// Or looking at binary, we want a mask with as many leading zeroes as the
+// input, by right-shifting a full mask: (8-bit examples)
+// input:          00000000    00000001   00000010  00010110  01111111 10000000
+// N leading 0s:   ^^^^^^^^ 8  ^^^^^^^ 7  ^^^^^^ 6  ^^^ 3     ^ 1      0
+// full mask:      11111111    11111111   11111111  11111111  11111111 11111111
+// full mask >> N: 00000000    00000001   00000011  00011111  01111111 11111111
+template <typename T>
+T RoundUpPow2Mask(T aInput) {
+  // Special case, as CountLeadingZeroes(0) is undefined. (And even if that was
+  // defined, shifting by the full type size is also undefined!)
+  if (aInput == 0) {
+    return 0;
+  }
+  return T(-1) >> detail::CountLeadingZeroes(aInput);
+}
+
+template <typename T>
+class PowerOfTwoMask;
+
+template <typename T, T Mask>
+constexpr PowerOfTwoMask<T> MakePowerOfTwoMask();
+
+template <typename T>
+class PowerOfTwo;
+
+template <typename T, T Value>
+constexpr PowerOfTwo<T> MakePowerOfTwo();
+
+// PowerOfTwoMask will always contain a mask for a power of 2, which is useful
+// for power-of-2 modulo operations (e.g., to keep an index inside a power-of-2
+// container).
+// Use this instead of PowerOfTwo if masking is the primary use of the value.
+//
+// Note that this class can store a "full" mask where all bits are set, so it
+// works for mask corresponding to the power of 2 that would overflow `T`
+// (e.g., 2^32 for uint32_t gives a mask of 2^32-1, which fits in a uint32_t).
+// For this reason there is no API that computes the power of 2 corresponding to
+// the mask; But this can be done explicitly with `MaskValue() + 1`, which may
+// be useful for computing things like distance-to-the-end by doing
+// `MaskValue() + 1 - offset`, which works fine with unsigned number types.
+template <typename T>
+class PowerOfTwoMask {
+  static_assert(!std::numeric_limits<T>::is_signed,
+                "PowerOfTwoMask must use an unsigned type");
+
+ public:
+  // Construct a power of 2 mask where the given value can fit.
+  // Cannot be constexpr because of `RoundUpPow2Mask()`.
+  explicit PowerOfTwoMask(T aInput) : mMask(RoundUpPow2Mask(aInput)) {}
+
+  // Compute the mask corresponding to a PowerOfTwo.
+  // This saves having to compute the nearest 2^N-1.
+  // Not a conversion constructor, as that could be ambiguous whether we'd want
+  // the mask corresponding to the power of 2 (2^N -> 2^N-1), or the mask that
+  // can *contain* the PowerOfTwo value (2^N -> 2^(N+1)-1).
+  // Note: Not offering reverse PowerOfTwoMark-to-PowerOfTwo conversion, because
+  // that could result in an unexpected 0 result for the largest possible mask.
+  template <typename U>
+  static constexpr PowerOfTwoMask<U> MaskForPowerOfTwo(
+      const PowerOfTwo<U>& aP2) {
+    return PowerOfTwoMask(aP2);
+  }
+
+  // Allow smaller unsigned types as input.
+  // Bigger or signed types must be explicitly converted by the caller.
+  template <typename U>
+  explicit constexpr PowerOfTwoMask(U aInput)
+      : mMask(RoundUpPow2Mask(static_cast<T>(aInput))) {
+    static_assert(!std::numeric_limits<T>::is_signed,
+                  "PowerOfTwoMask does not accept signed types");
+    static_assert(sizeof(U) <= sizeof(T),
+                  "PowerOfTwoMask does not accept bigger types");
+  }
+
+  constexpr T MaskValue() const { return mMask; }
+
+  // `x & aPowerOfTwoMask` just works.
+  template <typename U>
+  friend U operator&(U aNumber, PowerOfTwoMask aP2M) {
+    return static_cast<U>(aNumber & aP2M.MaskValue());
+  }
+
+  // `aPowerOfTwoMask & x` just works.
+  template <typename U>
+  friend constexpr U operator&(PowerOfTwoMask aP2M, U aNumber) {
+    return static_cast<U>(aP2M.MaskValue() & aNumber);
+  }
+
+  // `x % aPowerOfTwoMask(2^N-1)` is equivalent to `x % 2^N` but is more
+  // optimal by doing `x & (2^N-1)`.
+  // Useful for templated code doing modulo with a template argument type.
+  template <typename U>
+  friend constexpr U operator%(U aNumerator, PowerOfTwoMask aDenominator) {
+    return aNumerator & aDenominator.MaskValue();
+  }
+
+  constexpr bool operator==(const PowerOfTwoMask& aRhs) const {
+    return mMask == aRhs.mMask;
+  }
+  constexpr bool operator!=(const PowerOfTwoMask& aRhs) const {
+    return mMask != aRhs.mMask;
+  }
+
+ private:
+  // Trust `PowerOfTwo` to call the private Trusted constructor below.
+  friend class PowerOfTwo<T>;
+
+  // Trust `MakePowerOfTwoMask()` to call the private Trusted constructor below.
+  template <typename U, U Mask>
+  friend constexpr PowerOfTwoMask<U> MakePowerOfTwoMask();
+
+  struct Trusted {
+    T mMask;
+  };
+  // Construct the mask corresponding to a PowerOfTwo.
+  // This saves having to compute the nearest 2^N-1.
+  // Note: Not a public PowerOfTwo->PowerOfTwoMask conversion constructor, as
+  // that could be ambiguous whether we'd want the mask corresponding to the
+  // power of 2 (2^N -> 2^N-1), or the mask that can *contain* the PowerOfTwo
+  // value (2^N -> 2^(N+1)-1).
+  explicit constexpr PowerOfTwoMask(const Trusted& aP2) : mMask(aP2.mMask) {}
+
+  T mMask = 0;
+};
+
+// Make a PowerOfTwoMask constant, statically-checked.
+template <typename T, T Mask>
+constexpr PowerOfTwoMask<T> MakePowerOfTwoMask() {
+  static_assert(Mask == T(-1) || IsPowerOfTwo(Mask + 1),
+                "MakePowerOfTwoMask<T, Mask>: Mask must be 2^N-1");
+  using Trusted = typename PowerOfTwoMask<T>::Trusted;
+  return PowerOfTwoMask<T>(Trusted{Mask});
+}
+
+// PowerOfTwo will always contain a power of 2.
+template <typename T>
+class PowerOfTwo {
+  static_assert(!std::numeric_limits<T>::is_signed,
+                "PowerOfTwo must use an unsigned type");
+
+ public:
+  // Construct a power of 2 that can fit the given value, or the highest power
+  // of 2 possible.
+  // Caller should explicitly check/assert `Value() <= aInput` if they want to.
+  // Cannot be constexpr because of `FriendlyRoundUpPow2()`.
+  explicit PowerOfTwo(T aInput) : mValue(FriendlyRoundUpPow2(aInput)) {}
+
+  // Allow smaller unsigned types as input.
+  // Bigger or signed types must be explicitly converted by the caller.
+  template <typename U>
+  explicit PowerOfTwo(U aInput)
+      : mValue(FriendlyRoundUpPow2(static_cast<T>(aInput))) {
+    static_assert(!std::numeric_limits<T>::is_signed,
+                  "PowerOfTwo does not accept signed types");
+    static_assert(sizeof(U) <= sizeof(T),
+                  "PowerOfTwo does not accept bigger types");
+  }
+
+  constexpr T Value() const { return mValue; }
+
+  // Binary mask corresponding to the power of 2, useful for modulo.
+  // E.g., `x & powerOfTwo(y).Mask()` == `x % powerOfTwo(y)`.
+  // Consider PowerOfTwoMask class instead of PowerOfTwo if masking is the
+  // primary use case.
+  constexpr T MaskValue() const { return mValue - 1; }
+
+  // PowerOfTwoMask corresponding to this power of 2, useful for modulo.
+  constexpr PowerOfTwoMask<T> Mask() const {
+    using Trusted = typename PowerOfTwoMask<T>::Trusted;
+    return PowerOfTwoMask<T>(Trusted{MaskValue()});
+  }
+
+  // `x % aPowerOfTwo` works optimally.
+  // Useful for templated code doing modulo with a template argument type.
+  // Use PowerOfTwoMask class instead if masking is the primary use case.
+  template <typename U>
+  friend constexpr U operator%(U aNumerator, PowerOfTwo aDenominator) {
+    return aNumerator & aDenominator.MaskValue();
+  }
+
+  constexpr bool operator==(const PowerOfTwo& aRhs) const {
+    return mValue == aRhs.mValue;
+  }
+  constexpr bool operator!=(const PowerOfTwo& aRhs) const {
+    return mValue != aRhs.mValue;
+  }
+  constexpr bool operator<(const PowerOfTwo& aRhs) const {
+    return mValue < aRhs.mValue;
+  }
+  constexpr bool operator<=(const PowerOfTwo& aRhs) const {
+    return mValue <= aRhs.mValue;
+  }
+  constexpr bool operator>(const PowerOfTwo& aRhs) const {
+    return mValue > aRhs.mValue;
+  }
+  constexpr bool operator>=(const PowerOfTwo& aRhs) const {
+    return mValue >= aRhs.mValue;
+  }
+
+ private:
+  // Trust `MakePowerOfTwo()` to call the private Trusted constructor below.
+  template <typename U, U Value>
+  friend constexpr PowerOfTwo<U> MakePowerOfTwo();
+
+  struct Trusted {
+    T mValue;
+  };
+  // Construct a PowerOfTwo with the given trusted value.
+  // This saves having to compute the nearest 2^N.
+  // Note: Not offering PowerOfTwoMark-to-PowerOfTwo conversion, because that
+  // could result in an unexpected 0 result for the largest possible mask.
+  explicit constexpr PowerOfTwo(const Trusted& aP2) : mValue(aP2.mValue) {}
+
+  // The smallest power of 2 is 2^0 == 1.
+  T mValue = 1;
+};
+
+// Make a PowerOfTwo constant, statically-checked.
+template <typename T, T Value>
+constexpr PowerOfTwo<T> MakePowerOfTwo() {
+  static_assert(IsPowerOfTwo(Value),
+                "MakePowerOfTwo<T, Value>: Value must be 2^N");
+  using Trusted = typename PowerOfTwo<T>::Trusted;
+  return PowerOfTwo<T>(Trusted{Value});
+}
+
+// Shortcuts for the most common types and functions.
+
+using PowerOfTwoMask32 = PowerOfTwoMask<uint32_t>;
+using PowerOfTwo32 = PowerOfTwo<uint32_t>;
+using PowerOfTwoMask64 = PowerOfTwoMask<uint64_t>;
+using PowerOfTwo64 = PowerOfTwo<uint64_t>;
+
+template <uint32_t Mask>
+constexpr PowerOfTwoMask32 MakePowerOfTwoMask32() {
+  return MakePowerOfTwoMask<uint32_t, Mask>();
+}
+
+template <uint32_t Value>
+constexpr PowerOfTwo32 MakePowerOfTwo32() {
+  return MakePowerOfTwo<uint32_t, Value>();
+}
+
+template <uint64_t Mask>
+constexpr PowerOfTwoMask64 MakePowerOfTwoMask64() {
+  return MakePowerOfTwoMask<uint64_t, Mask>();
+}
+
+template <uint64_t Value>
+constexpr PowerOfTwo64 MakePowerOfTwo64() {
+  return MakePowerOfTwo<uint64_t, Value>();
+}
+
+}  // namespace mozilla
+
+#endif  // PowerOfTwo_h
diff --git a/mozglue/baseprofiler/public/ProfileBufferChunk.h b/mozglue/baseprofiler/public/ProfileBufferChunk.h
new file mode 100644
index 0000000000..24a516bcaf
--- /dev/null
+++ b/mozglue/baseprofiler/public/ProfileBufferChunk.h
@@ -0,0 +1,543 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfileBufferChunk_h
+#define ProfileBufferChunk_h
+
+#include "mozilla/MemoryReporting.h"
+#include "mozilla/ProfileBufferIndex.h"
+#include "mozilla/Span.h"
+#include "mozilla/TimeStamp.h"
+#include "mozilla/UniquePtr.h"
+
+#if defined(MOZ_MEMORY)
+#  include "mozmemory.h"
+#endif
+
+#include <algorithm>
+#include <limits>
+#include <type_traits>
+
+#ifdef DEBUG
+#  include <cstdio>
+#endif
+
+namespace mozilla {
+
+// Represents a single chunk of memory, with a link to the next chunk (or null).
+//
+// A chunk is made of an internal header (which contains a public part) followed
+// by user-accessible bytes.
+//
+// +---------------+---------+----------------------------------------------+
+// | public Header | private |         memory containing user blocks        |
+// +---------------+---------+----------------------------------------------+
+//                           <---------------BufferBytes()------------------>
+// <------------------------------ChunkBytes()------------------------------>
+//
+// The chunk can reserve "blocks", but doesn't know the internal contents of
+// each block, it only knows where the first one starts, and where the last one
+// ends (which is where the next one will begin, if not already out of range).
+// It is up to the user to add structure to each block so that they can be
+// distinguished when later read.
+//
+// +---------------+---------+----------------------------------------------+
+// | public Header | private |      [1st block]...[last full block]         |
+// +---------------+---------+----------------------------------------------+
+//  ChunkHeader().mOffsetFirstBlock ^                             ^
+//                           ChunkHeader().mOffsetPastLastBlock --'
+//
+// It is possible to attempt to reserve more than the remaining space, in which
+// case only what is available is returned. The caller is responsible for using
+// another chunk, reserving a block "tail" in it, and using both parts to
+// constitute a full block. (This initial tail may be empty in some chunks.)
+//
+// +---------------+---------+----------------------------------------------+
+// | public Header | private | tail][1st block]...[last full block][head... |
+// +---------------+---------+----------------------------------------------+
+//  ChunkHeader().mOffsetFirstBlock ^                                       ^
+//                                     ChunkHeader().mOffsetPastLastBlock --'
+//
+// Each Chunk has an internal state (checked in DEBUG builds) that directs how
+// to use it during creation, initialization, use, end of life, recycling, and
+// destruction. See `State` below for details.
+// In particular:
+// - `ReserveInitialBlockAsTail()` must be called before the first `Reserve()`
+//   after construction or recycling, even with a size of 0 (no actual tail),
+// - `MarkDone()` and `MarkRecycled()` must be called as appropriate.
+class ProfileBufferChunk {
+ public:
+  using Byte = uint8_t;
+  using Length = uint32_t;
+
+  using SpanOfBytes = Span<Byte>;
+
+  // Hint about the size of the metadata (public and private headers).
+  // `Create()` below takes the minimum *buffer* size, so the minimum total
+  // Chunk size is at least `SizeofChunkMetadata() + aMinBufferBytes`.
+  [[nodiscard]] static constexpr Length SizeofChunkMetadata() {
+    return static_cast<Length>(sizeof(InternalHeader));
+  }
+
+  // Allocate space for a chunk with a given minimum size, and construct it.
+  // The actual size may be higher, to match the actual space taken in the
+  // memory pool.
+  [[nodiscard]] static UniquePtr<ProfileBufferChunk> Create(
+      Length aMinBufferBytes) {
+    // We need at least one byte, to cover the always-present `mBuffer` byte.
+    aMinBufferBytes = std::max(aMinBufferBytes, Length(1));
+    // Trivial struct with the same alignment as `ProfileBufferChunk`, and size
+    // equal to that alignment, because typically the sizeof of an object is
+    // a multiple of its alignment.
+    struct alignas(alignof(InternalHeader)) ChunkStruct {
+      Byte c[alignof(InternalHeader)];
+    };
+    static_assert(std::is_trivial_v<ChunkStruct>,
+                  "ChunkStruct must be trivial to avoid any construction");
+    // Allocate an array of that struct, enough to contain the expected
+    // `ProfileBufferChunk` (with its header+buffer).
+    size_t count = (sizeof(InternalHeader) + aMinBufferBytes +
+                    (alignof(InternalHeader) - 1)) /
+                   alignof(InternalHeader);
+#if defined(MOZ_MEMORY)
+    // Potentially expand the array to use more of the effective allocation.
+    count = (malloc_good_size(count * sizeof(ChunkStruct)) +
+             (sizeof(ChunkStruct) - 1)) /
+            sizeof(ChunkStruct);
+#endif
+    auto chunkStorage = MakeUnique<ChunkStruct[]>(count);
+    MOZ_ASSERT(reinterpret_cast<uintptr_t>(chunkStorage.get()) %
+                   alignof(InternalHeader) ==
+               0);
+    // After the allocation, compute the actual chunk size (including header).
+    const size_t chunkBytes = count * sizeof(ChunkStruct);
+    MOZ_ASSERT(chunkBytes >= sizeof(ProfileBufferChunk),
+               "Not enough space to construct a ProfileBufferChunk");
+    MOZ_ASSERT(chunkBytes <=
+               static_cast<size_t>(std::numeric_limits<Length>::max()));
+    // Compute the size of the user-accessible buffer inside the chunk.
+    const Length bufferBytes =
+        static_cast<Length>(chunkBytes - sizeof(InternalHeader));
+    MOZ_ASSERT(bufferBytes >= aMinBufferBytes,
+               "Not enough space for minimum buffer size");
+    // Construct the header at the beginning of the allocated array, with the
+    // known buffer size.
+    new (chunkStorage.get()) ProfileBufferChunk(bufferBytes);
+    // We now have a proper `ProfileBufferChunk` object, create the appropriate
+    // UniquePtr for it.
+    UniquePtr<ProfileBufferChunk> chunk{
+        reinterpret_cast<ProfileBufferChunk*>(chunkStorage.release())};
+    MOZ_ASSERT(
+        size_t(reinterpret_cast<const char*>(
+                   &chunk.get()->BufferSpan()[bufferBytes - 1]) -
+               reinterpret_cast<const char*>(chunk.get())) == chunkBytes - 1,
+        "Buffer span spills out of chunk allocation");
+    return chunk;
+  }
+
+#ifdef DEBUG
+  ~ProfileBufferChunk() {
+    MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::InUse);
+    MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Full);
+    MOZ_ASSERT(mInternalHeader.mState == InternalHeader::State::Created ||
+               mInternalHeader.mState == InternalHeader::State::Done ||
+               mInternalHeader.mState == InternalHeader::State::Recycled);
+  }
+#endif
+
+  // Must be called with the first block tail (may be empty), which will be
+  // skipped if the reader starts with this ProfileBufferChunk.
+  [[nodiscard]] SpanOfBytes ReserveInitialBlockAsTail(Length aTailSize) {
+#ifdef DEBUG
+    MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::InUse);
+    MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Full);
+    MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Done);
+    MOZ_ASSERT(mInternalHeader.mState == InternalHeader::State::Created ||
+               mInternalHeader.mState == InternalHeader::State::Recycled);
+    mInternalHeader.mState = InternalHeader::State::InUse;
+#endif
+    mInternalHeader.mHeader.mOffsetFirstBlock = aTailSize;
+    mInternalHeader.mHeader.mOffsetPastLastBlock = aTailSize;
+    return SpanOfBytes(&mBuffer, aTailSize);
+  }
+
+  struct ReserveReturn {
+    SpanOfBytes mSpan;
+    ProfileBufferBlockIndex mBlockRangeIndex;
+  };
+
+  // Reserve a block of up to `aBlockSize` bytes, and return a Span to it, and
+  // its starting index. The actual size may be smaller, if the block cannot fit
+  // in the remaining space.
+  [[nodiscard]] ReserveReturn ReserveBlock(Length aBlockSize) {
+    MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Created);
+    MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Full);
+    MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Done);
+    MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Recycled);
+    MOZ_ASSERT(mInternalHeader.mState == InternalHeader::State::InUse);
+    MOZ_ASSERT(RangeStart() != 0,
+               "Expected valid range start before first Reserve()");
+    const Length blockOffset = mInternalHeader.mHeader.mOffsetPastLastBlock;
+    Length reservedSize = aBlockSize;
+    if (MOZ_UNLIKELY(aBlockSize >= RemainingBytes())) {
+      reservedSize = RemainingBytes();
+#ifdef DEBUG
+      mInternalHeader.mState = InternalHeader::State::Full;
+#endif
+    }
+    mInternalHeader.mHeader.mOffsetPastLastBlock += reservedSize;
+    mInternalHeader.mHeader.mBlockCount += 1;
+    return {SpanOfBytes(&mBuffer + blockOffset, reservedSize),
+            ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+                mInternalHeader.mHeader.mRangeStart + blockOffset)};
+  }
+
+  // When a chunk will not be used to store more blocks (because it is full, or
+  // because the profiler will not add more data), it should be marked "done".
+  // Access to its content is still allowed.
+  void MarkDone() {
+#ifdef DEBUG
+    MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Created);
+    MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Done);
+    MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Recycled);
+    MOZ_ASSERT(mInternalHeader.mState == InternalHeader::State::InUse ||
+               mInternalHeader.mState == InternalHeader::State::Full);
+    mInternalHeader.mState = InternalHeader::State::Done;
+#endif
+    mInternalHeader.mHeader.mDoneTimeStamp = TimeStamp::NowUnfuzzed();
+  }
+
+  // A "Done" chunk may be recycled, to avoid allocating a new one.
+  void MarkRecycled() {
+#ifdef DEBUG
+    // We also allow Created and already-Recycled chunks to be recycled, this
+    // way it's easier to recycle chunks when their state is not easily
+    // trackable.
+    MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::InUse);
+    MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Full);
+    MOZ_ASSERT(mInternalHeader.mState == InternalHeader::State::Created ||
+               mInternalHeader.mState == InternalHeader::State::Done ||
+               mInternalHeader.mState == InternalHeader::State::Recycled);
+    mInternalHeader.mState = InternalHeader::State::Recycled;
+#endif
+    // Reset all header fields, in case this recycled chunk gets read.
+    mInternalHeader.mHeader.Reset();
+  }
+
+  // Public header, meant to uniquely identify a chunk, it may be shared with
+  // other processes to coordinate global memory handling.
+  struct Header {
+    explicit Header(Length aBufferBytes) : mBufferBytes(aBufferBytes) {}
+
+    // Reset all members to their as-new values (apart from the buffer size,
+    // which cannot change), ready for re-use.
+    void Reset() {
+      mOffsetFirstBlock = 0;
+      mOffsetPastLastBlock = 0;
+      mDoneTimeStamp = TimeStamp{};
+      mBlockCount = 0;
+      mRangeStart = 0;
+      mProcessId = 0;
+    }
+
+    // Note: Part of the ordering of members below is to avoid unnecessary
+    // padding.
+
+    // Members managed by the ProfileBufferChunk.
+
+    // Offset of the first block (past the initial tail block, which may be 0).
+    Length mOffsetFirstBlock = 0;
+    // Offset past the last byte of the last reserved block
+    // It may be past mBufferBytes when last block continues in the next
+    // ProfileBufferChunk. It may be before mBufferBytes if ProfileBufferChunk
+    // is marked "Done" before the end is reached.
+    Length mOffsetPastLastBlock = 0;
+    // Timestamp when buffer is "Done" (which happens when the last block is
+    // written). This will be used to find and discard the oldest
+    // ProfileBufferChunk.
+    TimeStamp mDoneTimeStamp;
+    // Number of bytes in the buffer, set once at construction time.
+    const Length mBufferBytes;
+    // Number of reserved blocks (including final one even if partial, but
+    // excluding initial tail).
+    Length mBlockCount = 0;
+
+    // Meta-data set by the user.
+
+    // Index of the first byte of this ProfileBufferChunk, relative to all
+    // Chunks for this process. Index 0 is reserved as nullptr-like index,
+    // mRangeStart should be set to a non-0 value before the first `Reserve()`.
+    ProfileBufferIndex mRangeStart = 0;
+    // Process writing to this ProfileBufferChunk.
+    int mProcessId = 0;
+
+    // A bit of spare space (necessary here because of the alignment due to
+    // other members), may be later repurposed for extra data.
+    const int mPADDING = 0;
+  };
+
+  [[nodiscard]] const Header& ChunkHeader() const {
+    return mInternalHeader.mHeader;
+  }
+
+  [[nodiscard]] Length BufferBytes() const {
+    return ChunkHeader().mBufferBytes;
+  }
+
+  // Total size of the chunk (buffer + header).
+  [[nodiscard]] Length ChunkBytes() const {
+    return static_cast<Length>(sizeof(InternalHeader)) + BufferBytes();
+  }
+
+  // Size of external resources, in this case all the following chunks.
+  [[nodiscard]] size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const {
+    const ProfileBufferChunk* const next = GetNext();
+    return next ? next->SizeOfIncludingThis(aMallocSizeOf) : 0;
+  }
+
+  // Size of this chunk and all following ones.
+  [[nodiscard]] size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
+    // Just in case `aMallocSizeOf` falls back on just `sizeof`, make sure we
+    // account for at least the actual Chunk requested allocation size.
+    return std::max<size_t>(aMallocSizeOf(this), ChunkBytes()) +
+           SizeOfExcludingThis(aMallocSizeOf);
+  }
+
+  [[nodiscard]] Length RemainingBytes() const {
+    return BufferBytes() - OffsetPastLastBlock();
+  }
+
+  [[nodiscard]] Length OffsetFirstBlock() const {
+    return ChunkHeader().mOffsetFirstBlock;
+  }
+
+  [[nodiscard]] Length OffsetPastLastBlock() const {
+    return ChunkHeader().mOffsetPastLastBlock;
+  }
+
+  [[nodiscard]] Length BlockCount() const { return ChunkHeader().mBlockCount; }
+
+  [[nodiscard]] int ProcessId() const { return ChunkHeader().mProcessId; }
+
+  void SetProcessId(int aProcessId) {
+    mInternalHeader.mHeader.mProcessId = aProcessId;
+  }
+
+  // Global range index at the start of this Chunk.
+  [[nodiscard]] ProfileBufferIndex RangeStart() const {
+    return ChunkHeader().mRangeStart;
+  }
+
+  void SetRangeStart(ProfileBufferIndex aRangeStart) {
+    mInternalHeader.mHeader.mRangeStart = aRangeStart;
+  }
+
+  // Get a read-only Span to the buffer. It is up to the caller to decypher the
+  // contents, based on known offsets and the internal block structure.
+  [[nodiscard]] Span<const Byte> BufferSpan() const {
+    return Span<const Byte>(&mBuffer, BufferBytes());
+  }
+
+  [[nodiscard]] Byte ByteAt(Length aOffset) const {
+    MOZ_ASSERT(aOffset < OffsetPastLastBlock());
+    return *(&mBuffer + aOffset);
+  }
+
+  [[nodiscard]] ProfileBufferChunk* GetNext() {
+    return mInternalHeader.mNext.get();
+  }
+  [[nodiscard]] const ProfileBufferChunk* GetNext() const {
+    return mInternalHeader.mNext.get();
+  }
+
+  [[nodiscard]] UniquePtr<ProfileBufferChunk> ReleaseNext() {
+    return std::move(mInternalHeader.mNext);
+  }
+
+  void InsertNext(UniquePtr<ProfileBufferChunk>&& aChunk) {
+    if (!aChunk) {
+      return;
+    }
+    aChunk->SetLast(ReleaseNext());
+    mInternalHeader.mNext = std::move(aChunk);
+  }
+
+  // Find the last chunk in this chain (it may be `this`).
+  [[nodiscard]] ProfileBufferChunk* Last() {
+    ProfileBufferChunk* chunk = this;
+    for (;;) {
+      ProfileBufferChunk* next = chunk->GetNext();
+      if (!next) {
+        return chunk;
+      }
+      chunk = next;
+    }
+  }
+  [[nodiscard]] const ProfileBufferChunk* Last() const {
+    const ProfileBufferChunk* chunk = this;
+    for (;;) {
+      const ProfileBufferChunk* next = chunk->GetNext();
+      if (!next) {
+        return chunk;
+      }
+      chunk = next;
+    }
+  }
+
+  void SetLast(UniquePtr<ProfileBufferChunk>&& aChunk) {
+    if (!aChunk) {
+      return;
+    }
+    Last()->mInternalHeader.mNext = std::move(aChunk);
+  }
+
+  // Join two possibly-null chunk lists.
+  [[nodiscard]] static UniquePtr<ProfileBufferChunk> Join(
+      UniquePtr<ProfileBufferChunk>&& aFirst,
+      UniquePtr<ProfileBufferChunk>&& aLast) {
+    if (aFirst) {
+      aFirst->SetLast(std::move(aLast));
+      return std::move(aFirst);
+    }
+    return std::move(aLast);
+  }
+
+#ifdef DEBUG
+  void Dump(std::FILE* aFile = stdout) const {
+    fprintf(aFile,
+            "Chunk[%p] chunkSize=%u bufferSize=%u state=%s rangeStart=%u "
+            "firstBlockOffset=%u offsetPastLastBlock=%u blockCount=%u",
+            this, unsigned(ChunkBytes()), unsigned(BufferBytes()),
+            mInternalHeader.StateString(), unsigned(RangeStart()),
+            unsigned(OffsetFirstBlock()), unsigned(OffsetPastLastBlock()),
+            unsigned(BlockCount()));
+    const auto len = OffsetPastLastBlock();
+    constexpr unsigned columns = 16;
+    unsigned char ascii[columns + 1];
+    ascii[columns] = '\0';
+    for (Length i = 0; i < len; ++i) {
+      if (i % columns == 0) {
+        fprintf(aFile, "\n  %4u=0x%03x:", unsigned(i), unsigned(i));
+        for (unsigned a = 0; a < columns; ++a) {
+          ascii[a] = ' ';
+        }
+      }
+      unsigned char sep = ' ';
+      if (i == OffsetFirstBlock()) {
+        if (i == OffsetPastLastBlock()) {
+          sep = '#';
+        } else {
+          sep = '[';
+        }
+      } else if (i == OffsetPastLastBlock()) {
+        sep = ']';
+      }
+      unsigned char c = *(&mBuffer + i);
+      fprintf(aFile, "%c%02x", sep, c);
+
+      if (i == len - 1) {
+        if (i + 1 == OffsetPastLastBlock()) {
+          // Special case when last block ends right at the end.
+          fprintf(aFile, "]");
+        } else {
+          fprintf(aFile, " ");
+        }
+      } else if (i % columns == columns - 1) {
+        fprintf(aFile, " ");
+      }
+
+      ascii[i % columns] = (c >= ' ' && c <= '~') ? c : '.';
+
+      if (i % columns == columns - 1) {
+        fprintf(aFile, " %s", ascii);
+      }
+    }
+
+    if (len % columns < columns - 1) {
+      for (Length i = len % columns; i < columns; ++i) {
+        fprintf(aFile, "   ");
+      }
+      fprintf(aFile, " %s", ascii);
+    }
+
+    fprintf(aFile, "\n");
+  }
+#endif  // DEBUG
+
+ private:
+  // ProfileBufferChunk constructor. Use static `Create()` to allocate and
+  // construct a ProfileBufferChunk.
+  explicit ProfileBufferChunk(Length aBufferBytes)
+      : mInternalHeader(aBufferBytes) {}
+
+  // This internal header starts with the public `Header`, and adds some data
+  // only necessary for local handling.
+  // This encapsulation is also necessary to perform placement-new in
+  // `Create()`.
+  struct InternalHeader {
+    explicit InternalHeader(Length aBufferBytes) : mHeader(aBufferBytes) {}
+
+    Header mHeader;
+    UniquePtr<ProfileBufferChunk> mNext;
+
+#ifdef DEBUG
+    enum class State {
+      Created,  // Self-set. Just constructed, waiting for initial block tail.
+      InUse,    // Ready to accept blocks.
+      Full,     // Self-set. Blocks reach the end (or further).
+      Done,     // Blocks won't be added anymore.
+      Recycled  // Still full of data, but expecting an initial block tail.
+    };
+
+    State mState = State::Created;
+    // Transition table: (X=unexpected)
+    // Method          \  State   Created  InUse    Full     Done     Recycled
+    // ReserveInitialBlockAsTail   InUse     X       X        X        InUse
+    // Reserve                       X   InUse/Full  X        X          X
+    // MarkDone                      X     Done     Done      X          X
+    // MarkRecycled                  X       X       X      Recycled     X
+    // destructor                    ok      X       X        ok         ok
+
+    const char* StateString() const {
+      switch (mState) {
+        case State::Created:
+          return "Created";
+        case State::InUse:
+          return "InUse";
+        case State::Full:
+          return "Full";
+        case State::Done:
+          return "Done";
+        case State::Recycled:
+          return "Recycled";
+        default:
+          return "?";
+      }
+    }
+#else  // DEBUG
+    const char* StateString() const { return "(non-DEBUG)"; }
+#endif
+  };
+
+  InternalHeader mInternalHeader;
+
+  // KEEP THIS LAST!
+  // First byte of the buffer. Note that ProfileBufferChunk::Create allocates a
+  // bigger block, such that `mBuffer` is the first of `mBufferBytes` available
+  // bytes.
+  // The initialization is not strictly needed, because bytes should only be
+  // read after they have been written and `mOffsetPastLastBlock` has been
+  // updated. However:
+  // - Reviewbot complains that it's not initialized.
+  // - It's cheap to initialize one byte.
+  // - In the worst case (reading does happen), zero is not a valid entry size
+  //   and should get caught in entry readers.
+  Byte mBuffer = '\0';
+};
+
+}  // namespace mozilla
+
+#endif  // ProfileBufferChunk_h
diff --git a/mozglue/baseprofiler/public/ProfileBufferChunkManager.h b/mozglue/baseprofiler/public/ProfileBufferChunkManager.h
new file mode 100644
index 0000000000..e7f12bf21f
--- /dev/null
+++ b/mozglue/baseprofiler/public/ProfileBufferChunkManager.h
@@ -0,0 +1,134 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfileBufferChunkManager_h
+#define ProfileBufferChunkManager_h
+
+#include "mozilla/ProfileBufferChunk.h"
+#include "mozilla/ScopeExit.h"
+
+#include <functional>
+
+namespace mozilla {
+
+// Manages the ProfileBufferChunks for this process.
+// The main user of this class is the buffer that needs chunks to store its
+// data.
+// The main ProfileBufferChunks responsibilities are:
+// - It can create new chunks, they are called "unreleased".
+// - Later these chunks are returned here, and become "released".
+// - The manager is free to destroy or recycle the oldest released chunks
+//   (usually to reclaim memory), and will inform the user through a provided
+//   callback.
+// - The user may access still-alive released chunks.
+class ProfileBufferChunkManager {
+ public:
+  virtual ~ProfileBufferChunkManager()
+#ifdef DEBUG
+  {
+    MOZ_ASSERT(!mUser, "Still registered when being destroyed");
+  }
+#else
+      = default;
+#endif
+
+  // Expected maximum size needed to store one stack sample.
+  // Most ChunkManager sub-classes will require chunk sizes, this can serve as
+  // a minimum recommendation to hold most backtraces.
+  constexpr static ProfileBufferChunk::Length scExpectedMaximumStackSize =
+      128 * 1024;
+
+  // Estimated maximum buffer size.
+  [[nodiscard]] virtual size_t MaxTotalSize() const = 0;
+
+  // Create or recycle a chunk right now. May return null in case of allocation
+  // failure.
+  // Note that the chunk-destroyed callback may be invoked during this call;
+  // user should be careful with reentrancy issues.
+  [[nodiscard]] virtual UniquePtr<ProfileBufferChunk> GetChunk() = 0;
+
+  // `aChunkReceiver` may be called with a new or recycled chunk, or nullptr.
+  // (See `FulfillChunkRequests()` regarding when the callback may happen.)
+  virtual void RequestChunk(
+      std::function<void(UniquePtr<ProfileBufferChunk>)>&& aChunkReceiver) = 0;
+
+  // This method may be invoked at any time on any thread (and not necessarily
+  // by the main user of this class), to do the work necessary to respond to a
+  // previous `RequestChunk()`.
+  // It is optional: If it is never called, or called too late, the user is
+  // responsible for directly calling `GetChunk()` when a chunk is really
+  // needed (or it should at least fail gracefully).
+  // The idea is to fulfill chunk request on a separate thread, and most
+  // importantly outside of profiler calls, to avoid doing expensive memory
+  // allocations during these calls.
+  virtual void FulfillChunkRequests() = 0;
+
+  // One chunk is released by the user, the ProfileBufferChunkManager should
+  // keep it as long as possible (depending on local or global memory/time
+  // limits). Note that the chunk-destroyed callback may be invoked during this
+  // call; user should be careful with reentrancy issues.
+  virtual void ReleaseChunk(UniquePtr<ProfileBufferChunk> aChunk) = 0;
+
+  // `aChunkDestroyedCallback` will be called whenever the contents of a
+  // previously-released chunk is about to be destroyed or recycled.
+  // Note that it may be called during other functions above, or at other times
+  // from the same or other threads; user should be careful with reentrancy
+  // issues.
+  virtual void SetChunkDestroyedCallback(
+      std::function<void(const ProfileBufferChunk&)>&&
+          aChunkDestroyedCallback) = 0;
+
+  // Give away all released chunks that have not yet been destroyed.
+  [[nodiscard]] virtual UniquePtr<ProfileBufferChunk>
+  GetExtantReleasedChunks() = 0;
+
+  // Let a callback see all released chunks that have not yet been destroyed, if
+  // any. Return whatever the callback returns.
+  template <typename Callback>
+  [[nodiscard]] auto PeekExtantReleasedChunks(Callback&& aCallback) {
+    const ProfileBufferChunk* chunks = PeekExtantReleasedChunksAndLock();
+    auto unlock =
+        MakeScopeExit([&]() { UnlockAfterPeekExtantReleasedChunks(); });
+    return std::forward<Callback>(aCallback)(chunks);
+  }
+
+  // Chunks that were still unreleased will never be released.
+  virtual void ForgetUnreleasedChunks() = 0;
+
+  [[nodiscard]] virtual size_t SizeOfExcludingThis(
+      MallocSizeOf aMallocSizeOf) const = 0;
+  [[nodiscard]] virtual size_t SizeOfIncludingThis(
+      MallocSizeOf aMallocSizeOf) const = 0;
+
+ protected:
+  // Derived classes to implement `PeekExtantReleasedChunks` through these:
+  virtual const ProfileBufferChunk* PeekExtantReleasedChunksAndLock() = 0;
+  virtual void UnlockAfterPeekExtantReleasedChunks() = 0;
+
+#ifdef DEBUG
+ public:
+  // DEBUG checks ensuring that this manager and its users avoid UAFs.
+  // Derived classes should assert that mUser is not null in their functions.
+
+  void RegisteredWith(const void* aUser) {
+    MOZ_ASSERT(!mUser);
+    MOZ_ASSERT(aUser);
+    mUser = aUser;
+  }
+
+  void DeregisteredFrom(const void* aUser) {
+    MOZ_ASSERT(mUser == aUser);
+    mUser = nullptr;
+  }
+
+ protected:
+  const void* mUser = nullptr;
+#endif  // DEBUG
+};
+
+}  // namespace mozilla
+
+#endif  // ProfileBufferChunkManager_h
diff --git a/mozglue/baseprofiler/public/ProfileBufferChunkManagerSingle.h b/mozglue/baseprofiler/public/ProfileBufferChunkManagerSingle.h
new file mode 100644
index 0000000000..c91b38cbdb
--- /dev/null
+++ b/mozglue/baseprofiler/public/ProfileBufferChunkManagerSingle.h
@@ -0,0 +1,172 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfileBufferChunkManagerSingle_h
+#define ProfileBufferChunkManagerSingle_h
+
+#include "mozilla/ProfileBufferChunkManager.h"
+
+#ifdef DEBUG
+#  include "mozilla/Atomics.h"
+#endif  // DEBUG
+
+namespace mozilla {
+
+// Manages only one Chunk.
+// The first call to `Get`/`RequestChunk()` will retrieve the one chunk, and all
+// subsequent calls will return nullptr. That chunk may still be released, but
+// it will never be destroyed or recycled.
+// Unlike others, this manager may be `Reset()`, to allow another round of
+// small-data gathering.
+// The main use is with short-lived ProfileChunkedBuffers that collect little
+// data that can fit in one chunk, e.g., capturing one stack.
+// It is not thread-safe.
+class ProfileBufferChunkManagerSingle final : public ProfileBufferChunkManager {
+ public:
+  using Length = ProfileBufferChunk::Length;
+
+  // Use a preallocated chunk. (Accepting null to gracefully handle OOM.)
+  explicit ProfileBufferChunkManagerSingle(UniquePtr<ProfileBufferChunk> aChunk)
+      : mInitialChunk(std::move(aChunk)),
+        mBufferBytes(mInitialChunk ? mInitialChunk->BufferBytes() : 0) {
+    MOZ_ASSERT(!mInitialChunk || !mInitialChunk->GetNext(),
+               "Expected at most one chunk");
+  }
+
+  // ChunkMinBufferBytes: Minimum number of user-available bytes in the Chunk.
+  // Note that Chunks use a bit more memory for their header.
+  explicit ProfileBufferChunkManagerSingle(Length aChunkMinBufferBytes)
+      : mInitialChunk(ProfileBufferChunk::Create(aChunkMinBufferBytes)),
+        mBufferBytes(mInitialChunk ? mInitialChunk->BufferBytes() : 0) {}
+
+#ifdef DEBUG
+  ~ProfileBufferChunkManagerSingle() { MOZ_ASSERT(mVirtuallyLocked == false); }
+#endif  // DEBUG
+
+  // Reset this manager, using the provided chunk (probably coming from the
+  // ProfileChunkedBuffer that just used it); if null, fallback on current or
+  // released chunk.
+  void Reset(UniquePtr<ProfileBufferChunk> aPossibleChunk) {
+    if (aPossibleChunk) {
+      mInitialChunk = std::move(aPossibleChunk);
+      mReleasedChunk = nullptr;
+    } else if (!mInitialChunk) {
+      MOZ_ASSERT(!!mReleasedChunk, "Can't reset properly!");
+      mInitialChunk = std::move(mReleasedChunk);
+    }
+
+    if (mInitialChunk) {
+      mInitialChunk->MarkRecycled();
+      mBufferBytes = mInitialChunk->BufferBytes();
+    } else {
+      mBufferBytes = 0;
+    }
+  }
+
+  [[nodiscard]] size_t MaxTotalSize() const final { return mBufferBytes; }
+
+  // One of `GetChunk` and `RequestChunk` will only work the very first time (if
+  // there's even a chunk).
+  [[nodiscard]] UniquePtr<ProfileBufferChunk> GetChunk() final {
+    MOZ_ASSERT(mUser, "Not registered yet");
+    return std::move(mInitialChunk);
+  }
+
+  void RequestChunk(std::function<void(UniquePtr<ProfileBufferChunk>)>&&
+                        aChunkReceiver) final {
+    MOZ_ASSERT(mUser, "Not registered yet");
+    // Simple retrieval.
+    std::move(aChunkReceiver)(GetChunk());
+  }
+
+  void FulfillChunkRequests() final {
+    // Nothing to do here.
+  }
+
+  void ReleaseChunk(UniquePtr<ProfileBufferChunk> aChunk) final {
+    MOZ_ASSERT(mUser, "Not registered yet");
+    if (!aChunk) {
+      return;
+    }
+    MOZ_ASSERT(!mReleasedChunk, "Unexpected 2nd released chunk");
+    MOZ_ASSERT(!aChunk->GetNext(), "Only expected one released chunk");
+    mReleasedChunk = std::move(aChunk);
+  }
+
+  void SetChunkDestroyedCallback(
+      std::function<void(const ProfileBufferChunk&)>&& aChunkDestroyedCallback)
+      final {
+    MOZ_ASSERT(mUser, "Not registered yet");
+    // The chunk-destroyed callback will never actually be called, but we keep
+    // the callback here in case the caller expects it to live as long as this
+    // manager.
+    mChunkDestroyedCallback = std::move(aChunkDestroyedCallback);
+  }
+
+  [[nodiscard]] UniquePtr<ProfileBufferChunk> GetExtantReleasedChunks() final {
+    MOZ_ASSERT(mUser, "Not registered yet");
+    return std::move(mReleasedChunk);
+  }
+
+  void ForgetUnreleasedChunks() final {
+    MOZ_ASSERT(mUser, "Not registered yet");
+  }
+
+  [[nodiscard]] size_t SizeOfExcludingThis(
+      MallocSizeOf aMallocSizeOf) const final {
+    MOZ_ASSERT(mUser, "Not registered yet");
+    size_t size = 0;
+    if (mInitialChunk) {
+      size += mInitialChunk->SizeOfIncludingThis(aMallocSizeOf);
+    }
+    if (mReleasedChunk) {
+      size += mReleasedChunk->SizeOfIncludingThis(aMallocSizeOf);
+    }
+    // Note: Missing size of std::function external resources (if any).
+    return size;
+  }
+
+  [[nodiscard]] size_t SizeOfIncludingThis(
+      MallocSizeOf aMallocSizeOf) const final {
+    MOZ_ASSERT(mUser, "Not registered yet");
+    return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
+  }
+
+ protected:
+  // This manager is not thread-safe, so there's not actual locking needed.
+  const ProfileBufferChunk* PeekExtantReleasedChunksAndLock() final {
+    MOZ_ASSERT(mVirtuallyLocked.compareExchange(false, true));
+    MOZ_ASSERT(mUser, "Not registered yet");
+    return mReleasedChunk.get();
+  }
+  void UnlockAfterPeekExtantReleasedChunks() final {
+    MOZ_ASSERT(mVirtuallyLocked.compareExchange(true, false));
+  }
+
+ private:
+  // Initial chunk created with this manager, given away at first Get/Request.
+  UniquePtr<ProfileBufferChunk> mInitialChunk;
+
+  // Storage for the released chunk (which should probably not happen, as it
+  // means the chunk is full).
+  UniquePtr<ProfileBufferChunk> mReleasedChunk;
+
+  // Size of the one chunk we're managing. Stored here, because the chunk may
+  // be moved out and inaccessible from here.
+  Length mBufferBytes;
+
+  // The chunk-destroyed callback will never actually be called, but we keep it
+  // here in case the caller expects it to live as long as this manager.
+  std::function<void(const ProfileBufferChunk&)> mChunkDestroyedCallback;
+
+#ifdef DEBUG
+  mutable Atomic<bool> mVirtuallyLocked{false};
+#endif  // DEBUG
+};
+
+}  // namespace mozilla
+
+#endif  // ProfileBufferChunkManagerSingle_h
diff --git a/mozglue/baseprofiler/public/ProfileBufferChunkManagerWithLocalLimit.h b/mozglue/baseprofiler/public/ProfileBufferChunkManagerWithLocalLimit.h
new file mode 100644
index 0000000000..5b1af6d66c
--- /dev/null
+++ b/mozglue/baseprofiler/public/ProfileBufferChunkManagerWithLocalLimit.h
@@ -0,0 +1,428 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfileBufferChunkManagerWithLocalLimit_h
+#define ProfileBufferChunkManagerWithLocalLimit_h
+
+#include "BaseProfiler.h"
+#include "mozilla/BaseProfilerDetail.h"
+#include "mozilla/ProfileBufferChunkManager.h"
+#include "mozilla/ProfileBufferControlledChunkManager.h"
+
+#include <utility>
+
+namespace mozilla {
+
+// Manages the Chunks for this process in a thread-safe manner, with a maximum
+// size per process.
+//
+// "Unreleased" chunks are not owned here, only "released" chunks can be
+// destroyed or recycled when reaching the memory limit, so it is theoretically
+// possible to break that limit, if:
+// - The user of this class doesn't release their chunks, AND/OR
+// - The limit is too small (e.g., smaller than 2 or 3 chunks, which should be
+//   the usual number of unreleased chunks in flight).
+// In this case, it just means that we will use more memory than allowed,
+// potentially risking OOMs. Hopefully this shouldn't happen in real code,
+// assuming that the user is doing the right thing and releasing chunks ASAP,
+// and that the memory limit is reasonably large.
+class ProfileBufferChunkManagerWithLocalLimit final
+    : public ProfileBufferChunkManager,
+      public ProfileBufferControlledChunkManager {
+ public:
+  using Length = ProfileBufferChunk::Length;
+
+  // MaxTotalBytes: Maximum number of bytes allocated in all local Chunks.
+  // ChunkMinBufferBytes: Minimum number of user-available bytes in each Chunk.
+  // Note that Chunks use a bit more memory for their header.
+  explicit ProfileBufferChunkManagerWithLocalLimit(size_t aMaxTotalBytes,
+                                                   Length aChunkMinBufferBytes)
+      : mMaxTotalBytes(aMaxTotalBytes),
+        mChunkMinBufferBytes(aChunkMinBufferBytes) {}
+
+  ~ProfileBufferChunkManagerWithLocalLimit() {
+    if (mUpdateCallback) {
+      // Signal the end of this callback.
+      std::move(mUpdateCallback)(Update(nullptr));
+    }
+  }
+
+  [[nodiscard]] size_t MaxTotalSize() const final {
+    // `mMaxTotalBytes` is `const` so there is no need to lock the mutex.
+    return mMaxTotalBytes;
+  }
+
+  [[nodiscard]] UniquePtr<ProfileBufferChunk> GetChunk() final {
+    AUTO_PROFILER_STATS(Local_GetChunk);
+
+    ChunkAndUpdate chunkAndUpdate = [&]() {
+      baseprofiler::detail::BaseProfilerAutoLock lock(mMutex);
+      return GetChunk(lock);
+    }();
+
+    baseprofiler::detail::BaseProfilerAutoLock lock(mUpdateCallbackMutex);
+    if (mUpdateCallback && !chunkAndUpdate.second.IsNotUpdate()) {
+      mUpdateCallback(std::move(chunkAndUpdate.second));
+    }
+
+    return std::move(chunkAndUpdate.first);
+  }
+
+  void RequestChunk(std::function<void(UniquePtr<ProfileBufferChunk>)>&&
+                        aChunkReceiver) final {
+    AUTO_PROFILER_STATS(Local_RequestChunk);
+    baseprofiler::detail::BaseProfilerAutoLock lock(mMutex);
+    if (mChunkReceiver) {
+      // We already have a chunk receiver, meaning a request is pending.
+      return;
+    }
+    // Store the chunk receiver. This indicates that a request is pending, and
+    // it will be handled in the next `FulfillChunkRequests()` call.
+    mChunkReceiver = std::move(aChunkReceiver);
+  }
+
+  void FulfillChunkRequests() final {
+    AUTO_PROFILER_STATS(Local_FulfillChunkRequests);
+    std::function<void(UniquePtr<ProfileBufferChunk>)> chunkReceiver;
+    ChunkAndUpdate chunkAndUpdate = [&]() -> ChunkAndUpdate {
+      baseprofiler::detail::BaseProfilerAutoLock lock(mMutex);
+      if (!mChunkReceiver) {
+        // No receiver means no pending request, we're done.
+        return {};
+      }
+      // Otherwise there is a request, extract the receiver to call below.
+      std::swap(chunkReceiver, mChunkReceiver);
+      MOZ_ASSERT(!mChunkReceiver, "mChunkReceiver should have been emptied");
+      // And allocate the requested chunk. This may fail, it's fine, we're
+      // letting the receiver know about it.
+      AUTO_PROFILER_STATS(Local_FulfillChunkRequests_GetChunk);
+      return GetChunk(lock);
+    }();
+
+    if (chunkReceiver) {
+      {
+        baseprofiler::detail::BaseProfilerAutoLock lock(mUpdateCallbackMutex);
+        if (mUpdateCallback && !chunkAndUpdate.second.IsNotUpdate()) {
+          mUpdateCallback(std::move(chunkAndUpdate.second));
+        }
+      }
+
+      // Invoke callback outside of lock, so that it can use other chunk manager
+      // functions if needed.
+      // Note that this means there could be a race, where another request
+      // happens now and even gets fulfilled before this one is! It should be
+      // rare, and shouldn't be a problem anyway, the user will still get their
+      // requested chunks, new/recycled chunks look the same so their order
+      // doesn't matter.
+      std::move(chunkReceiver)(std::move(chunkAndUpdate.first));
+    }
+  }
+
+  void ReleaseChunk(UniquePtr<ProfileBufferChunk> aChunk) final {
+    if (!aChunk) {
+      return;
+    }
+
+    MOZ_RELEASE_ASSERT(!aChunk->GetNext(), "ReleaseChunk only accepts 1 chunk");
+    MOZ_RELEASE_ASSERT(!aChunk->ChunkHeader().mDoneTimeStamp.IsNull(),
+                       "Released chunk should have a 'Done' timestamp");
+
+    Update update = [&]() {
+      baseprofiler::detail::BaseProfilerAutoLock lock(mMutex);
+      MOZ_ASSERT(mUser, "Not registered yet");
+      // Keep a pointer to the first newly-released chunk, so we can use it to
+      // prepare an update (after `aChunk` is moved-from).
+      const ProfileBufferChunk* const newlyReleasedChunk = aChunk.get();
+      // Transfer the chunk size from the unreleased bucket to the released one.
+      mUnreleasedBufferBytes -= aChunk->BufferBytes();
+      mReleasedBufferBytes += aChunk->BufferBytes();
+      if (!mReleasedChunks) {
+        // No other released chunks at the moment, we're starting the list.
+        MOZ_ASSERT(mReleasedBufferBytes == aChunk->BufferBytes());
+        mReleasedChunks = std::move(aChunk);
+      } else {
+        // Insert aChunk in mReleasedChunks to keep done-timestamp order.
+        const TimeStamp& releasedChunkDoneTimeStamp =
+            aChunk->ChunkHeader().mDoneTimeStamp;
+        if (releasedChunkDoneTimeStamp <
+            mReleasedChunks->ChunkHeader().mDoneTimeStamp) {
+          // aChunk is the oldest -> Insert at the beginning.
+          aChunk->SetLast(std::move(mReleasedChunks));
+          mReleasedChunks = std::move(aChunk);
+        } else {
+          // Go through the already-released chunk list, and insert aChunk
+          // before the first younger released chunk, or at the end.
+          ProfileBufferChunk* chunk = mReleasedChunks.get();
+          for (;;) {
+            ProfileBufferChunk* const nextChunk = chunk->GetNext();
+            if (!nextChunk || releasedChunkDoneTimeStamp <
+                                  nextChunk->ChunkHeader().mDoneTimeStamp) {
+              // Either we're at the last released chunk, or the next released
+              // chunk is younger -> Insert right after this released chunk.
+              chunk->InsertNext(std::move(aChunk));
+              break;
+            }
+            chunk = nextChunk;
+          }
+        }
+      }
+
+      return Update(mUnreleasedBufferBytes, mReleasedBufferBytes,
+                    mReleasedChunks.get(), newlyReleasedChunk);
+    }();
+
+    baseprofiler::detail::BaseProfilerAutoLock lock(mUpdateCallbackMutex);
+    if (mUpdateCallback && !update.IsNotUpdate()) {
+      mUpdateCallback(std::move(update));
+    }
+  }
+
+  void SetChunkDestroyedCallback(
+      std::function<void(const ProfileBufferChunk&)>&& aChunkDestroyedCallback)
+      final {
+    baseprofiler::detail::BaseProfilerAutoLock lock(mMutex);
+    MOZ_ASSERT(mUser, "Not registered yet");
+    mChunkDestroyedCallback = std::move(aChunkDestroyedCallback);
+  }
+
+  [[nodiscard]] UniquePtr<ProfileBufferChunk> GetExtantReleasedChunks() final {
+    UniquePtr<ProfileBufferChunk> chunks;
+    size_t unreleasedBufferBytes = [&]() {
+      baseprofiler::detail::BaseProfilerAutoLock lock(mMutex);
+      MOZ_ASSERT(mUser, "Not registered yet");
+      mReleasedBufferBytes = 0;
+      chunks = std::move(mReleasedChunks);
+      return mUnreleasedBufferBytes;
+    }();
+
+    baseprofiler::detail::BaseProfilerAutoLock lock(mUpdateCallbackMutex);
+    if (mUpdateCallback) {
+      mUpdateCallback(Update(unreleasedBufferBytes, 0, nullptr, nullptr));
+    }
+
+    return chunks;
+  }
+
+  void ForgetUnreleasedChunks() final {
+    Update update = [&]() {
+      baseprofiler::detail::BaseProfilerAutoLock lock(mMutex);
+      MOZ_ASSERT(mUser, "Not registered yet");
+      mUnreleasedBufferBytes = 0;
+      return Update(0, mReleasedBufferBytes, mReleasedChunks.get(), nullptr);
+    }();
+    baseprofiler::detail::BaseProfilerAutoLock lock(mUpdateCallbackMutex);
+    if (mUpdateCallback) {
+      mUpdateCallback(std::move(update));
+    }
+  }
+
+  [[nodiscard]] size_t SizeOfExcludingThis(
+      MallocSizeOf aMallocSizeOf) const final {
+    baseprofiler::detail::BaseProfilerAutoLock lock(mMutex);
+    return SizeOfExcludingThis(aMallocSizeOf, lock);
+  }
+
+  [[nodiscard]] size_t SizeOfIncludingThis(
+      MallocSizeOf aMallocSizeOf) const final {
+    baseprofiler::detail::BaseProfilerAutoLock lock(mMutex);
+    MOZ_ASSERT(mUser, "Not registered yet");
+    return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf, lock);
+  }
+
+  void SetUpdateCallback(UpdateCallback&& aUpdateCallback) final {
+    {
+      baseprofiler::detail::BaseProfilerAutoLock lock(mUpdateCallbackMutex);
+      if (mUpdateCallback) {
+        // Signal the end of the previous callback.
+        std::move(mUpdateCallback)(Update(nullptr));
+        mUpdateCallback = nullptr;
+      }
+    }
+
+    if (aUpdateCallback) {
+      Update initialUpdate = [&]() {
+        baseprofiler::detail::BaseProfilerAutoLock lock(mMutex);
+        return Update(mUnreleasedBufferBytes, mReleasedBufferBytes,
+                      mReleasedChunks.get(), nullptr);
+      }();
+
+      baseprofiler::detail::BaseProfilerAutoLock lock(mUpdateCallbackMutex);
+      MOZ_ASSERT(!mUpdateCallback, "Only one update callback allowed");
+      mUpdateCallback = std::move(aUpdateCallback);
+      mUpdateCallback(std::move(initialUpdate));
+    }
+  }
+
+  void DestroyChunksAtOrBefore(TimeStamp aDoneTimeStamp) final {
+    MOZ_ASSERT(!aDoneTimeStamp.IsNull());
+    baseprofiler::detail::BaseProfilerAutoLock lock(mMutex);
+    for (;;) {
+      if (!mReleasedChunks) {
+        // We don't own any released chunks (anymore), we're done.
+        break;
+      }
+      if (mReleasedChunks->ChunkHeader().mDoneTimeStamp > aDoneTimeStamp) {
+        // The current chunk is strictly after the given timestamp, we're done.
+        break;
+      }
+      // We've found a chunk at or before the timestamp, discard it.
+      DiscardOldestReleasedChunk(lock);
+    }
+  }
+
+ protected:
+  const ProfileBufferChunk* PeekExtantReleasedChunksAndLock() final {
+    mMutex.Lock();
+    MOZ_ASSERT(mUser, "Not registered yet");
+    return mReleasedChunks.get();
+  }
+  void UnlockAfterPeekExtantReleasedChunks() final { mMutex.Unlock(); }
+
+ private:
+  void MaybeRecycleChunk(
+      UniquePtr<ProfileBufferChunk>&& chunk,
+      const baseprofiler::detail::BaseProfilerAutoLock& aLock) {
+    // Try to recycle big-enough chunks. (All chunks should have the same size,
+    // but it's a cheap test and may allow future adjustments based on actual
+    // data rate.)
+    if (chunk->BufferBytes() >= mChunkMinBufferBytes) {
+      // We keep up to two recycled chunks at any time.
+      if (!mRecycledChunks) {
+        mRecycledChunks = std::move(chunk);
+      } else if (!mRecycledChunks->GetNext()) {
+        mRecycledChunks->InsertNext(std::move(chunk));
+      }
+    }
+  }
+
+  UniquePtr<ProfileBufferChunk> TakeRecycledChunk(
+      const baseprofiler::detail::BaseProfilerAutoLock& aLock) {
+    UniquePtr<ProfileBufferChunk> recycled;
+    if (mRecycledChunks) {
+      recycled = std::exchange(mRecycledChunks, mRecycledChunks->ReleaseNext());
+      recycled->MarkRecycled();
+    }
+    return recycled;
+  }
+
+  void DiscardOldestReleasedChunk(
+      const baseprofiler::detail::BaseProfilerAutoLock& aLock) {
+    MOZ_ASSERT(!!mReleasedChunks);
+    UniquePtr<ProfileBufferChunk> oldest =
+        std::exchange(mReleasedChunks, mReleasedChunks->ReleaseNext());
+    mReleasedBufferBytes -= oldest->BufferBytes();
+    if (mChunkDestroyedCallback) {
+      // Inform the user that we're going to destroy this chunk.
+      mChunkDestroyedCallback(*oldest);
+    }
+    MaybeRecycleChunk(std::move(oldest), aLock);
+  }
+
+  using ChunkAndUpdate = std::pair<UniquePtr<ProfileBufferChunk>, Update>;
+  [[nodiscard]] ChunkAndUpdate GetChunk(
+      const baseprofiler::detail::BaseProfilerAutoLock& aLock) {
+    MOZ_ASSERT(mUser, "Not registered yet");
+    // After this function, the total memory consumption will be the sum of:
+    // - Bytes from released (i.e., full) chunks,
+    // - Bytes from unreleased (still in use) chunks,
+    // - Bytes from the chunk we want to create/recycle. (Note that we don't
+    //   count the extra bytes of chunk header, and of extra allocation ability,
+    //   for the new chunk, as it's assumed to be negligible compared to the
+    //   total memory limit.)
+    // If this total is higher than the local limit, we'll want to destroy
+    // the oldest released chunks until we're under the limit; if any, we may
+    // recycle one of them to avoid a deallocation followed by an allocation.
+    while (mReleasedBufferBytes + mUnreleasedBufferBytes +
+                   mChunkMinBufferBytes >=
+               mMaxTotalBytes &&
+           !!mReleasedChunks) {
+      // We have reached the local limit, discard the oldest released chunk.
+      DiscardOldestReleasedChunk(aLock);
+    }
+
+    // Extract the recycled chunk, if any.
+    ChunkAndUpdate chunkAndUpdate{TakeRecycledChunk(aLock), Update()};
+    UniquePtr<ProfileBufferChunk>& chunk = chunkAndUpdate.first;
+
+    if (!chunk) {
+      // No recycled chunk -> Create a chunk now. (This could still fail.)
+      chunk = ProfileBufferChunk::Create(mChunkMinBufferBytes);
+    }
+
+    if (chunk) {
+      // We do have a chunk (recycled or new), record its size as "unreleased".
+      mUnreleasedBufferBytes += chunk->BufferBytes();
+
+      chunkAndUpdate.second =
+          Update(mUnreleasedBufferBytes, mReleasedBufferBytes,
+                 mReleasedChunks.get(), nullptr);
+    }
+
+    return chunkAndUpdate;
+  }
+
+  [[nodiscard]] size_t SizeOfExcludingThis(
+      MallocSizeOf aMallocSizeOf,
+      const baseprofiler::detail::BaseProfilerAutoLock&) const {
+    MOZ_ASSERT(mUser, "Not registered yet");
+    size_t size = 0;
+    if (mReleasedChunks) {
+      size += mReleasedChunks->SizeOfIncludingThis(aMallocSizeOf);
+    }
+    if (mRecycledChunks) {
+      size += mRecycledChunks->SizeOfIncludingThis(aMallocSizeOf);
+    }
+    // Note: Missing size of std::function external resources (if any).
+    return size;
+  }
+
+  // Maxumum number of bytes that should be used by all unreleased and released
+  // chunks. Note that only released chunks can be destroyed here, so it is the
+  // responsibility of the user to properly release their chunks when possible.
+  const size_t mMaxTotalBytes;
+
+  // Minimum number of bytes that new chunks should be able to store.
+  // Used when calling `ProfileBufferChunk::Create()`.
+  const Length mChunkMinBufferBytes;
+
+  // Mutex guarding the following members.
+  mutable baseprofiler::detail::BaseProfilerMutex mMutex;
+
+  // Number of bytes currently held in chunks that have been given away (through
+  // `GetChunk` or `RequestChunk`) and not released yet.
+  size_t mUnreleasedBufferBytes = 0;
+
+  // Number of bytes currently held in chunks that have been released and stored
+  // in `mReleasedChunks` below.
+  size_t mReleasedBufferBytes = 0;
+
+  // List of all released chunks. The oldest one should be at the start of the
+  // list, and may be destroyed or recycled when the memory limit is reached.
+  UniquePtr<ProfileBufferChunk> mReleasedChunks;
+
+  // This may hold chunks that were released then slated for destruction, they
+  // will be reused next time an allocation would have been needed.
+  UniquePtr<ProfileBufferChunk> mRecycledChunks;
+
+  // Optional callback used to notify the user when a chunk is about to be
+  // destroyed or recycled. (The data content is always destroyed, but the chunk
+  // container may be reused.)
+  std::function<void(const ProfileBufferChunk&)> mChunkDestroyedCallback;
+
+  // Callback set from `RequestChunk()`, until it is serviced in
+  // `FulfillChunkRequests()`. There can only be one request in flight.
+  std::function<void(UniquePtr<ProfileBufferChunk>)> mChunkReceiver;
+
+  // Separate mutex guarding mUpdateCallback, so that it may be invoked outside
+  // of the main buffer `mMutex`.
+  mutable baseprofiler::detail::BaseProfilerMutex mUpdateCallbackMutex;
+
+  UpdateCallback mUpdateCallback;
+};
+
+}  // namespace mozilla
+
+#endif  // ProfileBufferChunkManagerWithLocalLimit_h
diff --git a/mozglue/baseprofiler/public/ProfileBufferControlledChunkManager.h b/mozglue/baseprofiler/public/ProfileBufferControlledChunkManager.h
new file mode 100644
index 0000000000..45b39b163c
--- /dev/null
+++ b/mozglue/baseprofiler/public/ProfileBufferControlledChunkManager.h
@@ -0,0 +1,203 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfileBufferControlledChunkManager_h
+#define ProfileBufferControlledChunkManager_h
+
+#include "mozilla/ProfileBufferChunk.h"
+
+#include <functional>
+#include <vector>
+
+namespace mozilla {
+
+// A "Controlled" chunk manager will provide updates about chunks that it
+// creates, releases, and destroys; and it can destroy released chunks as
+// requested.
+class ProfileBufferControlledChunkManager {
+ public:
+  using Length = ProfileBufferChunk::Length;
+
+  virtual ~ProfileBufferControlledChunkManager() = default;
+
+  // Minimum amount of chunk metadata to be transferred between processes.
+  struct ChunkMetadata {
+    // Timestamp when chunk was marked "done", which is used to:
+    // - determine its age, so the oldest one will be destroyed first,
+    // - uniquely identify this chunk in this process. (The parent process is
+    //   responsible for associating this timestamp to its process id.)
+    TimeStamp mDoneTimeStamp;
+    // Size of this chunk's buffer.
+    Length mBufferBytes;
+
+    ChunkMetadata(TimeStamp aDoneTimeStamp, Length aBufferBytes)
+        : mDoneTimeStamp(aDoneTimeStamp), mBufferBytes(aBufferBytes) {}
+  };
+
+  // Class collecting all information necessary to describe updates that
+  // happened in a chunk manager.
+  // An update can be folded into a previous update.
+  class Update {
+   public:
+    // Construct a "not-an-Update" object, which should only be used after a
+    // real update is folded into it.
+    Update() = default;
+
+    // Construct a "final" Update, which marks the end of all updates from a
+    // chunk manager.
+    explicit Update(decltype(nullptr)) : mUnreleasedBytes(FINAL) {}
+
+    // Construct an Update from the given data and released chunks.
+    // The chunk pointers may be null, and it doesn't matter if
+    // `aNewlyReleasedChunks` is already linked to `aExistingReleasedChunks` or
+    // not.
+    Update(size_t aUnreleasedBytes, size_t aReleasedBytes,
+           const ProfileBufferChunk* aExistingReleasedChunks,
+           const ProfileBufferChunk* aNewlyReleasedChunks)
+        : mUnreleasedBytes(aUnreleasedBytes),
+          mReleasedBytes(aReleasedBytes),
+          mOldestDoneTimeStamp(
+              aExistingReleasedChunks
+                  ? aExistingReleasedChunks->ChunkHeader().mDoneTimeStamp
+                  : TimeStamp{}) {
+      MOZ_RELEASE_ASSERT(
+          !IsNotUpdate(),
+          "Empty update should only be constructed with default constructor");
+      MOZ_RELEASE_ASSERT(
+          !IsFinal(),
+          "Final update should only be constructed with nullptr constructor");
+      for (const ProfileBufferChunk* chunk = aNewlyReleasedChunks; chunk;
+           chunk = chunk->GetNext()) {
+        mNewlyReleasedChunks.emplace_back(ChunkMetadata{
+            chunk->ChunkHeader().mDoneTimeStamp, chunk->BufferBytes()});
+      }
+    }
+
+    // Construct an Update from raw data.
+    // This may be used to re-construct an Update that was previously
+    // serialized.
+    Update(size_t aUnreleasedBytes, size_t aReleasedBytes,
+           TimeStamp aOldestDoneTimeStamp,
+           std::vector<ChunkMetadata>&& aNewlyReleasedChunks)
+        : mUnreleasedBytes(aUnreleasedBytes),
+          mReleasedBytes(aReleasedBytes),
+          mOldestDoneTimeStamp(aOldestDoneTimeStamp),
+          mNewlyReleasedChunks(std::move(aNewlyReleasedChunks)) {}
+
+    // Clear the Update completely and return it to a "not-an-Update" state.
+    void Clear() {
+      mUnreleasedBytes = NO_UPDATE;
+      mReleasedBytes = 0;
+      mOldestDoneTimeStamp = TimeStamp{};
+      mNewlyReleasedChunks.clear();
+    }
+
+    bool IsNotUpdate() const { return mUnreleasedBytes == NO_UPDATE; }
+
+    bool IsFinal() const { return mUnreleasedBytes == FINAL; }
+
+    size_t UnreleasedBytes() const {
+      MOZ_RELEASE_ASSERT(!IsNotUpdate(),
+                         "Cannot access UnreleasedBytes from empty update");
+      MOZ_RELEASE_ASSERT(!IsFinal(),
+                         "Cannot access UnreleasedBytes from final update");
+      return mUnreleasedBytes;
+    }
+
+    size_t ReleasedBytes() const {
+      MOZ_RELEASE_ASSERT(!IsNotUpdate(),
+                         "Cannot access ReleasedBytes from empty update");
+      MOZ_RELEASE_ASSERT(!IsFinal(),
+                         "Cannot access ReleasedBytes from final update");
+      return mReleasedBytes;
+    }
+
+    TimeStamp OldestDoneTimeStamp() const {
+      MOZ_RELEASE_ASSERT(!IsNotUpdate(),
+                         "Cannot access OldestDoneTimeStamp from empty update");
+      MOZ_RELEASE_ASSERT(!IsFinal(),
+                         "Cannot access OldestDoneTimeStamp from final update");
+      return mOldestDoneTimeStamp;
+    }
+
+    const std::vector<ChunkMetadata>& NewlyReleasedChunksRef() const {
+      MOZ_RELEASE_ASSERT(
+          !IsNotUpdate(),
+          "Cannot access NewlyReleasedChunksRef from empty update");
+      MOZ_RELEASE_ASSERT(
+          !IsFinal(), "Cannot access NewlyReleasedChunksRef from final update");
+      return mNewlyReleasedChunks;
+    }
+
+    // Fold a later update into this one.
+    void Fold(Update&& aNewUpdate) {
+      MOZ_ASSERT(
+          !IsFinal() || aNewUpdate.IsFinal(),
+          "There shouldn't be another non-final update after the final update");
+
+      if (IsNotUpdate() || aNewUpdate.IsFinal()) {
+        // We were empty, or the new update is the final update, we just switch
+        // to that new update.
+        *this = std::move(aNewUpdate);
+        return;
+      }
+
+      mUnreleasedBytes = aNewUpdate.mUnreleasedBytes;
+      mReleasedBytes = aNewUpdate.mReleasedBytes;
+      if (!aNewUpdate.mOldestDoneTimeStamp.IsNull()) {
+        MOZ_ASSERT(mOldestDoneTimeStamp.IsNull() ||
+                   mOldestDoneTimeStamp <= aNewUpdate.mOldestDoneTimeStamp);
+        mOldestDoneTimeStamp = aNewUpdate.mOldestDoneTimeStamp;
+        auto it = mNewlyReleasedChunks.begin();
+        while (it != mNewlyReleasedChunks.end() &&
+               it->mDoneTimeStamp < mOldestDoneTimeStamp) {
+          it = mNewlyReleasedChunks.erase(it);
+        }
+      }
+      if (!aNewUpdate.mNewlyReleasedChunks.empty()) {
+        mNewlyReleasedChunks.reserve(mNewlyReleasedChunks.size() +
+                                     aNewUpdate.mNewlyReleasedChunks.size());
+        mNewlyReleasedChunks.insert(mNewlyReleasedChunks.end(),
+                                    aNewUpdate.mNewlyReleasedChunks.begin(),
+                                    aNewUpdate.mNewlyReleasedChunks.end());
+      }
+    }
+
+   private:
+    static const size_t NO_UPDATE = size_t(-1);
+    static const size_t FINAL = size_t(-2);
+
+    size_t mUnreleasedBytes = NO_UPDATE;
+    size_t mReleasedBytes = 0;
+    TimeStamp mOldestDoneTimeStamp;
+    std::vector<ChunkMetadata> mNewlyReleasedChunks;
+  };
+
+  using UpdateCallback = std::function<void(Update&&)>;
+
+  // This *may* be set (or reset) by an object that needs to know about all
+  // chunk updates that happen in this manager. The main use will be to
+  // coordinate the global memory usage of Firefox.
+  // If a non-empty callback is given, it will be immediately invoked with the
+  // current state.
+  // When the callback is about to be destroyed (by overwriting it here, or in
+  // the class destructor), it will be invoked one last time with an empty
+  // update.
+  // Note that the callback (even the first current-state callback) will be
+  // invoked from inside a locked scope, so it should *not* call other functions
+  // of the chunk manager. A side benefit of this locking is that it guarantees
+  // that no two invocations can overlap.
+  virtual void SetUpdateCallback(UpdateCallback&& aUpdateCallback) = 0;
+
+  // This is a request to destroy all chunks before the given timestamp.
+  // This timestamp should be one that was given in a previous UpdateCallback
+  // call. Obviously, only released chunks can be destroyed.
+  virtual void DestroyChunksAtOrBefore(TimeStamp aDoneTimeStamp) = 0;
+};
+
+}  // namespace mozilla
+
+#endif  // ProfileBufferControlledChunkManager_h
diff --git a/mozglue/baseprofiler/public/ProfileBufferEntryKinds.h b/mozglue/baseprofiler/public/ProfileBufferEntryKinds.h
new file mode 100644
index 0000000000..c8280a92d7
--- /dev/null
+++ b/mozglue/baseprofiler/public/ProfileBufferEntryKinds.h
@@ -0,0 +1,94 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfileBufferEntryKinds_h
+#define ProfileBufferEntryKinds_h
+
+#include <cstdint>
+
+namespace mozilla {
+
+// This is equal to sizeof(double), which is the largest non-char variant in
+// |u|.
+static constexpr size_t ProfileBufferEntryNumChars = 8;
+
+// NOTE!  If you add entries, you need to verify if they need to be added to the
+// switch statement in DuplicateLastSample!
+// This will evaluate the MACRO with (KIND, TYPE, SIZE)
+#define FOR_EACH_PROFILE_BUFFER_ENTRY_KIND(MACRO)                 \
+  MACRO(CategoryPair, int, sizeof(int))                           \
+  MACRO(CollectionStart, double, sizeof(double))                  \
+  MACRO(CollectionEnd, double, sizeof(double))                    \
+  MACRO(Label, const char*, sizeof(const char*))                  \
+  MACRO(FrameFlags, uint64_t, sizeof(uint64_t))                   \
+  MACRO(DynamicStringFragment, char*, ProfileBufferEntryNumChars) \
+  MACRO(JitReturnAddr, void*, sizeof(void*))                      \
+  MACRO(InnerWindowID, uint64_t, sizeof(uint64_t))                \
+  MACRO(LineNumber, int, sizeof(int))                             \
+  MACRO(ColumnNumber, int, sizeof(int))                           \
+  MACRO(NativeLeafAddr, void*, sizeof(void*))                     \
+  MACRO(Pause, double, sizeof(double))                            \
+  MACRO(Resume, double, sizeof(double))                           \
+  MACRO(PauseSampling, double, sizeof(double))                    \
+  MACRO(ResumeSampling, double, sizeof(double))                   \
+  MACRO(Responsiveness, double, sizeof(double))                   \
+  MACRO(ThreadId, int, sizeof(int))                               \
+  MACRO(Time, double, sizeof(double))                             \
+  MACRO(TimeBeforeCompactStack, double, sizeof(double))           \
+  MACRO(CounterId, void*, sizeof(void*))                          \
+  MACRO(CounterKey, uint64_t, sizeof(uint64_t))                   \
+  MACRO(Number, uint64_t, sizeof(uint64_t))                       \
+  MACRO(Count, int64_t, sizeof(int64_t))                          \
+  MACRO(ProfilerOverheadTime, double, sizeof(double))             \
+  MACRO(ProfilerOverheadDuration, double, sizeof(double))
+
+// The `Kind` is a single byte identifying the type of data that is actually
+// stored in a `ProfileBufferEntry`, as per the list in
+// `FOR_EACH_PROFILE_BUFFER_ENTRY_KIND`.
+//
+// This byte is also used to identify entries in ProfileChunkedBuffer blocks,
+// for both "legacy" entries that do contain a `ProfileBufferEntry`, and for
+// new types of entries that may carry more data of different types.
+// TODO: Eventually each type of "legacy" entry should be replaced with newer,
+// more efficient kinds of entries (e.g., stack frames could be stored in one
+// bigger entry, instead of multiple `ProfileBufferEntry`s); then we could
+// discard `ProfileBufferEntry` and move this enum to a more appropriate spot.
+using ProfileBufferEntryKindUnderlyingType = uint8_t;
+
+enum class ProfileBufferEntryKind : ProfileBufferEntryKindUnderlyingType {
+  INVALID = 0,
+#define KIND(KIND, TYPE, SIZE) KIND,
+  FOR_EACH_PROFILE_BUFFER_ENTRY_KIND(KIND)
+#undef KIND
+
+  // Any value under `LEGACY_LIMIT` represents a `ProfileBufferEntry`.
+  LEGACY_LIMIT,
+
+  // Any value starting here does *not* represent a `ProfileBufferEntry` and
+  // requires separate decoding and handling.
+
+  // Markers and their data.
+  Marker = LEGACY_LIMIT,
+
+  // Entry with "running times", such as CPU usage measurements.
+  // Optional between TimeBeforeCompactStack and CompactStack.
+  RunningTimes,
+
+  // Optional between TimeBeforeCompactStack and CompactStack.
+  UnresponsiveDurationMs,
+
+  // Collection of legacy stack entries, must follow a ThreadId and
+  // TimeBeforeCompactStack (which are not included in the CompactStack;
+  // TimeBeforeCompactStack is equivalent to Time, but indicates that a
+  // CompactStack follows shortly afterwards).
+  CompactStack,
+
+  MODERN_LIMIT
+};
+
+}  // namespace mozilla
+
+#endif  // ProfileBufferEntryKinds_h
diff --git a/mozglue/baseprofiler/public/ProfileBufferEntrySerialization.h b/mozglue/baseprofiler/public/ProfileBufferEntrySerialization.h
new file mode 100644
index 0000000000..267b99f10d
--- /dev/null
+++ b/mozglue/baseprofiler/public/ProfileBufferEntrySerialization.h
@@ -0,0 +1,1166 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfileBufferEntrySerialization_h
+#define ProfileBufferEntrySerialization_h
+
+#include "mozilla/Assertions.h"
+#include "mozilla/leb128iterator.h"
+#include "mozilla/Likely.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/ProfileBufferIndex.h"
+#include "mozilla/Span.h"
+#include "mozilla/Tuple.h"
+#include "mozilla/UniquePtrExtensions.h"
+#include "mozilla/Unused.h"
+#include "mozilla/Variant.h"
+
+#include <string>
+#include <tuple>
+
+namespace mozilla {
+
+class ProfileBufferEntryWriter;
+
+// Iterator-like class used to read from an entry.
+// An entry may be split in two memory segments (e.g., the ends of a ring
+// buffer, or two chunks of a chunked buffer); it doesn't deal with this
+// underlying buffer, but only with one or two spans pointing at the space
+// where the entry lives.
+class ProfileBufferEntryReader {
+ public:
+  using Byte = uint8_t;
+  using Length = uint32_t;
+
+  using SpanOfConstBytes = Span<const Byte>;
+
+  // Class to be specialized for types to be read from a profile buffer entry.
+  // See common specializations at the bottom of this header.
+  // The following static functions must be provided:
+  //   static void ReadInto(EntryReader aER&, T& aT)
+  //   {
+  //     /* Call `aER.ReadX(...)` function to deserialize into aT, be sure to
+  //        read exactly `Bytes(aT)`! */
+  //   }
+  //   static T Read(EntryReader& aER) {
+  //     /* Call `aER.ReadX(...)` function to deserialize and return a `T`, be
+  //        sure to read exactly `Bytes(returned value)`! */
+  //   }
+  template <typename T>
+  struct Deserializer;
+
+  ProfileBufferEntryReader() = default;
+
+  // Reader over one Span.
+  ProfileBufferEntryReader(SpanOfConstBytes aSpan,
+                           ProfileBufferBlockIndex aCurrentBlockIndex,
+                           ProfileBufferBlockIndex aNextBlockIndex)
+      : mCurrentSpan(aSpan),
+        mNextSpanOrEmpty(aSpan.Last(0)),
+        mCurrentBlockIndex(aCurrentBlockIndex),
+        mNextBlockIndex(aNextBlockIndex) {
+    // 2nd internal Span points at the end of the 1st internal Span, to enforce
+    // invariants.
+    CheckInvariants();
+  }
+
+  // Reader over two Spans, the second one must not be empty.
+  ProfileBufferEntryReader(SpanOfConstBytes aSpanHead,
+                           SpanOfConstBytes aSpanTail,
+                           ProfileBufferBlockIndex aCurrentBlockIndex,
+                           ProfileBufferBlockIndex aNextBlockIndex)
+      : mCurrentSpan(aSpanHead),
+        mNextSpanOrEmpty(aSpanTail),
+        mCurrentBlockIndex(aCurrentBlockIndex),
+        mNextBlockIndex(aNextBlockIndex) {
+    MOZ_RELEASE_ASSERT(!mNextSpanOrEmpty.IsEmpty());
+    if (MOZ_UNLIKELY(mCurrentSpan.IsEmpty())) {
+      // First span is already empty, skip it.
+      mCurrentSpan = mNextSpanOrEmpty;
+      mNextSpanOrEmpty = mNextSpanOrEmpty.Last(0);
+    }
+    CheckInvariants();
+  }
+
+  // Allow copying, which is needed when used as an iterator in some std
+  // functions (e.g., string assignment), and to occasionally backtrack.
+  // Be aware that the main profile buffer APIs give a reference to an entry
+  // reader, and expect that reader to advance to the end of the entry, so don't
+  // just advance copies!
+  ProfileBufferEntryReader(const ProfileBufferEntryReader&) = default;
+  ProfileBufferEntryReader& operator=(const ProfileBufferEntryReader&) =
+      default;
+
+  // Don't =default moving, as it doesn't bring any benefit in this class.
+
+  [[nodiscard]] Length RemainingBytes() const {
+    return mCurrentSpan.LengthBytes() + mNextSpanOrEmpty.LengthBytes();
+  }
+
+  void SetRemainingBytes(Length aBytes) {
+    MOZ_RELEASE_ASSERT(aBytes <= RemainingBytes());
+    if (aBytes <= mCurrentSpan.LengthBytes()) {
+      mCurrentSpan = mCurrentSpan.First(aBytes);
+      mNextSpanOrEmpty = mCurrentSpan.Last(0);
+    } else {
+      mNextSpanOrEmpty =
+          mNextSpanOrEmpty.First(aBytes - mCurrentSpan.LengthBytes());
+    }
+  }
+
+  [[nodiscard]] ProfileBufferBlockIndex CurrentBlockIndex() const {
+    return mCurrentBlockIndex;
+  }
+
+  [[nodiscard]] ProfileBufferBlockIndex NextBlockIndex() const {
+    return mNextBlockIndex;
+  }
+
+  // Create a reader of size zero, pointing at aOffset past the current position
+  // of this Reader, so it can be used as end iterator.
+  [[nodiscard]] ProfileBufferEntryReader EmptyIteratorAtOffset(
+      Length aOffset) const {
+    MOZ_RELEASE_ASSERT(aOffset <= RemainingBytes());
+    if (MOZ_LIKELY(aOffset < mCurrentSpan.LengthBytes())) {
+      // aOffset is before the end of mCurrentSpan.
+      return ProfileBufferEntryReader(mCurrentSpan.Subspan(aOffset, 0),
+                                      mCurrentBlockIndex, mNextBlockIndex);
+    }
+    // aOffset is right at the end of mCurrentSpan, or inside mNextSpanOrEmpty.
+    return ProfileBufferEntryReader(
+        mNextSpanOrEmpty.Subspan(aOffset - mCurrentSpan.LengthBytes(), 0),
+        mCurrentBlockIndex, mNextBlockIndex);
+  }
+
+  // Be like a limited input iterator, with only `*`, prefix-`++`, `==`, `!=`.
+  // These definitions are expected by std functions, to recognize this as an
+  // iterator. See https://en.cppreference.com/w/cpp/iterator/iterator_traits
+  using difference_type = std::make_signed_t<Length>;
+  using value_type = Byte;
+  using pointer = const Byte*;
+  using reference = const Byte&;
+  using iterator_category = std::input_iterator_tag;
+
+  [[nodiscard]] const Byte& operator*() {
+    // Assume the caller will read from the returned reference (and not just
+    // take the address).
+    MOZ_RELEASE_ASSERT(mCurrentSpan.LengthBytes() >= 1);
+    return *(mCurrentSpan.Elements());
+  }
+
+  ProfileBufferEntryReader& operator++() {
+    MOZ_RELEASE_ASSERT(mCurrentSpan.LengthBytes() >= 1);
+    if (MOZ_LIKELY(mCurrentSpan.LengthBytes() > 1)) {
+      // More than 1 byte left in mCurrentSpan, just eat it.
+      mCurrentSpan = mCurrentSpan.From(1);
+    } else {
+      // mCurrentSpan will be empty, move mNextSpanOrEmpty to mCurrentSpan.
+      mCurrentSpan = mNextSpanOrEmpty;
+      mNextSpanOrEmpty = mNextSpanOrEmpty.Last(0);
+    }
+    CheckInvariants();
+    return *this;
+  }
+
+  ProfileBufferEntryReader& operator+=(Length aBytes) {
+    MOZ_RELEASE_ASSERT(aBytes <= RemainingBytes());
+    if (MOZ_LIKELY(aBytes <= mCurrentSpan.LengthBytes())) {
+      // All bytes are in mCurrentSpan.
+      // Update mCurrentSpan past the read bytes.
+      mCurrentSpan = mCurrentSpan.From(aBytes);
+      if (mCurrentSpan.IsEmpty() && !mNextSpanOrEmpty.IsEmpty()) {
+        // Don't leave mCurrentSpan empty, move non-empty mNextSpanOrEmpty into
+        // mCurrentSpan.
+        mCurrentSpan = mNextSpanOrEmpty;
+        mNextSpanOrEmpty = mNextSpanOrEmpty.Last(0);
+      }
+    } else {
+      // mCurrentSpan does not hold enough bytes.
+      // This should only happen at most once: Only for double spans, and when
+      // data crosses the gap.
+      const Length tail =
+          aBytes - static_cast<Length>(mCurrentSpan.LengthBytes());
+      // Move mNextSpanOrEmpty to mCurrentSpan, past the data. So the next call
+      // will go back to the true case above.
+      mCurrentSpan = mNextSpanOrEmpty.From(tail);
+      mNextSpanOrEmpty = mNextSpanOrEmpty.Last(0);
+    }
+    CheckInvariants();
+    return *this;
+  }
+
+  [[nodiscard]] bool operator==(const ProfileBufferEntryReader& aOther) const {
+    return mCurrentSpan.Elements() == aOther.mCurrentSpan.Elements();
+  }
+  [[nodiscard]] bool operator!=(const ProfileBufferEntryReader& aOther) const {
+    return mCurrentSpan.Elements() != aOther.mCurrentSpan.Elements();
+  }
+
+  // Read an unsigned LEB128 number and move iterator ahead.
+  template <typename T>
+  [[nodiscard]] T ReadULEB128() {
+    return ::mozilla::ReadULEB128<T>(*this);
+  }
+
+  // Read a sequence of bytes, like memcpy.
+  void ReadBytes(void* aDest, Length aBytes) {
+    MOZ_RELEASE_ASSERT(aBytes <= RemainingBytes());
+    if (MOZ_LIKELY(aBytes <= mCurrentSpan.LengthBytes())) {
+      // All bytes are in mCurrentSpan.
+      memcpy(aDest, mCurrentSpan.Elements(), aBytes);
+      // Update mCurrentSpan past the read bytes.
+      mCurrentSpan = mCurrentSpan.From(aBytes);
+      if (mCurrentSpan.IsEmpty() && !mNextSpanOrEmpty.IsEmpty()) {
+        // Don't leave mCurrentSpan empty, move non-empty mNextSpanOrEmpty into
+        // mCurrentSpan.
+        mCurrentSpan = mNextSpanOrEmpty;
+        mNextSpanOrEmpty = mNextSpanOrEmpty.Last(0);
+      }
+    } else {
+      // mCurrentSpan does not hold enough bytes.
+      // This should only happen at most once: Only for double spans, and when
+      // data crosses the gap.
+      // Split data between the end of mCurrentSpan and the beginning of
+      // mNextSpanOrEmpty.
+      memcpy(aDest, mCurrentSpan.Elements(), mCurrentSpan.LengthBytes());
+      const Length tail =
+          aBytes - static_cast<Length>(mCurrentSpan.LengthBytes());
+      memcpy(reinterpret_cast<Byte*>(aDest) + mCurrentSpan.LengthBytes(),
+             mNextSpanOrEmpty.Elements(), tail);
+      // Move mNextSpanOrEmpty to mCurrentSpan, past the data. So the next call
+      // will go back to the true case above.
+      mCurrentSpan = mNextSpanOrEmpty.From(tail);
+      mNextSpanOrEmpty = mNextSpanOrEmpty.Last(0);
+    }
+    CheckInvariants();
+  }
+
+  template <typename T>
+  void ReadIntoObject(T& aObject) {
+    Deserializer<T>::ReadInto(*this, aObject);
+  }
+
+  // Read into one or more objects, sequentially.
+  // `EntryReader::ReadIntoObjects()` with nothing is implicitly allowed, this
+  // could be useful for generic programming.
+  template <typename... Ts>
+  void ReadIntoObjects(Ts&... aTs) {
+    (ReadIntoObject(aTs), ...);
+  }
+
+  // Read data as an object and move iterator ahead.
+  template <typename T>
+  [[nodiscard]] T ReadObject() {
+    T ob = Deserializer<T>::Read(*this);
+    return ob;
+  }
+
+ private:
+  friend class ProfileBufferEntryWriter;
+
+  // Invariants:
+  // - mCurrentSpan cannot be empty unless mNextSpanOrEmpty is also empty. So
+  //   mCurrentSpan always points at the next byte to read or the end.
+  // - If mNextSpanOrEmpty is empty, it points at the end of mCurrentSpan. So
+  //   when reaching the end of mCurrentSpan, we can blindly move
+  //   mNextSpanOrEmpty to mCurrentSpan and keep the invariants.
+  SpanOfConstBytes mCurrentSpan;
+  SpanOfConstBytes mNextSpanOrEmpty;
+  ProfileBufferBlockIndex mCurrentBlockIndex;
+  ProfileBufferBlockIndex mNextBlockIndex;
+
+  void CheckInvariants() const {
+    MOZ_ASSERT(!mCurrentSpan.IsEmpty() || mNextSpanOrEmpty.IsEmpty());
+    MOZ_ASSERT(!mNextSpanOrEmpty.IsEmpty() ||
+               (mNextSpanOrEmpty == mCurrentSpan.Last(0)));
+  }
+};
+
+// Iterator-like class used to write into an entry.
+// An entry may be split in two memory segments (e.g., the ends of a ring
+// buffer, or two chunks of a chunked buffer); it doesn't deal with this
+// underlying buffer, but only with one or two spans pointing at the space
+// reserved for the entry.
+class ProfileBufferEntryWriter {
+ public:
+  using Byte = uint8_t;
+  using Length = uint32_t;
+
+  using SpanOfBytes = Span<Byte>;
+
+  // Class to be specialized for types to be written in an entry.
+  // See common specializations at the bottom of this header.
+  // The following static functions must be provided:
+  //   static Length Bytes(const T& aT) {
+  //     /* Return number of bytes that will be written. */
+  //   }
+  //   static void Write(ProfileBufferEntryWriter& aEW,
+  //                     const T& aT) {
+  //     /* Call `aEW.WriteX(...)` functions to serialize aT, be sure to write
+  //        exactly `Bytes(aT)` bytes! */
+  //   }
+  template <typename T>
+  struct Serializer;
+
+  ProfileBufferEntryWriter() = default;
+
+  ProfileBufferEntryWriter(SpanOfBytes aSpan,
+                           ProfileBufferBlockIndex aCurrentBlockIndex,
+                           ProfileBufferBlockIndex aNextBlockIndex)
+      : mCurrentSpan(aSpan),
+        mCurrentBlockIndex(aCurrentBlockIndex),
+        mNextBlockIndex(aNextBlockIndex) {}
+
+  ProfileBufferEntryWriter(SpanOfBytes aSpanHead, SpanOfBytes aSpanTail,
+                           ProfileBufferBlockIndex aCurrentBlockIndex,
+                           ProfileBufferBlockIndex aNextBlockIndex)
+      : mCurrentSpan(aSpanHead),
+        mNextSpanOrEmpty(aSpanTail),
+        mCurrentBlockIndex(aCurrentBlockIndex),
+        mNextBlockIndex(aNextBlockIndex) {
+    // Either:
+    // - mCurrentSpan is not empty, OR
+    // - mNextSpanOrEmpty is empty if mNextSpanOrEmpty is empty as well.
+    MOZ_RELEASE_ASSERT(!mCurrentSpan.IsEmpty() || mNextSpanOrEmpty.IsEmpty());
+  }
+
+  // Disable copying and moving, so we can't have multiple writing heads.
+  ProfileBufferEntryWriter(const ProfileBufferEntryWriter&) = delete;
+  ProfileBufferEntryWriter& operator=(const ProfileBufferEntryWriter&) = delete;
+  ProfileBufferEntryWriter(ProfileBufferEntryWriter&&) = delete;
+  ProfileBufferEntryWriter& operator=(ProfileBufferEntryWriter&&) = delete;
+
+  void Set() {
+    mCurrentSpan = SpanOfBytes{};
+    mNextSpanOrEmpty = SpanOfBytes{};
+    mCurrentBlockIndex = nullptr;
+    mNextBlockIndex = nullptr;
+  }
+
+  void Set(SpanOfBytes aSpan, ProfileBufferBlockIndex aCurrentBlockIndex,
+           ProfileBufferBlockIndex aNextBlockIndex) {
+    mCurrentSpan = aSpan;
+    mNextSpanOrEmpty = SpanOfBytes{};
+    mCurrentBlockIndex = aCurrentBlockIndex;
+    mNextBlockIndex = aNextBlockIndex;
+  }
+
+  void Set(SpanOfBytes aSpan0, SpanOfBytes aSpan1,
+           ProfileBufferBlockIndex aCurrentBlockIndex,
+           ProfileBufferBlockIndex aNextBlockIndex) {
+    mCurrentSpan = aSpan0;
+    mNextSpanOrEmpty = aSpan1;
+    mCurrentBlockIndex = aCurrentBlockIndex;
+    mNextBlockIndex = aNextBlockIndex;
+    // Either:
+    // - mCurrentSpan is not empty, OR
+    // - mNextSpanOrEmpty is empty if mNextSpanOrEmpty is empty as well.
+    MOZ_RELEASE_ASSERT(!mCurrentSpan.IsEmpty() || mNextSpanOrEmpty.IsEmpty());
+  }
+
+  [[nodiscard]] Length RemainingBytes() const {
+    return mCurrentSpan.LengthBytes() + mNextSpanOrEmpty.LengthBytes();
+  }
+
+  [[nodiscard]] ProfileBufferBlockIndex CurrentBlockIndex() const {
+    return mCurrentBlockIndex;
+  }
+
+  [[nodiscard]] ProfileBufferBlockIndex NextBlockIndex() const {
+    return mNextBlockIndex;
+  }
+
+  // Be like a limited output iterator, with only `*` and prefix-`++`.
+  // These definitions are expected by std functions, to recognize this as an
+  // iterator. See https://en.cppreference.com/w/cpp/iterator/iterator_traits
+  using value_type = Byte;
+  using pointer = Byte*;
+  using reference = Byte&;
+  using iterator_category = std::output_iterator_tag;
+
+  [[nodiscard]] Byte& operator*() {
+    MOZ_RELEASE_ASSERT(RemainingBytes() >= 1);
+    return *(
+        (MOZ_LIKELY(!mCurrentSpan.IsEmpty()) ? mCurrentSpan : mNextSpanOrEmpty)
+            .Elements());
+  }
+
+  ProfileBufferEntryWriter& operator++() {
+    if (MOZ_LIKELY(mCurrentSpan.LengthBytes() >= 1)) {
+      // There is at least 1 byte in mCurrentSpan, eat it.
+      mCurrentSpan = mCurrentSpan.From(1);
+    } else {
+      // mCurrentSpan is empty, move mNextSpanOrEmpty (past the first byte) to
+      // mCurrentSpan.
+      MOZ_RELEASE_ASSERT(mNextSpanOrEmpty.LengthBytes() >= 1);
+      mCurrentSpan = mNextSpanOrEmpty.From(1);
+      mNextSpanOrEmpty = mNextSpanOrEmpty.First(0);
+    }
+    return *this;
+  }
+
+  ProfileBufferEntryWriter& operator+=(Length aBytes) {
+    // Note: This is a rare operation. The code below is a copy of `WriteBytes`
+    // but without the `memcpy`s.
+    MOZ_RELEASE_ASSERT(aBytes <= RemainingBytes());
+    if (MOZ_LIKELY(aBytes <= mCurrentSpan.LengthBytes())) {
+      // Data fits in mCurrentSpan.
+      // Update mCurrentSpan. It may become empty, so in case of a double span,
+      // the next call will go to the false case below.
+      mCurrentSpan = mCurrentSpan.From(aBytes);
+    } else {
+      // Data does not fully fit in mCurrentSpan.
+      // This should only happen at most once: Only for double spans, and when
+      // data crosses the gap or starts there.
+      const Length tail =
+          aBytes - static_cast<Length>(mCurrentSpan.LengthBytes());
+      // Move mNextSpanOrEmpty to mCurrentSpan, past the data. So the next call
+      // will go back to the true case above.
+      mCurrentSpan = mNextSpanOrEmpty.From(tail);
+      mNextSpanOrEmpty = mNextSpanOrEmpty.First(0);
+    }
+    return *this;
+  }
+
+  // Number of bytes needed to represent `aValue` in unsigned LEB128.
+  template <typename T>
+  [[nodiscard]] static unsigned ULEB128Size(T aValue) {
+    return ::mozilla::ULEB128Size(aValue);
+  }
+
+  // Write number as unsigned LEB128 and move iterator ahead.
+  template <typename T>
+  void WriteULEB128(T aValue) {
+    ::mozilla::WriteULEB128(aValue, *this);
+  }
+
+  // Number of bytes needed to serialize objects.
+  template <typename... Ts>
+  [[nodiscard]] static Length SumBytes(const Ts&... aTs) {
+    return (0 + ... + Serializer<Ts>::Bytes(aTs));
+  }
+
+  // Write a sequence of bytes, like memcpy.
+  void WriteBytes(const void* aSrc, Length aBytes) {
+    MOZ_RELEASE_ASSERT(aBytes <= RemainingBytes());
+    if (MOZ_LIKELY(aBytes <= mCurrentSpan.LengthBytes())) {
+      // Data fits in mCurrentSpan.
+      memcpy(mCurrentSpan.Elements(), aSrc, aBytes);
+      // Update mCurrentSpan. It may become empty, so in case of a double span,
+      // the next call will go to the false case below.
+      mCurrentSpan = mCurrentSpan.From(aBytes);
+    } else {
+      // Data does not fully fit in mCurrentSpan.
+      // This should only happen at most once: Only for double spans, and when
+      // data crosses the gap or starts there.
+      // Split data between the end of mCurrentSpan and the beginning of
+      // mNextSpanOrEmpty. (mCurrentSpan could be empty, it's ok to do a memcpy
+      // because Span::Elements() is never null.)
+      memcpy(mCurrentSpan.Elements(), aSrc, mCurrentSpan.LengthBytes());
+      const Length tail =
+          aBytes - static_cast<Length>(mCurrentSpan.LengthBytes());
+      memcpy(mNextSpanOrEmpty.Elements(),
+             reinterpret_cast<const Byte*>(aSrc) + mCurrentSpan.LengthBytes(),
+             tail);
+      // Move mNextSpanOrEmpty to mCurrentSpan, past the data. So the next call
+      // will go back to the true case above.
+      mCurrentSpan = mNextSpanOrEmpty.From(tail);
+      mNextSpanOrEmpty = mNextSpanOrEmpty.First(0);
+    }
+  }
+
+  void WriteFromReader(ProfileBufferEntryReader& aReader, Length aBytes) {
+    MOZ_RELEASE_ASSERT(aBytes <= RemainingBytes());
+    MOZ_RELEASE_ASSERT(aBytes <= aReader.RemainingBytes());
+    Length read0 = std::min(
+        aBytes, static_cast<Length>(aReader.mCurrentSpan.LengthBytes()));
+    if (read0 != 0) {
+      WriteBytes(aReader.mCurrentSpan.Elements(), read0);
+    }
+    Length read1 = aBytes - read0;
+    if (read1 != 0) {
+      WriteBytes(aReader.mNextSpanOrEmpty.Elements(), read1);
+    }
+    aReader += aBytes;
+  }
+
+  // Write a single object by using the appropriate Serializer.
+  template <typename T>
+  void WriteObject(const T& aObject) {
+    Serializer<T>::Write(*this, aObject);
+  }
+
+  // Write one or more objects, sequentially.
+  // Allow `EntryWrite::WriteObjects()` with nothing, this could be useful
+  // for generic programming.
+  template <typename... Ts>
+  void WriteObjects(const Ts&... aTs) {
+    (WriteObject(aTs), ...);
+  }
+
+ private:
+  // The two spans covering the memory still to be written.
+  SpanOfBytes mCurrentSpan;
+  SpanOfBytes mNextSpanOrEmpty;
+  ProfileBufferBlockIndex mCurrentBlockIndex;
+  ProfileBufferBlockIndex mNextBlockIndex;
+};
+
+// ============================================================================
+// Serializer and Deserializer ready-to-use specializations.
+
+// ----------------------------------------------------------------------------
+// Trivially-copyable types (default)
+
+// The default implementation works for all trivially-copyable types (e.g.,
+// PODs).
+//
+// Usage: `aEW.WriteObject(123);`.
+//
+// Raw pointers, though trivially-copyable, are explictly forbidden when writing
+// (to avoid unexpected leaks/UAFs), instead use one of
+// `WrapProfileBufferLiteralCStringPointer`, `WrapProfileBufferUnownedCString`,
+// or `WrapProfileBufferRawPointer` as needed.
+template <typename T>
+struct ProfileBufferEntryWriter::Serializer {
+  static_assert(std::is_trivially_copyable<T>::value,
+                "Serializer only works with trivially-copyable types by "
+                "default, use/add specialization for other types.");
+
+  static constexpr Length Bytes(const T&) { return sizeof(T); }
+
+  static void Write(ProfileBufferEntryWriter& aEW, const T& aT) {
+    static_assert(!std::is_pointer<T>::value,
+                  "Serializer won't write raw pointers by default, use "
+                  "WrapProfileBufferRawPointer or other.");
+    aEW.WriteBytes(&aT, sizeof(T));
+  }
+};
+
+// Usage: `aER.ReadObject<int>();` or `int x; aER.ReadIntoObject(x);`.
+template <typename T>
+struct ProfileBufferEntryReader::Deserializer {
+  static_assert(std::is_trivially_copyable<T>::value,
+                "Deserializer only works with trivially-copyable types by "
+                "default, use/add specialization for other types.");
+
+  static void ReadInto(ProfileBufferEntryReader& aER, T& aT) {
+    aER.ReadBytes(&aT, sizeof(T));
+  }
+
+  static T Read(ProfileBufferEntryReader& aER) {
+    // Note that this creates a default `T` first, and then overwrites it with
+    // bytes from the buffer. Trivially-copyable types support this without UB.
+    T ob;
+    ReadInto(aER, ob);
+    return ob;
+  }
+};
+
+// ----------------------------------------------------------------------------
+// Strip const/volatile/reference from types.
+
+// Automatically strip `const`.
+template <typename T>
+struct ProfileBufferEntryWriter::Serializer<const T>
+    : public ProfileBufferEntryWriter::Serializer<T> {};
+
+template <typename T>
+struct ProfileBufferEntryReader::Deserializer<const T>
+    : public ProfileBufferEntryReader::Deserializer<T> {};
+
+// Automatically strip `volatile`.
+template <typename T>
+struct ProfileBufferEntryWriter::Serializer<volatile T>
+    : public ProfileBufferEntryWriter::Serializer<T> {};
+
+template <typename T>
+struct ProfileBufferEntryReader::Deserializer<volatile T>
+    : public ProfileBufferEntryReader::Deserializer<T> {};
+
+// Automatically strip `lvalue-reference`.
+template <typename T>
+struct ProfileBufferEntryWriter::Serializer<T&>
+    : public ProfileBufferEntryWriter::Serializer<T> {};
+
+template <typename T>
+struct ProfileBufferEntryReader::Deserializer<T&>
+    : public ProfileBufferEntryReader::Deserializer<T> {};
+
+// Automatically strip `rvalue-reference`.
+template <typename T>
+struct ProfileBufferEntryWriter::Serializer<T&&>
+    : public ProfileBufferEntryWriter::Serializer<T> {};
+
+template <typename T>
+struct ProfileBufferEntryReader::Deserializer<T&&>
+    : public ProfileBufferEntryReader::Deserializer<T> {};
+
+// ----------------------------------------------------------------------------
+// ProfileBufferBlockIndex
+
+// ProfileBufferBlockIndex, serialized as the underlying value.
+template <>
+struct ProfileBufferEntryWriter::Serializer<ProfileBufferBlockIndex> {
+  static constexpr Length Bytes(const ProfileBufferBlockIndex& aBlockIndex) {
+    return sizeof(ProfileBufferBlockIndex);
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW,
+                    const ProfileBufferBlockIndex& aBlockIndex) {
+    aEW.WriteBytes(&aBlockIndex, sizeof(aBlockIndex));
+  }
+};
+
+template <>
+struct ProfileBufferEntryReader::Deserializer<ProfileBufferBlockIndex> {
+  static void ReadInto(ProfileBufferEntryReader& aER,
+                       ProfileBufferBlockIndex& aBlockIndex) {
+    aER.ReadBytes(&aBlockIndex, sizeof(aBlockIndex));
+  }
+
+  static ProfileBufferBlockIndex Read(ProfileBufferEntryReader& aER) {
+    ProfileBufferBlockIndex blockIndex;
+    ReadInto(aER, blockIndex);
+    return blockIndex;
+  }
+};
+
+// ----------------------------------------------------------------------------
+// Literal C string pointer
+
+// Wrapper around a pointer to a literal C string.
+template <size_t NonTerminalCharacters>
+struct ProfileBufferLiteralCStringPointer {
+  const char* mCString;
+};
+
+// Wrap a pointer to a literal C string.
+template <size_t CharactersIncludingTerminal>
+ProfileBufferLiteralCStringPointer<CharactersIncludingTerminal - 1>
+WrapProfileBufferLiteralCStringPointer(
+    const char (&aCString)[CharactersIncludingTerminal]) {
+  return {aCString};
+}
+
+// Literal C strings, serialized as the raw pointer because it is unique and
+// valid for the whole program lifetime.
+//
+// Usage: `aEW.WriteObject(WrapProfileBufferLiteralCStringPointer("hi"));`.
+//
+// No deserializer is provided for this type, instead it must be deserialized as
+// a raw pointer: `aER.ReadObject<const char*>();`
+template <size_t CharactersIncludingTerminal>
+struct ProfileBufferEntryReader::Deserializer<
+    ProfileBufferLiteralCStringPointer<CharactersIncludingTerminal>> {
+  static constexpr Length Bytes(
+      const ProfileBufferLiteralCStringPointer<CharactersIncludingTerminal>&) {
+    // We're only storing a pointer, its size is independent from the pointer
+    // value.
+    return sizeof(const char*);
+  }
+
+  static void Write(
+      ProfileBufferEntryWriter& aEW,
+      const ProfileBufferLiteralCStringPointer<CharactersIncludingTerminal>&
+          aWrapper) {
+    // Write the pointer *value*, not the string contents.
+    aEW.WriteBytes(aWrapper.mCString, sizeof(aWrapper.mCString));
+  }
+};
+
+// ----------------------------------------------------------------------------
+// C string contents
+
+// Wrapper around a pointer to a C string whose contents will be serialized.
+struct ProfileBufferUnownedCString {
+  const char* mCString;
+};
+
+// Wrap a pointer to a C string whose contents will be serialized.
+inline ProfileBufferUnownedCString WrapProfileBufferUnownedCString(
+    const char* aCString) {
+  return {aCString};
+}
+
+// The contents of a (probably) unowned C string are serialized as the number of
+// characters (encoded as ULEB128) and all the characters in the string. The
+// terminal '\0' is omitted.
+//
+// Usage: `aEW.WriteObject(WrapProfileBufferUnownedCString(str.c_str()))`.
+//
+// No deserializer is provided for this pointer type, instead it must be
+// deserialized as one of the other string types that manages its contents,
+// e.g.: `aER.ReadObject<std::string>();`
+template <>
+struct ProfileBufferEntryWriter::Serializer<ProfileBufferUnownedCString> {
+  static Length Bytes(const ProfileBufferUnownedCString& aS) {
+    const auto len = strlen(aS.mCString);
+    return ULEB128Size(len) + len;
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW,
+                    const ProfileBufferUnownedCString& aS) {
+    const auto len = strlen(aS.mCString);
+    aEW.WriteULEB128(len);
+    aEW.WriteBytes(aS.mCString, len);
+  }
+};
+
+// ----------------------------------------------------------------------------
+// Raw pointers
+
+// Wrapper around a pointer to be serialized as the raw pointer value.
+template <typename T>
+struct ProfileBufferRawPointer {
+  T* mRawPointer;
+};
+
+// Wrap a pointer to be serialized as the raw pointer value.
+template <typename T>
+ProfileBufferRawPointer<T> WrapProfileBufferRawPointer(T* aRawPointer) {
+  return {aRawPointer};
+}
+
+// Raw pointers are serialized as the raw pointer value.
+//
+// Usage: `aEW.WriteObject(WrapProfileBufferRawPointer(ptr));`
+//
+// The wrapper is compulsory when writing pointers (to avoid unexpected
+// leaks/UAFs), but reading can be done straight into a raw pointer object,
+// e.g.: `aER.ReadObject<Foo*>;`.
+template <typename T>
+struct ProfileBufferEntryWriter::Serializer<ProfileBufferRawPointer<T>> {
+  template <typename U>
+  static constexpr Length Bytes(const U&) {
+    return sizeof(T*);
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW,
+                    const ProfileBufferRawPointer<T>& aWrapper) {
+    aEW.WriteBytes(&aWrapper.mRawPointer, sizeof(aWrapper.mRawPointer));
+  }
+};
+
+// Usage: `aER.ReadObject<Foo*>;` or `Foo* p; aER.ReadIntoObject(p);`, no
+// wrapper necessary.
+template <typename T>
+struct ProfileBufferEntryReader::Deserializer<ProfileBufferRawPointer<T>> {
+  static void ReadInto(ProfileBufferEntryReader& aER,
+                       ProfileBufferRawPointer<T>& aPtr) {
+    aER.ReadBytes(&aPtr.mRawPointer, sizeof(aPtr));
+  }
+
+  static ProfileBufferRawPointer<T> Read(ProfileBufferEntryReader& aER) {
+    ProfileBufferRawPointer<T> rawPointer;
+    ReadInto(aER, rawPointer);
+    return rawPointer;
+  }
+};
+
+// ----------------------------------------------------------------------------
+// std::string contents
+
+// std::string contents are serialized as the number of characters (encoded as
+// ULEB128) and all the characters in the string. The terminal '\0' is omitted.
+//
+// Usage: `std::string s = ...; aEW.WriteObject(s);`
+template <typename CHAR>
+struct ProfileBufferEntryWriter::Serializer<std::basic_string<CHAR>> {
+  static Length Bytes(const std::basic_string<CHAR>& aS) {
+    const Length len = static_cast<Length>(aS.length());
+    return ULEB128Size(len) + len;
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW,
+                    const std::basic_string<CHAR>& aS) {
+    const Length len = static_cast<Length>(aS.length());
+    aEW.WriteULEB128(len);
+    aEW.WriteBytes(aS.c_str(), len * sizeof(CHAR));
+  }
+};
+
+// Usage: `std::string s = aEW.ReadObject<std::string>(s);` or
+// `std::string s; aER.ReadIntoObject(s);`
+template <typename CHAR>
+struct ProfileBufferEntryReader::Deserializer<std::basic_string<CHAR>> {
+  static void ReadCharsInto(ProfileBufferEntryReader& aER,
+                            std::basic_string<CHAR>& aS, size_t aLength) {
+    // Assign to `aS` by using iterators.
+    // (`aER+0` so we get the same iterator type as `aER+len`.)
+    aS.assign(aER, aER.EmptyIteratorAtOffset(aLength));
+    aER += aLength;
+  }
+
+  static void ReadInto(ProfileBufferEntryReader& aER,
+                       std::basic_string<CHAR>& aS) {
+    ReadCharsInto(
+        aER, aS,
+        aER.ReadULEB128<typename std::basic_string<CHAR>::size_type>());
+  }
+
+  static std::basic_string<CHAR> ReadChars(ProfileBufferEntryReader& aER,
+                                           size_t aLength) {
+    // Construct a string by using iterators.
+    // (`aER+0` so we get the same iterator type as `aER+len`.)
+    std::basic_string<CHAR> s(aER, aER.EmptyIteratorAtOffset(aLength));
+    aER += aLength;
+    return s;
+  }
+
+  static std::basic_string<CHAR> Read(ProfileBufferEntryReader& aER) {
+    return ReadChars(
+        aER, aER.ReadULEB128<typename std::basic_string<CHAR>::size_type>());
+  }
+};
+
+// ----------------------------------------------------------------------------
+// mozilla::UniqueFreePtr<CHAR>
+
+// UniqueFreePtr<CHAR>, which points at a string allocated with `malloc`
+// (typically generated by `strdup()`), is serialized as the number of
+// *bytes* (encoded as ULEB128) and all the characters in the string. The
+// null terminator is omitted.
+// `CHAR` can be any type that has a specialization for
+// `std::char_traits<CHAR>::length(const CHAR*)`.
+//
+// Note: A nullptr pointer will be serialized like an empty string, so when
+// deserializing it will result in an allocated buffer only containing a
+// single null terminator.
+template <typename CHAR>
+struct ProfileBufferEntryWriter::Serializer<UniqueFreePtr<CHAR>> {
+  static Length Bytes(const UniqueFreePtr<CHAR>& aS) {
+    if (!aS) {
+      // Null pointer, store it as if it was an empty string (so: 0 bytes).
+      return ULEB128Size(0u);
+    }
+    // Note that we store the size in *bytes*, not in number of characters.
+    const auto bytes = std::char_traits<CHAR>::length(aS.get()) * sizeof(CHAR);
+    return ULEB128Size(bytes) + bytes;
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW,
+                    const UniqueFreePtr<CHAR>& aS) {
+    if (!aS) {
+      // Null pointer, store it as if it was an empty string (so we write a
+      // length of 0 bytes).
+      aEW.WriteULEB128(0u);
+      return;
+    }
+    // Note that we store the size in *bytes*, not in number of characters.
+    const auto bytes = std::char_traits<CHAR>::length(aS.get()) * sizeof(CHAR);
+    aEW.WriteULEB128(bytes);
+    aEW.WriteBytes(aS.get(), bytes);
+  }
+};
+
+template <typename CHAR>
+struct ProfileBufferEntryReader::Deserializer<UniqueFreePtr<CHAR>> {
+  static void ReadInto(ProfileBufferEntryReader& aER, UniqueFreePtr<CHAR>& aS) {
+    aS = Read(aER);
+  }
+
+  static UniqueFreePtr<CHAR> Read(ProfileBufferEntryReader& aER) {
+    // Read the number of *bytes* that follow.
+    const auto bytes = aER.ReadULEB128<size_t>();
+    // We need a buffer of the non-const character type.
+    using NC_CHAR = std::remove_const_t<CHAR>;
+    // We allocate the required number of bytes, plus one extra character for
+    // the null terminator.
+    NC_CHAR* buffer = static_cast<NC_CHAR*>(malloc(bytes + sizeof(NC_CHAR)));
+    // Copy the characters into the buffer.
+    aER.ReadBytes(buffer, bytes);
+    // And append a null terminator.
+    buffer[bytes / sizeof(NC_CHAR)] = NC_CHAR(0);
+    return UniqueFreePtr<CHAR>(buffer);
+  }
+};
+
+// ----------------------------------------------------------------------------
+// std::tuple
+
+// std::tuple is serialized as a sequence of each recursively-serialized item.
+//
+// This is equivalent to manually serializing each item, so reading/writing
+// tuples is equivalent to reading/writing their elements in order, e.g.:
+// ```
+// std::tuple<int, std::string> is = ...;
+// aEW.WriteObject(is); // Write the tuple, equivalent to:
+// aEW.WriteObject(/* int */ std::get<0>(is), /* string */ std::get<1>(is));
+// ...
+// // Reading back can be done directly into a tuple:
+// auto is = aER.ReadObject<std::tuple<int, std::string>>();
+// // Or each item could be read separately:
+// auto i = aER.ReadObject<int>(); auto s = aER.ReadObject<std::string>();
+// ```
+template <typename... Ts>
+struct ProfileBufferEntryWriter::Serializer<std::tuple<Ts...>> {
+ private:
+  template <size_t... Is>
+  static Length TupleBytes(const std::tuple<Ts...>& aTuple,
+                           std::index_sequence<Is...>) {
+    return (0 + ... + SumBytes(std::get<Is>(aTuple)));
+  }
+
+  template <size_t... Is>
+  static void TupleWrite(ProfileBufferEntryWriter& aEW,
+                         const std::tuple<Ts...>& aTuple,
+                         std::index_sequence<Is...>) {
+    (aEW.WriteObject(std::get<Is>(aTuple)), ...);
+  }
+
+ public:
+  static Length Bytes(const std::tuple<Ts...>& aTuple) {
+    // Generate a 0..N-1 index pack, we'll add the sizes of each item.
+    return TupleBytes(aTuple, std::index_sequence_for<Ts...>());
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW,
+                    const std::tuple<Ts...>& aTuple) {
+    // Generate a 0..N-1 index pack, we'll write each item.
+    TupleWrite(aEW, aTuple, std::index_sequence_for<Ts...>());
+  }
+};
+
+template <typename... Ts>
+struct ProfileBufferEntryReader::Deserializer<std::tuple<Ts...>> {
+  static void ReadInto(ProfileBufferEntryReader& aER,
+                       std::tuple<Ts...>& aTuple) {
+    aER.ReadBytes(&aTuple, Bytes(aTuple));
+  }
+
+  static std::tuple<Ts...> Read(ProfileBufferEntryReader& aER) {
+    // Note that this creates default `Ts` first, and then overwrites them.
+    std::tuple<Ts...> ob;
+    ReadInto(aER, ob);
+    return ob;
+  }
+};
+
+// ----------------------------------------------------------------------------
+// mozilla::Tuple
+
+// Tuple is serialized as a sequence of each recursively-serialized
+// item.
+//
+// This is equivalent to manually serializing each item, so reading/writing
+// tuples is equivalent to reading/writing their elements in order, e.g.:
+// ```
+// Tuple<int, std::string> is = ...;
+// aEW.WriteObject(is); // Write the Tuple, equivalent to:
+// aEW.WriteObject(/* int */ std::get<0>(is), /* string */ std::get<1>(is));
+// ...
+// // Reading back can be done directly into a Tuple:
+// auto is = aER.ReadObject<Tuple<int, std::string>>();
+// // Or each item could be read separately:
+// auto i = aER.ReadObject<int>(); auto s = aER.ReadObject<std::string>();
+// ```
+template <typename... Ts>
+struct ProfileBufferEntryWriter::Serializer<Tuple<Ts...>> {
+ private:
+  template <size_t... Is>
+  static Length TupleBytes(const Tuple<Ts...>& aTuple,
+                           std::index_sequence<Is...>) {
+    return (0 + ... + SumBytes(Get<Is>(aTuple)));
+  }
+
+  template <size_t... Is>
+  static void TupleWrite(ProfileBufferEntryWriter& aEW,
+                         const Tuple<Ts...>& aTuple,
+                         std::index_sequence<Is...>) {
+    (aEW.WriteObject(Get<Is>(aTuple)), ...);
+  }
+
+ public:
+  static Length Bytes(const Tuple<Ts...>& aTuple) {
+    // Generate a 0..N-1 index pack, we'll add the sizes of each item.
+    return TupleBytes(aTuple, std::index_sequence_for<Ts...>());
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW, const Tuple<Ts...>& aTuple) {
+    // Generate a 0..N-1 index pack, we'll write each item.
+    TupleWrite(aEW, aTuple, std::index_sequence_for<Ts...>());
+  }
+};
+
+template <typename... Ts>
+struct ProfileBufferEntryReader::Deserializer<Tuple<Ts...>> {
+  static void ReadInto(ProfileBufferEntryReader& aER, Tuple<Ts...>& aTuple) {
+    aER.ReadBytes(&aTuple, Bytes(aTuple));
+  }
+
+  static Tuple<Ts...> Read(ProfileBufferEntryReader& aER) {
+    // Note that this creates default `Ts` first, and then overwrites them.
+    Tuple<Ts...> ob;
+    ReadInto(aER, ob);
+    return ob;
+  }
+};
+
+// ----------------------------------------------------------------------------
+// mozilla::Span
+
+// Span. All elements are serialized in sequence.
+// The caller is assumed to know the number of elements (they may manually
+// write&read it before the span if needed).
+// Similar to tuples, reading/writing spans is equivalent to reading/writing
+// their elements in order.
+template <class T, size_t N>
+struct ProfileBufferEntryWriter::Serializer<Span<T, N>> {
+  static Length Bytes(const Span<T, N>& aSpan) {
+    Length bytes = 0;
+    for (const T& element : aSpan) {
+      bytes += SumBytes(element);
+    }
+    return bytes;
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW, const Span<T, N>& aSpan) {
+    for (const T& element : aSpan) {
+      aEW.WriteObject(element);
+    }
+  }
+};
+
+template <class T, size_t N>
+struct ProfileBufferEntryReader::Deserializer<Span<T, N>> {
+  // Read elements back into span pointing at a pre-allocated buffer.
+  static void ReadInto(ProfileBufferEntryReader& aER, Span<T, N>& aSpan) {
+    for (T& element : aSpan) {
+      aER.ReadIntoObject(element);
+    }
+  }
+
+  // A Span does not own its data, this would probably leak so we forbid this.
+  static Span<T, N> Read(ProfileBufferEntryReader& aER) = delete;
+};
+
+// ----------------------------------------------------------------------------
+// mozilla::Maybe
+
+// Maybe<T> is serialized as one byte containing either 'm' (Nothing),
+// or 'M' followed by the recursively-serialized `T` object.
+template <typename T>
+struct ProfileBufferEntryWriter::Serializer<Maybe<T>> {
+  static Length Bytes(const Maybe<T>& aMaybe) {
+    // 1 byte to store nothing/something flag, then object size if present.
+    return aMaybe.isNothing() ? 1 : (1 + SumBytes(aMaybe.ref()));
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW, const Maybe<T>& aMaybe) {
+    // 'm'/'M' is just an arbitrary 1-byte value to distinguish states.
+    if (aMaybe.isNothing()) {
+      aEW.WriteObject<char>('m');
+    } else {
+      aEW.WriteObject<char>('M');
+      // Use the Serializer for the contained type.
+      aEW.WriteObject(aMaybe.ref());
+    }
+  }
+};
+
+template <typename T>
+struct ProfileBufferEntryReader::Deserializer<Maybe<T>> {
+  static void ReadInto(ProfileBufferEntryReader& aER, Maybe<T>& aMaybe) {
+    char c = aER.ReadObject<char>();
+    if (c == 'm') {
+      aMaybe.reset();
+    } else {
+      MOZ_ASSERT(c == 'M');
+      // If aMaybe is empty, create a default `T` first, to be overwritten.
+      // Otherwise we'll just overwrite whatever was already there.
+      if (aMaybe.isNothing()) {
+        aMaybe.emplace();
+      }
+      // Use the Deserializer for the contained type.
+      aER.ReadIntoObject(aMaybe.ref());
+    }
+  }
+
+  static Maybe<T> Read(ProfileBufferEntryReader& aER) {
+    Maybe<T> maybe;
+    char c = aER.ReadObject<char>();
+    MOZ_ASSERT(c == 'M' || c == 'm');
+    if (c == 'M') {
+      // Note that this creates a default `T` inside the Maybe first, and then
+      // overwrites it.
+      maybe = Some(T{});
+      // Use the Deserializer for the contained type.
+      aER.ReadIntoObject(maybe.ref());
+    }
+    return maybe;
+  }
+};
+
+// ----------------------------------------------------------------------------
+// mozilla::Variant
+
+// Variant is serialized as the tag (0-based index of the stored type, encoded
+// as ULEB128), and the recursively-serialized object.
+template <typename... Ts>
+struct ProfileBufferEntryWriter::Serializer<Variant<Ts...>> {
+ public:
+  static Length Bytes(const Variant<Ts...>& aVariantTs) {
+    return aVariantTs.match([](auto aIndex, const auto& aAlternative) {
+      return ULEB128Size(aIndex) + SumBytes(aAlternative);
+    });
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW,
+                    const Variant<Ts...>& aVariantTs) {
+    aVariantTs.match([&aEW](auto aIndex, const auto& aAlternative) {
+      aEW.WriteULEB128(aIndex);
+      aEW.WriteObject(aAlternative);
+    });
+  }
+};
+
+template <typename... Ts>
+struct ProfileBufferEntryReader::Deserializer<Variant<Ts...>> {
+ private:
+  // Called from the fold expression in `VariantReadInto()`, only the selected
+  // variant will deserialize the object.
+  template <size_t I>
+  static void VariantIReadInto(ProfileBufferEntryReader& aER,
+                               Variant<Ts...>& aVariantTs, unsigned aTag) {
+    if (I == aTag) {
+      // Ensure the variant contains the target type. Note that this may create
+      // a default object.
+      if (!aVariantTs.template is<I>()) {
+        aVariantTs = Variant<Ts...>(VariantIndex<I>{});
+      }
+      aER.ReadIntoObject(aVariantTs.template as<I>());
+    }
+  }
+
+  template <size_t... Is>
+  static void VariantReadInto(ProfileBufferEntryReader& aER,
+                              Variant<Ts...>& aVariantTs,
+                              std::index_sequence<Is...>) {
+    unsigned tag = aER.ReadULEB128<unsigned>();
+    (VariantIReadInto<Is>(aER, aVariantTs, tag), ...);
+  }
+
+ public:
+  static void ReadInto(ProfileBufferEntryReader& aER,
+                       Variant<Ts...>& aVariantTs) {
+    // Generate a 0..N-1 index pack, the selected variant will deserialize
+    // itself.
+    VariantReadInto(aER, aVariantTs, std::index_sequence_for<Ts...>());
+  }
+
+  static Variant<Ts...> Read(ProfileBufferEntryReader& aER) {
+    // Note that this creates a default `Variant` of the first type, and then
+    // overwrites it. Consider using `ReadInto` for more control if needed.
+    Variant<Ts...> variant(VariantIndex<0>{});
+    ReadInto(aER, variant);
+    return variant;
+  }
+};
+
+}  // namespace mozilla
+
+#endif  // ProfileBufferEntrySerialization_h
diff --git a/mozglue/baseprofiler/public/ProfileBufferIndex.h b/mozglue/baseprofiler/public/ProfileBufferIndex.h
new file mode 100644
index 0000000000..5cda6bd89e
--- /dev/null
+++ b/mozglue/baseprofiler/public/ProfileBufferIndex.h
@@ -0,0 +1,97 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfileBufferIndex_h
+#define ProfileBufferIndex_h
+
+#include "mozilla/Attributes.h"
+
+#include <cstddef>
+#include <cstdint>
+
+namespace mozilla {
+
+// Generic index into a Profiler buffer, mostly for internal usage.
+// Intended to appear infinite (it should effectively never wrap).
+// 0 (zero) is reserved as nullptr-like value; it may indicate failure result,
+// or it may point at the earliest available block.
+using ProfileBufferIndex = uint64_t;
+
+// Externally-opaque class encapsulating a block index, i.e. a
+// ProfileBufferIndex that is guaranteed to point at the start of a Profile
+// buffer block (until it is destroyed, but then that index cannot be reused and
+// functions should gracefully handle expired blocks).
+// Users may get these from Profile buffer functions, to later access previous
+// blocks; they should avoid converting and operating on their value.
+class ProfileBufferBlockIndex {
+ public:
+  // Default constructor with internal 0 value, for which Profile buffers must
+  // guarantee that it is before any valid entries; All public APIs should
+  // fail gracefully, doing and/or returning Nothing.
+  ProfileBufferBlockIndex() : mBlockIndex(0) {}
+
+  // Implicit conversion from literal `nullptr` to internal 0 value, to allow
+  // convenient init/reset/comparison with 0 index.
+  MOZ_IMPLICIT ProfileBufferBlockIndex(std::nullptr_t) : mBlockIndex(0) {}
+
+  // Explicit conversion to bool, works in `if` and other tests.
+  // Only returns false for default `ProfileBufferBlockIndex{}` value.
+  explicit operator bool() const { return mBlockIndex != 0; }
+
+  // Comparison operators. Default `ProfileBufferBlockIndex{}` value is always
+  // the lowest.
+  [[nodiscard]] bool operator==(const ProfileBufferBlockIndex& aRhs) const {
+    return mBlockIndex == aRhs.mBlockIndex;
+  }
+  [[nodiscard]] bool operator!=(const ProfileBufferBlockIndex& aRhs) const {
+    return mBlockIndex != aRhs.mBlockIndex;
+  }
+  [[nodiscard]] bool operator<(const ProfileBufferBlockIndex& aRhs) const {
+    return mBlockIndex < aRhs.mBlockIndex;
+  }
+  [[nodiscard]] bool operator<=(const ProfileBufferBlockIndex& aRhs) const {
+    return mBlockIndex <= aRhs.mBlockIndex;
+  }
+  [[nodiscard]] bool operator>(const ProfileBufferBlockIndex& aRhs) const {
+    return mBlockIndex > aRhs.mBlockIndex;
+  }
+  [[nodiscard]] bool operator>=(const ProfileBufferBlockIndex& aRhs) const {
+    return mBlockIndex >= aRhs.mBlockIndex;
+  }
+
+  // Explicit conversion to ProfileBufferIndex, mostly used by internal Profile
+  // buffer code.
+  [[nodiscard]] ProfileBufferIndex ConvertToProfileBufferIndex() const {
+    return mBlockIndex;
+  }
+
+  // Explicit creation from ProfileBufferIndex, mostly used by internal
+  // Profile buffer code.
+  [[nodiscard]] static ProfileBufferBlockIndex CreateFromProfileBufferIndex(
+      ProfileBufferIndex aIndex) {
+    return ProfileBufferBlockIndex(aIndex);
+  }
+
+ private:
+  // Private to prevent easy construction from any value. Use
+  // `CreateFromProfileBufferIndex()` instead.
+  // The main reason for this indirection is to make it harder to create these
+  // objects, because only the profiler code should need to do it. Ideally, this
+  // class should be used wherever a block index should be stored, but there is
+  // so much code that uses `uint64_t` that it would be a big task to change
+  // them all. So for now we allow conversions to/from numbers, but it's as ugly
+  // as possible to make sure it doesn't get too common; and if one day we want
+  // to tackle a global change, it should be easy to find all these locations
+  // thanks to the explicit conversion functions.
+  explicit ProfileBufferBlockIndex(ProfileBufferIndex aBlockIndex)
+      : mBlockIndex(aBlockIndex) {}
+
+  ProfileBufferIndex mBlockIndex;
+};
+
+}  // namespace mozilla
+
+#endif  // ProfileBufferIndex_h
diff --git a/mozglue/baseprofiler/public/ProfileChunkedBuffer.h b/mozglue/baseprofiler/public/ProfileChunkedBuffer.h
new file mode 100644
index 0000000000..d4d55eafcb
--- /dev/null
+++ b/mozglue/baseprofiler/public/ProfileChunkedBuffer.h
@@ -0,0 +1,1872 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfileChunkedBuffer_h
+#define ProfileChunkedBuffer_h
+
+#include "mozilla/BaseProfilerDetail.h"
+#include "mozilla/NotNull.h"
+#include "mozilla/ProfileBufferChunkManager.h"
+#include "mozilla/ProfileBufferChunkManagerSingle.h"
+#include "mozilla/ProfileBufferEntrySerialization.h"
+#include "mozilla/RefCounted.h"
+#include "mozilla/RefPtr.h"
+#include "mozilla/ScopeExit.h"
+#include "mozilla/Unused.h"
+
+#include <cstdio>
+#include <utility>
+
+namespace mozilla {
+
+namespace detail {
+
+// Internal accessor pointing at a position inside a chunk.
+// It can handle two groups of chunks (typically the extant chunks stored in
+// the store manager, and the current chunk).
+// The main operations are:
+// - ReadEntrySize() to read an entry size, 0 means failure.
+// - operator+=(Length) to skip a number of bytes.
+// - EntryReader() creates an entry reader at the current position for a given
+//   size (it may fail with an empty reader), and skips the entry.
+// Note that there is no "past-the-end" position -- as soon as InChunkPointer
+// reaches the end, it becomes effectively null.
+class InChunkPointer {
+ public:
+  using Byte = ProfileBufferChunk::Byte;
+  using Length = ProfileBufferChunk::Length;
+
+  // Nullptr-like InChunkPointer, may be used as end iterator.
+  InChunkPointer()
+      : mChunk(nullptr), mNextChunkGroup(nullptr), mOffsetInChunk(0) {}
+
+  // InChunkPointer over one or two chunk groups, pointing at the given
+  // block index (if still in range).
+  // This constructor should only be used with *trusted* block index values!
+  InChunkPointer(const ProfileBufferChunk* aChunk,
+                 const ProfileBufferChunk* aNextChunkGroup,
+                 ProfileBufferBlockIndex aBlockIndex)
+      : mChunk(aChunk), mNextChunkGroup(aNextChunkGroup) {
+    if (mChunk) {
+      mOffsetInChunk = mChunk->OffsetFirstBlock();
+      Adjust();
+    } else if (mNextChunkGroup) {
+      mChunk = mNextChunkGroup;
+      mNextChunkGroup = nullptr;
+      mOffsetInChunk = mChunk->OffsetFirstBlock();
+      Adjust();
+    } else {
+      mOffsetInChunk = 0;
+    }
+
+    // Try to advance to given position.
+    if (!AdvanceToGlobalRangePosition(aBlockIndex)) {
+      // Block does not exist anymore (or block doesn't look valid), reset the
+      // in-chunk pointer.
+      mChunk = nullptr;
+      mNextChunkGroup = nullptr;
+    }
+  }
+
+  // InChunkPointer over one or two chunk groups, will start at the first
+  // block (if any). This may be slow, so avoid using it too much.
+  InChunkPointer(const ProfileBufferChunk* aChunk,
+                 const ProfileBufferChunk* aNextChunkGroup,
+                 ProfileBufferIndex aIndex = ProfileBufferIndex(0))
+      : mChunk(aChunk), mNextChunkGroup(aNextChunkGroup) {
+    if (mChunk) {
+      mOffsetInChunk = mChunk->OffsetFirstBlock();
+      Adjust();
+    } else if (mNextChunkGroup) {
+      mChunk = mNextChunkGroup;
+      mNextChunkGroup = nullptr;
+      mOffsetInChunk = mChunk->OffsetFirstBlock();
+      Adjust();
+    } else {
+      mOffsetInChunk = 0;
+    }
+
+    // Try to advance to given position.
+    if (!AdvanceToGlobalRangePosition(aIndex)) {
+      // Block does not exist anymore, reset the in-chunk pointer.
+      mChunk = nullptr;
+      mNextChunkGroup = nullptr;
+    }
+  }
+
+  // Compute the current position in the global range.
+  // 0 if null (including if we're reached the end).
+  [[nodiscard]] ProfileBufferIndex GlobalRangePosition() const {
+    if (IsNull()) {
+      return 0;
+    }
+    return mChunk->RangeStart() + mOffsetInChunk;
+  }
+
+  // Move InChunkPointer forward to the block at the given global block
+  // position, which is assumed to be valid exactly -- but it may be obsolete.
+  // 0 stays where it is (if valid already).
+  // MOZ_ASSERTs if the index is invalid.
+  [[nodiscard]] bool AdvanceToGlobalRangePosition(
+      ProfileBufferBlockIndex aBlockIndex) {
+    if (IsNull()) {
+      // Pointer is null already. (Not asserting because it's acceptable.)
+      return false;
+    }
+    if (!aBlockIndex) {
+      // Special null position, just stay where we are.
+      return ShouldPointAtValidBlock();
+    }
+    if (aBlockIndex.ConvertToProfileBufferIndex() < GlobalRangePosition()) {
+      // Past the requested position, stay where we are (assuming the current
+      // position was valid).
+      return ShouldPointAtValidBlock();
+    }
+    for (;;) {
+      if (aBlockIndex.ConvertToProfileBufferIndex() <
+          mChunk->RangeStart() + mChunk->OffsetPastLastBlock()) {
+        // Target position is in this chunk's written space, move to it.
+        mOffsetInChunk =
+            aBlockIndex.ConvertToProfileBufferIndex() - mChunk->RangeStart();
+        return ShouldPointAtValidBlock();
+      }
+      // Position is after this chunk, try next chunk.
+      GoToNextChunk();
+      if (IsNull()) {
+        return false;
+      }
+      // Skip whatever block tail there is, we don't allow pointing in the
+      // middle of a block.
+      mOffsetInChunk = mChunk->OffsetFirstBlock();
+      if (aBlockIndex.ConvertToProfileBufferIndex() < GlobalRangePosition()) {
+        // Past the requested position, meaning that the given position was in-
+        // between blocks -> Failure.
+        MOZ_ASSERT(false, "AdvanceToGlobalRangePosition - In-between blocks");
+        return false;
+      }
+    }
+  }
+
+  // Move InChunkPointer forward to the block at or after the given global
+  // range position.
+  // 0 stays where it is (if valid already).
+  [[nodiscard]] bool AdvanceToGlobalRangePosition(
+      ProfileBufferIndex aPosition) {
+    if (aPosition == 0) {
+      // Special position '0', just stay where we are.
+      // Success if this position is already valid.
+      return !IsNull();
+    }
+    for (;;) {
+      ProfileBufferIndex currentPosition = GlobalRangePosition();
+      if (currentPosition == 0) {
+        // Pointer is null.
+        return false;
+      }
+      if (aPosition <= currentPosition) {
+        // At or past the requested position, stay where we are.
+        return true;
+      }
+      if (aPosition < mChunk->RangeStart() + mChunk->OffsetPastLastBlock()) {
+        // Target position is in this chunk's written space, move to it.
+        for (;;) {
+          // Skip the current block.
+          mOffsetInChunk += ReadEntrySize();
+          if (mOffsetInChunk >= mChunk->OffsetPastLastBlock()) {
+            // Reached the end of the chunk, this can happen for the last
+            // block, let's just continue to the next chunk.
+            break;
+          }
+          if (aPosition <= mChunk->RangeStart() + mOffsetInChunk) {
+            // We're at or after the position, return at this block position.
+            return true;
+          }
+        }
+      }
+      // Position is after this chunk, try next chunk.
+      GoToNextChunk();
+      if (IsNull()) {
+        return false;
+      }
+      // Skip whatever block tail there is, we don't allow pointing in the
+      // middle of a block.
+      mOffsetInChunk = mChunk->OffsetFirstBlock();
+    }
+  }
+
+  [[nodiscard]] Byte ReadByte() {
+    MOZ_ASSERT(!IsNull());
+    MOZ_ASSERT(mOffsetInChunk < mChunk->OffsetPastLastBlock());
+    Byte byte = mChunk->ByteAt(mOffsetInChunk);
+    if (MOZ_UNLIKELY(++mOffsetInChunk == mChunk->OffsetPastLastBlock())) {
+      Adjust();
+    }
+    return byte;
+  }
+
+  // Read and skip a ULEB128-encoded size.
+  // 0 means failure (0-byte entries are not allowed.)
+  // Note that this doesn't guarantee that there are actually that many bytes
+  // available to read! (EntryReader() below may gracefully fail.)
+  [[nodiscard]] Length ReadEntrySize() {
+    ULEB128Reader<Length> reader;
+    if (IsNull()) {
+      return 0;
+    }
+    for (;;) {
+      const bool isComplete = reader.FeedByteIsComplete(ReadByte());
+      if (MOZ_UNLIKELY(IsNull())) {
+        // End of chunks, so there's no actual entry after this anyway.
+        return 0;
+      }
+      if (MOZ_LIKELY(isComplete)) {
+        if (MOZ_UNLIKELY(reader.Value() > mChunk->BufferBytes())) {
+          // Don't allow entries larger than a chunk.
+          return 0;
+        }
+        return reader.Value();
+      }
+    }
+  }
+
+  InChunkPointer& operator+=(Length aLength) {
+    MOZ_ASSERT(!IsNull());
+    mOffsetInChunk += aLength;
+    Adjust();
+    return *this;
+  }
+
+  [[nodiscard]] ProfileBufferEntryReader EntryReader(Length aLength) {
+    if (IsNull() || aLength == 0) {
+      return ProfileBufferEntryReader();
+    }
+
+    MOZ_ASSERT(mOffsetInChunk < mChunk->OffsetPastLastBlock());
+
+    // We should be pointing at the entry, past the entry size.
+    const ProfileBufferIndex entryIndex = GlobalRangePosition();
+    // Verify that there's enough space before for the size (starting at index
+    // 1 at least).
+    MOZ_ASSERT(entryIndex >= 1u + ULEB128Size(aLength));
+
+    const Length remaining = mChunk->OffsetPastLastBlock() - mOffsetInChunk;
+    Span<const Byte> mem0 = mChunk->BufferSpan();
+    mem0 = mem0.From(mOffsetInChunk);
+    if (aLength <= remaining) {
+      // Move to the end of this block, which could make this null if we have
+      // reached the end of all buffers.
+      *this += aLength;
+      return ProfileBufferEntryReader(
+          mem0.To(aLength),
+          // Block starts before the entry size.
+          ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+              entryIndex - ULEB128Size(aLength)),
+          // Block ends right after the entry (could be null for last entry).
+          ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+              GlobalRangePosition()));
+    }
+
+    // We need to go to the next chunk for the 2nd part of this block.
+    GoToNextChunk();
+    if (IsNull()) {
+      return ProfileBufferEntryReader();
+    }
+
+    Span<const Byte> mem1 = mChunk->BufferSpan();
+    const Length tail = aLength - remaining;
+    MOZ_ASSERT(tail <= mChunk->BufferBytes());
+    MOZ_ASSERT(tail == mChunk->OffsetFirstBlock());
+    // We are in the correct chunk, move the offset to the end of the block.
+    mOffsetInChunk = tail;
+    // And adjust as needed, which could make this null if we have reached the
+    // end of all buffers.
+    Adjust();
+    return ProfileBufferEntryReader(
+        mem0, mem1.To(tail),
+        // Block starts before the entry size.
+        ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+            entryIndex - ULEB128Size(aLength)),
+        // Block ends right after the entry (could be null for last entry).
+        ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+            GlobalRangePosition()));
+  }
+
+  [[nodiscard]] bool IsNull() const { return !mChunk; }
+
+  [[nodiscard]] bool operator==(const InChunkPointer& aOther) const {
+    if (IsNull() || aOther.IsNull()) {
+      return IsNull() && aOther.IsNull();
+    }
+    return mChunk == aOther.mChunk && mOffsetInChunk == aOther.mOffsetInChunk;
+  }
+
+  [[nodiscard]] bool operator!=(const InChunkPointer& aOther) const {
+    return !(*this == aOther);
+  }
+
+  [[nodiscard]] Byte operator*() const {
+    MOZ_ASSERT(!IsNull());
+    MOZ_ASSERT(mOffsetInChunk < mChunk->OffsetPastLastBlock());
+    return mChunk->ByteAt(mOffsetInChunk);
+  }
+
+  InChunkPointer& operator++() {
+    MOZ_ASSERT(!IsNull());
+    MOZ_ASSERT(mOffsetInChunk < mChunk->OffsetPastLastBlock());
+    if (MOZ_UNLIKELY(++mOffsetInChunk == mChunk->OffsetPastLastBlock())) {
+      mOffsetInChunk = 0;
+      GoToNextChunk();
+      Adjust();
+    }
+    return *this;
+  }
+
+ private:
+  void GoToNextChunk() {
+    MOZ_ASSERT(!IsNull());
+    const ProfileBufferIndex expectedNextRangeStart =
+        mChunk->RangeStart() + mChunk->BufferBytes();
+
+    mChunk = mChunk->GetNext();
+    if (!mChunk) {
+      // Reached the end of the current chunk group, try the next one (which
+      // may be null too, especially on the 2nd try).
+      mChunk = mNextChunkGroup;
+      mNextChunkGroup = nullptr;
+    }
+
+    if (mChunk && mChunk->RangeStart() == 0) {
+      // Reached a chunk without a valid (non-null) range start, assume there
+      // are only unused chunks from here on.
+      mChunk = nullptr;
+    }
+
+    MOZ_ASSERT(!mChunk || mChunk->RangeStart() == expectedNextRangeStart,
+               "We don't handle discontinuous buffers (yet)");
+    // Non-DEBUG fallback: Stop reading past discontinuities.
+    // (They should be rare, only happening on temporary OOMs.)
+    // TODO: Handle discontinuities (by skipping over incomplete blocks).
+    if (mChunk && mChunk->RangeStart() != expectedNextRangeStart) {
+      mChunk = nullptr;
+    }
+  }
+
+  // We want `InChunkPointer` to always point at a valid byte (or be null).
+  // After some operations, `mOffsetInChunk` may point past the end of the
+  // current `mChunk`, in which case we need to adjust our position to be inside
+  // the appropriate chunk. E.g., if we're 10 bytes after the end of the current
+  // chunk, we should end up at offset 10 in the next chunk.
+  // Note that we may "fall off" the last chunk and make this `InChunkPointer`
+  // effectively null.
+  void Adjust() {
+    while (mChunk && mOffsetInChunk >= mChunk->OffsetPastLastBlock()) {
+      // TODO: Try to adjust offset between chunks relative to mRangeStart
+      // differences. But we don't handle discontinuities yet.
+      if (mOffsetInChunk < mChunk->BufferBytes()) {
+        mOffsetInChunk -= mChunk->BufferBytes();
+      } else {
+        mOffsetInChunk -= mChunk->OffsetPastLastBlock();
+      }
+      GoToNextChunk();
+    }
+  }
+
+  // Check if the current position is likely to point at a valid block.
+  // (Size should be reasonable, and block should fully fit inside buffer.)
+  // MOZ_ASSERTs on failure, to catch incorrect uses of block indices (which
+  // should only point at valid blocks if still in range). Non-asserting build
+  // fallback should still be handled.
+  [[nodiscard]] bool ShouldPointAtValidBlock() const {
+    if (IsNull()) {
+      // Pointer is null, no blocks here.
+      MOZ_ASSERT(false, "ShouldPointAtValidBlock - null pointer");
+      return false;
+    }
+    // Use a copy, so we don't modify `*this`.
+    InChunkPointer pointer = *this;
+    // Try to read the entry size.
+    Length entrySize = pointer.ReadEntrySize();
+    if (entrySize == 0) {
+      // Entry size of zero means we read 0 or a way-too-big value.
+      MOZ_ASSERT(false, "ShouldPointAtValidBlock - invalid size");
+      return false;
+    }
+    // See if the last byte of the entry is still inside the buffer.
+    pointer += entrySize - 1;
+    MOZ_ASSERT(!IsNull(), "ShouldPointAtValidBlock - past end of buffer");
+    return !IsNull();
+  }
+
+  const ProfileBufferChunk* mChunk;
+  const ProfileBufferChunk* mNextChunkGroup;
+  Length mOffsetInChunk;
+};
+
+}  // namespace detail
+
+// Thread-safe buffer that can store blocks of different sizes during defined
+// sessions, using Chunks (from a ChunkManager) as storage.
+//
+// Each *block* contains an *entry* and the entry size:
+// [ entry_size | entry ] [ entry_size | entry ] ...
+//
+// *In-session* is a period of time during which `ProfileChunkedBuffer` allows
+// reading and writing.
+// *Out-of-session*, the `ProfileChunkedBuffer` object is still valid, but
+// contains no data, and gracefully denies accesses.
+//
+// To write an entry, the buffer reserves a block of sufficient size (to contain
+// user data of predetermined size), writes the entry size, and lets the caller
+// fill the entry contents using a ProfileBufferEntryWriter. E.g.:
+// ```
+// ProfileChunkedBuffer cb(...);
+// cb.ReserveAndPut([]() { return sizeof(123); },
+//                  [&](Maybe<ProfileBufferEntryWriter>& aEW) {
+//                    if (aEW) { aEW->WriteObject(123); }
+//                  });
+// ```
+// Other `Put...` functions may be used as shortcuts for simple entries.
+// The objects given to the caller's callbacks should only be used inside the
+// callbacks and not stored elsewhere, because they keep their own references to
+// chunk memory and therefore should not live longer.
+// Different type of objects may be serialized into an entry, see
+// `ProfileBufferEntryWriter::Serializer` for more information.
+//
+// When reading data, the buffer iterates over blocks (it knows how to read the
+// entry size, and therefore move to the next block), and lets the caller read
+// the entry inside of each block. E.g.:
+// ```
+// cb.ReadEach([](ProfileBufferEntryReader& aER) {
+//   /* Use ProfileBufferEntryReader functions to read serialized objects. */
+//   int n = aER.ReadObject<int>();
+// });
+// ```
+// Different type of objects may be deserialized from an entry, see
+// `ProfileBufferEntryReader::Deserializer` for more information.
+//
+// Writers may retrieve the block index corresponding to an entry
+// (`ProfileBufferBlockIndex` is an opaque type preventing the user from easily
+// modifying it). That index may later be used with `ReadAt` to get back to the
+// entry in that particular block -- if it still exists.
+class ProfileChunkedBuffer {
+ public:
+  using Byte = ProfileBufferChunk::Byte;
+  using Length = ProfileBufferChunk::Length;
+
+  enum class ThreadSafety { WithoutMutex, WithMutex };
+
+  // Default constructor starts out-of-session (nothing to read or write).
+  explicit ProfileChunkedBuffer(ThreadSafety aThreadSafety)
+      : mMutex(aThreadSafety != ThreadSafety::WithoutMutex) {}
+
+  // Start in-session with external chunk manager.
+  ProfileChunkedBuffer(ThreadSafety aThreadSafety,
+                       ProfileBufferChunkManager& aChunkManager)
+      : mMutex(aThreadSafety != ThreadSafety::WithoutMutex) {
+    SetChunkManager(aChunkManager);
+  }
+
+  // Start in-session with owned chunk manager.
+  ProfileChunkedBuffer(ThreadSafety aThreadSafety,
+                       UniquePtr<ProfileBufferChunkManager>&& aChunkManager)
+      : mMutex(aThreadSafety != ThreadSafety::WithoutMutex) {
+    SetChunkManager(std::move(aChunkManager));
+  }
+
+  ~ProfileChunkedBuffer() {
+    // Do proper clean-up by resetting the chunk manager.
+    ResetChunkManager();
+  }
+
+  // This cannot change during the lifetime of this buffer, so there's no need
+  // to lock.
+  [[nodiscard]] bool IsThreadSafe() const { return mMutex.IsActivated(); }
+
+  [[nodiscard]] bool IsInSession() const {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    return !!mChunkManager;
+  }
+
+  // Stop using the current chunk manager.
+  // If we own the current chunk manager, it will be destroyed.
+  // This will always clear currently-held chunks, if any.
+  void ResetChunkManager() {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    Unused << ResetChunkManager(lock);
+  }
+
+  // Set the current chunk manager.
+  // The caller is responsible for keeping the chunk manager alive as along as
+  // it's used here (until the next (Re)SetChunkManager, or
+  // ~ProfileChunkedBuffer).
+  void SetChunkManager(ProfileBufferChunkManager& aChunkManager) {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    Unused << ResetChunkManager(lock);
+    SetChunkManager(aChunkManager, lock);
+  }
+
+  // Set the current chunk manager, and keep ownership of it.
+  void SetChunkManager(UniquePtr<ProfileBufferChunkManager>&& aChunkManager) {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    Unused << ResetChunkManager(lock);
+    mOwnedChunkManager = std::move(aChunkManager);
+    if (mOwnedChunkManager) {
+      SetChunkManager(*mOwnedChunkManager, lock);
+    }
+  }
+
+  // Stop using the current chunk manager, and return it if owned here.
+  [[nodiscard]] UniquePtr<ProfileBufferChunkManager> ExtractChunkManager() {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    return ResetChunkManager(lock);
+  }
+
+  // Clear the contents of this buffer, ready to receive new chunks.
+  // Note that memory is not freed: No chunks are destroyed, they are all
+  // receycled.
+  // Also the range doesn't reset, instead it continues at some point after the
+  // previous range. This may be useful if the caller may be keeping indexes
+  // into old chunks that have now been cleared, using these indexes will fail
+  // gracefully (instead of potentially pointing into new data).
+  void Clear() {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    if (MOZ_UNLIKELY(!mChunkManager)) {
+      // Out-of-session.
+      return;
+    }
+
+    mRangeStart = mRangeEnd = mNextChunkRangeStart;
+    mPushedBlockCount = 0;
+    mClearedBlockCount = 0;
+    mFailedPutBytes = 0;
+
+    // Recycle all released chunks as "next" chunks. This will reduce the number
+    // of future allocations. Also, when using ProfileBufferChunkManagerSingle,
+    // this retrieves the one chunk if it was released.
+    UniquePtr<ProfileBufferChunk> releasedChunks =
+        mChunkManager->GetExtantReleasedChunks();
+    if (releasedChunks) {
+      // Released chunks should be in the "Done" state, they need to be marked
+      // "recycled" before they can be reused.
+      for (ProfileBufferChunk* chunk = releasedChunks.get(); chunk;
+           chunk = chunk->GetNext()) {
+        chunk->MarkRecycled();
+      }
+      mNextChunks = ProfileBufferChunk::Join(std::move(mNextChunks),
+                                             std::move(releasedChunks));
+    }
+
+    if (mCurrentChunk) {
+      // We already have a current chunk (empty or in-use), mark it "done" and
+      // then "recycled", ready to be reused.
+      mCurrentChunk->MarkDone();
+      mCurrentChunk->MarkRecycled();
+    } else {
+      if (!mNextChunks) {
+        // No current chunk, and no next chunks to recycle, nothing more to do.
+        // The next "Put" operation will try to allocate a chunk as needed.
+        return;
+      }
+
+      // No current chunk, take a next chunk.
+      mCurrentChunk = std::exchange(mNextChunks, mNextChunks->ReleaseNext());
+    }
+
+    // Here, there was already a current chunk, or one has just been taken.
+    // Make sure it's ready to receive new entries.
+    InitializeCurrentChunk(lock);
+  }
+
+  // Buffer maximum length in bytes.
+  Maybe<size_t> BufferLength() const {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    if (!mChunkManager) {
+      return Nothing{};
+    }
+    return Some(mChunkManager->MaxTotalSize());
+  }
+
+  [[nodiscard]] size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    return SizeOfExcludingThis(aMallocSizeOf, lock);
+  }
+
+  [[nodiscard]] size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf, lock);
+  }
+
+  // Snapshot of the buffer state.
+  struct State {
+    // Index to/before the first block.
+    ProfileBufferIndex mRangeStart = 1;
+
+    // Index past the last block. Equals mRangeStart if empty.
+    ProfileBufferIndex mRangeEnd = 1;
+
+    // Number of blocks that have been pushed into this buffer.
+    uint64_t mPushedBlockCount = 0;
+
+    // Number of blocks that have been removed from this buffer.
+    // Note: Live entries = pushed - cleared.
+    uint64_t mClearedBlockCount = 0;
+
+    // Number of bytes that could not be put into this buffer.
+    uint64_t mFailedPutBytes = 0;
+  };
+
+  // Get a snapshot of the current state.
+  // When out-of-session, mFirstReadIndex==mNextWriteIndex, and
+  // mPushedBlockCount==mClearedBlockCount==0.
+  // Note that these may change right after this thread-safe call, so they
+  // should only be used for statistical purposes.
+  [[nodiscard]] State GetState() const {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    return {mRangeStart, mRangeEnd, mPushedBlockCount, mClearedBlockCount,
+            mFailedPutBytes};
+  }
+
+  [[nodiscard]] bool IsEmpty() const {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    return mRangeStart == mRangeEnd;
+  }
+
+  // True if this buffer is already locked on this thread.
+  // This should be used if some functions may call an already-locked buffer,
+  // e.g.: Put -> memory hook -> profiler_add_native_allocation_marker -> Put.
+  [[nodiscard]] bool IsThreadSafeAndLockedOnCurrentThread() const {
+    return mMutex.IsActivatedAndLockedOnCurrentThread();
+  }
+
+  // Lock the buffer mutex and run the provided callback.
+  // This can be useful when the caller needs to explicitly lock down this
+  // buffer, but not do anything else with it.
+  template <typename Callback>
+  auto LockAndRun(Callback&& aCallback) const {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    return std::forward<Callback>(aCallback)();
+  }
+
+  // Reserve a block that can hold an entry of the given `aCallbackEntryBytes()`
+  // size, write the entry size (ULEB128-encoded), and invoke and return
+  // `aCallback(Maybe<ProfileBufferEntryWriter>&)`.
+  // Note: `aCallbackEntryBytes` is a callback instead of a simple value, to
+  // delay this potentially-expensive computation until after we're checked that
+  // we're in-session; use `Put(Length, Callback)` below if you know the size
+  // already.
+  template <typename CallbackEntryBytes, typename Callback>
+  auto ReserveAndPut(CallbackEntryBytes&& aCallbackEntryBytes,
+                     Callback&& aCallback)
+      -> decltype(std::forward<Callback>(aCallback)(
+          std::declval<Maybe<ProfileBufferEntryWriter>&>())) {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+
+    // This can only be read in the 2nd lambda below after it has been written
+    // by the first lambda.
+    Length entryBytes;
+
+    return ReserveAndPutRaw(
+        [&]() {
+          entryBytes = std::forward<CallbackEntryBytes>(aCallbackEntryBytes)();
+          MOZ_ASSERT(entryBytes != 0, "Empty entries are not allowed");
+          return ULEB128Size(entryBytes) + entryBytes;
+        },
+        [&](Maybe<ProfileBufferEntryWriter>& aMaybeEntryWriter) {
+          if (aMaybeEntryWriter.isSome()) {
+            aMaybeEntryWriter->WriteULEB128(entryBytes);
+            MOZ_ASSERT(aMaybeEntryWriter->RemainingBytes() == entryBytes);
+          }
+          return std::forward<Callback>(aCallback)(aMaybeEntryWriter);
+        },
+        lock);
+  }
+
+  template <typename Callback>
+  auto Put(Length aEntryBytes, Callback&& aCallback) {
+    return ReserveAndPut([aEntryBytes]() { return aEntryBytes; },
+                         std::forward<Callback>(aCallback));
+  }
+
+  // Add a new entry copied from the given buffer, return block index.
+  ProfileBufferBlockIndex PutFrom(const void* aSrc, Length aBytes) {
+    return ReserveAndPut(
+        [aBytes]() { return aBytes; },
+        [aSrc, aBytes](Maybe<ProfileBufferEntryWriter>& aMaybeEntryWriter) {
+          if (aMaybeEntryWriter.isNothing()) {
+            return ProfileBufferBlockIndex{};
+          }
+          aMaybeEntryWriter->WriteBytes(aSrc, aBytes);
+          return aMaybeEntryWriter->CurrentBlockIndex();
+        });
+  }
+
+  // Add a new single entry with *all* given object (using a Serializer for
+  // each), return block index.
+  template <typename... Ts>
+  ProfileBufferBlockIndex PutObjects(const Ts&... aTs) {
+    static_assert(sizeof...(Ts) > 0,
+                  "PutObjects must be given at least one object.");
+    return ReserveAndPut(
+        [&]() { return ProfileBufferEntryWriter::SumBytes(aTs...); },
+        [&](Maybe<ProfileBufferEntryWriter>& aMaybeEntryWriter) {
+          if (aMaybeEntryWriter.isNothing()) {
+            return ProfileBufferBlockIndex{};
+          }
+          aMaybeEntryWriter->WriteObjects(aTs...);
+          return aMaybeEntryWriter->CurrentBlockIndex();
+        });
+  }
+
+  // Add a new entry copied from the given object, return block index.
+  template <typename T>
+  ProfileBufferBlockIndex PutObject(const T& aOb) {
+    return PutObjects(aOb);
+  }
+
+  // Get *all* chunks related to this buffer, including extant chunks in its
+  // ChunkManager, and yet-unused new/recycled chunks.
+  // We don't expect this buffer to be used again, though it's still possible
+  // and will allocate the first buffer when needed.
+  [[nodiscard]] UniquePtr<ProfileBufferChunk> GetAllChunks() {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    if (MOZ_UNLIKELY(!mChunkManager)) {
+      // Out-of-session.
+      return nullptr;
+    }
+    UniquePtr<ProfileBufferChunk> chunks =
+        mChunkManager->GetExtantReleasedChunks();
+    Unused << HandleRequestedChunk_IsPending(lock);
+    if (MOZ_LIKELY(!!mCurrentChunk)) {
+      mCurrentChunk->MarkDone();
+      chunks =
+          ProfileBufferChunk::Join(std::move(chunks), std::move(mCurrentChunk));
+    }
+    chunks =
+        ProfileBufferChunk::Join(std::move(chunks), std::move(mNextChunks));
+    mChunkManager->ForgetUnreleasedChunks();
+    mRangeStart = mRangeEnd = mNextChunkRangeStart;
+    return chunks;
+  }
+
+  class Reader;
+
+  // Class that can iterate through blocks and provide
+  // `ProfileBufferEntryReader`s.
+  // Created through `Reader`, lives within a lock guard lifetime.
+  class BlockIterator {
+   public:
+#ifdef DEBUG
+    ~BlockIterator() {
+      // No BlockIterator should live outside of a mutexed call.
+      mBuffer->mMutex.AssertCurrentThreadOwns();
+    }
+#endif  // DEBUG
+
+    // Comparison with other iterator, mostly used in range-for loops.
+    [[nodiscard]] bool operator==(const BlockIterator& aRhs) const {
+      MOZ_ASSERT(mBuffer == aRhs.mBuffer);
+      return mCurrentBlockIndex == aRhs.mCurrentBlockIndex;
+    }
+    [[nodiscard]] bool operator!=(const BlockIterator& aRhs) const {
+      MOZ_ASSERT(mBuffer == aRhs.mBuffer);
+      return mCurrentBlockIndex != aRhs.mCurrentBlockIndex;
+    }
+
+    // Advance to next BlockIterator.
+    BlockIterator& operator++() {
+      mBuffer->mMutex.AssertCurrentThreadOwns();
+      mCurrentBlockIndex =
+          ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+              mNextBlockPointer.GlobalRangePosition());
+      mCurrentEntry =
+          mNextBlockPointer.EntryReader(mNextBlockPointer.ReadEntrySize());
+      return *this;
+    }
+
+    // Dereferencing creates a `ProfileBufferEntryReader` object for the entry
+    // inside this block.
+    // (Note: It would be possible to return a `const
+    // ProfileBufferEntryReader&`, but not useful in practice, because in most
+    // case the user will want to read, which is non-const.)
+    [[nodiscard]] ProfileBufferEntryReader operator*() const {
+      return mCurrentEntry;
+    }
+
+    // True if this iterator is just past the last entry.
+    [[nodiscard]] bool IsAtEnd() const {
+      return mCurrentEntry.RemainingBytes() == 0;
+    }
+
+    // Can be used as reference to come back to this entry with `GetEntryAt()`.
+    [[nodiscard]] ProfileBufferBlockIndex CurrentBlockIndex() const {
+      return mCurrentBlockIndex;
+    }
+
+    // Index past the end of this block, which is the start of the next block.
+    [[nodiscard]] ProfileBufferBlockIndex NextBlockIndex() const {
+      MOZ_ASSERT(!IsAtEnd());
+      return ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+          mNextBlockPointer.GlobalRangePosition());
+    }
+
+    // Index of the first block in the whole buffer.
+    [[nodiscard]] ProfileBufferBlockIndex BufferRangeStart() const {
+      mBuffer->mMutex.AssertCurrentThreadOwns();
+      return ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+          mBuffer->mRangeStart);
+    }
+
+    // Index past the last block in the whole buffer.
+    [[nodiscard]] ProfileBufferBlockIndex BufferRangeEnd() const {
+      mBuffer->mMutex.AssertCurrentThreadOwns();
+      return ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+          mBuffer->mRangeEnd);
+    }
+
+   private:
+    // Only a Reader can instantiate a BlockIterator.
+    friend class Reader;
+
+    BlockIterator(const ProfileChunkedBuffer& aBuffer,
+                  const ProfileBufferChunk* aChunks0,
+                  const ProfileBufferChunk* aChunks1,
+                  ProfileBufferBlockIndex aBlockIndex)
+        : mNextBlockPointer(aChunks0, aChunks1, aBlockIndex),
+          mCurrentBlockIndex(
+              ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+                  mNextBlockPointer.GlobalRangePosition())),
+          mCurrentEntry(
+              mNextBlockPointer.EntryReader(mNextBlockPointer.ReadEntrySize())),
+          mBuffer(WrapNotNull(&aBuffer)) {
+      // No BlockIterator should live outside of a mutexed call.
+      mBuffer->mMutex.AssertCurrentThreadOwns();
+    }
+
+    detail::InChunkPointer mNextBlockPointer;
+
+    ProfileBufferBlockIndex mCurrentBlockIndex;
+
+    ProfileBufferEntryReader mCurrentEntry;
+
+    // Using a non-null pointer instead of a reference, to allow copying.
+    // This BlockIterator should only live inside one of the thread-safe
+    // ProfileChunkedBuffer functions, for this reference to stay valid.
+    NotNull<const ProfileChunkedBuffer*> mBuffer;
+  };
+
+  // Class that can create `BlockIterator`s (e.g., for range-for), or just
+  // iterate through entries; lives within a lock guard lifetime.
+  class MOZ_RAII Reader {
+   public:
+    Reader(const Reader&) = delete;
+    Reader& operator=(const Reader&) = delete;
+    Reader(Reader&&) = delete;
+    Reader& operator=(Reader&&) = delete;
+
+#ifdef DEBUG
+    ~Reader() {
+      // No Reader should live outside of a mutexed call.
+      mBuffer.mMutex.AssertCurrentThreadOwns();
+    }
+#endif  // DEBUG
+
+    // Index of the first block in the whole buffer.
+    [[nodiscard]] ProfileBufferBlockIndex BufferRangeStart() const {
+      mBuffer.mMutex.AssertCurrentThreadOwns();
+      return ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+          mBuffer.mRangeStart);
+    }
+
+    // Index past the last block in the whole buffer.
+    [[nodiscard]] ProfileBufferBlockIndex BufferRangeEnd() const {
+      mBuffer.mMutex.AssertCurrentThreadOwns();
+      return ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+          mBuffer.mRangeEnd);
+    }
+
+    // Iterators to the first and past-the-last blocks.
+    // Compatible with range-for (see `ForEach` below as example).
+    [[nodiscard]] BlockIterator begin() const {
+      return BlockIterator(mBuffer, mChunks0, mChunks1, nullptr);
+    }
+    // Note that a `BlockIterator` at the `end()` should not be dereferenced, as
+    // there is no actual block there!
+    [[nodiscard]] BlockIterator end() const {
+      return BlockIterator(mBuffer, nullptr, nullptr, nullptr);
+    }
+
+    // Get a `BlockIterator` at the given `ProfileBufferBlockIndex`, clamped to
+    // the stored range. Note that a `BlockIterator` at the `end()` should not
+    // be dereferenced, as there is no actual block there!
+    [[nodiscard]] BlockIterator At(ProfileBufferBlockIndex aBlockIndex) const {
+      if (aBlockIndex < BufferRangeStart()) {
+        // Anything before the range (including null ProfileBufferBlockIndex) is
+        // clamped at the beginning.
+        return begin();
+      }
+      // Otherwise we at least expect the index to be valid (pointing exactly at
+      // a live block, or just past the end.)
+      return BlockIterator(mBuffer, mChunks0, mChunks1, aBlockIndex);
+    }
+
+    // Run `aCallback(ProfileBufferEntryReader&)` on each entry from first to
+    // last. Callback should not store `ProfileBufferEntryReader`, as it may
+    // become invalid after this thread-safe call.
+    template <typename Callback>
+    void ForEach(Callback&& aCallback) const {
+      for (ProfileBufferEntryReader reader : *this) {
+        aCallback(reader);
+      }
+    }
+
+    // If this reader only points at one chunk with some data, this data will be
+    // exposed as a single entry.
+    [[nodiscard]] ProfileBufferEntryReader SingleChunkDataAsEntry() {
+      const ProfileBufferChunk* onlyNonEmptyChunk = nullptr;
+      for (const ProfileBufferChunk* chunkList : {mChunks0, mChunks1}) {
+        for (const ProfileBufferChunk* chunk = chunkList; chunk;
+             chunk = chunk->GetNext()) {
+          if (chunk->OffsetFirstBlock() != chunk->OffsetPastLastBlock()) {
+            if (onlyNonEmptyChunk) {
+              // More than one non-empty chunk.
+              return ProfileBufferEntryReader();
+            }
+            onlyNonEmptyChunk = chunk;
+          }
+        }
+      }
+      if (!onlyNonEmptyChunk) {
+        // No non-empty chunks.
+        return ProfileBufferEntryReader();
+      }
+      // Here, we have found one chunk that had some data.
+      return ProfileBufferEntryReader(
+          onlyNonEmptyChunk->BufferSpan().FromTo(
+              onlyNonEmptyChunk->OffsetFirstBlock(),
+              onlyNonEmptyChunk->OffsetPastLastBlock()),
+          ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+              onlyNonEmptyChunk->RangeStart()),
+          ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+              onlyNonEmptyChunk->RangeStart() +
+              (onlyNonEmptyChunk->OffsetPastLastBlock() -
+               onlyNonEmptyChunk->OffsetFirstBlock())));
+    }
+
+   private:
+    friend class ProfileChunkedBuffer;
+
+    explicit Reader(const ProfileChunkedBuffer& aBuffer,
+                    const ProfileBufferChunk* aChunks0,
+                    const ProfileBufferChunk* aChunks1)
+        : mBuffer(aBuffer), mChunks0(aChunks0), mChunks1(aChunks1) {
+      // No Reader should live outside of a mutexed call.
+      mBuffer.mMutex.AssertCurrentThreadOwns();
+    }
+
+    // This Reader should only live inside one of the thread-safe
+    // ProfileChunkedBuffer functions, for this reference to stay valid.
+    const ProfileChunkedBuffer& mBuffer;
+    const ProfileBufferChunk* mChunks0;
+    const ProfileBufferChunk* mChunks1;
+  };
+
+  // In in-session, call `aCallback(ProfileChunkedBuffer::Reader&)` and return
+  // true. Callback should not store `Reader`, because it may become invalid
+  // after this call.
+  // If out-of-session, return false (callback is not invoked).
+  template <typename Callback>
+  [[nodiscard]] auto Read(Callback&& aCallback) const {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    if (MOZ_UNLIKELY(!mChunkManager)) {
+      // Out-of-session.
+      return std::forward<Callback>(aCallback)(static_cast<Reader*>(nullptr));
+    }
+    return mChunkManager->PeekExtantReleasedChunks(
+        [&](const ProfileBufferChunk* aOldestChunk) {
+          Reader reader(*this, aOldestChunk, mCurrentChunk.get());
+          return std::forward<Callback>(aCallback)(&reader);
+        });
+  }
+
+  // Invoke `aCallback(ProfileBufferEntryReader& [, ProfileBufferBlockIndex])`
+  // on each entry, it must read or at least skip everything. Either/both chunk
+  // pointers may be null.
+  template <typename Callback>
+  static void ReadEach(const ProfileBufferChunk* aChunks0,
+                       const ProfileBufferChunk* aChunks1,
+                       Callback&& aCallback) {
+    static_assert(std::is_invocable_v<Callback, ProfileBufferEntryReader&> ||
+                      std::is_invocable_v<Callback, ProfileBufferEntryReader&,
+                                          ProfileBufferBlockIndex>,
+                  "ReadEach callback must take ProfileBufferEntryReader& and "
+                  "optionally a ProfileBufferBlockIndex");
+    detail::InChunkPointer p{aChunks0, aChunks1};
+    while (!p.IsNull()) {
+      // The position right before an entry size *is* a block index.
+      const ProfileBufferBlockIndex blockIndex =
+          ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+              p.GlobalRangePosition());
+      Length entrySize = p.ReadEntrySize();
+      if (entrySize == 0) {
+        return;
+      }
+      ProfileBufferEntryReader entryReader = p.EntryReader(entrySize);
+      if (entryReader.RemainingBytes() == 0) {
+        return;
+      }
+      MOZ_ASSERT(entryReader.RemainingBytes() == entrySize);
+      if constexpr (std::is_invocable_v<Callback, ProfileBufferEntryReader&,
+                                        ProfileBufferBlockIndex>) {
+        aCallback(entryReader, blockIndex);
+      } else {
+        Unused << blockIndex;
+        aCallback(entryReader);
+      }
+      MOZ_ASSERT(entryReader.RemainingBytes() == 0);
+    }
+  }
+
+  // Invoke `aCallback(ProfileBufferEntryReader& [, ProfileBufferBlockIndex])`
+  // on each entry, it must read or at least skip everything.
+  template <typename Callback>
+  void ReadEach(Callback&& aCallback) const {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    if (MOZ_UNLIKELY(!mChunkManager)) {
+      // Out-of-session.
+      return;
+    }
+    mChunkManager->PeekExtantReleasedChunks(
+        [&](const ProfileBufferChunk* aOldestChunk) {
+          ReadEach(aOldestChunk, mCurrentChunk.get(),
+                   std::forward<Callback>(aCallback));
+        });
+  }
+
+  // Call `aCallback(Maybe<ProfileBufferEntryReader>&&)` on the entry at
+  // the given ProfileBufferBlockIndex; The `Maybe` will be `Nothing` if
+  // out-of-session, or if that entry doesn't exist anymore, or if we've reached
+  // just past the last entry. Return whatever `aCallback` returns. Callback
+  // should not store `ProfileBufferEntryReader`, because it may become invalid
+  // after this call.
+  // Either/both chunk pointers may be null.
+  template <typename Callback>
+  [[nodiscard]] static auto ReadAt(ProfileBufferBlockIndex aMinimumBlockIndex,
+                                   const ProfileBufferChunk* aChunks0,
+                                   const ProfileBufferChunk* aChunks1,
+                                   Callback&& aCallback) {
+    static_assert(
+        std::is_invocable_v<Callback, Maybe<ProfileBufferEntryReader>&&>,
+        "ReadAt callback must take a Maybe<ProfileBufferEntryReader>&&");
+    Maybe<ProfileBufferEntryReader> maybeEntryReader;
+    if (detail::InChunkPointer p{aChunks0, aChunks1}; !p.IsNull()) {
+      // If the pointer position is before the given position, try to advance.
+      if (p.GlobalRangePosition() >=
+              aMinimumBlockIndex.ConvertToProfileBufferIndex() ||
+          p.AdvanceToGlobalRangePosition(
+              aMinimumBlockIndex.ConvertToProfileBufferIndex())) {
+        MOZ_ASSERT(p.GlobalRangePosition() >=
+                   aMinimumBlockIndex.ConvertToProfileBufferIndex());
+        // Here we're pointing at the start of a block, try to read the entry
+        // size. (Entries cannot be empty, so 0 means failure.)
+        if (Length entrySize = p.ReadEntrySize(); entrySize != 0) {
+          maybeEntryReader.emplace(p.EntryReader(entrySize));
+          if (maybeEntryReader->RemainingBytes() == 0) {
+            // An empty entry reader means there was no complete block at the
+            // given index.
+            maybeEntryReader.reset();
+          } else {
+            MOZ_ASSERT(maybeEntryReader->RemainingBytes() == entrySize);
+          }
+        }
+      }
+    }
+#ifdef DEBUG
+    auto assertAllRead = MakeScopeExit([&]() {
+      MOZ_ASSERT(!maybeEntryReader || maybeEntryReader->RemainingBytes() == 0);
+    });
+#endif  // DEBUG
+    return std::forward<Callback>(aCallback)(std::move(maybeEntryReader));
+  }
+
+  // Call `aCallback(Maybe<ProfileBufferEntryReader>&&)` on the entry at
+  // the given ProfileBufferBlockIndex; The `Maybe` will be `Nothing` if
+  // out-of-session, or if that entry doesn't exist anymore, or if we've reached
+  // just past the last entry. Return whatever `aCallback` returns. Callback
+  // should not store `ProfileBufferEntryReader`, because it may become invalid
+  // after this call.
+  template <typename Callback>
+  [[nodiscard]] auto ReadAt(ProfileBufferBlockIndex aBlockIndex,
+                            Callback&& aCallback) const {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    if (MOZ_UNLIKELY(!mChunkManager)) {
+      // Out-of-session.
+      return std::forward<Callback>(aCallback)(Nothing{});
+    }
+    return mChunkManager->PeekExtantReleasedChunks(
+        [&](const ProfileBufferChunk* aOldestChunk) {
+          return ReadAt(aBlockIndex, aOldestChunk, mCurrentChunk.get(),
+                        std::forward<Callback>(aCallback));
+        });
+  }
+
+  // Append the contents of another ProfileChunkedBuffer to this one.
+  ProfileBufferBlockIndex AppendContents(const ProfileChunkedBuffer& aSrc) {
+    ProfileBufferBlockIndex firstBlockIndex;
+    // If we start failing, we'll stop writing.
+    bool failed = false;
+    aSrc.ReadEach([&](ProfileBufferEntryReader& aER) {
+      if (failed) {
+        return;
+      }
+      failed =
+          !Put(aER.RemainingBytes(), [&](Maybe<ProfileBufferEntryWriter>& aEW) {
+            if (aEW.isNothing()) {
+              return false;
+            }
+            if (!firstBlockIndex) {
+              firstBlockIndex = aEW->CurrentBlockIndex();
+            }
+            aEW->WriteFromReader(aER, aER.RemainingBytes());
+            return true;
+          });
+    });
+    return failed ? nullptr : firstBlockIndex;
+  }
+
+#ifdef DEBUG
+  void Dump(std::FILE* aFile = stdout) const {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    fprintf(aFile,
+            "ProfileChunkedBuffer[%p] State: range %u-%u pushed=%u cleared=%u "
+            "(live=%u) failed-puts=%u bytes",
+            this, unsigned(mRangeStart), unsigned(mRangeEnd),
+            unsigned(mPushedBlockCount), unsigned(mClearedBlockCount),
+            unsigned(mPushedBlockCount) - unsigned(mClearedBlockCount),
+            unsigned(mFailedPutBytes));
+    if (MOZ_UNLIKELY(!mChunkManager)) {
+      fprintf(aFile, " - Out-of-session\n");
+      return;
+    }
+    fprintf(aFile, " - chunks:\n");
+    bool hasChunks = false;
+    mChunkManager->PeekExtantReleasedChunks(
+        [&](const ProfileBufferChunk* aOldestChunk) {
+          for (const ProfileBufferChunk* chunk = aOldestChunk; chunk;
+               chunk = chunk->GetNext()) {
+            fprintf(aFile, "R ");
+            chunk->Dump(aFile);
+            hasChunks = true;
+          }
+        });
+    if (mCurrentChunk) {
+      fprintf(aFile, "C ");
+      mCurrentChunk->Dump(aFile);
+      hasChunks = true;
+    }
+    for (const ProfileBufferChunk* chunk = mNextChunks.get(); chunk;
+         chunk = chunk->GetNext()) {
+      fprintf(aFile, "N ");
+      chunk->Dump(aFile);
+      hasChunks = true;
+    }
+    switch (mRequestedChunkHolder->GetState()) {
+      case RequestedChunkRefCountedHolder::State::Unused:
+        fprintf(aFile, " - No request pending.\n");
+        break;
+      case RequestedChunkRefCountedHolder::State::Requested:
+        fprintf(aFile, " - Request pending.\n");
+        break;
+      case RequestedChunkRefCountedHolder::State::Fulfilled:
+        fprintf(aFile, " - Request fulfilled.\n");
+        break;
+    }
+    if (!hasChunks) {
+      fprintf(aFile, " No chunks.\n");
+    }
+  }
+#endif  // DEBUG
+
+ private:
+  // Used to de/serialize a ProfileChunkedBuffer (e.g., containing a backtrace).
+  friend ProfileBufferEntryWriter::Serializer<ProfileChunkedBuffer>;
+  friend ProfileBufferEntryReader::Deserializer<ProfileChunkedBuffer>;
+  friend ProfileBufferEntryWriter::Serializer<UniquePtr<ProfileChunkedBuffer>>;
+  friend ProfileBufferEntryReader::Deserializer<
+      UniquePtr<ProfileChunkedBuffer>>;
+
+  [[nodiscard]] UniquePtr<ProfileBufferChunkManager> ResetChunkManager(
+      const baseprofiler::detail::BaseProfilerMaybeAutoLock&) {
+    UniquePtr<ProfileBufferChunkManager> chunkManager;
+    if (mChunkManager) {
+      mRequestedChunkHolder = nullptr;
+      mChunkManager->ForgetUnreleasedChunks();
+#ifdef DEBUG
+      mChunkManager->DeregisteredFrom(this);
+#endif
+      mChunkManager = nullptr;
+      chunkManager = std::move(mOwnedChunkManager);
+      if (mCurrentChunk) {
+        mCurrentChunk->MarkDone();
+        mCurrentChunk = nullptr;
+      }
+      mNextChunks = nullptr;
+      mNextChunkRangeStart = mRangeEnd;
+      mRangeStart = mRangeEnd;
+      mPushedBlockCount = 0;
+      mClearedBlockCount = 0;
+      mFailedPutBytes = 0;
+    }
+    return chunkManager;
+  }
+
+  void SetChunkManager(
+      ProfileBufferChunkManager& aChunkManager,
+      const baseprofiler::detail::BaseProfilerMaybeAutoLock& aLock) {
+    MOZ_ASSERT(!mChunkManager);
+    mChunkManager = &aChunkManager;
+#ifdef DEBUG
+    mChunkManager->RegisteredWith(this);
+#endif
+
+    mChunkManager->SetChunkDestroyedCallback(
+        [this](const ProfileBufferChunk& aChunk) {
+          for (;;) {
+            ProfileBufferIndex rangeStart = mRangeStart;
+            if (MOZ_LIKELY(rangeStart <= aChunk.RangeStart())) {
+              if (MOZ_LIKELY(mRangeStart.compareExchange(
+                      rangeStart,
+                      aChunk.RangeStart() + aChunk.BufferBytes()))) {
+                break;
+              }
+            }
+          }
+          mClearedBlockCount += aChunk.BlockCount();
+        });
+
+    // We start with one chunk right away, and request a following one now
+    // so it should be available before the current chunk is full.
+    SetAndInitializeCurrentChunk(mChunkManager->GetChunk(), aLock);
+    mRequestedChunkHolder = MakeRefPtr<RequestedChunkRefCountedHolder>();
+    RequestChunk(aLock);
+  }
+
+  [[nodiscard]] size_t SizeOfExcludingThis(
+      MallocSizeOf aMallocSizeOf,
+      const baseprofiler::detail::BaseProfilerMaybeAutoLock&) const {
+    if (MOZ_UNLIKELY(!mChunkManager)) {
+      // Out-of-session.
+      return 0;
+    }
+    size_t size = mChunkManager->SizeOfIncludingThis(aMallocSizeOf);
+    if (mCurrentChunk) {
+      size += mCurrentChunk->SizeOfIncludingThis(aMallocSizeOf);
+    }
+    if (mNextChunks) {
+      size += mNextChunks->SizeOfIncludingThis(aMallocSizeOf);
+    }
+    return size;
+  }
+
+  void InitializeCurrentChunk(
+      const baseprofiler::detail::BaseProfilerMaybeAutoLock&) {
+    MOZ_ASSERT(!!mCurrentChunk);
+    mCurrentChunk->SetRangeStart(mNextChunkRangeStart);
+    mNextChunkRangeStart += mCurrentChunk->BufferBytes();
+    Unused << mCurrentChunk->ReserveInitialBlockAsTail(0);
+  }
+
+  void SetAndInitializeCurrentChunk(
+      UniquePtr<ProfileBufferChunk>&& aChunk,
+      const baseprofiler::detail::BaseProfilerMaybeAutoLock& aLock) {
+    mCurrentChunk = std::move(aChunk);
+    if (mCurrentChunk) {
+      InitializeCurrentChunk(aLock);
+    }
+  }
+
+  void RequestChunk(
+      const baseprofiler::detail::BaseProfilerMaybeAutoLock& aLock) {
+    if (HandleRequestedChunk_IsPending(aLock)) {
+      // There is already a pending request, don't start a new one.
+      return;
+    }
+
+    // Ensure the `RequestedChunkHolder` knows we're starting a request.
+    mRequestedChunkHolder->StartRequest();
+
+    // Request a chunk, the callback carries a `RefPtr` of the
+    // `RequestedChunkHolder`, so it's guaranteed to live until it's invoked,
+    // even if this `ProfileChunkedBuffer` changes its `ChunkManager` or is
+    // destroyed.
+    mChunkManager->RequestChunk(
+        [requestedChunkHolder = RefPtr<RequestedChunkRefCountedHolder>(
+             mRequestedChunkHolder)](UniquePtr<ProfileBufferChunk> aChunk) {
+          requestedChunkHolder->AddRequestedChunk(std::move(aChunk));
+        });
+  }
+
+  [[nodiscard]] bool HandleRequestedChunk_IsPending(
+      const baseprofiler::detail::BaseProfilerMaybeAutoLock& aLock) {
+    MOZ_ASSERT(!!mChunkManager);
+    MOZ_ASSERT(!!mRequestedChunkHolder);
+
+    if (mRequestedChunkHolder->GetState() ==
+        RequestedChunkRefCountedHolder::State::Unused) {
+      return false;
+    }
+
+    // A request is either in-flight or fulfilled.
+    Maybe<UniquePtr<ProfileBufferChunk>> maybeChunk =
+        mRequestedChunkHolder->GetChunkIfFulfilled();
+    if (maybeChunk.isNothing()) {
+      // Request is still pending.
+      return true;
+    }
+
+    // Since we extracted the provided chunk, the holder should now be unused.
+    MOZ_ASSERT(mRequestedChunkHolder->GetState() ==
+               RequestedChunkRefCountedHolder::State::Unused);
+
+    // Request has been fulfilled.
+    UniquePtr<ProfileBufferChunk>& chunk = *maybeChunk;
+    if (chunk) {
+      // Try to use as current chunk if needed.
+      if (!mCurrentChunk) {
+        SetAndInitializeCurrentChunk(std::move(chunk), aLock);
+        // We've just received a chunk and made it current, request a next chunk
+        // for later.
+        MOZ_ASSERT(!mNextChunks);
+        RequestChunk(aLock);
+        return true;
+      }
+
+      if (!mNextChunks) {
+        mNextChunks = std::move(chunk);
+      } else {
+        mNextChunks->InsertNext(std::move(chunk));
+      }
+    }
+
+    return false;
+  }
+
+  // Get a pointer to the next chunk available
+  [[nodiscard]] ProfileBufferChunk* GetOrCreateCurrentChunk(
+      const baseprofiler::detail::BaseProfilerMaybeAutoLock& aLock) {
+    ProfileBufferChunk* current = mCurrentChunk.get();
+    if (MOZ_UNLIKELY(!current)) {
+      // No current chunk ready.
+      MOZ_ASSERT(!mNextChunks,
+                 "There shouldn't be next chunks when there is no current one");
+      // See if a request has recently been fulfilled, ignore pending status.
+      Unused << HandleRequestedChunk_IsPending(aLock);
+      current = mCurrentChunk.get();
+      if (MOZ_UNLIKELY(!current)) {
+        // There was no pending chunk, try to get one right now.
+        // This may still fail, but we can't do anything else about it, the
+        // caller must handle the nullptr case.
+        // Attempt a request for later.
+        SetAndInitializeCurrentChunk(mChunkManager->GetChunk(), aLock);
+        current = mCurrentChunk.get();
+      }
+    }
+    return current;
+  }
+
+  // Get a pointer to the next chunk available
+  [[nodiscard]] ProfileBufferChunk* GetOrCreateNextChunk(
+      const baseprofiler::detail::BaseProfilerMaybeAutoLock& aLock) {
+    MOZ_ASSERT(!!mCurrentChunk,
+               "Why ask for a next chunk when there isn't even a current one?");
+    ProfileBufferChunk* next = mNextChunks.get();
+    if (MOZ_UNLIKELY(!next)) {
+      // No next chunk ready, see if a request has recently been fulfilled,
+      // ignore pending status.
+      Unused << HandleRequestedChunk_IsPending(aLock);
+      next = mNextChunks.get();
+      if (MOZ_UNLIKELY(!next)) {
+        // There was no pending chunk, try to get one right now.
+        mNextChunks = mChunkManager->GetChunk();
+        next = mNextChunks.get();
+        // This may still fail, but we can't do anything else about it, the
+        // caller must handle the nullptr case.
+        if (MOZ_UNLIKELY(!next)) {
+          // Attempt a request for later.
+          RequestChunk(aLock);
+        }
+      }
+    }
+    return next;
+  }
+
+  // Reserve a block of `aCallbackBlockBytes()` size, and invoke and return
+  // `aCallback(Maybe<ProfileBufferEntryWriter>&)`. Note that this is the "raw"
+  // version that doesn't write the entry size at the beginning of the block.
+  // Note: `aCallbackBlockBytes` is a callback instead of a simple value, to
+  // delay this potentially-expensive computation until after we're checked that
+  // we're in-session; use `Put(Length, Callback)` below if you know the size
+  // already.
+  template <typename CallbackBlockBytes, typename Callback>
+  auto ReserveAndPutRaw(CallbackBlockBytes&& aCallbackBlockBytes,
+                        Callback&& aCallback,
+                        baseprofiler::detail::BaseProfilerMaybeAutoLock& aLock,
+                        uint64_t aBlockCount = 1) {
+    // The entry writer that will point into one or two chunks to write
+    // into, empty by default (failure).
+    Maybe<ProfileBufferEntryWriter> maybeEntryWriter;
+
+    // The current chunk will be filled if we need to write more than its
+    // remaining space.
+    bool currentChunkFilled = false;
+
+    // If the current chunk gets filled, we may or may not initialize the next
+    // chunk!
+    bool nextChunkInitialized = false;
+
+    if (MOZ_LIKELY(mChunkManager)) {
+      // In-session.
+
+      const Length blockBytes =
+          std::forward<CallbackBlockBytes>(aCallbackBlockBytes)();
+
+      if (ProfileBufferChunk* current = GetOrCreateCurrentChunk(aLock);
+          MOZ_LIKELY(current)) {
+        if (blockBytes <= current->RemainingBytes()) {
+          // Block fits in current chunk with only one span.
+          currentChunkFilled = blockBytes == current->RemainingBytes();
+          const auto [mem0, blockIndex] = current->ReserveBlock(blockBytes);
+          MOZ_ASSERT(mem0.LengthBytes() == blockBytes);
+          maybeEntryWriter.emplace(
+              mem0, blockIndex,
+              ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+                  blockIndex.ConvertToProfileBufferIndex() + blockBytes));
+          MOZ_ASSERT(maybeEntryWriter->RemainingBytes() == blockBytes);
+          mRangeEnd += blockBytes;
+          mPushedBlockCount += aBlockCount;
+        } else {
+          // Block doesn't fit fully in current chunk, it needs to overflow into
+          // the next one.
+          // Whether or not we can write this entry, the current chunk is now
+          // considered full, so it will be released. (Otherwise we could refuse
+          // this entry, but later accept a smaller entry into this chunk, which
+          // would be somewhat inconsistent.)
+          currentChunkFilled = true;
+          // Make sure the next chunk is available (from a previous request),
+          // otherwise create one on the spot.
+          if (ProfileBufferChunk* next = GetOrCreateNextChunk(aLock);
+              MOZ_LIKELY(next)) {
+            // Here, we know we have a current and a next chunk.
+            // Reserve head of block at the end of the current chunk.
+            const auto [mem0, blockIndex] =
+                current->ReserveBlock(current->RemainingBytes());
+            MOZ_ASSERT(mem0.LengthBytes() < blockBytes);
+            MOZ_ASSERT(current->RemainingBytes() == 0);
+            // Set the next chunk range, and reserve the needed space for the
+            // tail of the block.
+            next->SetRangeStart(mNextChunkRangeStart);
+            mNextChunkRangeStart += next->BufferBytes();
+            const auto mem1 = next->ReserveInitialBlockAsTail(
+                blockBytes - mem0.LengthBytes());
+            MOZ_ASSERT(next->RemainingBytes() != 0);
+            nextChunkInitialized = true;
+            // Block is split in two spans.
+            maybeEntryWriter.emplace(
+                mem0, mem1, blockIndex,
+                ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+                    blockIndex.ConvertToProfileBufferIndex() + blockBytes));
+            MOZ_ASSERT(maybeEntryWriter->RemainingBytes() == blockBytes);
+            mRangeEnd += blockBytes;
+            mPushedBlockCount += aBlockCount;
+          } else {
+            // Cannot get a new chunk. Record put failure.
+            mFailedPutBytes += blockBytes;
+          }
+        }
+      } else {
+        // Cannot get a current chunk. Record put failure.
+        mFailedPutBytes += blockBytes;
+      }
+    }  // end of `if (MOZ_LIKELY(mChunkManager))`
+
+    // Here, we either have a `Nothing` (failure), or a non-empty entry writer
+    // pointing at the start of the block.
+
+    // After we invoke the callback and return, we may need to handle the
+    // current chunk being filled.
+    auto handleFilledChunk = MakeScopeExit([&]() {
+      // If the entry writer was not already empty, the callback *must* have
+      // filled the full entry.
+      MOZ_ASSERT(!maybeEntryWriter || maybeEntryWriter->RemainingBytes() == 0);
+
+      if (currentChunkFilled) {
+        // Extract current (now filled) chunk.
+        UniquePtr<ProfileBufferChunk> filled = std::move(mCurrentChunk);
+
+        if (mNextChunks) {
+          // Cycle to the next chunk.
+          mCurrentChunk =
+              std::exchange(mNextChunks, mNextChunks->ReleaseNext());
+
+          // Make sure it is initialized (it is now the current chunk).
+          if (!nextChunkInitialized) {
+            InitializeCurrentChunk(aLock);
+          }
+        }
+
+        // And finally mark filled chunk done and release it.
+        filled->MarkDone();
+        mChunkManager->ReleaseChunk(std::move(filled));
+
+        // Request another chunk if needed.
+        // In most cases, here we should have one current chunk and no next
+        // chunk, so we want to do a request so there hopefully will be a next
+        // chunk available when the current one gets filled.
+        // But we also for a request if we don't even have a current chunk (if
+        // it's too late, it's ok because the next `ReserveAndPutRaw` wil just
+        // allocate one on the spot.)
+        // And if we already have a next chunk, there's no need for more now.
+        if (!mCurrentChunk || !mNextChunks) {
+          RequestChunk(aLock);
+        }
+      }
+    });
+
+    return std::forward<Callback>(aCallback)(maybeEntryWriter);
+  }
+
+  // Reserve a block of `aBlockBytes` size, and invoke and return
+  // `aCallback(Maybe<ProfileBufferEntryWriter>&)`. Note that this is the "raw"
+  // version that doesn't write the entry size at the beginning of the block.
+  template <typename Callback>
+  auto ReserveAndPutRaw(Length aBlockBytes, Callback&& aCallback,
+                        uint64_t aBlockCount) {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    return ReserveAndPutRaw([aBlockBytes]() { return aBlockBytes; },
+                            std::forward<Callback>(aCallback), lock,
+                            aBlockCount);
+  }
+
+  // Mutex guarding the following members.
+  mutable baseprofiler::detail::BaseProfilerMaybeMutex mMutex;
+
+  // Pointer to the current Chunk Manager (or null when out-of-session.)
+  // It may be owned locally (see below) or externally.
+  ProfileBufferChunkManager* mChunkManager = nullptr;
+
+  // Only non-null when we own the current Chunk Manager.
+  UniquePtr<ProfileBufferChunkManager> mOwnedChunkManager;
+
+  UniquePtr<ProfileBufferChunk> mCurrentChunk;
+
+  UniquePtr<ProfileBufferChunk> mNextChunks;
+
+  // Class used to transfer requested chunks from a `ChunkManager` to a
+  // `ProfileChunkedBuffer`.
+  // It needs to be ref-counted because the request may be fulfilled
+  // asynchronously, and either side may be destroyed during the request.
+  // It cannot use the `ProfileChunkedBuffer` mutex, because that buffer and its
+  // mutex could be destroyed during the request.
+  class RequestedChunkRefCountedHolder
+      : public external::AtomicRefCounted<RequestedChunkRefCountedHolder> {
+   public:
+    MOZ_DECLARE_REFCOUNTED_TYPENAME(RequestedChunkRefCountedHolder)
+
+    enum class State { Unused, Requested, Fulfilled };
+
+    // Get the current state. Note that it may change after the function
+    // returns, so it should be used carefully, e.g., `ProfileChunkedBuffer` can
+    // see if a request is pending or fulfilled, to avoid starting another
+    // request.
+    [[nodiscard]] State GetState() const {
+      baseprofiler::detail::BaseProfilerAutoLock lock(mRequestMutex);
+      return mState;
+    }
+
+    // Must be called by `ProfileChunkedBuffer` when it requests a chunk.
+    // There cannot be more than one request in-flight.
+    void StartRequest() {
+      baseprofiler::detail::BaseProfilerAutoLock lock(mRequestMutex);
+      MOZ_ASSERT(mState == State::Unused, "Already requested or fulfilled");
+      mState = State::Requested;
+    }
+
+    // Must be called by the `ChunkManager` with a chunk.
+    // If the `ChunkManager` cannot provide a chunk (because of memory limits,
+    // or it gets destroyed), it must call this anyway with a nullptr.
+    void AddRequestedChunk(UniquePtr<ProfileBufferChunk>&& aChunk) {
+      baseprofiler::detail::BaseProfilerAutoLock lock(mRequestMutex);
+      MOZ_ASSERT(mState == State::Requested);
+      mState = State::Fulfilled;
+      mRequestedChunk = std::move(aChunk);
+    }
+
+    // The `ProfileChunkedBuffer` can try to extract the provided chunk after a
+    // request:
+    // - Nothing -> Request is not fulfilled yet.
+    // - Some(nullptr) -> The `ChunkManager` was not able to provide a chunk.
+    // - Some(chunk) -> Requested chunk.
+    [[nodiscard]] Maybe<UniquePtr<ProfileBufferChunk>> GetChunkIfFulfilled() {
+      Maybe<UniquePtr<ProfileBufferChunk>> maybeChunk;
+      baseprofiler::detail::BaseProfilerAutoLock lock(mRequestMutex);
+      MOZ_ASSERT(mState == State::Requested || mState == State::Fulfilled);
+      if (mState == State::Fulfilled) {
+        mState = State::Unused;
+        maybeChunk.emplace(std::move(mRequestedChunk));
+      }
+      return maybeChunk;
+    }
+
+   private:
+    // Mutex guarding the following members.
+    mutable baseprofiler::detail::BaseProfilerMutex mRequestMutex;
+    State mState = State::Unused;
+    UniquePtr<ProfileBufferChunk> mRequestedChunk;
+  };
+
+  // Requested-chunk holder, kept alive when in-session, but may also live
+  // longer if a request is in-flight.
+  RefPtr<RequestedChunkRefCountedHolder> mRequestedChunkHolder;
+
+  // Range start of the next chunk to become current. Starting at 1 because
+  // 0 is a reserved index similar to nullptr.
+  ProfileBufferIndex mNextChunkRangeStart = 1;
+
+  // Index to the first block.
+  // Atomic because it may be increased when a Chunk is destroyed, and the
+  // callback may be invoked from anywhere, including from inside one of our
+  // locked section, so we cannot protect it with a mutex.
+  Atomic<ProfileBufferIndex, MemoryOrdering::ReleaseAcquire> mRangeStart{1};
+
+  // Index past the last block. Equals mRangeStart if empty.
+  ProfileBufferIndex mRangeEnd = 1;
+
+  // Number of blocks that have been pushed into this buffer.
+  uint64_t mPushedBlockCount = 0;
+
+  // Number of blocks that have been removed from this buffer.
+  // Note: Live entries = pushed - cleared.
+  // Atomic because it may be updated when a Chunk is destroyed, and the
+  // callback may be invoked from anywhere, including from inside one of our
+  // locked section, so we cannot protect it with a mutex.
+  Atomic<uint64_t, MemoryOrdering::ReleaseAcquire> mClearedBlockCount{0};
+
+  // Number of bytes that could not be put into this buffer.
+  uint64_t mFailedPutBytes = 0;
+};
+
+// ----------------------------------------------------------------------------
+// ProfileChunkedBuffer serialization
+
+// A ProfileChunkedBuffer can hide another one!
+// This will be used to store marker backtraces; They can be read back into a
+// UniquePtr<ProfileChunkedBuffer>.
+// Format: len (ULEB128) | start | end | buffer (len bytes) | pushed | cleared
+// len==0 marks an out-of-session buffer, or empty buffer.
+template <>
+struct ProfileBufferEntryWriter::Serializer<ProfileChunkedBuffer> {
+  static Length Bytes(const ProfileChunkedBuffer& aBuffer) {
+    return aBuffer.Read([&](ProfileChunkedBuffer::Reader* aReader) {
+      if (!aReader) {
+        // Out-of-session, we only need 1 byte to store a length of 0.
+        return ULEB128Size<Length>(0);
+      }
+      ProfileBufferEntryReader reader = aReader->SingleChunkDataAsEntry();
+      const ProfileBufferIndex start =
+          reader.CurrentBlockIndex().ConvertToProfileBufferIndex();
+      const ProfileBufferIndex end =
+          reader.NextBlockIndex().ConvertToProfileBufferIndex();
+      MOZ_ASSERT(end - start <= std::numeric_limits<Length>::max());
+      const Length len = static_cast<Length>(end - start);
+      if (len == 0) {
+        // In-session but empty, also store a length of 0.
+        return ULEB128Size<Length>(0);
+      }
+      // In-session.
+      return static_cast<Length>(ULEB128Size(len) + sizeof(start) + len +
+                                 sizeof(aBuffer.mPushedBlockCount) +
+                                 sizeof(aBuffer.mClearedBlockCount));
+    });
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW,
+                    const ProfileChunkedBuffer& aBuffer) {
+    aBuffer.Read([&](ProfileChunkedBuffer::Reader* aReader) {
+      if (!aReader) {
+        // Out-of-session, only store a length of 0.
+        aEW.WriteULEB128<Length>(0);
+        return;
+      }
+      ProfileBufferEntryReader reader = aReader->SingleChunkDataAsEntry();
+      const ProfileBufferIndex start =
+          reader.CurrentBlockIndex().ConvertToProfileBufferIndex();
+      const ProfileBufferIndex end =
+          reader.NextBlockIndex().ConvertToProfileBufferIndex();
+      MOZ_ASSERT(end - start <= std::numeric_limits<Length>::max());
+      const Length len = static_cast<Length>(end - start);
+      MOZ_ASSERT(len <= aEW.RemainingBytes());
+      if (len == 0) {
+        // In-session but empty, only store a length of 0.
+        aEW.WriteULEB128<Length>(0);
+        return;
+      }
+      // In-session.
+      // Store buffer length, and start index.
+      aEW.WriteULEB128(len);
+      aEW.WriteObject(start);
+      // Write all the bytes.
+      aEW.WriteFromReader(reader, reader.RemainingBytes());
+      // And write stats.
+      aEW.WriteObject(static_cast<uint64_t>(aBuffer.mPushedBlockCount));
+      aEW.WriteObject(static_cast<uint64_t>(aBuffer.mClearedBlockCount));
+      // Note: Failed pushes are not important to serialize.
+    });
+  }
+};
+
+// A serialized ProfileChunkedBuffer can be read into an empty buffer (either
+// out-of-session, or in-session with enough room).
+template <>
+struct ProfileBufferEntryReader::Deserializer<ProfileChunkedBuffer> {
+  static void ReadInto(ProfileBufferEntryReader& aER,
+                       ProfileChunkedBuffer& aBuffer) {
+    // Expect an empty buffer, as we're going to overwrite it.
+    MOZ_ASSERT(aBuffer.GetState().mRangeStart == aBuffer.GetState().mRangeEnd);
+    // Read the stored buffer length.
+    const auto len = aER.ReadULEB128<ProfileChunkedBuffer::Length>();
+    if (len == 0) {
+      // 0-length means an "uninteresting" buffer, just return now.
+      return;
+    }
+    // We have a non-empty buffer to read.
+
+    // Read start and end indices.
+    const auto start = aER.ReadObject<ProfileBufferIndex>();
+    aBuffer.mRangeStart = start;
+    // For now, set the end to be the start (the buffer is still empty). It will
+    // be updated in `ReserveAndPutRaw()` below.
+    aBuffer.mRangeEnd = start;
+
+    if (aBuffer.IsInSession()) {
+      // Output buffer is in-session (i.e., it already has a memory buffer
+      // attached). Make sure the caller allocated enough space.
+      MOZ_RELEASE_ASSERT(aBuffer.BufferLength().value() >= len);
+    } else {
+      // Output buffer is out-of-session, set a new chunk manager that will
+      // provide a single chunk of just the right size.
+      aBuffer.SetChunkManager(MakeUnique<ProfileBufferChunkManagerSingle>(len));
+      MOZ_ASSERT(aBuffer.BufferLength().value() >= len);
+    }
+
+    // Copy bytes into the buffer.
+    aBuffer.ReserveAndPutRaw(
+        len,
+        [&](Maybe<ProfileBufferEntryWriter>& aEW) {
+          MOZ_RELEASE_ASSERT(aEW.isSome());
+          aEW->WriteFromReader(aER, len);
+        },
+        0);
+    // Finally copy stats.
+    aBuffer.mPushedBlockCount = aER.ReadObject<uint64_t>();
+    aBuffer.mClearedBlockCount = aER.ReadObject<uint64_t>();
+    // Failed puts are not important to keep.
+    aBuffer.mFailedPutBytes = 0;
+  }
+
+  // We cannot output a ProfileChunkedBuffer object (not copyable), use
+  // `ReadInto()` or `aER.ReadObject<UniquePtr<BlocksRinbBuffer>>()` instead.
+  static ProfileChunkedBuffer Read(ProfileBufferEntryReader& aER) = delete;
+};
+
+// A ProfileChunkedBuffer is usually refererenced through a UniquePtr, for
+// convenience we support (de)serializing that UniquePtr directly.
+// This is compatible with the non-UniquePtr serialization above, with a null
+// pointer being treated like an out-of-session or empty buffer; and any of
+// these would be deserialized into a null pointer.
+template <>
+struct ProfileBufferEntryWriter::Serializer<UniquePtr<ProfileChunkedBuffer>> {
+  static Length Bytes(const UniquePtr<ProfileChunkedBuffer>& aBufferUPtr) {
+    if (!aBufferUPtr) {
+      // Null pointer, treat it like an empty buffer, i.e., write length of 0.
+      return ULEB128Size<Length>(0);
+    }
+    // Otherwise write the pointed-at ProfileChunkedBuffer (which could be
+    // out-of-session or empty.)
+    return SumBytes(*aBufferUPtr);
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW,
+                    const UniquePtr<ProfileChunkedBuffer>& aBufferUPtr) {
+    if (!aBufferUPtr) {
+      // Null pointer, treat it like an empty buffer, i.e., write length of 0.
+      aEW.WriteULEB128<Length>(0);
+      return;
+    }
+    // Otherwise write the pointed-at ProfileChunkedBuffer (which could be
+    // out-of-session or empty.)
+    aEW.WriteObject(*aBufferUPtr);
+  }
+};
+
+// Serialization of a raw pointer to ProfileChunkedBuffer.
+// Use Deserializer<UniquePtr<ProfileChunkedBuffer>> to read it back.
+template <>
+struct ProfileBufferEntryWriter::Serializer<ProfileChunkedBuffer*> {
+  static Length Bytes(ProfileChunkedBuffer* aBufferUPtr) {
+    if (!aBufferUPtr) {
+      // Null pointer, treat it like an empty buffer, i.e., write length of 0.
+      return ULEB128Size<Length>(0);
+    }
+    // Otherwise write the pointed-at ProfileChunkedBuffer (which could be
+    // out-of-session or empty.)
+    return SumBytes(*aBufferUPtr);
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW,
+                    ProfileChunkedBuffer* aBufferUPtr) {
+    if (!aBufferUPtr) {
+      // Null pointer, treat it like an empty buffer, i.e., write length of 0.
+      aEW.WriteULEB128<Length>(0);
+      return;
+    }
+    // Otherwise write the pointed-at ProfileChunkedBuffer (which could be
+    // out-of-session or empty.)
+    aEW.WriteObject(*aBufferUPtr);
+  }
+};
+
+template <>
+struct ProfileBufferEntryReader::Deserializer<UniquePtr<ProfileChunkedBuffer>> {
+  static void ReadInto(ProfileBufferEntryReader& aER,
+                       UniquePtr<ProfileChunkedBuffer>& aBuffer) {
+    aBuffer = Read(aER);
+  }
+
+  static UniquePtr<ProfileChunkedBuffer> Read(ProfileBufferEntryReader& aER) {
+    UniquePtr<ProfileChunkedBuffer> bufferUPtr;
+    // Keep a copy of the reader before reading the length, so we can restart
+    // from here below.
+    ProfileBufferEntryReader readerBeforeLen = aER;
+    // Read the stored buffer length.
+    const auto len = aER.ReadULEB128<ProfileChunkedBuffer::Length>();
+    if (len == 0) {
+      // 0-length means an "uninteresting" buffer, just return nullptr.
+      return bufferUPtr;
+    }
+    // We have a non-empty buffer.
+    // allocate an empty ProfileChunkedBuffer without mutex.
+    bufferUPtr = MakeUnique<ProfileChunkedBuffer>(
+        ProfileChunkedBuffer::ThreadSafety::WithoutMutex);
+    // Rewind the reader before the length and deserialize the contents, using
+    // the non-UniquePtr Deserializer.
+    aER = readerBeforeLen;
+    aER.ReadIntoObject(*bufferUPtr);
+    return bufferUPtr;
+  }
+};
+
+}  // namespace mozilla
+
+#endif  // ProfileChunkedBuffer_h
diff --git a/mozglue/baseprofiler/public/ProfilingCategoryList.h b/mozglue/baseprofiler/public/ProfilingCategoryList.h
new file mode 100644
index 0000000000..437f24aaa1
--- /dev/null
+++ b/mozglue/baseprofiler/public/ProfilingCategoryList.h
@@ -0,0 +1,122 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=99:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef baseprofiler_ProfilingCategoryList_h
+#define baseprofiler_ProfilingCategoryList_h
+
+// Profiler sub-categories are applied to each sampled stack to describe the
+// type of workload that the CPU is busy with. Only one sub-category can be
+// assigned so be mindful that these are non-overlapping. The active category is
+// set by pushing a label to the profiling stack, or by the unwinder in cases
+// such as JITs. A profile sample in arbitrary C++/Rust will typically be
+// categorized based on the top of the label stack.
+//
+// The list of available color names for categories is:
+//    transparent
+//    blue
+//    green
+//    grey
+//    lightblue
+//    magenta
+//    orange
+//    purple
+//    yellow
+
+// clang-format off
+
+#define MOZ_PROFILING_CATEGORY_LIST(BEGIN_CATEGORY, SUBCATEGORY, END_CATEGORY) \
+  BEGIN_CATEGORY(IDLE, "Idle", "transparent") \
+    SUBCATEGORY(IDLE, IDLE, "Other") \
+  END_CATEGORY \
+  BEGIN_CATEGORY(OTHER, "Other", "grey") \
+    SUBCATEGORY(OTHER, OTHER, "Other") \
+    SUBCATEGORY(OTHER, OTHER_PreferenceRead, "Preference Read") \
+    SUBCATEGORY(OTHER, OTHER_Profiling, "Profiling") \
+  END_CATEGORY \
+  BEGIN_CATEGORY(TEST, "Test", "darkgray") \
+    SUBCATEGORY(TEST, TEST, "Test") \
+  END_CATEGORY \
+  BEGIN_CATEGORY(LAYOUT, "Layout", "purple") \
+    SUBCATEGORY(LAYOUT, LAYOUT, "Other") \
+    SUBCATEGORY(LAYOUT, LAYOUT_FrameConstruction, "Frame construction") \
+    SUBCATEGORY(LAYOUT, LAYOUT_Reflow, "Reflow") \
+    SUBCATEGORY(LAYOUT, LAYOUT_CSSParsing, "CSS parsing") \
+    SUBCATEGORY(LAYOUT, LAYOUT_SelectorQuery, "Selector query") \
+    SUBCATEGORY(LAYOUT, LAYOUT_StyleComputation, "Style computation") \
+  END_CATEGORY \
+  BEGIN_CATEGORY(JS, "JavaScript", "yellow") \
+    SUBCATEGORY(JS, JS, "Other") \
+    SUBCATEGORY(JS, JS_Parsing, "Parsing") \
+    SUBCATEGORY(JS, JS_BaselineCompilation, "JIT Compile (baseline)") \
+    SUBCATEGORY(JS, JS_IonCompilation, "JIT Compile (ion)") \
+    SUBCATEGORY(JS, JS_Interpreter, "Interpreter") \
+    SUBCATEGORY(JS, JS_BaselineInterpret, "JIT (baseline-interpreter)") \
+    SUBCATEGORY(JS, JS_Baseline, "JIT (baseline)") \
+    SUBCATEGORY(JS, JS_IonMonkey, "JIT (ion)") \
+  END_CATEGORY \
+  BEGIN_CATEGORY(GCCC, "GC / CC", "orange") \
+    SUBCATEGORY(GCCC, GCCC, "Other") \
+    SUBCATEGORY(GCCC, GCCC_MinorGC, "Minor GC") \
+    SUBCATEGORY(GCCC, GCCC_MajorGC, "Major GC (Other)") \
+    SUBCATEGORY(GCCC, GCCC_MajorGC_Mark, "Major GC (Mark)") \
+    SUBCATEGORY(GCCC, GCCC_MajorGC_Sweep, "Major GC (Sweep)") \
+    SUBCATEGORY(GCCC, GCCC_MajorGC_Compact, "Major GC (Compact)") \
+    SUBCATEGORY(GCCC, GCCC_UnmarkGray, "Unmark Gray") \
+    SUBCATEGORY(GCCC, GCCC_Barrier, "Barrier") \
+    SUBCATEGORY(GCCC, GCCC_FreeSnowWhite, "CC (Free Snow White)") \
+    SUBCATEGORY(GCCC, GCCC_BuildGraph, "CC (Build Graph)") \
+    SUBCATEGORY(GCCC, GCCC_ScanRoots, "CC (Scan Roots)") \
+    SUBCATEGORY(GCCC, GCCC_CollectWhite, "CC (Collect White)") \
+    SUBCATEGORY(GCCC, GCCC_Finalize, "CC (Finalize)") \
+  END_CATEGORY \
+  BEGIN_CATEGORY(NETWORK, "Network", "lightblue") \
+    SUBCATEGORY(NETWORK, NETWORK, "Other") \
+  END_CATEGORY \
+  BEGIN_CATEGORY(GRAPHICS, "Graphics", "green") \
+    SUBCATEGORY(GRAPHICS, GRAPHICS, "Other") \
+    SUBCATEGORY(GRAPHICS, GRAPHICS_DisplayListBuilding, "DisplayList building") \
+    SUBCATEGORY(GRAPHICS, GRAPHICS_DisplayListMerging, "DisplayList merging") \
+    SUBCATEGORY(GRAPHICS, GRAPHICS_LayerBuilding, "Layer building") \
+    SUBCATEGORY(GRAPHICS, GRAPHICS_TileAllocation, "Tile allocation") \
+    SUBCATEGORY(GRAPHICS, GRAPHICS_WRDisplayList, "WebRender display list") \
+    SUBCATEGORY(GRAPHICS, GRAPHICS_Rasterization, "Rasterization") \
+    SUBCATEGORY(GRAPHICS, GRAPHICS_FlushingAsyncPaints, "Flushing async paints") \
+    SUBCATEGORY(GRAPHICS, GRAPHICS_ImageDecoding, "Image decoding") \
+  END_CATEGORY \
+  BEGIN_CATEGORY(DOM, "DOM", "blue") \
+    SUBCATEGORY(DOM, DOM, "Other") \
+  END_CATEGORY \
+  BEGIN_CATEGORY(JAVA_ANDROID, "Android", "yellow") \
+    SUBCATEGORY(JAVA_ANDROID, JAVA_ANDROID, "Other") \
+  END_CATEGORY \
+  BEGIN_CATEGORY(JAVA_ANDROIDX, "AndroidX", "orange") \
+    SUBCATEGORY(JAVA_ANDROIDX, JAVA_ANDROIDX, "Other") \
+  END_CATEGORY \
+  BEGIN_CATEGORY(JAVA_LANGUAGE, "Java", "blue") \
+    SUBCATEGORY(JAVA_LANGUAGE, JAVA_LANGUAGE, "Other") \
+  END_CATEGORY \
+  BEGIN_CATEGORY(JAVA_MOZILLA, "Mozilla", "green") \
+    SUBCATEGORY(JAVA_MOZILLA, JAVA_MOZILLA, "Other") \
+  END_CATEGORY \
+  BEGIN_CATEGORY(JAVA_KOTLIN, "Kotlin", "purple") \
+    SUBCATEGORY(JAVA_KOTLIN, JAVA_KOTLIN, "Other") \
+  END_CATEGORY \
+  BEGIN_CATEGORY(JAVA_BLOCKED, "Blocked", "lightblue") \
+    SUBCATEGORY(JAVA_BLOCKED, JAVA_BLOCKED, "Other") \
+  END_CATEGORY \
+  BEGIN_CATEGORY(IPC, "IPC", "lightgreen") \
+    SUBCATEGORY(IPC, IPC, "Other") \
+  END_CATEGORY \
+  BEGIN_CATEGORY(MEDIA, "Media", "orange") \
+    SUBCATEGORY(MEDIA, MEDIA, "Other") \
+    SUBCATEGORY(MEDIA, MEDIA_CUBEB, "Cubeb") \
+    SUBCATEGORY(MEDIA, MEDIA_PLAYBACK, "Playback") \
+    SUBCATEGORY(MEDIA, MEDIA_RT, "Real-time rendering") \
+  END_CATEGORY
+
+// clang-format on
+
+#endif  // baseprofiler_ProfilingCategoryList_h
diff --git a/mozglue/baseprofiler/public/leb128iterator.h b/mozglue/baseprofiler/public/leb128iterator.h
new file mode 100644
index 0000000000..636baf916f
--- /dev/null
+++ b/mozglue/baseprofiler/public/leb128iterator.h
@@ -0,0 +1,207 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// LEB128 utilities that can read/write unsigned LEB128 numbers from/to
+// iterators.
+//
+// LEB128 = Little Endian Base 128, where small numbers take few bytes, but
+// large numbers are still allowed, which is ideal when serializing numbers that
+// are likely to be small.
+// Each byte contains 7 bits from the number, starting at the "little end", the
+// top bit is 0 for the last byte, 1 otherwise.
+// Numbers 0-127 only take 1 byte. 128-16383 take 2 bytes. Etc.
+//
+// Iterators only need to provide:
+// - `*it` to return a reference to the next byte to be read from or written to.
+// - `++it` to advance the iterator after a byte is written.
+//
+// The caller must always provide sufficient space to write any number, by:
+// - pre-allocating a large enough buffer, or
+// - allocating more space when `++it` reaches the end and/or `*it` is invoked
+//   after the end, or
+// - moving the underlying pointer to an appropriate location (e.g., wrapping
+//   around a circular buffer).
+// The caller must also provide enough bytes to read a full value (i.e., at
+// least one byte should have its top bit unset), and a type large enough to
+// hold the stored value.
+//
+// Note: There are insufficient checks for validity! These functions are
+// intended to be used together, i.e., the user should only `ReadULEB128()` from
+// a sufficiently-large buffer that the same user filled with `WriteULEB128()`.
+// Using with externally-sourced data (e.g., DWARF) is *not* recommended.
+//
+// https://en.wikipedia.org/wiki/LEB128
+
+#ifndef leb128iterator_h
+#define leb128iterator_h
+
+#include "mozilla/Assertions.h"
+#include "mozilla/Likely.h"
+
+#include <climits>
+#include <cstdint>
+#include <limits>
+#include <type_traits>
+
+namespace mozilla {
+
+// Number of bytes needed to represent `aValue`.
+template <typename T>
+constexpr uint_fast8_t ULEB128Size(T aValue) {
+  static_assert(!std::numeric_limits<T>::is_signed,
+                "ULEB128Size only takes unsigned types");
+  // We need one output byte per 7 bits of non-zero value. So we just remove
+  // 7 least significant bits at a time until the value becomes zero.
+  // Note the special case of 0, which still needs 1 output byte; this is done
+  // by starting the first loop before we check for 0.
+  uint_fast8_t size = 0;
+  for (;;) {
+    size += 1;
+    aValue >>= 7;
+    // Expecting small values, so it should be more likely that `aValue == 0`.
+    if (MOZ_LIKELY(aValue == 0)) {
+      return size;
+    }
+  }
+}
+
+// Maximum number of bytes needed to represent any value of type `T`.
+template <typename T>
+constexpr uint_fast8_t ULEB128MaxSize() {
+  return ULEB128Size<T>(std::numeric_limits<T>::max());
+}
+
+// Write `aValue` in LEB128 to `aIterator`.
+// The iterator will be moved past the last byte.
+template <typename T, typename It>
+void WriteULEB128(T aValue, It& aIterator) {
+  static_assert(!std::numeric_limits<T>::is_signed,
+                "WriteULEB128 only takes unsigned types");
+  using IteratorValue = std::remove_reference_t<decltype(*aIterator)>;
+  static_assert(sizeof(IteratorValue) == 1,
+                "WriteULEB128 expects an iterator to single bytes");
+  // 0. Don't test for 0 yet, as we want to output one byte for it.
+  for (;;) {
+    // 1. Extract the 7 least significant bits.
+    const uint_fast8_t byte = aValue & 0x7Fu;
+    // 2. Remove them from `aValue`.
+    aValue >>= 7;
+    // 3. Write the 7 bits, and set the 8th bit if `aValue` is not 0 yet
+    // (meaning there will be more bytes after this one.)
+    // Expecting small values, so it should be more likely that `aValue == 0`.
+    // Note: No absolute need to force-cast to IteratorValue, because we have
+    // only changed the bottom 8 bits above. However the compiler could warn
+    // about a narrowing conversion from potentially-multibyte uint_fast8_t down
+    // to whatever single-byte type `*iterator* expects, so we make it explicit.
+    *aIterator = static_cast<IteratorValue>(
+        MOZ_LIKELY(aValue == 0) ? byte : (byte | 0x80u));
+    // 4. Always advance the iterator to the next byte.
+    ++aIterator;
+    // 5. We're done if `aValue` is 0.
+    // Expecting small values, so it should be more likely that `aValue == 0`.
+    if (MOZ_LIKELY(aValue == 0)) {
+      return;
+    }
+  }
+}
+
+// Read an LEB128 value from `aIterator`.
+// The iterator will be moved past the last byte.
+template <typename T, typename It>
+T ReadULEB128(It& aIterator) {
+  static_assert(!std::numeric_limits<T>::is_signed,
+                "ReadULEB128 must return an unsigned type");
+  using IteratorValue = std::remove_reference_t<decltype(*aIterator)>;
+  static_assert(sizeof(IteratorValue) == 1,
+                "ReadULEB128 expects an iterator to single bytes");
+  // Incoming bits will be added to `result`...
+  T result = 0;
+  // ... starting with the least significant bits.
+  uint_fast8_t shift = 0;
+  for (;;) {
+    // 1. Read one byte from the iterator.
+    // `static_cast` just in case IteratorValue is not implicitly convertible to
+    // uint_fast8_t. It wouldn't matter if the sign was extended, we're only
+    // dealing with the bottom 8 bits below.
+    const uint_fast8_t byte = static_cast<uint_fast8_t>(*aIterator);
+    // 2. Always advance the iterator.
+    ++aIterator;
+    // 3. Extract the 7 bits of value, and shift them in place into `result`.
+    result |= static_cast<T>(byte & 0x7fu) << shift;
+    // 4. If the 8th bit is *not* set, this was the last byte.
+    // Expecting small values, so it should be more likely that the bit is off.
+    if (MOZ_LIKELY((byte & 0x80u) == 0)) {
+      return result;
+    }
+    // There are more bytes to read.
+    // 5. Next byte will contain more significant bits above the past 7.
+    shift += 7;
+    // Safety check that we're not going to shift by >= than the type size,
+    // which is Undefined Behavior in C++.
+    MOZ_ASSERT(shift < CHAR_BIT * sizeof(T));
+  }
+}
+
+// constexpr ULEB128 reader class.
+// Mostly useful when dealing with non-trivial byte feeds.
+template <typename T>
+class ULEB128Reader {
+  static_assert(!std::numeric_limits<T>::is_signed,
+                "ULEB128Reader must handle an unsigned type");
+
+ public:
+  constexpr ULEB128Reader() = default;
+
+  // Don't allow copy/assignment, it doesn't make sense for a stateful parser.
+  constexpr ULEB128Reader(const ULEB128Reader&) = delete;
+  constexpr ULEB128Reader& operator=(const ULEB128Reader&) = delete;
+
+  // Feed a byte into the parser.
+  // Returns true if this was the last byte.
+  [[nodiscard]] constexpr bool FeedByteIsComplete(unsigned aByte) {
+    MOZ_ASSERT(!IsComplete());
+    // Extract the 7 bits of value, and shift them in place into the value.
+    mValue |= static_cast<T>(aByte & 0x7fu) << mShift;
+    // If the 8th bit is *not* set, this was the last byte.
+    // Expecting small values, so it should be more likely that the bit is off.
+    if (MOZ_LIKELY((aByte & 0x80u) == 0)) {
+      mShift = mCompleteShift;
+      return true;
+    }
+    // There are more bytes to read.
+    // Next byte will contain more significant bits above the past 7.
+    mShift += 7;
+    // Safety check that we're not going to shift by >= than the type size,
+    // which is Undefined Behavior in C++.
+    MOZ_ASSERT(mShift < CHAR_BIT * sizeof(T));
+    return false;
+  }
+
+  constexpr void Reset() {
+    mValue = 0;
+    mShift = 0;
+  }
+
+  [[nodiscard]] constexpr bool IsComplete() const {
+    return mShift == mCompleteShift;
+  }
+
+  [[nodiscard]] constexpr T Value() const {
+    MOZ_ASSERT(IsComplete());
+    return mValue;
+  }
+
+ private:
+  // Special value of `mShift` indicating that parsing is complete.
+  constexpr static unsigned mCompleteShift = 0x10000u;
+
+  T mValue = 0;
+  unsigned mShift = 0;
+};
+
+}  // namespace mozilla
+
+#endif  // leb128iterator_h