summaryrefslogtreecommitdiffstats
path: root/mozglue/baseprofiler/core
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 14:29:10 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 14:29:10 +0000
commit2aa4a82499d4becd2284cdb482213d541b8804dd (patch)
treeb80bf8bf13c3766139fbacc530efd0dd9d54394c /mozglue/baseprofiler/core
parentInitial commit. (diff)
downloadfirefox-2aa4a82499d4becd2284cdb482213d541b8804dd.tar.xz
firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.zip
Adding upstream version 86.0.1.upstream/86.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'mozglue/baseprofiler/core')
-rw-r--r--mozglue/baseprofiler/core/EHABIStackWalk.cpp592
-rw-r--r--mozglue/baseprofiler/core/EHABIStackWalk.h30
-rw-r--r--mozglue/baseprofiler/core/PageInformation.cpp50
-rw-r--r--mozglue/baseprofiler/core/PageInformation.h77
-rw-r--r--mozglue/baseprofiler/core/PlatformMacros.h130
-rw-r--r--mozglue/baseprofiler/core/ProfileBuffer.cpp210
-rw-r--r--mozglue/baseprofiler/core/ProfileBuffer.h186
-rw-r--r--mozglue/baseprofiler/core/ProfileBufferEntry.cpp1337
-rw-r--r--mozglue/baseprofiler/core/ProfileBufferEntry.h358
-rw-r--r--mozglue/baseprofiler/core/ProfileJSONWriter.cpp52
-rw-r--r--mozglue/baseprofiler/core/ProfiledThreadData.cpp187
-rw-r--r--mozglue/baseprofiler/core/ProfiledThreadData.h119
-rw-r--r--mozglue/baseprofiler/core/ProfilerBacktrace.cpp123
-rw-r--r--mozglue/baseprofiler/core/ProfilerBacktrace.h162
-rw-r--r--mozglue/baseprofiler/core/ProfilerMarkers.cpp221
-rw-r--r--mozglue/baseprofiler/core/ProfilingCategory.cpp71
-rw-r--r--mozglue/baseprofiler/core/ProfilingStack.cpp52
-rw-r--r--mozglue/baseprofiler/core/RegisteredThread.cpp42
-rw-r--r--mozglue/baseprofiler/core/RegisteredThread.h166
-rw-r--r--mozglue/baseprofiler/core/ThreadInfo.h61
-rw-r--r--mozglue/baseprofiler/core/VTuneProfiler.cpp92
-rw-r--r--mozglue/baseprofiler/core/VTuneProfiler.h84
-rw-r--r--mozglue/baseprofiler/core/platform-linux-android.cpp550
-rw-r--r--mozglue/baseprofiler/core/platform-macos.cpp233
-rw-r--r--mozglue/baseprofiler/core/platform-win32.cpp351
-rw-r--r--mozglue/baseprofiler/core/platform.cpp3712
-rw-r--r--mozglue/baseprofiler/core/platform.h132
-rw-r--r--mozglue/baseprofiler/core/shared-libraries-linux.cc835
-rw-r--r--mozglue/baseprofiler/core/shared-libraries-macos.cc182
-rw-r--r--mozglue/baseprofiler/core/shared-libraries-win32.cc277
-rw-r--r--mozglue/baseprofiler/core/vtune/ittnotify.h4127
31 files changed, 14801 insertions, 0 deletions
diff --git a/mozglue/baseprofiler/core/EHABIStackWalk.cpp b/mozglue/baseprofiler/core/EHABIStackWalk.cpp
new file mode 100644
index 0000000000..0c2c855c9b
--- /dev/null
+++ b/mozglue/baseprofiler/core/EHABIStackWalk.cpp
@@ -0,0 +1,592 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * This is an implementation of stack unwinding according to a subset
+ * of the ARM Exception Handling ABI, as described in:
+ * http://infocenter.arm.com/help/topic/com.arm.doc.ihi0038a/IHI0038A_ehabi.pdf
+ *
+ * This handles only the ARM-defined "personality routines" (chapter
+ * 9), and don't track the value of FP registers, because profiling
+ * needs only chain of PC/SP values.
+ *
+ * Because the exception handling info may not be accurate for all
+ * possible places where an async signal could occur (e.g., in a
+ * prologue or epilogue), this bounds-checks all stack accesses.
+ *
+ * This file uses "struct" for structures in the exception tables and
+ * "class" otherwise. We should avoid violating the C++11
+ * standard-layout rules in the former.
+ */
+
+#include "BaseProfiler.h"
+
+#include "EHABIStackWalk.h"
+
+#include "BaseProfilerSharedLibraries.h"
+#include "platform.h"
+
+#include "mozilla/Atomics.h"
+#include "mozilla/DebugOnly.h"
+#include "mozilla/EndianUtils.h"
+
+#include <algorithm>
+#include <elf.h>
+#include <stdint.h>
+#include <vector>
+#include <string>
+
+#ifndef PT_ARM_EXIDX
+# define PT_ARM_EXIDX 0x70000001
+#endif
+
+namespace mozilla {
+namespace baseprofiler {
+
+struct PRel31 {
+ uint32_t mBits;
+ bool topBit() const { return mBits & 0x80000000; }
+ uint32_t value() const { return mBits & 0x7fffffff; }
+ int32_t offset() const { return (static_cast<int32_t>(mBits) << 1) >> 1; }
+ const void* compute() const {
+ return reinterpret_cast<const char*>(this) + offset();
+ }
+
+ private:
+ PRel31(const PRel31& copied) = delete;
+ PRel31() = delete;
+};
+
+struct EHEntry {
+ PRel31 startPC;
+ PRel31 exidx;
+
+ private:
+ EHEntry(const EHEntry& copied) = delete;
+ EHEntry() = delete;
+};
+
+class EHState {
+ // Note that any core register can be used as a "frame pointer" to
+ // influence the unwinding process, so this must track all of them.
+ uint32_t mRegs[16];
+
+ public:
+ bool unwind(const EHEntry* aEntry, const void* stackBase);
+ uint32_t& operator[](int i) { return mRegs[i]; }
+ const uint32_t& operator[](int i) const { return mRegs[i]; }
+ explicit EHState(const mcontext_t&);
+};
+
+enum { R_SP = 13, R_LR = 14, R_PC = 15 };
+
+class EHTable {
+ uint32_t mStartPC;
+ uint32_t mEndPC;
+ uint32_t mBaseAddress;
+ const EHEntry* mEntriesBegin;
+ const EHEntry* mEntriesEnd;
+ std::string mName;
+
+ public:
+ EHTable(const void* aELF, size_t aSize, const std::string& aName);
+ const EHEntry* lookup(uint32_t aPC) const;
+ bool isValid() const { return mEntriesEnd != mEntriesBegin; }
+ const std::string& name() const { return mName; }
+ uint32_t startPC() const { return mStartPC; }
+ uint32_t endPC() const { return mEndPC; }
+ uint32_t baseAddress() const { return mBaseAddress; }
+};
+
+class EHAddrSpace {
+ std::vector<uint32_t> mStarts;
+ std::vector<EHTable> mTables;
+ static Atomic<const EHAddrSpace*> sCurrent;
+
+ public:
+ explicit EHAddrSpace(const std::vector<EHTable>& aTables);
+ const EHTable* lookup(uint32_t aPC) const;
+ static void Update();
+ static const EHAddrSpace* Get();
+};
+
+void EHABIStackWalkInit() { EHAddrSpace::Update(); }
+
+size_t EHABIStackWalk(const mcontext_t& aContext, void* stackBase, void** aSPs,
+ void** aPCs, const size_t aNumFrames) {
+ const EHAddrSpace* space = EHAddrSpace::Get();
+ EHState state(aContext);
+ size_t count = 0;
+
+ while (count < aNumFrames) {
+ uint32_t pc = state[R_PC], sp = state[R_SP];
+ aPCs[count] = reinterpret_cast<void*>(pc);
+ aSPs[count] = reinterpret_cast<void*>(sp);
+ count++;
+
+ if (!space) break;
+ // TODO: cache these lookups. Binary-searching libxul is
+ // expensive (possibly more expensive than doing the actual
+ // unwind), and even a small cache should help.
+ const EHTable* table = space->lookup(pc);
+ if (!table) break;
+ const EHEntry* entry = table->lookup(pc);
+ if (!entry) break;
+ if (!state.unwind(entry, stackBase)) break;
+ }
+
+ return count;
+}
+
+class EHInterp {
+ public:
+ // Note that stackLimit is exclusive and stackBase is inclusive
+ // (i.e, stackLimit < SP <= stackBase), following the convention
+ // set by the AAPCS spec.
+ EHInterp(EHState& aState, const EHEntry* aEntry, uint32_t aStackLimit,
+ uint32_t aStackBase)
+ : mState(aState),
+ mStackLimit(aStackLimit),
+ mStackBase(aStackBase),
+ mNextWord(0),
+ mWordsLeft(0),
+ mFailed(false) {
+ const PRel31& exidx = aEntry->exidx;
+ uint32_t firstWord;
+
+ if (exidx.mBits == 1) { // EXIDX_CANTUNWIND
+ mFailed = true;
+ return;
+ }
+ if (exidx.topBit()) {
+ firstWord = exidx.mBits;
+ } else {
+ mNextWord = reinterpret_cast<const uint32_t*>(exidx.compute());
+ firstWord = *mNextWord++;
+ }
+
+ switch (firstWord >> 24) {
+ case 0x80: // short
+ mWord = firstWord << 8;
+ mBytesLeft = 3;
+ break;
+ case 0x81:
+ case 0x82: // long; catch descriptor size ignored
+ mWord = firstWord << 16;
+ mBytesLeft = 2;
+ mWordsLeft = (firstWord >> 16) & 0xff;
+ break;
+ default:
+ // unknown personality
+ mFailed = true;
+ }
+ }
+
+ bool unwind();
+
+ private:
+ // TODO: GCC has been observed not CSEing repeated reads of
+ // mState[R_SP] with writes to mFailed between them, suggesting that
+ // it hasn't determined that they can't alias and is thus missing
+ // optimization opportunities. So, we may want to flatten EHState
+ // into this class; this may also make the code simpler.
+ EHState& mState;
+ uint32_t mStackLimit;
+ uint32_t mStackBase;
+ const uint32_t* mNextWord;
+ uint32_t mWord;
+ uint8_t mWordsLeft;
+ uint8_t mBytesLeft;
+ bool mFailed;
+
+ enum {
+ I_ADDSP = 0x00, // 0sxxxxxx (subtract if s)
+ M_ADDSP = 0x80,
+ I_POPMASK = 0x80, // 1000iiii iiiiiiii (if any i set)
+ M_POPMASK = 0xf0,
+ I_MOVSP = 0x90, // 1001nnnn
+ M_MOVSP = 0xf0,
+ I_POPN = 0xa0, // 1010lnnn
+ M_POPN = 0xf0,
+ I_FINISH = 0xb0, // 10110000
+ I_POPLO = 0xb1, // 10110001 0000iiii (if any i set)
+ I_ADDSPBIG = 0xb2, // 10110010 uleb128
+ I_POPFDX = 0xb3, // 10110011 sssscccc
+ I_POPFDX8 = 0xb8, // 10111nnn
+ M_POPFDX8 = 0xf8,
+ // "Intel Wireless MMX" extensions omitted.
+ I_POPFDD = 0xc8, // 1100100h sssscccc
+ M_POPFDD = 0xfe,
+ I_POPFDD8 = 0xd0, // 11010nnn
+ M_POPFDD8 = 0xf8
+ };
+
+ uint8_t next() {
+ if (mBytesLeft == 0) {
+ if (mWordsLeft == 0) {
+ return I_FINISH;
+ }
+ mWordsLeft--;
+ mWord = *mNextWord++;
+ mBytesLeft = 4;
+ }
+ mBytesLeft--;
+ mWord = (mWord << 8) | (mWord >> 24); // rotate
+ return mWord;
+ }
+
+ uint32_t& vSP() { return mState[R_SP]; }
+ uint32_t* ptrSP() { return reinterpret_cast<uint32_t*>(vSP()); }
+
+ void checkStackBase() {
+ if (vSP() > mStackBase) mFailed = true;
+ }
+ void checkStackLimit() {
+ if (vSP() <= mStackLimit) mFailed = true;
+ }
+ void checkStackAlign() {
+ if ((vSP() & 3) != 0) mFailed = true;
+ }
+ void checkStack() {
+ checkStackBase();
+ checkStackLimit();
+ checkStackAlign();
+ }
+
+ void popRange(uint8_t first, uint8_t last, uint16_t mask) {
+ bool hasSP = false;
+ uint32_t tmpSP;
+ if (mask == 0) mFailed = true;
+ for (uint8_t r = first; r <= last; ++r) {
+ if (mask & 1) {
+ if (r == R_SP) {
+ hasSP = true;
+ tmpSP = *ptrSP();
+ } else
+ mState[r] = *ptrSP();
+ vSP() += 4;
+ checkStackBase();
+ if (mFailed) return;
+ }
+ mask >>= 1;
+ }
+ if (hasSP) {
+ vSP() = tmpSP;
+ checkStack();
+ }
+ }
+};
+
+bool EHState::unwind(const EHEntry* aEntry, const void* stackBasePtr) {
+ // The unwinding program cannot set SP to less than the initial value.
+ uint32_t stackLimit = mRegs[R_SP] - 4;
+ uint32_t stackBase = reinterpret_cast<uint32_t>(stackBasePtr);
+ EHInterp interp(*this, aEntry, stackLimit, stackBase);
+ return interp.unwind();
+}
+
+bool EHInterp::unwind() {
+ mState[R_PC] = 0;
+ checkStack();
+ while (!mFailed) {
+ uint8_t insn = next();
+#if DEBUG_EHABI_UNWIND
+ LOG("unwind insn = %02x", (unsigned)insn);
+#endif
+ // Try to put the common cases first.
+
+ // 00xxxxxx: vsp = vsp + (xxxxxx << 2) + 4
+ // 01xxxxxx: vsp = vsp - (xxxxxx << 2) - 4
+ if ((insn & M_ADDSP) == I_ADDSP) {
+ uint32_t offset = ((insn & 0x3f) << 2) + 4;
+ if (insn & 0x40) {
+ vSP() -= offset;
+ checkStackLimit();
+ } else {
+ vSP() += offset;
+ checkStackBase();
+ }
+ continue;
+ }
+
+ // 10100nnn: Pop r4-r[4+nnn]
+ // 10101nnn: Pop r4-r[4+nnn], r14
+ if ((insn & M_POPN) == I_POPN) {
+ uint8_t n = (insn & 0x07) + 1;
+ bool lr = insn & 0x08;
+ uint32_t* ptr = ptrSP();
+ vSP() += (n + (lr ? 1 : 0)) * 4;
+ checkStackBase();
+ for (uint8_t r = 4; r < 4 + n; ++r) mState[r] = *ptr++;
+ if (lr) mState[R_LR] = *ptr++;
+ continue;
+ }
+
+ // 1011000: Finish
+ if (insn == I_FINISH) {
+ if (mState[R_PC] == 0) {
+ mState[R_PC] = mState[R_LR];
+ // Non-standard change (bug 916106): Prevent the caller from
+ // re-using LR. Since the caller is by definition not a leaf
+ // routine, it will have to restore LR from somewhere to
+ // return to its own caller, so we can safely zero it here.
+ // This makes a difference only if an error in unwinding
+ // (e.g., caused by starting from within a prologue/epilogue)
+ // causes us to load a pointer to a leaf routine as LR; if we
+ // don't do something, we'll go into an infinite loop of
+ // "returning" to that same function.
+ mState[R_LR] = 0;
+ }
+ return true;
+ }
+
+ // 1001nnnn: Set vsp = r[nnnn]
+ if ((insn & M_MOVSP) == I_MOVSP) {
+ vSP() = mState[insn & 0x0f];
+ checkStack();
+ continue;
+ }
+
+ // 11001000 sssscccc: Pop VFP regs D[16+ssss]-D[16+ssss+cccc] (as FLDMFDD)
+ // 11001001 sssscccc: Pop VFP regs D[ssss]-D[ssss+cccc] (as FLDMFDD)
+ if ((insn & M_POPFDD) == I_POPFDD) {
+ uint8_t n = (next() & 0x0f) + 1;
+ // Note: if the 16+ssss+cccc > 31, the encoding is reserved.
+ // As the space is currently unused, we don't try to check.
+ vSP() += 8 * n;
+ checkStackBase();
+ continue;
+ }
+
+ // 11010nnn: Pop VFP regs D[8]-D[8+nnn] (as FLDMFDD)
+ if ((insn & M_POPFDD8) == I_POPFDD8) {
+ uint8_t n = (insn & 0x07) + 1;
+ vSP() += 8 * n;
+ checkStackBase();
+ continue;
+ }
+
+ // 10110010 uleb128: vsp = vsp + 0x204 + (uleb128 << 2)
+ if (insn == I_ADDSPBIG) {
+ uint32_t acc = 0;
+ uint8_t shift = 0;
+ uint8_t byte;
+ do {
+ if (shift >= 32) return false;
+ byte = next();
+ acc |= (byte & 0x7f) << shift;
+ shift += 7;
+ } while (byte & 0x80);
+ uint32_t offset = 0x204 + (acc << 2);
+ // The calculations above could have overflowed.
+ // But the one we care about is this:
+ if (vSP() + offset < vSP()) mFailed = true;
+ vSP() += offset;
+ // ...so that this is the only other check needed:
+ checkStackBase();
+ continue;
+ }
+
+ // 1000iiii iiiiiiii (i not all 0): Pop under masks {r15-r12}, {r11-r4}
+ if ((insn & M_POPMASK) == I_POPMASK) {
+ popRange(4, 15, ((insn & 0x0f) << 8) | next());
+ continue;
+ }
+
+ // 1011001 0000iiii (i not all 0): Pop under mask {r3-r0}
+ if (insn == I_POPLO) {
+ popRange(0, 3, next() & 0x0f);
+ continue;
+ }
+
+ // 10110011 sssscccc: Pop VFP regs D[ssss]-D[ssss+cccc] (as FLDMFDX)
+ if (insn == I_POPFDX) {
+ uint8_t n = (next() & 0x0f) + 1;
+ vSP() += 8 * n + 4;
+ checkStackBase();
+ continue;
+ }
+
+ // 10111nnn: Pop VFP regs D[8]-D[8+nnn] (as FLDMFDX)
+ if ((insn & M_POPFDX8) == I_POPFDX8) {
+ uint8_t n = (insn & 0x07) + 1;
+ vSP() += 8 * n + 4;
+ checkStackBase();
+ continue;
+ }
+
+ // unhandled instruction
+#ifdef DEBUG_EHABI_UNWIND
+ LOG("Unhandled EHABI instruction 0x%02x", insn);
+#endif
+ mFailed = true;
+ }
+ return false;
+}
+
+bool operator<(const EHTable& lhs, const EHTable& rhs) {
+ return lhs.startPC() < rhs.startPC();
+}
+
+// Async signal unsafe.
+EHAddrSpace::EHAddrSpace(const std::vector<EHTable>& aTables)
+ : mTables(aTables) {
+ std::sort(mTables.begin(), mTables.end());
+ DebugOnly<uint32_t> lastEnd = 0;
+ for (std::vector<EHTable>::iterator i = mTables.begin(); i != mTables.end();
+ ++i) {
+ MOZ_ASSERT(i->startPC() >= lastEnd);
+ mStarts.push_back(i->startPC());
+ lastEnd = i->endPC();
+ }
+}
+
+const EHTable* EHAddrSpace::lookup(uint32_t aPC) const {
+ ptrdiff_t i = (std::upper_bound(mStarts.begin(), mStarts.end(), aPC) -
+ mStarts.begin()) -
+ 1;
+
+ if (i < 0 || aPC >= mTables[i].endPC()) return 0;
+ return &mTables[i];
+}
+
+const EHEntry* EHTable::lookup(uint32_t aPC) const {
+ MOZ_ASSERT(aPC >= mStartPC);
+ if (aPC >= mEndPC) return nullptr;
+
+ const EHEntry* begin = mEntriesBegin;
+ const EHEntry* end = mEntriesEnd;
+ MOZ_ASSERT(begin < end);
+ if (aPC < reinterpret_cast<uint32_t>(begin->startPC.compute()))
+ return nullptr;
+
+ while (end - begin > 1) {
+#ifdef EHABI_UNWIND_MORE_ASSERTS
+ if ((end - 1)->startPC.compute() < begin->startPC.compute()) {
+ MOZ_CRASH("unsorted exidx");
+ }
+#endif
+ const EHEntry* mid = begin + (end - begin) / 2;
+ if (aPC < reinterpret_cast<uint32_t>(mid->startPC.compute()))
+ end = mid;
+ else
+ begin = mid;
+ }
+ return begin;
+}
+
+#if MOZ_LITTLE_ENDIAN()
+static const unsigned char hostEndian = ELFDATA2LSB;
+#elif MOZ_BIG_ENDIAN()
+static const unsigned char hostEndian = ELFDATA2MSB;
+#else
+# error "No endian?"
+#endif
+
+// Async signal unsafe: std::vector::reserve, std::string copy ctor.
+EHTable::EHTable(const void* aELF, size_t aSize, const std::string& aName)
+ : mStartPC(~0), // largest uint32_t
+ mEndPC(0),
+ mEntriesBegin(nullptr),
+ mEntriesEnd(nullptr),
+ mName(aName) {
+ const uint32_t fileHeaderAddr = reinterpret_cast<uint32_t>(aELF);
+
+ if (aSize < sizeof(Elf32_Ehdr)) return;
+
+ const Elf32_Ehdr& file = *(reinterpret_cast<Elf32_Ehdr*>(fileHeaderAddr));
+ if (memcmp(&file.e_ident[EI_MAG0], ELFMAG, SELFMAG) != 0 ||
+ file.e_ident[EI_CLASS] != ELFCLASS32 ||
+ file.e_ident[EI_DATA] != hostEndian ||
+ file.e_ident[EI_VERSION] != EV_CURRENT || file.e_machine != EM_ARM ||
+ file.e_version != EV_CURRENT)
+ // e_flags?
+ return;
+
+ MOZ_ASSERT(file.e_phoff + file.e_phnum * file.e_phentsize <= aSize);
+ const Elf32_Phdr *exidxHdr = 0, *zeroHdr = 0;
+ for (unsigned i = 0; i < file.e_phnum; ++i) {
+ const Elf32_Phdr& phdr = *(reinterpret_cast<Elf32_Phdr*>(
+ fileHeaderAddr + file.e_phoff + i * file.e_phentsize));
+ if (phdr.p_type == PT_ARM_EXIDX) {
+ exidxHdr = &phdr;
+ } else if (phdr.p_type == PT_LOAD) {
+ if (phdr.p_offset == 0) {
+ zeroHdr = &phdr;
+ }
+ if (phdr.p_flags & PF_X) {
+ mStartPC = std::min(mStartPC, phdr.p_vaddr);
+ mEndPC = std::max(mEndPC, phdr.p_vaddr + phdr.p_memsz);
+ }
+ }
+ }
+ if (!exidxHdr) return;
+ if (!zeroHdr) return;
+ mBaseAddress = fileHeaderAddr - zeroHdr->p_vaddr;
+ mStartPC += mBaseAddress;
+ mEndPC += mBaseAddress;
+ mEntriesBegin =
+ reinterpret_cast<const EHEntry*>(mBaseAddress + exidxHdr->p_vaddr);
+ mEntriesEnd = reinterpret_cast<const EHEntry*>(
+ mBaseAddress + exidxHdr->p_vaddr + exidxHdr->p_memsz);
+}
+
+Atomic<const EHAddrSpace*> EHAddrSpace::sCurrent(nullptr);
+
+// Async signal safe; can fail if Update() hasn't returned yet.
+const EHAddrSpace* EHAddrSpace::Get() { return sCurrent; }
+
+// Collect unwinding information from loaded objects. Calls after the
+// first have no effect. Async signal unsafe.
+void EHAddrSpace::Update() {
+ const EHAddrSpace* space = sCurrent;
+ if (space) return;
+
+ SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf();
+ std::vector<EHTable> tables;
+
+ for (size_t i = 0; i < info.GetSize(); ++i) {
+ const SharedLibrary& lib = info.GetEntry(i);
+ // FIXME: This isn't correct if the start address isn't p_offset 0, because
+ // the start address will not point at the file header. But this is worked
+ // around by magic number checks in the EHTable constructor.
+ EHTable tab(reinterpret_cast<const void*>(lib.GetStart()),
+ lib.GetEnd() - lib.GetStart(), lib.GetDebugPath());
+ if (tab.isValid()) tables.push_back(tab);
+ }
+ space = new EHAddrSpace(tables);
+
+ if (!sCurrent.compareExchange(nullptr, space)) {
+ delete space;
+ space = sCurrent;
+ }
+}
+
+EHState::EHState(const mcontext_t& context) {
+#ifdef linux
+ mRegs[0] = context.arm_r0;
+ mRegs[1] = context.arm_r1;
+ mRegs[2] = context.arm_r2;
+ mRegs[3] = context.arm_r3;
+ mRegs[4] = context.arm_r4;
+ mRegs[5] = context.arm_r5;
+ mRegs[6] = context.arm_r6;
+ mRegs[7] = context.arm_r7;
+ mRegs[8] = context.arm_r8;
+ mRegs[9] = context.arm_r9;
+ mRegs[10] = context.arm_r10;
+ mRegs[11] = context.arm_fp;
+ mRegs[12] = context.arm_ip;
+ mRegs[13] = context.arm_sp;
+ mRegs[14] = context.arm_lr;
+ mRegs[15] = context.arm_pc;
+#else
+# error "Unhandled OS for ARM EHABI unwinding"
+#endif
+}
+
+} // namespace baseprofiler
+} // namespace mozilla
diff --git a/mozglue/baseprofiler/core/EHABIStackWalk.h b/mozglue/baseprofiler/core/EHABIStackWalk.h
new file mode 100644
index 0000000000..d5f4edc0d7
--- /dev/null
+++ b/mozglue/baseprofiler/core/EHABIStackWalk.h
@@ -0,0 +1,30 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * This is an implementation of stack unwinding according to a subset
+ * of the ARM Exception Handling ABI; see the comment at the top of
+ * the .cpp file for details.
+ */
+
+#ifndef mozilla_EHABIStackWalk_h__
+#define mozilla_EHABIStackWalk_h__
+
+#include <stddef.h>
+#include <ucontext.h>
+
+namespace mozilla {
+namespace baseprofiler {
+
+void EHABIStackWalkInit();
+
+size_t EHABIStackWalk(const mcontext_t& aContext, void* stackBase, void** aSPs,
+ void** aPCs, size_t aNumFrames);
+
+} // namespace baseprofiler
+} // namespace mozilla
+
+#endif
diff --git a/mozglue/baseprofiler/core/PageInformation.cpp b/mozglue/baseprofiler/core/PageInformation.cpp
new file mode 100644
index 0000000000..7ce47e86d7
--- /dev/null
+++ b/mozglue/baseprofiler/core/PageInformation.cpp
@@ -0,0 +1,50 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "PageInformation.h"
+
+#include "BaseProfiler.h"
+
+#include "mozilla/BaseProfileJSONWriter.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+PageInformation::PageInformation(uint64_t aBrowsingContextID,
+ uint64_t aInnerWindowID,
+ const std::string& aUrl,
+ uint64_t aEmbedderInnerWindowID)
+ : mBrowsingContextID(aBrowsingContextID),
+ mInnerWindowID(aInnerWindowID),
+ mUrl(aUrl),
+ mEmbedderInnerWindowID(aEmbedderInnerWindowID),
+ mRefCnt(0) {}
+
+bool PageInformation::Equals(PageInformation* aOtherPageInfo) const {
+ // It's enough to check inner window IDs because they are unique for each
+ // page. Therefore, we don't have to check browsing context ID or url.
+ return InnerWindowID() == aOtherPageInfo->InnerWindowID();
+}
+
+void PageInformation::StreamJSON(SpliceableJSONWriter& aWriter) const {
+ aWriter.StartObjectElement();
+ // Here, we are converting uint64_t to double. Both Browsing Context and Inner
+ // Window IDs are creating using `nsContentUtils::GenerateProcessSpecificId`,
+ // which is specifically designed to only use 53 of the 64 bits to be lossless
+ // when passed into and out of JS as a double.
+ aWriter.DoubleProperty("browsingContextID", BrowsingContextID());
+ aWriter.DoubleProperty("innerWindowID", InnerWindowID());
+ aWriter.StringProperty("url", Url());
+ aWriter.DoubleProperty("embedderInnerWindowID", EmbedderInnerWindowID());
+ aWriter.EndObject();
+}
+
+size_t PageInformation::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
+ return aMallocSizeOf(this);
+}
+
+} // namespace baseprofiler
+} // namespace mozilla
diff --git a/mozglue/baseprofiler/core/PageInformation.h b/mozglue/baseprofiler/core/PageInformation.h
new file mode 100644
index 0000000000..158b172bdf
--- /dev/null
+++ b/mozglue/baseprofiler/core/PageInformation.h
@@ -0,0 +1,77 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef PageInformation_h
+#define PageInformation_h
+
+#include "mozilla/Atomics.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/MemoryReporting.h"
+
+#include <string>
+
+namespace mozilla {
+namespace baseprofiler {
+
+class SpliceableJSONWriter;
+
+// This class contains information that's relevant to a single page only
+// while the page information is important and registered with the profiler,
+// but regardless of whether the profiler is running. All accesses to it are
+// protected by the profiler state lock.
+// When the page gets unregistered, we keep the profiler buffer position
+// to determine if we are still using this page. If not, we unregister
+// it in the next page registration.
+class PageInformation final {
+ public:
+ PageInformation(uint64_t aBrowsingContextID, uint64_t aInnerWindowID,
+ const std::string& aUrl, uint64_t aEmbedderInnerWindowID);
+
+ // Using hand-rolled ref-counting, because RefCounted.h macros don't produce
+ // the same code between mozglue and libxul, see bug 1536656.
+ MFBT_API void AddRef() const { ++mRefCnt; }
+ MFBT_API void Release() const {
+ MOZ_ASSERT(int32_t(mRefCnt) > 0);
+ if (--mRefCnt) {
+ delete this;
+ }
+ }
+
+ size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const;
+ bool Equals(PageInformation* aOtherPageInfo) const;
+ void StreamJSON(SpliceableJSONWriter& aWriter) const;
+
+ uint64_t InnerWindowID() const { return mInnerWindowID; }
+ uint64_t BrowsingContextID() const { return mBrowsingContextID; }
+ const std::string& Url() const { return mUrl; }
+ uint64_t EmbedderInnerWindowID() const { return mEmbedderInnerWindowID; }
+
+ Maybe<uint64_t> BufferPositionWhenUnregistered() const {
+ return mBufferPositionWhenUnregistered;
+ }
+
+ void NotifyUnregistered(uint64_t aBufferPosition) {
+ mBufferPositionWhenUnregistered = Some(aBufferPosition);
+ }
+
+ private:
+ const uint64_t mBrowsingContextID;
+ const uint64_t mInnerWindowID;
+ const std::string mUrl;
+ const uint64_t mEmbedderInnerWindowID;
+
+ // Holds the buffer position when page is unregistered.
+ // It's used to determine if we still use this page in the profiler or
+ // not.
+ Maybe<uint64_t> mBufferPositionWhenUnregistered;
+
+ mutable Atomic<int32_t, MemoryOrdering::ReleaseAcquire> mRefCnt;
+};
+
+} // namespace baseprofiler
+} // namespace mozilla
+
+#endif // PageInformation_h
diff --git a/mozglue/baseprofiler/core/PlatformMacros.h b/mozglue/baseprofiler/core/PlatformMacros.h
new file mode 100644
index 0000000000..c72e94c128
--- /dev/null
+++ b/mozglue/baseprofiler/core/PlatformMacros.h
@@ -0,0 +1,130 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef PLATFORM_MACROS_H
+#define PLATFORM_MACROS_H
+
+// Define platform selection macros in a consistent way. Don't add anything
+// else to this file, so it can remain freestanding. The primary factorisation
+// is on (ARCH,OS) pairs ("PLATforms") but ARCH_ and OS_ macros are defined
+// too, since they are sometimes convenient.
+//
+// Note: "GP" is short for "Gecko Profiler".
+
+#undef GP_PLAT_x86_android
+#undef GP_PLAT_amd64_android
+#undef GP_PLAT_arm_android
+#undef GP_PLAT_arm64_android
+#undef GP_PLAT_x86_linux
+#undef GP_PLAT_amd64_linux
+#undef GP_PLAT_arm_linux
+#undef GP_PLAT_mips64_linux
+#undef GP_PLAT_amd64_darwin
+#undef GP_PLAT_arm64_darwin
+#undef GP_PLAT_x86_windows
+#undef GP_PLAT_amd64_windows
+#undef GP_PLAT_arm64_windows
+
+#undef GP_ARCH_x86
+#undef GP_ARCH_amd64
+#undef GP_ARCH_arm
+#undef GP_ARCH_arm64
+#undef GP_ARCH_mips64
+
+#undef GP_OS_android
+#undef GP_OS_linux
+#undef GP_OS_darwin
+#undef GP_OS_windows
+
+// We test __ANDROID__ before __linux__ because __linux__ is defined on both
+// Android and Linux, whereas GP_OS_android is not defined on vanilla Linux.
+
+#if defined(__ANDROID__) && defined(__i386__)
+# define GP_PLAT_x86_android 1
+# define GP_ARCH_x86 1
+# define GP_OS_android 1
+
+#elif defined(__ANDROID__) && defined(__x86_64__)
+# define GP_PLAT_amd64_android 1
+# define GP_ARCH_amd64 1
+# define GP_OS_android 1
+
+#elif defined(__ANDROID__) && defined(__arm__)
+# define GP_PLAT_arm_android 1
+# define GP_ARCH_arm 1
+# define GP_OS_android 1
+
+#elif defined(__ANDROID__) && defined(__aarch64__)
+# define GP_PLAT_arm64_android 1
+# define GP_ARCH_arm64 1
+# define GP_OS_android 1
+
+#elif defined(__linux__) && defined(__i386__)
+# define GP_PLAT_x86_linux 1
+# define GP_ARCH_x86 1
+# define GP_OS_linux 1
+
+#elif defined(__linux__) && defined(__x86_64__)
+# define GP_PLAT_amd64_linux 1
+# define GP_ARCH_amd64 1
+# define GP_OS_linux 1
+
+#elif defined(__linux__) && defined(__arm__)
+# define GP_PLAT_arm_linux 1
+# define GP_ARCH_arm 1
+# define GP_OS_linux 1
+
+#elif defined(__linux__) && defined(__aarch64__)
+# define GP_PLAT_arm64_linux 1
+# define GP_ARCH_arm64 1
+# define GP_OS_linux 1
+
+#elif defined(__linux__) && defined(__mips64)
+# define GP_PLAT_mips64_linux 1
+# define GP_ARCH_mips64 1
+# define GP_OS_linux 1
+
+#elif defined(__APPLE__) && defined(__aarch64__)
+# define GP_PLAT_arm64_darwin 1
+# define GP_ARCH_arm64 1
+# define GP_OS_darwin 1
+
+#elif defined(__APPLE__) && defined(__x86_64__)
+# define GP_PLAT_amd64_darwin 1
+# define GP_ARCH_amd64 1
+# define GP_OS_darwin 1
+
+#elif defined(__FreeBSD__) && defined(__x86_64__)
+# define GP_PLAT_amd64_freebsd 1
+# define GP_ARCH_amd64 1
+# define GP_OS_freebsd 1
+
+#elif defined(__FreeBSD__) && defined(__aarch64__)
+# define GP_PLAT_arm64_freebsd 1
+# define GP_ARCH_arm64 1
+# define GP_OS_freebsd 1
+
+#elif (defined(_MSC_VER) || defined(__MINGW32__)) && \
+ (defined(_M_IX86) || defined(__i386__))
+# define GP_PLAT_x86_windows 1
+# define GP_ARCH_x86 1
+# define GP_OS_windows 1
+
+#elif (defined(_MSC_VER) || defined(__MINGW32__)) && \
+ (defined(_M_X64) || defined(__x86_64__))
+# define GP_PLAT_amd64_windows 1
+# define GP_ARCH_amd64 1
+# define GP_OS_windows 1
+
+#elif defined(_MSC_VER) && defined(_M_ARM64)
+# define GP_PLAT_arm64_windows 1
+# define GP_ARCH_arm64 1
+# define GP_OS_windows 1
+
+#else
+# error "Unsupported platform"
+#endif
+
+#endif /* ndef PLATFORM_MACROS_H */
diff --git a/mozglue/baseprofiler/core/ProfileBuffer.cpp b/mozglue/baseprofiler/core/ProfileBuffer.cpp
new file mode 100644
index 0000000000..f39244ee91
--- /dev/null
+++ b/mozglue/baseprofiler/core/ProfileBuffer.cpp
@@ -0,0 +1,210 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ProfileBuffer.h"
+
+#include "mozilla/MathAlgorithms.h"
+
+#include "BaseProfiler.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+ProfileBuffer::ProfileBuffer(ProfileChunkedBuffer& aBuffer)
+ : mEntries(aBuffer) {
+ // Assume the given buffer is in-session.
+ MOZ_ASSERT(mEntries.IsInSession());
+}
+
+/* static */
+ProfileBufferBlockIndex ProfileBuffer::AddEntry(
+ ProfileChunkedBuffer& aProfileChunkedBuffer,
+ const ProfileBufferEntry& aEntry) {
+ switch (aEntry.GetKind()) {
+#define SWITCH_KIND(KIND, TYPE, SIZE) \
+ case ProfileBufferEntry::Kind::KIND: { \
+ return aProfileChunkedBuffer.PutFrom(&aEntry, 1 + (SIZE)); \
+ break; \
+ }
+
+ FOR_EACH_PROFILE_BUFFER_ENTRY_KIND(SWITCH_KIND)
+
+#undef SWITCH_KIND
+ default:
+ MOZ_ASSERT(false, "Unhandled baseprofiler::ProfilerBuffer entry KIND");
+ return ProfileBufferBlockIndex{};
+ }
+}
+
+// Called from signal, call only reentrant functions
+uint64_t ProfileBuffer::AddEntry(const ProfileBufferEntry& aEntry) {
+ return AddEntry(mEntries, aEntry).ConvertToProfileBufferIndex();
+}
+
+/* static */
+ProfileBufferBlockIndex ProfileBuffer::AddThreadIdEntry(
+ ProfileChunkedBuffer& aProfileChunkedBuffer, int aThreadId) {
+ return AddEntry(aProfileChunkedBuffer,
+ ProfileBufferEntry::ThreadId(aThreadId));
+}
+
+uint64_t ProfileBuffer::AddThreadIdEntry(int aThreadId) {
+ return AddThreadIdEntry(mEntries, aThreadId).ConvertToProfileBufferIndex();
+}
+
+void ProfileBuffer::CollectCodeLocation(
+ const char* aLabel, const char* aStr, uint32_t aFrameFlags,
+ uint64_t aInnerWindowID, const Maybe<uint32_t>& aLineNumber,
+ const Maybe<uint32_t>& aColumnNumber,
+ const Maybe<ProfilingCategoryPair>& aCategoryPair) {
+ AddEntry(ProfileBufferEntry::Label(aLabel));
+ AddEntry(ProfileBufferEntry::FrameFlags(uint64_t(aFrameFlags)));
+
+ if (aStr) {
+ // Store the string using one or more DynamicStringFragment entries.
+ size_t strLen = strlen(aStr) + 1; // +1 for the null terminator
+ // If larger than the prescribed limit, we will cut the string and end it
+ // with an ellipsis.
+ const bool tooBig = strLen > kMaxFrameKeyLength;
+ if (tooBig) {
+ strLen = kMaxFrameKeyLength;
+ }
+ char chars[ProfileBufferEntry::kNumChars];
+ for (size_t j = 0;; j += ProfileBufferEntry::kNumChars) {
+ // Store up to kNumChars characters in the entry.
+ size_t len = ProfileBufferEntry::kNumChars;
+ const bool last = j + len >= strLen;
+ if (last) {
+ // Only the last entry may be smaller than kNumChars.
+ len = strLen - j;
+ if (tooBig) {
+ // That last entry is part of a too-big string, replace the end
+ // characters with an ellipsis "...".
+ len = std::max(len, size_t(4));
+ chars[len - 4] = '.';
+ chars[len - 3] = '.';
+ chars[len - 2] = '.';
+ chars[len - 1] = '\0';
+ // Make sure the memcpy will not overwrite our ellipsis!
+ len -= 4;
+ }
+ }
+ memcpy(chars, &aStr[j], len);
+ AddEntry(ProfileBufferEntry::DynamicStringFragment(chars));
+ if (last) {
+ break;
+ }
+ }
+ }
+
+ if (aInnerWindowID) {
+ AddEntry(ProfileBufferEntry::InnerWindowID(aInnerWindowID));
+ }
+
+ if (aLineNumber) {
+ AddEntry(ProfileBufferEntry::LineNumber(*aLineNumber));
+ }
+
+ if (aColumnNumber) {
+ AddEntry(ProfileBufferEntry::ColumnNumber(*aColumnNumber));
+ }
+
+ if (aCategoryPair.isSome()) {
+ AddEntry(ProfileBufferEntry::CategoryPair(int(*aCategoryPair)));
+ }
+}
+
+size_t ProfileBuffer::SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const {
+ // Measurement of the following members may be added later if DMD finds it
+ // is worthwhile:
+ // - memory pointed to by the elements within mEntries
+ return mEntries.SizeOfExcludingThis(aMallocSizeOf);
+}
+
+size_t ProfileBuffer::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
+ return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
+}
+
+void ProfileBuffer::CollectOverheadStats(TimeDuration aSamplingTime,
+ TimeDuration aLocking,
+ TimeDuration aCleaning,
+ TimeDuration aCounters,
+ TimeDuration aThreads) {
+ double timeUs = aSamplingTime.ToMilliseconds() * 1000.0;
+ if (mFirstSamplingTimeUs == 0.0) {
+ mFirstSamplingTimeUs = timeUs;
+ } else {
+ // Note that we'll have 1 fewer interval than other numbers (because
+ // we need both ends of an interval to know its duration). The final
+ // difference should be insignificant over the expected many thousands
+ // of iterations.
+ mIntervalsUs.Count(timeUs - mLastSamplingTimeUs);
+ }
+ mLastSamplingTimeUs = timeUs;
+ // Time to take the lock before sampling.
+ double lockingUs = aLocking.ToMilliseconds() * 1000.0;
+ // Time to discard expired data.
+ double cleaningUs = aCleaning.ToMilliseconds() * 1000.0;
+ // Time to gather all counters.
+ double countersUs = aCounters.ToMilliseconds() * 1000.0;
+ // Time to sample all threads.
+ double threadsUs = aThreads.ToMilliseconds() * 1000.0;
+
+ // Add to our gathered stats.
+ mOverheadsUs.Count(lockingUs + cleaningUs + countersUs + threadsUs);
+ mLockingsUs.Count(lockingUs);
+ mCleaningsUs.Count(cleaningUs);
+ mCountersUs.Count(countersUs);
+ mThreadsUs.Count(threadsUs);
+
+ // Record details in buffer.
+ AddEntry(ProfileBufferEntry::ProfilerOverheadTime(timeUs));
+ AddEntry(ProfileBufferEntry::ProfilerOverheadDuration(lockingUs));
+ AddEntry(ProfileBufferEntry::ProfilerOverheadDuration(cleaningUs));
+ AddEntry(ProfileBufferEntry::ProfilerOverheadDuration(countersUs));
+ AddEntry(ProfileBufferEntry::ProfilerOverheadDuration(threadsUs));
+}
+
+ProfilerBufferInfo ProfileBuffer::GetProfilerBufferInfo() const {
+ return {BufferRangeStart(),
+ BufferRangeEnd(),
+ static_cast<uint32_t>(*mEntries.BufferLength() /
+ 8), // 8 bytes per entry.
+ mIntervalsUs,
+ mOverheadsUs,
+ mLockingsUs,
+ mCleaningsUs,
+ mCountersUs,
+ mThreadsUs};
+}
+
+/* ProfileBufferCollector */
+
+void ProfileBufferCollector::CollectNativeLeafAddr(void* aAddr) {
+ mBuf.AddEntry(ProfileBufferEntry::NativeLeafAddr(aAddr));
+}
+
+void ProfileBufferCollector::CollectProfilingStackFrame(
+ const ProfilingStackFrame& aFrame) {
+ // WARNING: this function runs within the profiler's "critical section".
+
+ MOZ_ASSERT(aFrame.isLabelFrame() ||
+ (aFrame.isJsFrame() && !aFrame.isOSRFrame()));
+
+ const char* label = aFrame.label();
+ const char* dynamicString = aFrame.dynamicString();
+ Maybe<uint32_t> line;
+ Maybe<uint32_t> column;
+
+ MOZ_ASSERT(aFrame.isLabelFrame());
+
+ mBuf.CollectCodeLocation(label, dynamicString, aFrame.flags(),
+ aFrame.realmID(), line, column,
+ Some(aFrame.categoryPair()));
+}
+
+} // namespace baseprofiler
+} // namespace mozilla
diff --git a/mozglue/baseprofiler/core/ProfileBuffer.h b/mozglue/baseprofiler/core/ProfileBuffer.h
new file mode 100644
index 0000000000..b7a0af5e93
--- /dev/null
+++ b/mozglue/baseprofiler/core/ProfileBuffer.h
@@ -0,0 +1,186 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MOZ_PROFILE_BUFFER_H
+#define MOZ_PROFILE_BUFFER_H
+
+#include "ProfileBufferEntry.h"
+
+#include "mozilla/Maybe.h"
+#include "mozilla/PowerOfTwo.h"
+#include "mozilla/ProfileBufferChunkManagerSingle.h"
+#include "mozilla/ProfileChunkedBuffer.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+// Class storing most profiling data in a ProfileChunkedBuffer.
+//
+// This class is used as a queue of entries which, after construction, never
+// allocates. This makes it safe to use in the profiler's "critical section".
+class ProfileBuffer final {
+ public:
+ // ProfileBuffer constructor
+ // @param aBuffer The in-session ProfileChunkedBuffer to use as buffer
+ // manager.
+ explicit ProfileBuffer(ProfileChunkedBuffer& aBuffer);
+
+ ProfileChunkedBuffer& UnderlyingChunkedBuffer() const { return mEntries; }
+
+ bool IsThreadSafe() const { return mEntries.IsThreadSafe(); }
+
+ // Add |aEntry| to the buffer, ignoring what kind of entry it is.
+ // Returns the position of the entry.
+ uint64_t AddEntry(const ProfileBufferEntry& aEntry);
+
+ // Add to the buffer a sample start (ThreadId) entry for aThreadId.
+ // Returns the position of the entry.
+ uint64_t AddThreadIdEntry(int aThreadId);
+
+ void CollectCodeLocation(const char* aLabel, const char* aStr,
+ uint32_t aFrameFlags, uint64_t aInnerWindowID,
+ const Maybe<uint32_t>& aLineNumber,
+ const Maybe<uint32_t>& aColumnNumber,
+ const Maybe<ProfilingCategoryPair>& aCategoryPair);
+
+ // Maximum size of a frameKey string that we'll handle.
+ static const size_t kMaxFrameKeyLength = 512;
+
+ // Stream JSON for samples in the buffer to aWriter, using the supplied
+ // UniqueStacks object.
+ // Only streams samples for the given thread ID and which were taken at or
+ // after aSinceTime. If ID is 0, ignore the stored thread ID; this should only
+ // be used when the buffer contains only one sample.
+ // Return the thread ID of the streamed sample(s), or 0.
+ int StreamSamplesToJSON(SpliceableJSONWriter& aWriter, int aThreadId,
+ double aSinceTime, UniqueStacks& aUniqueStacks) const;
+
+ void StreamMarkersToJSON(SpliceableJSONWriter& aWriter, int aThreadId,
+ const TimeStamp& aProcessStartTime,
+ double aSinceTime,
+ UniqueStacks& aUniqueStacks) const;
+ void StreamPausedRangesToJSON(SpliceableJSONWriter& aWriter,
+ double aSinceTime) const;
+ void StreamProfilerOverheadToJSON(SpliceableJSONWriter& aWriter,
+ const TimeStamp& aProcessStartTime,
+ double aSinceTime) const;
+ void StreamCountersToJSON(SpliceableJSONWriter& aWriter,
+ const TimeStamp& aProcessStartTime,
+ double aSinceTime) const;
+
+ // Find (via |aLastSample|) the most recent sample for the thread denoted by
+ // |aThreadId| and clone it, patching in the current time as appropriate.
+ // Mutate |aLastSample| to point to the newly inserted sample.
+ // Returns whether duplication was successful.
+ bool DuplicateLastSample(int aThreadId, const TimeStamp& aProcessStartTime,
+ Maybe<uint64_t>& aLastSample);
+
+ void DiscardSamplesBeforeTime(double aTime);
+
+ size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const;
+ size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const;
+
+ void CollectOverheadStats(TimeDuration aSamplingTime, TimeDuration aLocking,
+ TimeDuration aCleaning, TimeDuration aCounters,
+ TimeDuration aThreads);
+
+ ProfilerBufferInfo GetProfilerBufferInfo() const;
+
+ private:
+ // Add |aEntry| to the provider ProfileChunkedBuffer.
+ // `static` because it may be used to add an entry to a `ProfileChunkedBuffer`
+ // that is not attached to a `ProfileBuffer`.
+ static ProfileBufferBlockIndex AddEntry(
+ ProfileChunkedBuffer& aProfileChunkedBuffer,
+ const ProfileBufferEntry& aEntry);
+
+ // Add a sample start (ThreadId) entry for aThreadId to the provided
+ // ProfileChunkedBuffer. Returns the position of the entry.
+ // `static` because it may be used to add an entry to a `ProfileChunkedBuffer`
+ // that is not attached to a `ProfileBuffer`.
+ static ProfileBufferBlockIndex AddThreadIdEntry(
+ ProfileChunkedBuffer& aProfileChunkedBuffer, int aThreadId);
+
+ // The storage in which this ProfileBuffer stores its entries.
+ ProfileChunkedBuffer& mEntries;
+
+ public:
+ // `BufferRangeStart()` and `BufferRangeEnd()` return `uint64_t` values
+ // corresponding to the first entry and past the last entry stored in
+ // `mEntries`.
+ //
+ // The returned values are not guaranteed to be stable, because other threads
+ // may also be accessing the buffer concurrently. But they will always
+ // increase, and can therefore give an indication of how far these values have
+ // *at least* reached. In particular:
+ // - Entries whose index is strictly less that `BufferRangeStart()` have been
+ // discarded by now, so any related data may also be safely discarded.
+ // - It is safe to try and read entries at any index strictly less than
+ // `BufferRangeEnd()` -- but note that these reads may fail by the time you
+ // request them, as old entries get overwritten by new ones.
+ uint64_t BufferRangeStart() const { return mEntries.GetState().mRangeStart; }
+ uint64_t BufferRangeEnd() const { return mEntries.GetState().mRangeEnd; }
+
+ private:
+ // Single pre-allocated chunk (to avoid spurious mallocs), used when:
+ // - Duplicating sleeping stacks (hence scExpectedMaximumStackSize).
+ // - Adding JIT info.
+ // - Streaming stacks to JSON.
+ // Mutable because it's accessed from non-multithreaded const methods.
+ mutable ProfileBufferChunkManagerSingle mWorkerChunkManager{
+ ProfileBufferChunk::Create(
+ ProfileBufferChunk::SizeofChunkMetadata() +
+ ProfileBufferChunkManager::scExpectedMaximumStackSize)};
+
+ // Time from launch (us) when first sampling was recorded.
+ double mFirstSamplingTimeUs = 0.0;
+ // Time from launch (us) when last sampling was recorded.
+ double mLastSamplingTimeUs = 0.0;
+ // Sampling stats: Interval (us) between successive samplings.
+ ProfilerStats mIntervalsUs;
+ // Sampling stats: Total duration (us) of each sampling. (Split detail below.)
+ ProfilerStats mOverheadsUs;
+ // Sampling stats: Time (us) to acquire the lock before sampling.
+ ProfilerStats mLockingsUs;
+ // Sampling stats: Time (us) to discard expired data.
+ ProfilerStats mCleaningsUs;
+ // Sampling stats: Time (us) to collect counter data.
+ ProfilerStats mCountersUs;
+ // Sampling stats: Time (us) to sample thread stacks.
+ ProfilerStats mThreadsUs;
+};
+
+/**
+ * Helper type used to implement ProfilerStackCollector. This type is used as
+ * the collector for MergeStacks by ProfileBuffer. It holds a reference to the
+ * buffer, as well as additional feature flags which are needed to control the
+ * data collection strategy
+ */
+class ProfileBufferCollector final : public ProfilerStackCollector {
+ public:
+ ProfileBufferCollector(ProfileBuffer& aBuf, uint64_t aSamplePos)
+ : mBuf(aBuf), mSamplePositionInBuffer(aSamplePos) {}
+
+ Maybe<uint64_t> SamplePositionInBuffer() override {
+ return Some(mSamplePositionInBuffer);
+ }
+
+ Maybe<uint64_t> BufferRangeStart() override {
+ return Some(mBuf.BufferRangeStart());
+ }
+
+ virtual void CollectNativeLeafAddr(void* aAddr) override;
+ virtual void CollectProfilingStackFrame(
+ const ProfilingStackFrame& aFrame) override;
+
+ private:
+ ProfileBuffer& mBuf;
+ uint64_t mSamplePositionInBuffer;
+};
+
+} // namespace baseprofiler
+} // namespace mozilla
+
+#endif
diff --git a/mozglue/baseprofiler/core/ProfileBufferEntry.cpp b/mozglue/baseprofiler/core/ProfileBufferEntry.cpp
new file mode 100644
index 0000000000..3d3f68b655
--- /dev/null
+++ b/mozglue/baseprofiler/core/ProfileBufferEntry.cpp
@@ -0,0 +1,1337 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ProfileBufferEntry.h"
+
+#include <ostream>
+#include <type_traits>
+
+#include "mozilla/Logging.h"
+#include "mozilla/ScopeExit.h"
+#include "mozilla/Sprintf.h"
+#include "mozilla/StackWalk.h"
+
+#include "BaseProfiler.h"
+#include "mozilla/BaseProfilerMarkers.h"
+#include "platform.h"
+#include "ProfileBuffer.h"
+#include "ProfilerBacktrace.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN ProfileBufferEntry
+
+ProfileBufferEntry::ProfileBufferEntry()
+ : mKind(Kind::INVALID), mStorage{0, 0, 0, 0, 0, 0, 0, 0} {}
+
+// aString must be a static string.
+ProfileBufferEntry::ProfileBufferEntry(Kind aKind, const char* aString)
+ : mKind(aKind) {
+ memcpy(mStorage, &aString, sizeof(aString));
+}
+
+ProfileBufferEntry::ProfileBufferEntry(Kind aKind, char aChars[kNumChars])
+ : mKind(aKind) {
+ memcpy(mStorage, aChars, kNumChars);
+}
+
+ProfileBufferEntry::ProfileBufferEntry(Kind aKind, void* aPtr) : mKind(aKind) {
+ memcpy(mStorage, &aPtr, sizeof(aPtr));
+}
+
+ProfileBufferEntry::ProfileBufferEntry(Kind aKind, double aDouble)
+ : mKind(aKind) {
+ memcpy(mStorage, &aDouble, sizeof(aDouble));
+}
+
+ProfileBufferEntry::ProfileBufferEntry(Kind aKind, int aInt) : mKind(aKind) {
+ memcpy(mStorage, &aInt, sizeof(aInt));
+}
+
+ProfileBufferEntry::ProfileBufferEntry(Kind aKind, int64_t aInt64)
+ : mKind(aKind) {
+ memcpy(mStorage, &aInt64, sizeof(aInt64));
+}
+
+ProfileBufferEntry::ProfileBufferEntry(Kind aKind, uint64_t aUint64)
+ : mKind(aKind) {
+ memcpy(mStorage, &aUint64, sizeof(aUint64));
+}
+
+const char* ProfileBufferEntry::GetString() const {
+ const char* result;
+ memcpy(&result, mStorage, sizeof(result));
+ return result;
+}
+
+void* ProfileBufferEntry::GetPtr() const {
+ void* result;
+ memcpy(&result, mStorage, sizeof(result));
+ return result;
+}
+
+double ProfileBufferEntry::GetDouble() const {
+ double result;
+ memcpy(&result, mStorage, sizeof(result));
+ return result;
+}
+
+int ProfileBufferEntry::GetInt() const {
+ int result;
+ memcpy(&result, mStorage, sizeof(result));
+ return result;
+}
+
+int64_t ProfileBufferEntry::GetInt64() const {
+ int64_t result;
+ memcpy(&result, mStorage, sizeof(result));
+ return result;
+}
+
+uint64_t ProfileBufferEntry::GetUint64() const {
+ uint64_t result;
+ memcpy(&result, mStorage, sizeof(result));
+ return result;
+}
+
+void ProfileBufferEntry::CopyCharsInto(char (&aOutArray)[kNumChars]) const {
+ memcpy(aOutArray, mStorage, kNumChars);
+}
+
+// END ProfileBufferEntry
+////////////////////////////////////////////////////////////////////////
+
+// As mentioned in ProfileBufferEntry.h, the JSON format contains many
+// arrays whose elements are laid out according to various schemas to help
+// de-duplication. This RAII class helps write these arrays by keeping track of
+// the last non-null element written and adding the appropriate number of null
+// elements when writing new non-null elements. It also automatically opens and
+// closes an array element on the given JSON writer.
+//
+// You grant the AutoArraySchemaWriter exclusive access to the JSONWriter and
+// the UniqueJSONStrings objects for the lifetime of AutoArraySchemaWriter. Do
+// not access them independently while the AutoArraySchemaWriter is alive.
+// If you need to add complex objects, call FreeFormElement(), which will give
+// you temporary access to the writer.
+//
+// Example usage:
+//
+// // Define the schema of elements in this type of array: [FOO, BAR, BAZ]
+// enum Schema : uint32_t {
+// FOO = 0,
+// BAR = 1,
+// BAZ = 2
+// };
+//
+// AutoArraySchemaWriter writer(someJsonWriter, someUniqueStrings);
+// if (shouldWriteFoo) {
+// writer.IntElement(FOO, getFoo());
+// }
+// ... etc ...
+//
+// The elements need to be added in-order.
+class MOZ_RAII AutoArraySchemaWriter {
+ public:
+ explicit AutoArraySchemaWriter(SpliceableJSONWriter& aWriter)
+ : mJSONWriter(aWriter), mNextFreeIndex(0) {
+ mJSONWriter.StartArrayElement(SpliceableJSONWriter::SingleLineStyle);
+ }
+
+ ~AutoArraySchemaWriter() { mJSONWriter.EndArray(); }
+
+ template <typename T>
+ void IntElement(uint32_t aIndex, T aValue) {
+ static_assert(!std::is_same_v<T, uint64_t>,
+ "Narrowing uint64 -> int64 conversion not allowed");
+ FillUpTo(aIndex);
+ mJSONWriter.IntElement(static_cast<int64_t>(aValue));
+ }
+
+ void DoubleElement(uint32_t aIndex, double aValue) {
+ FillUpTo(aIndex);
+ mJSONWriter.DoubleElement(aValue);
+ }
+
+ void BoolElement(uint32_t aIndex, bool aValue) {
+ FillUpTo(aIndex);
+ mJSONWriter.BoolElement(aValue);
+ }
+
+ protected:
+ SpliceableJSONWriter& Writer() { return mJSONWriter; }
+
+ void FillUpTo(uint32_t aIndex) {
+ MOZ_ASSERT(aIndex >= mNextFreeIndex);
+ mJSONWriter.NullElements(aIndex - mNextFreeIndex);
+ mNextFreeIndex = aIndex + 1;
+ }
+
+ private:
+ SpliceableJSONWriter& mJSONWriter;
+ uint32_t mNextFreeIndex;
+};
+
+// Same as AutoArraySchemaWriter, but this can also write strings (output as
+// indexes into the table of unique strings).
+class MOZ_RAII AutoArraySchemaWithStringsWriter : public AutoArraySchemaWriter {
+ public:
+ AutoArraySchemaWithStringsWriter(SpliceableJSONWriter& aWriter,
+ UniqueJSONStrings& aStrings)
+ : AutoArraySchemaWriter(aWriter), mStrings(aStrings) {}
+
+ void StringElement(uint32_t aIndex, const Span<const char>& aValue) {
+ FillUpTo(aIndex);
+ mStrings.WriteElement(Writer(), aValue);
+ }
+
+ private:
+ UniqueJSONStrings& mStrings;
+};
+
+UniqueStacks::StackKey UniqueStacks::BeginStack(const FrameKey& aFrame) {
+ return StackKey(GetOrAddFrameIndex(aFrame));
+}
+
+UniqueStacks::StackKey UniqueStacks::AppendFrame(const StackKey& aStack,
+ const FrameKey& aFrame) {
+ return StackKey(aStack, GetOrAddStackIndex(aStack),
+ GetOrAddFrameIndex(aFrame));
+}
+
+bool UniqueStacks::FrameKey::NormalFrameData::operator==(
+ const NormalFrameData& aOther) const {
+ return mLocation == aOther.mLocation &&
+ mRelevantForJS == aOther.mRelevantForJS &&
+ mInnerWindowID == aOther.mInnerWindowID && mLine == aOther.mLine &&
+ mColumn == aOther.mColumn && mCategoryPair == aOther.mCategoryPair;
+}
+
+UniqueStacks::UniqueStacks() : mUniqueStrings(MakeUnique<UniqueJSONStrings>()) {
+ mFrameTableWriter.StartBareList();
+ mStackTableWriter.StartBareList();
+}
+
+uint32_t UniqueStacks::GetOrAddStackIndex(const StackKey& aStack) {
+ uint32_t count = mStackToIndexMap.count();
+ auto entry = mStackToIndexMap.lookupForAdd(aStack);
+ if (entry) {
+ MOZ_ASSERT(entry->value() < count);
+ return entry->value();
+ }
+
+ MOZ_RELEASE_ASSERT(mStackToIndexMap.add(entry, aStack, count));
+ StreamStack(aStack);
+ return count;
+}
+
+uint32_t UniqueStacks::GetOrAddFrameIndex(const FrameKey& aFrame) {
+ uint32_t count = mFrameToIndexMap.count();
+ auto entry = mFrameToIndexMap.lookupForAdd(aFrame);
+ if (entry) {
+ MOZ_ASSERT(entry->value() < count);
+ return entry->value();
+ }
+
+ MOZ_RELEASE_ASSERT(mFrameToIndexMap.add(entry, aFrame, count));
+ StreamNonJITFrame(aFrame);
+ return count;
+}
+
+void UniqueStacks::SpliceFrameTableElements(SpliceableJSONWriter& aWriter) {
+ mFrameTableWriter.EndBareList();
+ aWriter.TakeAndSplice(mFrameTableWriter.TakeChunkedWriteFunc());
+}
+
+void UniqueStacks::SpliceStackTableElements(SpliceableJSONWriter& aWriter) {
+ mStackTableWriter.EndBareList();
+ aWriter.TakeAndSplice(mStackTableWriter.TakeChunkedWriteFunc());
+}
+
+void UniqueStacks::StreamStack(const StackKey& aStack) {
+ enum Schema : uint32_t { PREFIX = 0, FRAME = 1 };
+
+ AutoArraySchemaWriter writer(mStackTableWriter);
+ if (aStack.mPrefixStackIndex.isSome()) {
+ writer.IntElement(PREFIX, *aStack.mPrefixStackIndex);
+ }
+ writer.IntElement(FRAME, aStack.mFrameIndex);
+}
+
+void UniqueStacks::StreamNonJITFrame(const FrameKey& aFrame) {
+ using NormalFrameData = FrameKey::NormalFrameData;
+
+ enum Schema : uint32_t {
+ LOCATION = 0,
+ RELEVANT_FOR_JS = 1,
+ INNER_WINDOW_ID = 2,
+ IMPLEMENTATION = 3,
+ OPTIMIZATIONS = 4,
+ LINE = 5,
+ COLUMN = 6,
+ CATEGORY = 7,
+ SUBCATEGORY = 8
+ };
+
+ AutoArraySchemaWithStringsWriter writer(mFrameTableWriter, *mUniqueStrings);
+
+ const NormalFrameData& data = aFrame.mData.as<NormalFrameData>();
+ writer.StringElement(LOCATION, data.mLocation);
+ writer.BoolElement(RELEVANT_FOR_JS, data.mRelevantForJS);
+
+ // It's okay to convert uint64_t to double here because DOM always creates IDs
+ // that are convertible to double.
+ writer.DoubleElement(INNER_WINDOW_ID, data.mInnerWindowID);
+
+ if (data.mLine.isSome()) {
+ writer.IntElement(LINE, *data.mLine);
+ }
+ if (data.mColumn.isSome()) {
+ writer.IntElement(COLUMN, *data.mColumn);
+ }
+ if (data.mCategoryPair.isSome()) {
+ const ProfilingCategoryPairInfo& info =
+ GetProfilingCategoryPairInfo(*data.mCategoryPair);
+ writer.IntElement(CATEGORY, uint32_t(info.mCategory));
+ writer.IntElement(SUBCATEGORY, info.mSubcategoryIndex);
+ }
+}
+
+struct CStringWriteFunc : public JSONWriteFunc {
+ std::string& mBuffer; // The struct must not outlive this buffer
+ explicit CStringWriteFunc(std::string& aBuffer) : mBuffer(aBuffer) {}
+
+ void Write(const Span<const char>& aStr) override {
+ mBuffer.append(aStr.data(), aStr.size());
+ }
+};
+
+struct ProfileSample {
+ uint32_t mStack;
+ double mTime;
+ Maybe<double> mResponsiveness;
+};
+
+static void WriteSample(SpliceableJSONWriter& aWriter,
+ const ProfileSample& aSample) {
+ enum Schema : uint32_t {
+ STACK = 0,
+ TIME = 1,
+ EVENT_DELAY = 2,
+ };
+
+ AutoArraySchemaWriter writer(aWriter);
+
+ writer.IntElement(STACK, aSample.mStack);
+
+ writer.DoubleElement(TIME, aSample.mTime);
+
+ if (aSample.mResponsiveness.isSome()) {
+ writer.DoubleElement(EVENT_DELAY, *aSample.mResponsiveness);
+ }
+}
+
+class EntryGetter {
+ public:
+ explicit EntryGetter(ProfileChunkedBuffer::Reader& aReader,
+ uint64_t aInitialReadPos = 0)
+ : mBlockIt(
+ aReader.At(ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+ aInitialReadPos))),
+ mBlockItEnd(aReader.end()) {
+ if (!ReadLegacyOrEnd()) {
+ // Find and read the next non-legacy entry.
+ Next();
+ }
+ }
+
+ bool Has() const { return mBlockIt != mBlockItEnd; }
+
+ const ProfileBufferEntry& Get() const {
+ MOZ_ASSERT(Has(), "Caller should have checked `Has()` before `Get()`");
+ return mEntry;
+ }
+
+ void Next() {
+ MOZ_ASSERT(Has(), "Caller should have checked `Has()` before `Next()`");
+ for (;;) {
+ ++mBlockIt;
+ if (ReadLegacyOrEnd()) {
+ // Either we're at the end, or we could read a legacy entry -> Done.
+ break;
+ }
+ // Otherwise loop around until we hit the end or a legacy entry.
+ }
+ }
+
+ ProfileBufferBlockIndex CurBlockIndex() const {
+ return mBlockIt.CurrentBlockIndex();
+ }
+
+ uint64_t CurPos() const {
+ return CurBlockIndex().ConvertToProfileBufferIndex();
+ }
+
+ private:
+ // Try to read the entry at the current `mBlockIt` position.
+ // * If we're at the end of the buffer, just return `true`.
+ // * If there is a "legacy" entry (containing a real `ProfileBufferEntry`),
+ // read it into `mEntry`, and return `true` as well.
+ // * Otherwise the entry contains a "modern" type that cannot be read into
+ // `mEntry`, return `false` (so `EntryGetter` can skip to another entry).
+ bool ReadLegacyOrEnd() {
+ if (!Has()) {
+ return true;
+ }
+ // Read the entry "kind", which is always at the start of all entries.
+ ProfileBufferEntryReader aER = *mBlockIt;
+ auto type = static_cast<ProfileBufferEntry::Kind>(
+ aER.ReadObject<ProfileBufferEntry::KindUnderlyingType>());
+ MOZ_ASSERT(static_cast<ProfileBufferEntry::KindUnderlyingType>(type) <
+ static_cast<ProfileBufferEntry::KindUnderlyingType>(
+ ProfileBufferEntry::Kind::MODERN_LIMIT));
+ if (type >= ProfileBufferEntry::Kind::LEGACY_LIMIT) {
+ aER.SetRemainingBytes(0);
+ return false;
+ }
+ // Here, we have a legacy item, we need to read it from the start.
+ // Because the above `ReadObject` moved the reader, we ned to reset it to
+ // the start of the entry before reading the whole entry.
+ aER = *mBlockIt;
+ aER.ReadBytes(&mEntry, aER.RemainingBytes());
+ return true;
+ }
+
+ ProfileBufferEntry mEntry;
+ ProfileChunkedBuffer::BlockIterator mBlockIt;
+ const ProfileChunkedBuffer::BlockIterator mBlockItEnd;
+};
+
+// The following grammar shows legal sequences of profile buffer entries.
+// The sequences beginning with a ThreadId entry are known as "samples".
+//
+// (
+// ( /* Samples */
+// ThreadId
+// Time
+// ( NativeLeafAddr
+// | Label FrameFlags? DynamicStringFragment* LineNumber? CategoryPair?
+// | JitReturnAddr
+// )+
+// Responsiveness?
+// )
+// | MarkerData
+// | ( /* Counters */
+// CounterId
+// Time
+// (
+// CounterKey
+// Count
+// Number?
+// )*
+// )
+// | CollectionStart
+// | CollectionEnd
+// | Pause
+// | Resume
+// | ( ProfilerOverheadTime /* Sampling start timestamp */
+// ProfilerOverheadDuration /* Lock acquisition */
+// ProfilerOverheadDuration /* Expired data cleaning */
+// ProfilerOverheadDuration /* Counters */
+// ProfilerOverheadDuration /* Threads */
+// )
+// )*
+//
+// The most complicated part is the stack entry sequence that begins with
+// Label. Here are some examples.
+//
+// - ProfilingStack frames without a dynamic string:
+//
+// Label("js::RunScript")
+// CategoryPair(ProfilingCategoryPair::JS)
+//
+// Label("XREMain::XRE_main")
+// LineNumber(4660)
+// CategoryPair(ProfilingCategoryPair::OTHER)
+//
+// Label("ElementRestyler::ComputeStyleChangeFor")
+// LineNumber(3003)
+// CategoryPair(ProfilingCategoryPair::CSS)
+//
+// - ProfilingStack frames with a dynamic string:
+//
+// Label("nsObserverService::NotifyObservers")
+// FrameFlags(uint64_t(ProfilingStackFrame::Flags::IS_LABEL_FRAME))
+// DynamicStringFragment("domwindo")
+// DynamicStringFragment("wopened")
+// LineNumber(291)
+// CategoryPair(ProfilingCategoryPair::OTHER)
+//
+// Label("")
+// FrameFlags(uint64_t(ProfilingStackFrame::Flags::IS_JS_FRAME))
+// DynamicStringFragment("closeWin")
+// DynamicStringFragment("dow (chr")
+// DynamicStringFragment("ome://gl")
+// DynamicStringFragment("obal/con")
+// DynamicStringFragment("tent/glo")
+// DynamicStringFragment("balOverl")
+// DynamicStringFragment("ay.js:5)")
+// DynamicStringFragment("") # this string holds the closing '\0'
+// LineNumber(25)
+// CategoryPair(ProfilingCategoryPair::JS)
+//
+// Label("")
+// FrameFlags(uint64_t(ProfilingStackFrame::Flags::IS_JS_FRAME))
+// DynamicStringFragment("bound (s")
+// DynamicStringFragment("elf-host")
+// DynamicStringFragment("ed:914)")
+// LineNumber(945)
+// CategoryPair(ProfilingCategoryPair::JS)
+//
+// - A profiling stack frame with an overly long dynamic string:
+//
+// Label("")
+// FrameFlags(uint64_t(ProfilingStackFrame::Flags::IS_LABEL_FRAME))
+// DynamicStringFragment("(too lon")
+// DynamicStringFragment("g)")
+// LineNumber(100)
+// CategoryPair(ProfilingCategoryPair::NETWORK)
+//
+// - A wasm JIT frame:
+//
+// Label("")
+// FrameFlags(uint64_t(0))
+// DynamicStringFragment("wasm-fun")
+// DynamicStringFragment("ction[87")
+// DynamicStringFragment("36] (blo")
+// DynamicStringFragment("b:http:/")
+// DynamicStringFragment("/webasse")
+// DynamicStringFragment("mbly.org")
+// DynamicStringFragment("/3dc5759")
+// DynamicStringFragment("4-ce58-4")
+// DynamicStringFragment("626-975b")
+// DynamicStringFragment("-08ad116")
+// DynamicStringFragment("30bc1:38")
+// DynamicStringFragment("29856)")
+//
+// - A JS frame in a synchronous sample:
+//
+// Label("")
+// FrameFlags(uint64_t(ProfilingStackFrame::Flags::IS_LABEL_FRAME))
+// DynamicStringFragment("u (https")
+// DynamicStringFragment("://perf-")
+// DynamicStringFragment("html.io/")
+// DynamicStringFragment("ac0da204")
+// DynamicStringFragment("aaa44d75")
+// DynamicStringFragment("a800.bun")
+// DynamicStringFragment("dle.js:2")
+// DynamicStringFragment("5)")
+
+// Because this is a format entirely internal to the Profiler, any parsing
+// error indicates a bug in the ProfileBuffer writing or the parser itself,
+// or possibly flaky hardware.
+#define ERROR_AND_CONTINUE(msg) \
+ { \
+ fprintf(stderr, "ProfileBuffer parse error: %s", msg); \
+ MOZ_ASSERT(false, msg); \
+ continue; \
+ }
+
+int ProfileBuffer::StreamSamplesToJSON(SpliceableJSONWriter& aWriter,
+ int aThreadId, double aSinceTime,
+ UniqueStacks& aUniqueStacks) const {
+ UniquePtr<char[]> dynStrBuf = MakeUnique<char[]>(kMaxFrameKeyLength);
+
+ return mEntries.Read([&](ProfileChunkedBuffer::Reader* aReader) {
+ MOZ_ASSERT(aReader,
+ "ProfileChunkedBuffer cannot be out-of-session when sampler is "
+ "running");
+
+ int processedThreadId = 0;
+
+ EntryGetter e(*aReader);
+
+ for (;;) {
+ // This block skips entries until we find the start of the next sample.
+ // This is useful in three situations.
+ //
+ // - The circular buffer overwrites old entries, so when we start parsing
+ // we might be in the middle of a sample, and we must skip forward to
+ // the start of the next sample.
+ //
+ // - We skip samples that don't have an appropriate ThreadId or Time.
+ //
+ // - We skip range Pause, Resume, CollectionStart, Counter and
+ // CollectionEnd entries between samples.
+ while (e.Has()) {
+ if (e.Get().IsThreadId()) {
+ break;
+ }
+ e.Next();
+ }
+
+ if (!e.Has()) {
+ break;
+ }
+
+ // Due to the skip_to_next_sample block above, if we have an entry here it
+ // must be a ThreadId entry.
+ MOZ_ASSERT(e.Get().IsThreadId());
+
+ int threadId = e.Get().GetInt();
+ e.Next();
+
+ // Ignore samples that are for the wrong thread.
+ if (threadId != aThreadId && aThreadId != 0) {
+ continue;
+ }
+
+ MOZ_ASSERT(aThreadId != 0 || processedThreadId == 0,
+ "aThreadId==0 should only be used with 1-sample buffer");
+
+ ProfileSample sample;
+
+ if (e.Has() && e.Get().IsTime()) {
+ sample.mTime = e.Get().GetDouble();
+ e.Next();
+
+ // Ignore samples that are too old.
+ if (sample.mTime < aSinceTime) {
+ continue;
+ }
+ } else {
+ ERROR_AND_CONTINUE("expected a Time entry");
+ }
+
+ UniqueStacks::StackKey stack =
+ aUniqueStacks.BeginStack(UniqueStacks::FrameKey("(root)"));
+
+ int numFrames = 0;
+ while (e.Has()) {
+ if (e.Get().IsNativeLeafAddr()) {
+ numFrames++;
+
+ void* pc = e.Get().GetPtr();
+ e.Next();
+
+ static const uint32_t BUF_SIZE = 256;
+ char buf[BUF_SIZE];
+
+ // Bug 753041: We need a double cast here to tell GCC that we don't
+ // want to sign extend 32-bit addresses starting with 0xFXXXXXX.
+ unsigned long long pcULL = (unsigned long long)(uintptr_t)pc;
+ SprintfLiteral(buf, "%#llx", pcULL);
+
+ // If the "MOZ_PROFILER_SYMBOLICATE" env-var is set, we add a local
+ // symbolication description to the PC address. This is off by
+ // default, and mainly intended for local development.
+ static const bool preSymbolicate = []() {
+ const char* symbolicate = getenv("MOZ_PROFILER_SYMBOLICATE");
+ return symbolicate && symbolicate[0] != '\0';
+ }();
+ if (preSymbolicate) {
+ MozCodeAddressDetails details;
+ if (MozDescribeCodeAddress(pc, &details)) {
+ // Replace \0 terminator with space.
+ const uint32_t pcLen = strlen(buf);
+ buf[pcLen] = ' ';
+ // Add description after space. Note: Using a frame number of 0,
+ // as using `numFrames` wouldn't help here, and would prevent
+ // combining same function calls that happen at different depths.
+ // TODO: Remove unsightly "#00: " if too annoying. :-)
+ MozFormatCodeAddressDetails(
+ buf + pcLen + 1, BUF_SIZE - (pcLen + 1), 0, pc, &details);
+ }
+ }
+
+ stack = aUniqueStacks.AppendFrame(stack, UniqueStacks::FrameKey(buf));
+
+ } else if (e.Get().IsLabel()) {
+ numFrames++;
+
+ const char* label = e.Get().GetString();
+ e.Next();
+
+ using FrameFlags = ProfilingStackFrame::Flags;
+ uint32_t frameFlags = 0;
+ if (e.Has() && e.Get().IsFrameFlags()) {
+ frameFlags = uint32_t(e.Get().GetUint64());
+ e.Next();
+ }
+
+ bool relevantForJS =
+ frameFlags & uint32_t(FrameFlags::RELEVANT_FOR_JS);
+
+ // Copy potential dynamic string fragments into dynStrBuf, so that
+ // dynStrBuf will then contain the entire dynamic string.
+ size_t i = 0;
+ dynStrBuf[0] = '\0';
+ while (e.Has()) {
+ if (e.Get().IsDynamicStringFragment()) {
+ char chars[ProfileBufferEntry::kNumChars];
+ e.Get().CopyCharsInto(chars);
+ for (char c : chars) {
+ if (i < kMaxFrameKeyLength) {
+ dynStrBuf[i] = c;
+ i++;
+ }
+ }
+ e.Next();
+ } else {
+ break;
+ }
+ }
+ dynStrBuf[kMaxFrameKeyLength - 1] = '\0';
+ bool hasDynamicString = (i != 0);
+
+ std::string frameLabel;
+ if (label[0] != '\0' && hasDynamicString) {
+ if (frameFlags & uint32_t(FrameFlags::STRING_TEMPLATE_METHOD)) {
+ frameLabel += label;
+ frameLabel += '.';
+ frameLabel += dynStrBuf.get();
+ } else if (frameFlags &
+ uint32_t(FrameFlags::STRING_TEMPLATE_GETTER)) {
+ frameLabel += "get ";
+ frameLabel += label;
+ frameLabel += '.';
+ frameLabel += dynStrBuf.get();
+ } else if (frameFlags &
+ uint32_t(FrameFlags::STRING_TEMPLATE_SETTER)) {
+ frameLabel += "set ";
+ frameLabel += label;
+ frameLabel += '.';
+ frameLabel += dynStrBuf.get();
+ } else {
+ frameLabel += label;
+ frameLabel += ' ';
+ frameLabel += dynStrBuf.get();
+ }
+ } else if (hasDynamicString) {
+ frameLabel += dynStrBuf.get();
+ } else {
+ frameLabel += label;
+ }
+
+ uint64_t innerWindowID = 0;
+ if (e.Has() && e.Get().IsInnerWindowID()) {
+ innerWindowID = uint64_t(e.Get().GetUint64());
+ e.Next();
+ }
+
+ Maybe<unsigned> line;
+ if (e.Has() && e.Get().IsLineNumber()) {
+ line = Some(unsigned(e.Get().GetInt()));
+ e.Next();
+ }
+
+ Maybe<unsigned> column;
+ if (e.Has() && e.Get().IsColumnNumber()) {
+ column = Some(unsigned(e.Get().GetInt()));
+ e.Next();
+ }
+
+ Maybe<ProfilingCategoryPair> categoryPair;
+ if (e.Has() && e.Get().IsCategoryPair()) {
+ categoryPair =
+ Some(ProfilingCategoryPair(uint32_t(e.Get().GetInt())));
+ e.Next();
+ }
+
+ stack = aUniqueStacks.AppendFrame(
+ stack, UniqueStacks::FrameKey(std::move(frameLabel),
+ relevantForJS, innerWindowID, line,
+ column, categoryPair));
+
+ } else {
+ break;
+ }
+ }
+
+ if (numFrames == 0) {
+ // It is possible to have empty stacks if native stackwalking is
+ // disabled. Skip samples with empty stacks. (See Bug 1497985).
+ // Thus, don't use ERROR_AND_CONTINUE, but just continue.
+ continue;
+ }
+
+ sample.mStack = aUniqueStacks.GetOrAddStackIndex(stack);
+
+ if (e.Has() && e.Get().IsResponsiveness()) {
+ sample.mResponsiveness = Some(e.Get().GetDouble());
+ e.Next();
+ }
+
+ WriteSample(aWriter, sample);
+
+ processedThreadId = threadId;
+ }
+
+ return processedThreadId;
+ });
+}
+
+void ProfileBuffer::StreamMarkersToJSON(SpliceableJSONWriter& aWriter,
+ int aThreadId,
+ const TimeStamp& aProcessStartTime,
+ double aSinceTime,
+ UniqueStacks& aUniqueStacks) const {
+ mEntries.ReadEach([&](ProfileBufferEntryReader& aER) {
+ auto type = static_cast<ProfileBufferEntry::Kind>(
+ aER.ReadObject<ProfileBufferEntry::KindUnderlyingType>());
+ MOZ_ASSERT(static_cast<ProfileBufferEntry::KindUnderlyingType>(type) <
+ static_cast<ProfileBufferEntry::KindUnderlyingType>(
+ ProfileBufferEntry::Kind::MODERN_LIMIT));
+ bool entryWasFullyRead = false;
+
+ if (type == ProfileBufferEntry::Kind::Marker) {
+ entryWasFullyRead = ::mozilla::base_profiler_markers_detail::
+ DeserializeAfterKindAndStream(
+ aER, aWriter, aThreadId,
+ [&](ProfileChunkedBuffer& aChunkedBuffer) {
+ ProfilerBacktrace backtrace("", &aChunkedBuffer);
+ backtrace.StreamJSON(aWriter, TimeStamp::ProcessCreation(),
+ aUniqueStacks);
+ });
+ }
+
+ if (!entryWasFullyRead) {
+ // Not a marker, or marker for another thread.
+ // We probably didn't read the whole entry, so we need to skip to the end.
+ aER.SetRemainingBytes(0);
+ }
+ });
+}
+
+void ProfileBuffer::StreamProfilerOverheadToJSON(
+ SpliceableJSONWriter& aWriter, const TimeStamp& aProcessStartTime,
+ double aSinceTime) const {
+ mEntries.Read([&](ProfileChunkedBuffer::Reader* aReader) {
+ MOZ_ASSERT(aReader,
+ "ProfileChunkedBuffer cannot be out-of-session when sampler is "
+ "running");
+
+ EntryGetter e(*aReader);
+
+ enum Schema : uint32_t {
+ TIME = 0,
+ LOCKING = 1,
+ MARKER_CLEANING = 2,
+ COUNTERS = 3,
+ THREADS = 4
+ };
+
+ aWriter.StartObjectProperty("profilerOverhead");
+ aWriter.StartObjectProperty("samples");
+ // Stream all sampling overhead data. We skip other entries, because we
+ // process them in StreamSamplesToJSON()/etc.
+ {
+ JSONSchemaWriter schema(aWriter);
+ schema.WriteField("time");
+ schema.WriteField("locking");
+ schema.WriteField("expiredMarkerCleaning");
+ schema.WriteField("counters");
+ schema.WriteField("threads");
+ }
+
+ aWriter.StartArrayProperty("data");
+ double firstTime = 0.0;
+ double lastTime = 0.0;
+ ProfilerStats intervals, overheads, lockings, cleanings, counters, threads;
+ while (e.Has()) {
+ // valid sequence: ProfilerOverheadTime, ProfilerOverheadDuration * 4
+ if (e.Get().IsProfilerOverheadTime()) {
+ double time = e.Get().GetDouble();
+ if (time >= aSinceTime) {
+ e.Next();
+ if (!e.Has() || !e.Get().IsProfilerOverheadDuration()) {
+ ERROR_AND_CONTINUE(
+ "expected a ProfilerOverheadDuration entry after "
+ "ProfilerOverheadTime");
+ }
+ double locking = e.Get().GetDouble();
+ e.Next();
+ if (!e.Has() || !e.Get().IsProfilerOverheadDuration()) {
+ ERROR_AND_CONTINUE(
+ "expected a ProfilerOverheadDuration entry after "
+ "ProfilerOverheadTime,ProfilerOverheadDuration");
+ }
+ double cleaning = e.Get().GetDouble();
+ e.Next();
+ if (!e.Has() || !e.Get().IsProfilerOverheadDuration()) {
+ ERROR_AND_CONTINUE(
+ "expected a ProfilerOverheadDuration entry after "
+ "ProfilerOverheadTime,ProfilerOverheadDuration*2");
+ }
+ double counter = e.Get().GetDouble();
+ e.Next();
+ if (!e.Has() || !e.Get().IsProfilerOverheadDuration()) {
+ ERROR_AND_CONTINUE(
+ "expected a ProfilerOverheadDuration entry after "
+ "ProfilerOverheadTime,ProfilerOverheadDuration*3");
+ }
+ double thread = e.Get().GetDouble();
+
+ if (firstTime == 0.0) {
+ firstTime = time;
+ } else {
+ // Note that we'll have 1 fewer interval than other numbers (because
+ // we need both ends of an interval to know its duration). The final
+ // difference should be insignificant over the expected many
+ // thousands of iterations.
+ intervals.Count(time - lastTime);
+ }
+ lastTime = time;
+ overheads.Count(locking + cleaning + counter + thread);
+ lockings.Count(locking);
+ cleanings.Count(cleaning);
+ counters.Count(counter);
+ threads.Count(thread);
+
+ AutoArraySchemaWriter writer(aWriter);
+ writer.DoubleElement(TIME, time);
+ writer.DoubleElement(LOCKING, locking);
+ writer.DoubleElement(MARKER_CLEANING, cleaning);
+ writer.DoubleElement(COUNTERS, counter);
+ writer.DoubleElement(THREADS, thread);
+ }
+ }
+ e.Next();
+ }
+ aWriter.EndArray(); // data
+ aWriter.EndObject(); // samples
+
+ // Only output statistics if there is at least one full interval (and
+ // therefore at least two samplings.)
+ if (intervals.n > 0) {
+ aWriter.StartObjectProperty("statistics");
+ aWriter.DoubleProperty("profiledDuration", lastTime - firstTime);
+ aWriter.IntProperty("samplingCount", overheads.n);
+ aWriter.DoubleProperty("overheadDurations", overheads.sum);
+ aWriter.DoubleProperty("overheadPercentage",
+ overheads.sum / (lastTime - firstTime));
+#define PROFILER_STATS(name, var) \
+ aWriter.DoubleProperty("mean" name, (var).sum / (var).n); \
+ aWriter.DoubleProperty("min" name, (var).min); \
+ aWriter.DoubleProperty("max" name, (var).max);
+ PROFILER_STATS("Interval", intervals);
+ PROFILER_STATS("Overhead", overheads);
+ PROFILER_STATS("Lockings", lockings);
+ PROFILER_STATS("Cleaning", cleanings);
+ PROFILER_STATS("Counter", counters);
+ PROFILER_STATS("Thread", threads);
+#undef PROFILER_STATS
+ aWriter.EndObject(); // statistics
+ }
+ aWriter.EndObject(); // profilerOverhead
+ });
+}
+
+struct CounterKeyedSample {
+ double mTime;
+ uint64_t mNumber;
+ int64_t mCount;
+};
+
+using CounterKeyedSamples = Vector<CounterKeyedSample>;
+
+using CounterMap = HashMap<uint64_t, CounterKeyedSamples>;
+
+// HashMap lookup, if not found, a default value is inserted.
+// Returns reference to (existing or new) value inside the HashMap.
+template <typename HashM, typename Key>
+static auto& LookupOrAdd(HashM& aMap, Key&& aKey) {
+ auto addPtr = aMap.lookupForAdd(aKey);
+ if (!addPtr) {
+ MOZ_RELEASE_ASSERT(aMap.add(addPtr, std::forward<Key>(aKey),
+ typename HashM::Entry::ValueType{}));
+ MOZ_ASSERT(!!addPtr);
+ }
+ return addPtr->value();
+}
+
+void ProfileBuffer::StreamCountersToJSON(SpliceableJSONWriter& aWriter,
+ const TimeStamp& aProcessStartTime,
+ double aSinceTime) const {
+ // Because this is a format entirely internal to the Profiler, any parsing
+ // error indicates a bug in the ProfileBuffer writing or the parser itself,
+ // or possibly flaky hardware.
+
+ mEntries.Read([&](ProfileChunkedBuffer::Reader* aReader) {
+ MOZ_ASSERT(aReader,
+ "ProfileChunkedBuffer cannot be out-of-session when sampler is "
+ "running");
+
+ EntryGetter e(*aReader);
+
+ enum Schema : uint32_t { TIME = 0, NUMBER = 1, COUNT = 2 };
+
+ // Stream all counters. We skip other entries, because we process them in
+ // StreamSamplesToJSON()/etc.
+ //
+ // Valid sequence in the buffer:
+ // CounterID
+ // Time
+ // ( CounterKey Count Number? )*
+ //
+ // And the JSON (example):
+ // "counters": {
+ // "name": "malloc",
+ // "category": "Memory",
+ // "description": "Amount of allocated memory",
+ // "sample_groups": {
+ // "id": 0,
+ // "samples": {
+ // "schema": {"time": 0, "number": 1, "count": 2},
+ // "data": [
+ // [
+ // 16117.033968000002,
+ // 2446216,
+ // 6801320
+ // ],
+ // [
+ // 16118.037638,
+ // 2446216,
+ // 6801320
+ // ],
+ // ],
+ // }
+ // }
+ // },
+
+ // Build the map of counters and populate it
+ HashMap<void*, CounterMap> counters;
+
+ while (e.Has()) {
+ // skip all non-Counters, including if we start in the middle of a counter
+ if (e.Get().IsCounterId()) {
+ void* id = e.Get().GetPtr();
+ CounterMap& counter = LookupOrAdd(counters, id);
+ e.Next();
+ if (!e.Has() || !e.Get().IsTime()) {
+ ERROR_AND_CONTINUE("expected a Time entry");
+ }
+ double time = e.Get().GetDouble();
+ if (time >= aSinceTime) {
+ e.Next();
+ while (e.Has() && e.Get().IsCounterKey()) {
+ uint64_t key = e.Get().GetUint64();
+ CounterKeyedSamples& data = LookupOrAdd(counter, key);
+ e.Next();
+ if (!e.Has() || !e.Get().IsCount()) {
+ ERROR_AND_CONTINUE("expected a Count entry");
+ }
+ int64_t count = e.Get().GetUint64();
+ e.Next();
+ uint64_t number;
+ if (!e.Has() || !e.Get().IsNumber()) {
+ number = 0;
+ } else {
+ number = e.Get().GetInt64();
+ }
+ CounterKeyedSample sample = {time, number, count};
+ MOZ_RELEASE_ASSERT(data.append(sample));
+ }
+ } else {
+ // skip counter sample - only need to skip the initial counter
+ // id, then let the loop at the top skip the rest
+ }
+ }
+ e.Next();
+ }
+ // we have a map of a map of counter entries; dump them to JSON
+ if (counters.count() == 0) {
+ return;
+ }
+
+ aWriter.StartArrayProperty("counters");
+ for (auto iter = counters.iter(); !iter.done(); iter.next()) {
+ CounterMap& counter = iter.get().value();
+ const BaseProfilerCount* base_counter =
+ static_cast<const BaseProfilerCount*>(iter.get().key());
+
+ aWriter.Start();
+ aWriter.StringProperty("name", MakeStringSpan(base_counter->mLabel));
+ aWriter.StringProperty("category",
+ MakeStringSpan(base_counter->mCategory));
+ aWriter.StringProperty("description",
+ MakeStringSpan(base_counter->mDescription));
+
+ aWriter.StartArrayProperty("sample_groups");
+ for (auto counter_iter = counter.iter(); !counter_iter.done();
+ counter_iter.next()) {
+ CounterKeyedSamples& samples = counter_iter.get().value();
+ uint64_t key = counter_iter.get().key();
+
+ size_t size = samples.length();
+ if (size == 0) {
+ continue;
+ }
+
+ aWriter.StartObjectElement();
+ {
+ aWriter.IntProperty("id", static_cast<int64_t>(key));
+ aWriter.StartObjectProperty("samples");
+ {
+ // XXX Can we assume a missing count means 0?
+ JSONSchemaWriter schema(aWriter);
+ schema.WriteField("time");
+ schema.WriteField("number");
+ schema.WriteField("count");
+ }
+
+ aWriter.StartArrayProperty("data");
+ uint64_t previousNumber = 0;
+ int64_t previousCount = 0;
+ for (size_t i = 0; i < size; i++) {
+ // Encode as deltas, and only encode if different than the last
+ // sample
+ if (i == 0 || samples[i].mNumber != previousNumber ||
+ samples[i].mCount != previousCount) {
+ MOZ_ASSERT(i == 0 || samples[i].mTime >= samples[i - 1].mTime);
+ MOZ_ASSERT(samples[i].mNumber >= previousNumber);
+ MOZ_ASSERT(samples[i].mNumber - previousNumber <=
+ uint64_t(std::numeric_limits<int64_t>::max()));
+
+ AutoArraySchemaWriter writer(aWriter);
+ writer.DoubleElement(TIME, samples[i].mTime);
+ writer.IntElement(
+ NUMBER,
+ static_cast<int64_t>(samples[i].mNumber - previousNumber));
+ writer.IntElement(COUNT, samples[i].mCount - previousCount);
+ previousNumber = samples[i].mNumber;
+ previousCount = samples[i].mCount;
+ }
+ }
+ aWriter.EndArray(); // data
+ aWriter.EndObject(); // samples
+ }
+ aWriter.EndObject(); // sample_groups item
+ }
+ aWriter.EndArray(); // sample groups
+ aWriter.End(); // for each counter
+ }
+ aWriter.EndArray(); // counters
+ });
+}
+
+#undef ERROR_AND_CONTINUE
+
+static void AddPausedRange(SpliceableJSONWriter& aWriter, const char* aReason,
+ const Maybe<double>& aStartTime,
+ const Maybe<double>& aEndTime) {
+ aWriter.Start();
+ if (aStartTime) {
+ aWriter.DoubleProperty("startTime", *aStartTime);
+ } else {
+ aWriter.NullProperty("startTime");
+ }
+ if (aEndTime) {
+ aWriter.DoubleProperty("endTime", *aEndTime);
+ } else {
+ aWriter.NullProperty("endTime");
+ }
+ aWriter.StringProperty("reason", MakeStringSpan(aReason));
+ aWriter.End();
+}
+
+void ProfileBuffer::StreamPausedRangesToJSON(SpliceableJSONWriter& aWriter,
+ double aSinceTime) const {
+ mEntries.Read([&](ProfileChunkedBuffer::Reader* aReader) {
+ MOZ_ASSERT(aReader,
+ "ProfileChunkedBuffer cannot be out-of-session when sampler is "
+ "running");
+
+ EntryGetter e(*aReader);
+
+ Maybe<double> currentPauseStartTime;
+ Maybe<double> currentCollectionStartTime;
+
+ while (e.Has()) {
+ if (e.Get().IsPause()) {
+ currentPauseStartTime = Some(e.Get().GetDouble());
+ } else if (e.Get().IsResume()) {
+ AddPausedRange(aWriter, "profiler-paused", currentPauseStartTime,
+ Some(e.Get().GetDouble()));
+ currentPauseStartTime = Nothing();
+ } else if (e.Get().IsCollectionStart()) {
+ currentCollectionStartTime = Some(e.Get().GetDouble());
+ } else if (e.Get().IsCollectionEnd()) {
+ AddPausedRange(aWriter, "collecting", currentCollectionStartTime,
+ Some(e.Get().GetDouble()));
+ currentCollectionStartTime = Nothing();
+ }
+ e.Next();
+ }
+
+ if (currentPauseStartTime) {
+ AddPausedRange(aWriter, "profiler-paused", currentPauseStartTime,
+ Nothing());
+ }
+ if (currentCollectionStartTime) {
+ AddPausedRange(aWriter, "collecting", currentCollectionStartTime,
+ Nothing());
+ }
+ });
+}
+
+bool ProfileBuffer::DuplicateLastSample(int aThreadId,
+ const TimeStamp& aProcessStartTime,
+ Maybe<uint64_t>& aLastSample) {
+ if (!aLastSample) {
+ return false;
+ }
+
+ ProfileChunkedBuffer tempBuffer(
+ ProfileChunkedBuffer::ThreadSafety::WithoutMutex, mWorkerChunkManager);
+
+ auto retrieveWorkerChunk = MakeScopeExit(
+ [&]() { mWorkerChunkManager.Reset(tempBuffer.GetAllChunks()); });
+
+ const bool ok = mEntries.Read([&](ProfileChunkedBuffer::Reader* aReader) {
+ MOZ_ASSERT(aReader,
+ "ProfileChunkedBuffer cannot be out-of-session when sampler is "
+ "running");
+
+ EntryGetter e(*aReader, *aLastSample);
+
+ if (e.CurPos() != *aLastSample) {
+ // The last sample is no longer within the buffer range, so we cannot
+ // use it. Reset the stored buffer position to Nothing().
+ aLastSample.reset();
+ return false;
+ }
+
+ MOZ_RELEASE_ASSERT(e.Has() && e.Get().IsThreadId() &&
+ e.Get().GetInt() == aThreadId);
+
+ e.Next();
+
+ // Go through the whole entry and duplicate it, until we find the next
+ // one.
+ while (e.Has()) {
+ switch (e.Get().GetKind()) {
+ case ProfileBufferEntry::Kind::Pause:
+ case ProfileBufferEntry::Kind::Resume:
+ case ProfileBufferEntry::Kind::PauseSampling:
+ case ProfileBufferEntry::Kind::ResumeSampling:
+ case ProfileBufferEntry::Kind::CollectionStart:
+ case ProfileBufferEntry::Kind::CollectionEnd:
+ case ProfileBufferEntry::Kind::ThreadId:
+ // We're done.
+ return true;
+ case ProfileBufferEntry::Kind::Time:
+ // Copy with new time
+ AddEntry(tempBuffer,
+ ProfileBufferEntry::Time(
+ (TimeStamp::NowUnfuzzed() - aProcessStartTime)
+ .ToMilliseconds()));
+ break;
+ case ProfileBufferEntry::Kind::CounterKey:
+ case ProfileBufferEntry::Kind::Number:
+ case ProfileBufferEntry::Kind::Count:
+ case ProfileBufferEntry::Kind::Responsiveness:
+ // Don't copy anything not part of a thread's stack sample
+ break;
+ case ProfileBufferEntry::Kind::CounterId:
+ // CounterId is normally followed by Time - if so, we'd like
+ // to skip it. If we duplicate Time, it won't hurt anything, just
+ // waste buffer space (and this can happen if the CounterId has
+ // fallen off the end of the buffer, but Time (and Number/Count)
+ // are still in the buffer).
+ e.Next();
+ if (e.Has() && e.Get().GetKind() != ProfileBufferEntry::Kind::Time) {
+ // this would only happen if there was an invalid sequence
+ // in the buffer. Don't skip it.
+ continue;
+ }
+ // we've skipped Time
+ break;
+ case ProfileBufferEntry::Kind::ProfilerOverheadTime:
+ // ProfilerOverheadTime is normally followed by
+ // ProfilerOverheadDuration*4 - if so, we'd like to skip it. Don't
+ // duplicate, as we are in the middle of a sampling and will soon
+ // capture its own overhead.
+ e.Next();
+ // A missing Time would only happen if there was an invalid
+ // sequence in the buffer. Don't skip unexpected entry.
+ if (e.Has() &&
+ e.Get().GetKind() !=
+ ProfileBufferEntry::Kind::ProfilerOverheadDuration) {
+ continue;
+ }
+ e.Next();
+ if (e.Has() &&
+ e.Get().GetKind() !=
+ ProfileBufferEntry::Kind::ProfilerOverheadDuration) {
+ continue;
+ }
+ e.Next();
+ if (e.Has() &&
+ e.Get().GetKind() !=
+ ProfileBufferEntry::Kind::ProfilerOverheadDuration) {
+ continue;
+ }
+ e.Next();
+ if (e.Has() &&
+ e.Get().GetKind() !=
+ ProfileBufferEntry::Kind::ProfilerOverheadDuration) {
+ continue;
+ }
+ // we've skipped ProfilerOverheadTime and
+ // ProfilerOverheadDuration*4.
+ break;
+ default: {
+ // Copy anything else we don't know about.
+ AddEntry(tempBuffer, e.Get());
+ break;
+ }
+ }
+ e.Next();
+ }
+ return true;
+ });
+
+ if (!ok) {
+ return false;
+ }
+
+ // If the buffer was big enough, there won't be any cleared blocks.
+ if (tempBuffer.GetState().mClearedBlockCount != 0) {
+ // No need to try to read stack again as it won't fit. Reset the stored
+ // buffer position to Nothing().
+ aLastSample.reset();
+ return false;
+ }
+
+ aLastSample = Some(AddThreadIdEntry(aThreadId));
+
+ tempBuffer.Read([&](ProfileChunkedBuffer::Reader* aReader) {
+ MOZ_ASSERT(aReader, "tempBuffer cannot be out-of-session");
+
+ EntryGetter e(*aReader);
+
+ while (e.Has()) {
+ AddEntry(e.Get());
+ e.Next();
+ }
+ });
+
+ return true;
+}
+
+void ProfileBuffer::DiscardSamplesBeforeTime(double aTime) {
+ // This function does nothing!
+ // The duration limit will be removed from Firefox, see bug 1632365.
+ Unused << aTime;
+}
+
+// END ProfileBuffer
+////////////////////////////////////////////////////////////////////////
+
+} // namespace baseprofiler
+} // namespace mozilla
diff --git a/mozglue/baseprofiler/core/ProfileBufferEntry.h b/mozglue/baseprofiler/core/ProfileBufferEntry.h
new file mode 100644
index 0000000000..6422a34a85
--- /dev/null
+++ b/mozglue/baseprofiler/core/ProfileBufferEntry.h
@@ -0,0 +1,358 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfileBufferEntry_h
+#define ProfileBufferEntry_h
+
+#include "BaseProfilingCategory.h"
+#include "gtest/MozGtestFriend.h"
+#include "mozilla/BaseProfileJSONWriter.h"
+#include "mozilla/HashFunctions.h"
+#include "mozilla/HashTable.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/ProfileBufferEntryKinds.h"
+#include "mozilla/UniquePtr.h"
+#include "mozilla/Variant.h"
+#include "mozilla/Vector.h"
+
+#include <string>
+
+namespace mozilla {
+namespace baseprofiler {
+
+class ProfileBufferEntry {
+ public:
+ using KindUnderlyingType = ::mozilla::ProfileBufferEntryKindUnderlyingType;
+ using Kind = ::mozilla::ProfileBufferEntryKind;
+
+ ProfileBufferEntry();
+
+ static constexpr size_t kNumChars = ::mozilla::ProfileBufferEntryNumChars;
+
+ private:
+ // aString must be a static string.
+ ProfileBufferEntry(Kind aKind, const char* aString);
+ ProfileBufferEntry(Kind aKind, char aChars[kNumChars]);
+ ProfileBufferEntry(Kind aKind, void* aPtr);
+ ProfileBufferEntry(Kind aKind, double aDouble);
+ ProfileBufferEntry(Kind aKind, int64_t aInt64);
+ ProfileBufferEntry(Kind aKind, uint64_t aUint64);
+ ProfileBufferEntry(Kind aKind, int aInt);
+
+ public:
+#define CTOR(KIND, TYPE, SIZE) \
+ static ProfileBufferEntry KIND(TYPE aVal) { \
+ return ProfileBufferEntry(Kind::KIND, aVal); \
+ }
+ FOR_EACH_PROFILE_BUFFER_ENTRY_KIND(CTOR)
+#undef CTOR
+
+ Kind GetKind() const { return mKind; }
+
+#define IS_KIND(KIND, TYPE, SIZE) \
+ bool Is##KIND() const { return mKind == Kind::KIND; }
+ FOR_EACH_PROFILE_BUFFER_ENTRY_KIND(IS_KIND)
+#undef IS_KIND
+
+ private:
+ FRIEND_TEST(ThreadProfile, InsertOneEntry);
+ FRIEND_TEST(ThreadProfile, InsertOneEntryWithTinyBuffer);
+ FRIEND_TEST(ThreadProfile, InsertEntriesNoWrap);
+ FRIEND_TEST(ThreadProfile, InsertEntriesWrap);
+ FRIEND_TEST(ThreadProfile, MemoryMeasure);
+ friend class ProfileBuffer;
+
+ Kind mKind;
+ uint8_t mStorage[kNumChars];
+
+ const char* GetString() const;
+ void* GetPtr() const;
+ double GetDouble() const;
+ int GetInt() const;
+ int64_t GetInt64() const;
+ uint64_t GetUint64() const;
+ void CopyCharsInto(char (&aOutArray)[kNumChars]) const;
+};
+
+// Packed layout: 1 byte for the tag + 8 bytes for the value.
+static_assert(sizeof(ProfileBufferEntry) == 9, "bad ProfileBufferEntry size");
+
+class UniqueStacks {
+ public:
+ struct FrameKey {
+ explicit FrameKey(const char* aLocation)
+ : mData(NormalFrameData{std::string(aLocation), false, 0, Nothing(),
+ Nothing()}) {}
+
+ FrameKey(std::string&& aLocation, bool aRelevantForJS,
+ uint64_t aInnerWindowID, const Maybe<unsigned>& aLine,
+ const Maybe<unsigned>& aColumn,
+ const Maybe<ProfilingCategoryPair>& aCategoryPair)
+ : mData(NormalFrameData{aLocation, aRelevantForJS, aInnerWindowID,
+ aLine, aColumn, aCategoryPair}) {}
+
+ FrameKey(const FrameKey& aToCopy) = default;
+
+ uint32_t Hash() const;
+ bool operator==(const FrameKey& aOther) const {
+ return mData == aOther.mData;
+ }
+
+ struct NormalFrameData {
+ bool operator==(const NormalFrameData& aOther) const;
+
+ std::string mLocation;
+ bool mRelevantForJS;
+ uint64_t mInnerWindowID;
+ Maybe<unsigned> mLine;
+ Maybe<unsigned> mColumn;
+ Maybe<ProfilingCategoryPair> mCategoryPair;
+ };
+ Variant<NormalFrameData> mData;
+ };
+
+ struct FrameKeyHasher {
+ using Lookup = FrameKey;
+
+ static HashNumber hash(const FrameKey& aLookup) {
+ HashNumber hash = 0;
+ if (aLookup.mData.is<FrameKey::NormalFrameData>()) {
+ const FrameKey::NormalFrameData& data =
+ aLookup.mData.as<FrameKey::NormalFrameData>();
+ if (!data.mLocation.empty()) {
+ hash = AddToHash(hash, HashString(data.mLocation.c_str()));
+ }
+ hash = AddToHash(hash, data.mRelevantForJS);
+ hash = mozilla::AddToHash(hash, data.mInnerWindowID);
+ if (data.mLine.isSome()) {
+ hash = AddToHash(hash, *data.mLine);
+ }
+ if (data.mColumn.isSome()) {
+ hash = AddToHash(hash, *data.mColumn);
+ }
+ if (data.mCategoryPair.isSome()) {
+ hash = AddToHash(hash, static_cast<uint32_t>(*data.mCategoryPair));
+ }
+ }
+ return hash;
+ }
+
+ static bool match(const FrameKey& aKey, const FrameKey& aLookup) {
+ return aKey == aLookup;
+ }
+
+ static void rekey(FrameKey& aKey, const FrameKey& aNewKey) {
+ aKey = aNewKey;
+ }
+ };
+
+ struct StackKey {
+ Maybe<uint32_t> mPrefixStackIndex;
+ uint32_t mFrameIndex;
+
+ explicit StackKey(uint32_t aFrame)
+ : mFrameIndex(aFrame), mHash(HashGeneric(aFrame)) {}
+
+ StackKey(const StackKey& aPrefix, uint32_t aPrefixStackIndex,
+ uint32_t aFrame)
+ : mPrefixStackIndex(Some(aPrefixStackIndex)),
+ mFrameIndex(aFrame),
+ mHash(AddToHash(aPrefix.mHash, aFrame)) {}
+
+ HashNumber Hash() const { return mHash; }
+
+ bool operator==(const StackKey& aOther) const {
+ return mPrefixStackIndex == aOther.mPrefixStackIndex &&
+ mFrameIndex == aOther.mFrameIndex;
+ }
+
+ private:
+ HashNumber mHash;
+ };
+
+ struct StackKeyHasher {
+ using Lookup = StackKey;
+
+ static HashNumber hash(const StackKey& aLookup) { return aLookup.Hash(); }
+
+ static bool match(const StackKey& aKey, const StackKey& aLookup) {
+ return aKey == aLookup;
+ }
+
+ static void rekey(StackKey& aKey, const StackKey& aNewKey) {
+ aKey = aNewKey;
+ }
+ };
+
+ UniqueStacks();
+
+ // Return a StackKey for aFrame as the stack's root frame (no prefix).
+ [[nodiscard]] StackKey BeginStack(const FrameKey& aFrame);
+
+ // Return a new StackKey that is obtained by appending aFrame to aStack.
+ [[nodiscard]] StackKey AppendFrame(const StackKey& aStack,
+ const FrameKey& aFrame);
+
+ [[nodiscard]] uint32_t GetOrAddFrameIndex(const FrameKey& aFrame);
+ [[nodiscard]] uint32_t GetOrAddStackIndex(const StackKey& aStack);
+
+ void SpliceFrameTableElements(SpliceableJSONWriter& aWriter);
+ void SpliceStackTableElements(SpliceableJSONWriter& aWriter);
+
+ private:
+ void StreamNonJITFrame(const FrameKey& aFrame);
+ void StreamStack(const StackKey& aStack);
+
+ public:
+ UniquePtr<UniqueJSONStrings> mUniqueStrings;
+
+ private:
+ SpliceableChunkedJSONWriter mFrameTableWriter;
+ HashMap<FrameKey, uint32_t, FrameKeyHasher> mFrameToIndexMap;
+
+ SpliceableChunkedJSONWriter mStackTableWriter;
+ HashMap<StackKey, uint32_t, StackKeyHasher> mStackToIndexMap;
+};
+
+//
+// Thread profile JSON Format
+// --------------------------
+//
+// The profile contains much duplicate information. The output JSON of the
+// profile attempts to deduplicate strings, frames, and stack prefixes, to cut
+// down on size and to increase JSON streaming speed. Deduplicated values are
+// streamed as indices into their respective tables.
+//
+// Further, arrays of objects with the same set of properties (e.g., samples,
+// frames) are output as arrays according to a schema instead of an object
+// with property names. A property that is not present is represented in the
+// array as null or undefined.
+//
+// The format of the thread profile JSON is shown by the following example
+// with 1 sample and 1 marker:
+//
+// {
+// "name": "Foo",
+// "tid": 42,
+// "samples":
+// {
+// "schema":
+// {
+// "stack": 0, /* index into stackTable */
+// "time": 1, /* number */
+// "eventDelay": 2, /* number */
+// },
+// "data":
+// [
+// [ 1, 0.0, 0.0 ] /* { stack: 1, time: 0.0, eventDelay: 0.0 } */
+// ]
+// },
+//
+// "markers":
+// {
+// "schema":
+// {
+// "name": 0, /* index into stringTable */
+// "time": 1, /* number */
+// "data": 2 /* arbitrary JSON */
+// },
+// "data":
+// [
+// [ 3, 0.1 ] /* { name: 'example marker', time: 0.1 } */
+// ]
+// },
+//
+// "stackTable":
+// {
+// "schema":
+// {
+// "prefix": 0, /* index into stackTable */
+// "frame": 1 /* index into frameTable */
+// },
+// "data":
+// [
+// [ null, 0 ], /* (root) */
+// [ 0, 1 ] /* (root) > foo.js */
+// ]
+// },
+//
+// "frameTable":
+// {
+// "schema":
+// {
+// "location": 0, /* index into stringTable */
+// "relevantForJS": 1, /* bool */
+// "innerWindowID": 2, /* inner window ID of global JS `window` object */
+// "implementation": 3, /* index into stringTable */
+// "optimizations": 4, /* arbitrary JSON */
+// "line": 5, /* number */
+// "column": 6, /* number */
+// "category": 7, /* index into profile.meta.categories */
+// "subcategory": 8 /* index into
+// profile.meta.categories[category].subcategories */
+// },
+// "data":
+// [
+// [ 0 ], /* { location: '(root)' } */
+// [ 1, 2 ] /* { location: 'foo.js',
+// implementation: 'baseline' } */
+// ]
+// },
+//
+// "stringTable":
+// [
+// "(root)",
+// "foo.js",
+// "baseline",
+// "example marker"
+// ]
+// }
+//
+// Process:
+// {
+// "name": "Bar",
+// "pid": 24,
+// "threads":
+// [
+// <0-N threads from above>
+// ],
+// "counters": /* includes the memory counter */
+// [
+// {
+// "name": "qwerty",
+// "category": "uiop",
+// "description": "this is qwerty uiop",
+// "sample_groups:
+// [
+// {
+// "id": 42, /* number (thread id, or object identifier (tab), etc) */
+// "samples:
+// {
+// "schema":
+// {
+// "time": 1, /* number */
+// "number": 2, /* number (of times the counter was touched) */
+// "count": 3 /* number (total for the counter) */
+// },
+// "data":
+// [
+// [ 0.1, 1824,
+// 454622 ] /* { time: 0.1, number: 1824, count: 454622 } */
+// ]
+// },
+// },
+// /* more sample-group objects with different id's */
+// ]
+// },
+// /* more counters */
+// ],
+// }
+//
+
+} // namespace baseprofiler
+} // namespace mozilla
+
+#endif /* ndef ProfileBufferEntry_h */
diff --git a/mozglue/baseprofiler/core/ProfileJSONWriter.cpp b/mozglue/baseprofiler/core/ProfileJSONWriter.cpp
new file mode 100644
index 0000000000..966ff2f515
--- /dev/null
+++ b/mozglue/baseprofiler/core/ProfileJSONWriter.cpp
@@ -0,0 +1,52 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/BaseProfileJSONWriter.h"
+
+namespace mozilla::baseprofiler {
+
+UniqueJSONStrings::UniqueJSONStrings(JSONWriter::CollectionStyle aStyle) {
+ mStringTableWriter.StartBareList(aStyle);
+}
+
+UniqueJSONStrings::UniqueJSONStrings(const UniqueJSONStrings& aOther,
+ JSONWriter::CollectionStyle aStyle) {
+ mStringTableWriter.StartBareList(aStyle);
+ uint32_t count = aOther.mStringHashToIndexMap.count();
+ if (count != 0) {
+ MOZ_RELEASE_ASSERT(mStringHashToIndexMap.reserve(count));
+ for (auto iter = aOther.mStringHashToIndexMap.iter(); !iter.done();
+ iter.next()) {
+ mStringHashToIndexMap.putNewInfallible(iter.get().key(),
+ iter.get().value());
+ }
+ mStringTableWriter.CopyAndSplice(
+ aOther.mStringTableWriter.ChunkedWriteFunc());
+ }
+}
+
+UniqueJSONStrings::~UniqueJSONStrings() = default;
+
+void UniqueJSONStrings::SpliceStringTableElements(
+ SpliceableJSONWriter& aWriter) {
+ aWriter.TakeAndSplice(mStringTableWriter.TakeChunkedWriteFunc());
+}
+
+uint32_t UniqueJSONStrings::GetOrAddIndex(const Span<const char>& aStr) {
+ uint32_t count = mStringHashToIndexMap.count();
+ HashNumber hash = HashString(aStr.data(), aStr.size());
+ auto entry = mStringHashToIndexMap.lookupForAdd(hash);
+ if (entry) {
+ MOZ_ASSERT(entry->value() < count);
+ return entry->value();
+ }
+
+ MOZ_RELEASE_ASSERT(mStringHashToIndexMap.add(entry, hash, count));
+ mStringTableWriter.StringElement(aStr);
+ return count;
+}
+
+} // namespace mozilla::baseprofiler
diff --git a/mozglue/baseprofiler/core/ProfiledThreadData.cpp b/mozglue/baseprofiler/core/ProfiledThreadData.cpp
new file mode 100644
index 0000000000..4dc600d97c
--- /dev/null
+++ b/mozglue/baseprofiler/core/ProfiledThreadData.cpp
@@ -0,0 +1,187 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ProfiledThreadData.h"
+
+#include "BaseProfiler.h"
+#include "ProfileBuffer.h"
+
+#include "mozilla/BaseProfileJSONWriter.h"
+
+#if defined(GP_OS_darwin)
+# include <pthread.h>
+#endif
+
+namespace mozilla {
+namespace baseprofiler {
+
+ProfiledThreadData::ProfiledThreadData(ThreadInfo* aThreadInfo)
+ : mThreadInfo(aThreadInfo) {}
+
+ProfiledThreadData::~ProfiledThreadData() {}
+
+void ProfiledThreadData::StreamJSON(const ProfileBuffer& aBuffer,
+ SpliceableJSONWriter& aWriter,
+ const std::string& aProcessName,
+ const std::string& aETLDplus1,
+ const TimeStamp& aProcessStartTime,
+ double aSinceTime) {
+ UniqueStacks uniqueStacks;
+
+ MOZ_ASSERT(uniqueStacks.mUniqueStrings);
+ aWriter.SetUniqueStrings(*uniqueStacks.mUniqueStrings);
+
+ aWriter.Start();
+ {
+ StreamSamplesAndMarkers(mThreadInfo->Name(), mThreadInfo->ThreadId(),
+ aBuffer, aWriter, aProcessName, aETLDplus1,
+ aProcessStartTime, mThreadInfo->RegisterTime(),
+ mUnregisterTime, aSinceTime, uniqueStacks);
+
+ aWriter.StartObjectProperty("stackTable");
+ {
+ {
+ JSONSchemaWriter schema(aWriter);
+ schema.WriteField("prefix");
+ schema.WriteField("frame");
+ }
+
+ aWriter.StartArrayProperty("data");
+ { uniqueStacks.SpliceStackTableElements(aWriter); }
+ aWriter.EndArray();
+ }
+ aWriter.EndObject();
+
+ aWriter.StartObjectProperty("frameTable");
+ {
+ {
+ JSONSchemaWriter schema(aWriter);
+ schema.WriteField("location");
+ schema.WriteField("relevantForJS");
+ schema.WriteField("innerWindowID");
+ schema.WriteField("implementation");
+ schema.WriteField("optimizations");
+ schema.WriteField("line");
+ schema.WriteField("column");
+ schema.WriteField("category");
+ schema.WriteField("subcategory");
+ }
+
+ aWriter.StartArrayProperty("data");
+ { uniqueStacks.SpliceFrameTableElements(aWriter); }
+ aWriter.EndArray();
+ }
+ aWriter.EndObject();
+
+ aWriter.StartArrayProperty("stringTable");
+ {
+ std::move(*uniqueStacks.mUniqueStrings)
+ .SpliceStringTableElements(aWriter);
+ }
+ aWriter.EndArray();
+ }
+ aWriter.End();
+
+ aWriter.ResetUniqueStrings();
+}
+
+int StreamSamplesAndMarkers(
+ const char* aName, int aThreadId, const ProfileBuffer& aBuffer,
+ SpliceableJSONWriter& aWriter, const std::string& aProcessName,
+ const std::string& aETLDplus1, const TimeStamp& aProcessStartTime,
+ const TimeStamp& aRegisterTime, const TimeStamp& aUnregisterTime,
+ double aSinceTime, UniqueStacks& aUniqueStacks) {
+ int processedThreadId = 0;
+
+ aWriter.StringProperty(
+ "processType",
+ "(unknown)" /* XRE_GeckoProcessTypeToString(XRE_GetProcessType()) */);
+
+ {
+ std::string name = aName;
+ // We currently need to distinguish threads output by Base Profiler from
+ // those in Gecko Profiler, as the frontend could get confused and lose
+ // tracks with the same name.
+ // TODO: As part of the profilers de-duplication, thread data from both
+ // profilers should end up in the same track, at which point this won't be
+ // necessary anymore. See meta bug 1557566.
+ name += " (pre-xul)";
+ aWriter.StringProperty("name", name);
+ }
+
+ // Use given process name (if any).
+ if (!aProcessName.empty()) {
+ aWriter.StringProperty("processName", aProcessName);
+ }
+ if (!aETLDplus1.empty()) {
+ aWriter.StringProperty("eTLD+1", aETLDplus1);
+ }
+
+ if (aRegisterTime) {
+ aWriter.DoubleProperty(
+ "registerTime", (aRegisterTime - aProcessStartTime).ToMilliseconds());
+ } else {
+ aWriter.NullProperty("registerTime");
+ }
+
+ if (aUnregisterTime) {
+ aWriter.DoubleProperty(
+ "unregisterTime",
+ (aUnregisterTime - aProcessStartTime).ToMilliseconds());
+ } else {
+ aWriter.NullProperty("unregisterTime");
+ }
+
+ aWriter.StartObjectProperty("samples");
+ {
+ {
+ JSONSchemaWriter schema(aWriter);
+ schema.WriteField("stack");
+ schema.WriteField("time");
+ schema.WriteField("eventDelay");
+ }
+
+ aWriter.StartArrayProperty("data");
+ {
+ processedThreadId = aBuffer.StreamSamplesToJSON(
+ aWriter, aThreadId, aSinceTime, aUniqueStacks);
+ }
+ aWriter.EndArray();
+ }
+ aWriter.EndObject();
+
+ aWriter.StartObjectProperty("markers");
+ {
+ {
+ JSONSchemaWriter schema(aWriter);
+ schema.WriteField("name");
+ schema.WriteField("startTime");
+ schema.WriteField("endTime");
+ schema.WriteField("phase");
+ schema.WriteField("category");
+ schema.WriteField("data");
+ }
+
+ aWriter.StartArrayProperty("data");
+ {
+ aBuffer.StreamMarkersToJSON(aWriter, aThreadId, aProcessStartTime,
+ aSinceTime, aUniqueStacks);
+ }
+ aWriter.EndArray();
+ }
+ aWriter.EndObject();
+
+ aWriter.IntProperty("pid",
+ static_cast<int64_t>(profiler_current_process_id()));
+ aWriter.IntProperty(
+ "tid",
+ static_cast<int64_t>(aThreadId != 0 ? aThreadId : processedThreadId));
+
+ return processedThreadId;
+}
+
+} // namespace baseprofiler
+} // namespace mozilla
diff --git a/mozglue/baseprofiler/core/ProfiledThreadData.h b/mozglue/baseprofiler/core/ProfiledThreadData.h
new file mode 100644
index 0000000000..c45c02a7bb
--- /dev/null
+++ b/mozglue/baseprofiler/core/ProfiledThreadData.h
@@ -0,0 +1,119 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfiledThreadData_h
+#define ProfiledThreadData_h
+
+#include "BaseProfilingStack.h"
+#include "platform.h"
+#include "ProfileBufferEntry.h"
+#include "ThreadInfo.h"
+
+#include "mozilla/RefPtr.h"
+#include "mozilla/TimeStamp.h"
+#include "mozilla/UniquePtr.h"
+
+#include <string>
+
+namespace mozilla {
+namespace baseprofiler {
+
+class ProfileBuffer;
+
+// This class contains information about a thread that is only relevant while
+// the profiler is running, for any threads (both alive and dead) whose thread
+// name matches the "thread filter" in the current profiler run.
+// ProfiledThreadData objects may be kept alive even after the thread is
+// unregistered, as long as there is still data for that thread in the profiler
+// buffer.
+//
+// Accesses to this class are protected by the profiler state lock.
+//
+// Created as soon as the following are true for the thread:
+// - The profiler is running, and
+// - the thread matches the profiler's thread filter, and
+// - the thread is registered with the profiler.
+// So it gets created in response to either (1) the profiler being started (for
+// an existing registered thread) or (2) the thread being registered (if the
+// profiler is already running).
+//
+// The thread may be unregistered during the lifetime of ProfiledThreadData.
+// If that happens, NotifyUnregistered() is called.
+//
+// This class is the right place to store buffer positions. Profiler buffer
+// positions become invalid if the profiler buffer is destroyed, which happens
+// when the profiler is stopped.
+class ProfiledThreadData final {
+ public:
+ explicit ProfiledThreadData(ThreadInfo* aThreadInfo);
+ ~ProfiledThreadData();
+
+ void NotifyUnregistered(uint64_t aBufferPosition) {
+ mLastSample = Nothing();
+ MOZ_ASSERT(!mBufferPositionWhenReceivedJSContext,
+ "JSContext should have been cleared before the thread was "
+ "unregistered");
+ mUnregisterTime = TimeStamp::NowUnfuzzed();
+ mBufferPositionWhenUnregistered = Some(aBufferPosition);
+ }
+ Maybe<uint64_t> BufferPositionWhenUnregistered() {
+ return mBufferPositionWhenUnregistered;
+ }
+
+ Maybe<uint64_t>& LastSample() { return mLastSample; }
+
+ void StreamJSON(const ProfileBuffer& aBuffer, SpliceableJSONWriter& aWriter,
+ const std::string& aProcessName,
+ const std::string& aETLDplus1,
+ const TimeStamp& aProcessStartTime, double aSinceTime);
+
+ const RefPtr<ThreadInfo> Info() const { return mThreadInfo; }
+
+ void NotifyReceivedJSContext(uint64_t aCurrentBufferPosition) {
+ mBufferPositionWhenReceivedJSContext = Some(aCurrentBufferPosition);
+ }
+
+ private:
+ // Group A:
+ // The following fields are interesting for the entire lifetime of a
+ // ProfiledThreadData object.
+
+ // This thread's thread info.
+ const RefPtr<ThreadInfo> mThreadInfo;
+
+ // Group B:
+ // The following fields are only used while this thread is alive and
+ // registered. They become Nothing() once the thread is unregistered.
+
+ // When sampling, this holds the position in ActivePS::mBuffer of the most
+ // recent sample for this thread, or Nothing() if there is no sample for this
+ // thread in the buffer.
+ Maybe<uint64_t> mLastSample;
+
+ // Only non-Nothing() if the thread currently has a JSContext.
+ Maybe<uint64_t> mBufferPositionWhenReceivedJSContext;
+
+ // Group C:
+ // The following fields are only used once this thread has been unregistered.
+
+ Maybe<uint64_t> mBufferPositionWhenUnregistered;
+ TimeStamp mUnregisterTime;
+};
+
+// Stream all samples and markers from aBuffer with the given aThreadId (or 0
+// for everything, which is assumed to be a single backtrace sample.)
+// Returns the thread id of the output sample(s), or 0 if none was present.
+int StreamSamplesAndMarkers(
+ const char* aName, int aThreadId, const ProfileBuffer& aBuffer,
+ SpliceableJSONWriter& aWriter, const std::string& aProcessName,
+ const std::string& aETLDplus1, const TimeStamp& aProcessStartTime,
+ const TimeStamp& aRegisterTime, const TimeStamp& aUnregisterTime,
+ double aSinceTime, UniqueStacks& aUniqueStacks);
+
+} // namespace baseprofiler
+} // namespace mozilla
+
+#endif // ProfiledThreadData_h
diff --git a/mozglue/baseprofiler/core/ProfilerBacktrace.cpp b/mozglue/baseprofiler/core/ProfilerBacktrace.cpp
new file mode 100644
index 0000000000..166e72fd9c
--- /dev/null
+++ b/mozglue/baseprofiler/core/ProfilerBacktrace.cpp
@@ -0,0 +1,123 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ProfilerBacktrace.h"
+
+#include "BaseProfiler.h"
+#include "ProfileBuffer.h"
+#include "ProfiledThreadData.h"
+#include "ThreadInfo.h"
+
+#include "mozilla/BaseProfileJSONWriter.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+ProfilerBacktrace::ProfilerBacktrace(
+ const char* aName,
+ UniquePtr<ProfileChunkedBuffer> aProfileChunkedBufferStorage,
+ UniquePtr<ProfileBuffer> aProfileBufferStorageOrNull /* = nullptr */)
+ : mName(aName),
+ mOptionalProfileChunkedBufferStorage(
+ std::move(aProfileChunkedBufferStorage)),
+ mProfileChunkedBuffer(mOptionalProfileChunkedBufferStorage.get()),
+ mOptionalProfileBufferStorage(std::move(aProfileBufferStorageOrNull)),
+ mProfileBuffer(mOptionalProfileBufferStorage.get()) {
+ if (mProfileBuffer) {
+ MOZ_RELEASE_ASSERT(mProfileChunkedBuffer,
+ "If we take ownership of a ProfileBuffer, we must also "
+ "receive ownership of a ProfileChunkedBuffer");
+ MOZ_RELEASE_ASSERT(
+ mProfileChunkedBuffer == &mProfileBuffer->UnderlyingChunkedBuffer(),
+ "If we take ownership of a ProfileBuffer, we must also receive "
+ "ownership of its ProfileChunkedBuffer");
+ }
+ MOZ_ASSERT(
+ !mProfileChunkedBuffer || !mProfileChunkedBuffer->IsThreadSafe(),
+ "ProfilerBacktrace only takes a non-thread-safe ProfileChunkedBuffer");
+}
+
+ProfilerBacktrace::ProfilerBacktrace(
+ const char* aName,
+ ProfileChunkedBuffer* aExternalProfileChunkedBufferOrNull /* = nullptr */,
+ ProfileBuffer* aExternalProfileBufferOrNull /* = nullptr */)
+ : mName(aName),
+ mProfileChunkedBuffer(aExternalProfileChunkedBufferOrNull),
+ mProfileBuffer(aExternalProfileBufferOrNull) {
+ if (!mProfileChunkedBuffer) {
+ if (mProfileBuffer) {
+ // We don't have a ProfileChunkedBuffer but we have a ProfileBuffer, use
+ // the latter's ProfileChunkedBuffer.
+ mProfileChunkedBuffer = &mProfileBuffer->UnderlyingChunkedBuffer();
+ MOZ_ASSERT(!mProfileChunkedBuffer->IsThreadSafe(),
+ "ProfilerBacktrace only takes a non-thread-safe "
+ "ProfileChunkedBuffer");
+ }
+ } else {
+ if (mProfileBuffer) {
+ MOZ_RELEASE_ASSERT(
+ mProfileChunkedBuffer == &mProfileBuffer->UnderlyingChunkedBuffer(),
+ "If we reference both ProfileChunkedBuffer and ProfileBuffer, they "
+ "must already be connected");
+ }
+ MOZ_ASSERT(!mProfileChunkedBuffer->IsThreadSafe(),
+ "ProfilerBacktrace only takes a non-thread-safe "
+ "ProfileChunkedBuffer");
+ }
+}
+
+ProfilerBacktrace::~ProfilerBacktrace() {}
+
+int ProfilerBacktrace::StreamJSON(SpliceableJSONWriter& aWriter,
+ const TimeStamp& aProcessStartTime,
+ UniqueStacks& aUniqueStacks) {
+ int processedThreadId = 0;
+
+ // Unlike ProfiledThreadData::StreamJSON, we don't need to call
+ // ProfileBuffer::AddJITInfoForRange because ProfileBuffer does not contain
+ // any JitReturnAddr entries. For synchronous samples, JIT frames get expanded
+ // at sample time.
+ if (mProfileBuffer) {
+ processedThreadId = StreamSamplesAndMarkers(
+ mName.c_str(), 0, *mProfileBuffer, aWriter, "", "", aProcessStartTime,
+ /* aRegisterTime */ TimeStamp(),
+ /* aUnregisterTime */ TimeStamp(),
+ /* aSinceTime */ 0, aUniqueStacks);
+ } else if (mProfileChunkedBuffer) {
+ ProfileBuffer profileBuffer(*mProfileChunkedBuffer);
+ processedThreadId = StreamSamplesAndMarkers(
+ mName.c_str(), 0, profileBuffer, aWriter, "", "", aProcessStartTime,
+ /* aRegisterTime */ TimeStamp(),
+ /* aUnregisterTime */ TimeStamp(),
+ /* aSinceTime */ 0, aUniqueStacks);
+ }
+ // If there are no buffers, the backtrace is empty and nothing is streamed.
+
+ return processedThreadId;
+}
+
+} // namespace baseprofiler
+
+// static
+template <typename Destructor>
+UniquePtr<baseprofiler::ProfilerBacktrace, Destructor>
+ProfileBufferEntryReader::
+ Deserializer<UniquePtr<baseprofiler::ProfilerBacktrace, Destructor>>::Read(
+ ProfileBufferEntryReader& aER) {
+ auto profileChunkedBuffer = aER.ReadObject<UniquePtr<ProfileChunkedBuffer>>();
+ if (!profileChunkedBuffer) {
+ return nullptr;
+ }
+ MOZ_ASSERT(
+ !profileChunkedBuffer->IsThreadSafe(),
+ "ProfilerBacktrace only stores non-thread-safe ProfileChunkedBuffers");
+ std::string name = aER.ReadObject<std::string>();
+ return UniquePtr<baseprofiler::ProfilerBacktrace, Destructor>{
+ new baseprofiler::ProfilerBacktrace(name.c_str(),
+ std::move(profileChunkedBuffer))};
+};
+
+} // namespace mozilla
diff --git a/mozglue/baseprofiler/core/ProfilerBacktrace.h b/mozglue/baseprofiler/core/ProfilerBacktrace.h
new file mode 100644
index 0000000000..b5365cd4ac
--- /dev/null
+++ b/mozglue/baseprofiler/core/ProfilerBacktrace.h
@@ -0,0 +1,162 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef __PROFILER_BACKTRACE_H
+#define __PROFILER_BACKTRACE_H
+
+#include "mozilla/UniquePtrExtensions.h"
+
+#include <string>
+
+namespace mozilla {
+
+class ProfileChunkedBuffer;
+class TimeStamp;
+
+namespace baseprofiler {
+
+class ProfileBuffer;
+class SpliceableJSONWriter;
+class ThreadInfo;
+class UniqueStacks;
+
+// ProfilerBacktrace encapsulates a synchronous sample.
+// It can work with a ProfileBuffer and/or a ProfileChunkedBuffer (if both, they
+// must already be linked together). The ProfileChunkedBuffer contains all the
+// data; the ProfileBuffer is not strictly needed, only provide it if it is
+// already available at the call site.
+// And these buffers can either be:
+// - owned here, so that the ProfilerBacktrace object can be kept for later
+// use), OR
+// - referenced through pointers (in cases where the backtrace is immediately
+// streamed out, so we only need temporary references to external buffers);
+// these pointers may be null for empty backtraces.
+class ProfilerBacktrace {
+ public:
+ // Take ownership of external buffers and use them to keep, and to stream a
+ // backtrace. If a ProfileBuffer is given, its underlying chunked buffer must
+ // be provided as well.
+ explicit ProfilerBacktrace(
+ const char* aName,
+ UniquePtr<ProfileChunkedBuffer> aProfileChunkedBufferStorage,
+ UniquePtr<ProfileBuffer> aProfileBufferStorageOrNull = nullptr);
+
+ // Take pointers to external buffers and use them to stream a backtrace.
+ // If null, the backtrace is effectively empty.
+ // If both are provided, they must already be connected.
+ explicit ProfilerBacktrace(
+ const char* aName,
+ ProfileChunkedBuffer* aExternalProfileChunkedBufferOrNull = nullptr,
+ ProfileBuffer* aExternalProfileBufferOrNull = nullptr);
+
+ ~ProfilerBacktrace();
+
+ [[nodiscard]] bool IsEmpty() const {
+ return !mProfileChunkedBuffer ||
+ ProfileBufferEntryWriter::Serializer<ProfileChunkedBuffer>::Bytes(
+ *mProfileChunkedBuffer) <= ULEB128Size(0u);
+ }
+
+ // ProfilerBacktraces' stacks are deduplicated in the context of the
+ // profile that contains the backtrace as a marker payload.
+ //
+ // That is, markers that contain backtraces should not need their own stack,
+ // frame, and string tables. They should instead reuse their parent
+ // profile's tables.
+ int StreamJSON(SpliceableJSONWriter& aWriter,
+ const TimeStamp& aProcessStartTime,
+ UniqueStacks& aUniqueStacks);
+
+ private:
+ // Used to de/serialize a ProfilerBacktrace.
+ friend ProfileBufferEntryWriter::Serializer<ProfilerBacktrace>;
+ friend ProfileBufferEntryReader::Deserializer<ProfilerBacktrace>;
+
+ std::string mName;
+
+ // `ProfileChunkedBuffer` in which `mProfileBuffer` stores its data; must be
+ // located before `mProfileBuffer` so that it's destroyed after.
+ UniquePtr<ProfileChunkedBuffer> mOptionalProfileChunkedBufferStorage;
+ // If null, there is no need to check mProfileBuffer's (if present) underlying
+ // buffer because this is done when constructed.
+ ProfileChunkedBuffer* mProfileChunkedBuffer;
+
+ UniquePtr<ProfileBuffer> mOptionalProfileBufferStorage;
+ ProfileBuffer* mProfileBuffer;
+};
+
+} // namespace baseprofiler
+
+// Format: [ UniquePtr<BlockRingsBuffer> | name ]
+// Initial len==0 marks a nullptr or empty backtrace.
+template <>
+struct ProfileBufferEntryWriter::Serializer<baseprofiler::ProfilerBacktrace> {
+ static Length Bytes(const baseprofiler::ProfilerBacktrace& aBacktrace) {
+ if (!aBacktrace.mProfileChunkedBuffer) {
+ // No buffer.
+ return ULEB128Size(0u);
+ }
+ auto bufferBytes = SumBytes(*aBacktrace.mProfileChunkedBuffer);
+ if (bufferBytes <= ULEB128Size(0u)) {
+ // Empty buffer.
+ return ULEB128Size(0u);
+ }
+ return bufferBytes + SumBytes(aBacktrace.mName);
+ }
+
+ static void Write(ProfileBufferEntryWriter& aEW,
+ const baseprofiler::ProfilerBacktrace& aBacktrace) {
+ if (!aBacktrace.mProfileChunkedBuffer ||
+ SumBytes(*aBacktrace.mProfileChunkedBuffer) <= ULEB128Size(0u)) {
+ // No buffer, or empty buffer.
+ aEW.WriteULEB128(0u);
+ return;
+ }
+ aEW.WriteObject(*aBacktrace.mProfileChunkedBuffer);
+ aEW.WriteObject(aBacktrace.mName);
+ }
+};
+
+template <typename Destructor>
+struct ProfileBufferEntryWriter::Serializer<
+ UniquePtr<baseprofiler::ProfilerBacktrace, Destructor>> {
+ static Length Bytes(const UniquePtr<baseprofiler::ProfilerBacktrace,
+ Destructor>& aBacktrace) {
+ if (!aBacktrace) {
+ // Null backtrace pointer (treated like an empty backtrace).
+ return ULEB128Size(0u);
+ }
+ return SumBytes(*aBacktrace);
+ }
+
+ static void Write(ProfileBufferEntryWriter& aEW,
+ const UniquePtr<baseprofiler::ProfilerBacktrace,
+ Destructor>& aBacktrace) {
+ if (!aBacktrace) {
+ // Null backtrace pointer (treated like an empty backtrace).
+ aEW.WriteULEB128(0u);
+ return;
+ }
+ aEW.WriteObject(*aBacktrace);
+ }
+};
+
+template <typename Destructor>
+struct ProfileBufferEntryReader::Deserializer<
+ UniquePtr<baseprofiler::ProfilerBacktrace, Destructor>> {
+ static void ReadInto(
+ ProfileBufferEntryReader& aER,
+ UniquePtr<baseprofiler::ProfilerBacktrace, Destructor>& aBacktrace) {
+ aBacktrace = Read(aER);
+ }
+
+ static UniquePtr<baseprofiler::ProfilerBacktrace, Destructor> Read(
+ ProfileBufferEntryReader& aER);
+};
+
+} // namespace mozilla
+
+#endif // __PROFILER_BACKTRACE_H
diff --git a/mozglue/baseprofiler/core/ProfilerMarkers.cpp b/mozglue/baseprofiler/core/ProfilerMarkers.cpp
new file mode 100644
index 0000000000..bff2a9ebdd
--- /dev/null
+++ b/mozglue/baseprofiler/core/ProfilerMarkers.cpp
@@ -0,0 +1,221 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/BaseProfilerMarkers.h"
+
+#include "mozilla/Likely.h"
+
+#include <limits>
+
+namespace mozilla {
+namespace base_profiler_markers_detail {
+
+// We need an atomic type that can hold a `DeserializerTag`. (Atomic doesn't
+// work with too-small types.)
+using DeserializerTagAtomic = unsigned;
+
+// Number of currently-registered deserializers and other marker type functions.
+static Atomic<DeserializerTagAtomic, MemoryOrdering::Relaxed>
+ sDeserializerCount{0};
+
+// This needs to be big enough to handle all possible marker types. If one day
+// this needs to be higher, the underlying DeserializerTag type will have to be
+// changed.
+static constexpr DeserializerTagAtomic DeserializerMax = 250;
+
+static_assert(
+ DeserializerMax <= std::numeric_limits<Streaming::DeserializerTag>::max(),
+ "The maximum number of deserializers must fit in the DeserializerTag type");
+
+// Array of marker type functions.
+// 1-based, i.e.: [0] -> tag 1, [DeserializerMax - 1] -> tag DeserializerMax.
+// Elements are added at the next available atomically-incremented
+// `sDeserializerCount` (minus 1) whenever a new marker type is used in a
+// Firefox session; the content is kept between profiler runs in that session.
+// There is theoretically a race between the increment and the time the entry is
+// fully written, but in practice all new elements are written (during
+// profiling, using a marker type for the first time) long before they are read
+// (after profiling is paused).
+static Streaming::MarkerTypeFunctions
+ sMarkerTypeFunctions1Based[DeserializerMax];
+
+/* static */ Streaming::DeserializerTag Streaming::TagForMarkerTypeFunctions(
+ Streaming::MarkerDataDeserializer aDeserializer,
+ Streaming::MarkerTypeNameFunction aMarkerTypeNameFunction,
+ Streaming::MarkerSchemaFunction aMarkerSchemaFunction) {
+ MOZ_RELEASE_ASSERT(!!aDeserializer);
+ MOZ_RELEASE_ASSERT(!!aMarkerTypeNameFunction);
+ MOZ_RELEASE_ASSERT(!!aMarkerSchemaFunction);
+
+ DeserializerTagAtomic tag = ++sDeserializerCount;
+ MOZ_RELEASE_ASSERT(
+ tag <= DeserializerMax,
+ "Too many deserializers, consider increasing DeserializerMax. "
+ "Or is a deserializer stored again and again?");
+ sMarkerTypeFunctions1Based[tag - 1] = {aDeserializer, aMarkerTypeNameFunction,
+ aMarkerSchemaFunction};
+
+ return static_cast<DeserializerTag>(tag);
+}
+
+/* static */ Streaming::MarkerDataDeserializer Streaming::DeserializerForTag(
+ Streaming::DeserializerTag aTag) {
+ MOZ_RELEASE_ASSERT(
+ aTag > 0 && static_cast<DeserializerTagAtomic>(aTag) <=
+ static_cast<DeserializerTagAtomic>(sDeserializerCount),
+ "Out-of-range tag value");
+ return sMarkerTypeFunctions1Based[aTag - 1].mMarkerDataDeserializer;
+}
+
+/* static */ Span<const Streaming::MarkerTypeFunctions>
+Streaming::MarkerTypeFunctionsArray() {
+ return {sMarkerTypeFunctions1Based, sDeserializerCount};
+}
+
+} // namespace base_profiler_markers_detail
+
+void MarkerSchema::Stream(JSONWriter& aWriter,
+ const Span<const char>& aName) && {
+ // The caller should have started a JSON array, in which we can add an object
+ // that defines a marker schema.
+
+ if (mLocations.empty()) {
+ // SpecialFrontendLocation case, don't output anything for this type.
+ return;
+ }
+
+ aWriter.StartObjectElement();
+ {
+ aWriter.StringProperty("name", aName);
+
+ if (!mChartLabel.empty()) {
+ aWriter.StringProperty("chartLabel", mChartLabel);
+ }
+
+ if (!mTooltipLabel.empty()) {
+ aWriter.StringProperty("tooltipLabel", mTooltipLabel);
+ }
+
+ if (!mTableLabel.empty()) {
+ aWriter.StringProperty("tableLabel", mTableLabel);
+ }
+
+ aWriter.StartArrayProperty("display");
+ {
+ for (Location location : mLocations) {
+ aWriter.StringElement(LocationToStringSpan(location));
+ }
+ }
+ aWriter.EndArray();
+
+ aWriter.StartArrayProperty("data");
+ {
+ for (const DataRow& row : mData) {
+ aWriter.StartObjectElement();
+ {
+ row.match(
+ [&aWriter](const DynamicData& aData) {
+ aWriter.StringProperty("key", aData.mKey);
+ if (aData.mLabel) {
+ aWriter.StringProperty("label", *aData.mLabel);
+ }
+ aWriter.StringProperty("format",
+ FormatToStringSpan(aData.mFormat));
+ if (aData.mSearchable) {
+ aWriter.BoolProperty(
+ "searchable",
+ *aData.mSearchable == Searchable::searchable);
+ }
+ },
+ [&aWriter](const StaticData& aStaticData) {
+ aWriter.StringProperty("label", aStaticData.mLabel);
+ aWriter.StringProperty("value", aStaticData.mValue);
+ });
+ }
+ aWriter.EndObject();
+ }
+ }
+ aWriter.EndArray();
+ }
+ aWriter.EndObject();
+}
+
+/* static */
+Span<const char> MarkerSchema::LocationToStringSpan(
+ MarkerSchema::Location aLocation) {
+ switch (aLocation) {
+ case Location::markerChart:
+ return mozilla::MakeStringSpan("marker-chart");
+ case Location::markerTable:
+ return mozilla::MakeStringSpan("marker-table");
+ case Location::timelineOverview:
+ return mozilla::MakeStringSpan("timeline-overview");
+ case Location::timelineMemory:
+ return mozilla::MakeStringSpan("timeline-memory");
+ case Location::timelineIPC:
+ return mozilla::MakeStringSpan("timeline-ipc");
+ case Location::timelineFileIO:
+ return mozilla::MakeStringSpan("timeline-fileio");
+ case Location::stackChart:
+ return mozilla::MakeStringSpan("stack-chart");
+ default:
+ MOZ_CRASH("Unexpected Location enum");
+ return {};
+ }
+}
+
+/* static */
+Span<const char> MarkerSchema::FormatToStringSpan(
+ MarkerSchema::Format aFormat) {
+ switch (aFormat) {
+ case Format::url:
+ return mozilla::MakeStringSpan("url");
+ case Format::filePath:
+ return mozilla::MakeStringSpan("file-path");
+ case Format::string:
+ return mozilla::MakeStringSpan("string");
+ case Format::duration:
+ return mozilla::MakeStringSpan("duration");
+ case Format::time:
+ return mozilla::MakeStringSpan("time");
+ case Format::seconds:
+ return mozilla::MakeStringSpan("seconds");
+ case Format::milliseconds:
+ return mozilla::MakeStringSpan("milliseconds");
+ case Format::microseconds:
+ return mozilla::MakeStringSpan("microseconds");
+ case Format::nanoseconds:
+ return mozilla::MakeStringSpan("nanoseconds");
+ case Format::bytes:
+ return mozilla::MakeStringSpan("bytes");
+ case Format::percentage:
+ return mozilla::MakeStringSpan("percentage");
+ case Format::integer:
+ return mozilla::MakeStringSpan("integer");
+ case Format::decimal:
+ return mozilla::MakeStringSpan("decimal");
+ default:
+ MOZ_CRASH("Unexpected Format enum");
+ return {};
+ }
+}
+
+} // namespace mozilla
+
+namespace mozilla::baseprofiler {
+template MFBT_API ProfileBufferBlockIndex AddMarker(const ProfilerString8View&,
+ const MarkerCategory&,
+ MarkerOptions&&,
+ markers::TextMarker,
+ const std::string&);
+
+template MFBT_API ProfileBufferBlockIndex
+AddMarkerToBuffer(ProfileChunkedBuffer&, const ProfilerString8View&,
+ const MarkerCategory&, MarkerOptions&&, markers::NoPayload);
+
+template MFBT_API ProfileBufferBlockIndex AddMarkerToBuffer(
+ ProfileChunkedBuffer&, const ProfilerString8View&, const MarkerCategory&,
+ MarkerOptions&&, markers::TextMarker, const std::string&);
+} // namespace mozilla::baseprofiler
diff --git a/mozglue/baseprofiler/core/ProfilingCategory.cpp b/mozglue/baseprofiler/core/ProfilingCategory.cpp
new file mode 100644
index 0000000000..8ff2b15555
--- /dev/null
+++ b/mozglue/baseprofiler/core/ProfilingCategory.cpp
@@ -0,0 +1,71 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "BaseProfilingCategory.h"
+
+#include "mozilla/ArrayUtils.h"
+#include "mozilla/Assertions.h"
+
+#include "BaseProfiler.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+// clang-format off
+
+// ProfilingSubcategory_X:
+// One enum for each category X, listing that category's subcategories. This
+// allows the sProfilingCategoryInfo macro construction below to look up a
+// per-category index for a subcategory.
+#define SUBCATEGORY_ENUMS_BEGIN_CATEGORY(name, labelAsString, color) \
+ enum class ProfilingSubcategory_##name : uint32_t {
+#define SUBCATEGORY_ENUMS_SUBCATEGORY(category, name, labelAsString) \
+ name,
+#define SUBCATEGORY_ENUMS_END_CATEGORY \
+ };
+MOZ_PROFILING_CATEGORY_LIST(SUBCATEGORY_ENUMS_BEGIN_CATEGORY,
+ SUBCATEGORY_ENUMS_SUBCATEGORY,
+ SUBCATEGORY_ENUMS_END_CATEGORY)
+#undef SUBCATEGORY_ENUMS_BEGIN_CATEGORY
+#undef SUBCATEGORY_ENUMS_SUBCATEGORY
+#undef SUBCATEGORY_ENUMS_END_CATEGORY
+
+// sProfilingCategoryPairInfo:
+// A list of ProfilingCategoryPairInfos with the same order as
+// ProfilingCategoryPair, which can be used to map a ProfilingCategoryPair to
+// its information.
+#define CATEGORY_INFO_BEGIN_CATEGORY(name, labelAsString, color)
+#define CATEGORY_INFO_SUBCATEGORY(category, name, labelAsString) \
+ {ProfilingCategory::category, \
+ uint32_t(ProfilingSubcategory_##category::name), labelAsString},
+#define CATEGORY_INFO_END_CATEGORY
+const ProfilingCategoryPairInfo sProfilingCategoryPairInfo[] = {
+ MOZ_PROFILING_CATEGORY_LIST(CATEGORY_INFO_BEGIN_CATEGORY,
+ CATEGORY_INFO_SUBCATEGORY,
+ CATEGORY_INFO_END_CATEGORY)
+};
+#undef CATEGORY_INFO_BEGIN_CATEGORY
+#undef CATEGORY_INFO_SUBCATEGORY
+#undef CATEGORY_INFO_END_CATEGORY
+
+// clang-format on
+
+const ProfilingCategoryPairInfo& GetProfilingCategoryPairInfo(
+ ProfilingCategoryPair aCategoryPair) {
+ static_assert(
+ MOZ_ARRAY_LENGTH(sProfilingCategoryPairInfo) ==
+ uint32_t(ProfilingCategoryPair::COUNT),
+ "sProfilingCategoryPairInfo and ProfilingCategory need to have the "
+ "same order and the same length");
+
+ uint32_t categoryPairIndex = uint32_t(aCategoryPair);
+ MOZ_RELEASE_ASSERT(categoryPairIndex <=
+ uint32_t(ProfilingCategoryPair::LAST));
+ return sProfilingCategoryPairInfo[categoryPairIndex];
+}
+
+} // namespace baseprofiler
+} // namespace mozilla
diff --git a/mozglue/baseprofiler/core/ProfilingStack.cpp b/mozglue/baseprofiler/core/ProfilingStack.cpp
new file mode 100644
index 0000000000..f5cd2ddd04
--- /dev/null
+++ b/mozglue/baseprofiler/core/ProfilingStack.cpp
@@ -0,0 +1,52 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "BaseProfilingStack.h"
+
+#include <algorithm>
+
+#include "mozilla/IntegerRange.h"
+#include "mozilla/UniquePtr.h"
+#include "mozilla/UniquePtrExtensions.h"
+
+#include "BaseProfiler.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+ProfilingStack::~ProfilingStack() {
+ // The label macros keep a reference to the ProfilingStack to avoid a TLS
+ // access. If these are somehow not all cleared we will get a
+ // use-after-free so better to crash now.
+ MOZ_RELEASE_ASSERT(stackPointer == 0);
+
+ delete[] frames;
+}
+
+void ProfilingStack::ensureCapacitySlow() {
+ MOZ_ASSERT(stackPointer >= capacity);
+ const uint32_t kInitialCapacity = 128;
+
+ uint32_t sp = stackPointer;
+ auto newCapacity =
+ std::max(sp + 1, capacity ? capacity * 2 : kInitialCapacity);
+
+ auto* newFrames = new ProfilingStackFrame[newCapacity];
+
+ // It's important that `frames` / `capacity` / `stackPointer` remain
+ // consistent here at all times.
+ for (auto i : IntegerRange(capacity)) {
+ newFrames[i] = frames[i];
+ }
+
+ ProfilingStackFrame* oldFrames = frames;
+ frames = newFrames;
+ capacity = newCapacity;
+ delete[] oldFrames;
+}
+
+} // namespace baseprofiler
+} // namespace mozilla
diff --git a/mozglue/baseprofiler/core/RegisteredThread.cpp b/mozglue/baseprofiler/core/RegisteredThread.cpp
new file mode 100644
index 0000000000..85a7fc2c6d
--- /dev/null
+++ b/mozglue/baseprofiler/core/RegisteredThread.cpp
@@ -0,0 +1,42 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "RegisteredThread.h"
+
+#include "BaseProfiler.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+RegisteredThread::RegisteredThread(ThreadInfo* aInfo, void* aStackTop)
+ : mRacyRegisteredThread(aInfo->ThreadId()),
+ mPlatformData(AllocPlatformData(aInfo->ThreadId())),
+ mStackTop(aStackTop),
+ mThreadInfo(aInfo) {
+ // We don't have to guess on mac
+#if defined(GP_OS_darwin)
+ pthread_t self = pthread_self();
+ mStackTop = pthread_get_stackaddr_np(self);
+#endif
+}
+
+RegisteredThread::~RegisteredThread() {}
+
+size_t RegisteredThread::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
+ size_t n = aMallocSizeOf(this);
+
+ // Measurement of the following members may be added later if DMD finds it
+ // is worthwhile:
+ // - mPlatformData
+ //
+ // The following members are not measured:
+ // - mThreadInfo: because it is non-owning
+
+ return n;
+}
+
+} // namespace baseprofiler
+} // namespace mozilla
diff --git a/mozglue/baseprofiler/core/RegisteredThread.h b/mozglue/baseprofiler/core/RegisteredThread.h
new file mode 100644
index 0000000000..6ae12b823f
--- /dev/null
+++ b/mozglue/baseprofiler/core/RegisteredThread.h
@@ -0,0 +1,166 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef RegisteredThread_h
+#define RegisteredThread_h
+
+#include "platform.h"
+#include "ThreadInfo.h"
+
+#include "mozilla/UniquePtr.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+// This class contains the state for a single thread that is accessible without
+// protection from gPSMutex in platform.cpp. Because there is no external
+// protection against data races, it must provide internal protection. Hence
+// the "Racy" prefix.
+//
+class RacyRegisteredThread final {
+ public:
+ explicit RacyRegisteredThread(int aThreadId)
+ : mThreadId(aThreadId), mSleep(AWAKE), mIsBeingProfiled(false) {}
+
+ ~RacyRegisteredThread() {}
+
+ void SetIsBeingProfiled(bool aIsBeingProfiled) {
+ mIsBeingProfiled = aIsBeingProfiled;
+ }
+
+ bool IsBeingProfiled() const { return mIsBeingProfiled; }
+
+ // This is called on every profiler restart. Put things that should happen at
+ // that time here.
+ void ReinitializeOnResume() {
+ // This is needed to cause an initial sample to be taken from sleeping
+ // threads that had been observed prior to the profiler stopping and
+ // restarting. Otherwise sleeping threads would not have any samples to
+ // copy forward while sleeping.
+ (void)mSleep.compareExchange(SLEEPING_OBSERVED, SLEEPING_NOT_OBSERVED);
+ }
+
+ // This returns true for the second and subsequent calls in each sleep cycle.
+ bool CanDuplicateLastSampleDueToSleep() {
+ if (mSleep == AWAKE) {
+ return false;
+ }
+
+ if (mSleep.compareExchange(SLEEPING_NOT_OBSERVED, SLEEPING_OBSERVED)) {
+ return false;
+ }
+
+ return true;
+ }
+
+ // Call this whenever the current thread sleeps. Calling it twice in a row
+ // without an intervening setAwake() call is an error.
+ void SetSleeping() {
+ MOZ_ASSERT(mSleep == AWAKE);
+ mSleep = SLEEPING_NOT_OBSERVED;
+ }
+
+ // Call this whenever the current thread wakes. Calling it twice in a row
+ // without an intervening setSleeping() call is an error.
+ void SetAwake() {
+ MOZ_ASSERT(mSleep != AWAKE);
+ mSleep = AWAKE;
+ }
+
+ bool IsSleeping() { return mSleep != AWAKE; }
+
+ int ThreadId() const { return mThreadId; }
+
+ class ProfilingStack& ProfilingStack() {
+ return mProfilingStack;
+ }
+ const class ProfilingStack& ProfilingStack() const { return mProfilingStack; }
+
+ private:
+ class ProfilingStack mProfilingStack;
+
+ // mThreadId contains the thread ID of the current thread. It is safe to read
+ // this from multiple threads concurrently, as it will never be mutated.
+ const int mThreadId;
+
+ // mSleep tracks whether the thread is sleeping, and if so, whether it has
+ // been previously observed. This is used for an optimization: in some cases,
+ // when a thread is asleep, we duplicate the previous sample, which is
+ // cheaper than taking a new sample.
+ //
+ // mSleep is atomic because it is accessed from multiple threads.
+ //
+ // - It is written only by this thread, via setSleeping() and setAwake().
+ //
+ // - It is read by SamplerThread::Run().
+ //
+ // There are two cases where racing between threads can cause an issue.
+ //
+ // - If CanDuplicateLastSampleDueToSleep() returns false but that result is
+ // invalidated before being acted upon, we will take a full sample
+ // unnecessarily. This is additional work but won't cause any correctness
+ // issues. (In actual fact, this case is impossible. In order to go from
+ // CanDuplicateLastSampleDueToSleep() returning false to it returning true
+ // requires an intermediate call to it in order for mSleep to go from
+ // SLEEPING_NOT_OBSERVED to SLEEPING_OBSERVED.)
+ //
+ // - If CanDuplicateLastSampleDueToSleep() returns true but that result is
+ // invalidated before being acted upon -- i.e. the thread wakes up before
+ // DuplicateLastSample() is called -- we will duplicate the previous
+ // sample. This is inaccurate, but only slightly... we will effectively
+ // treat the thread as having slept a tiny bit longer than it really did.
+ //
+ // This latter inaccuracy could be avoided by moving the
+ // CanDuplicateLastSampleDueToSleep() check within the thread-freezing code,
+ // e.g. the section where Tick() is called. But that would reduce the
+ // effectiveness of the optimization because more code would have to be run
+ // before we can tell that duplication is allowed.
+ //
+ static const int AWAKE = 0;
+ static const int SLEEPING_NOT_OBSERVED = 1;
+ static const int SLEEPING_OBSERVED = 2;
+ Atomic<int> mSleep;
+
+ // Is this thread being profiled? (e.g., should markers be recorded?)
+ Atomic<bool, MemoryOrdering::Relaxed> mIsBeingProfiled;
+};
+
+// This class contains information that's relevant to a single thread only
+// while that thread is running and registered with the profiler, but
+// regardless of whether the profiler is running. All accesses to it are
+// protected by the profiler state lock.
+class RegisteredThread final {
+ public:
+ RegisteredThread(ThreadInfo* aInfo, void* aStackTop);
+ ~RegisteredThread();
+
+ class RacyRegisteredThread& RacyRegisteredThread() {
+ return mRacyRegisteredThread;
+ }
+ const class RacyRegisteredThread& RacyRegisteredThread() const {
+ return mRacyRegisteredThread;
+ }
+
+ PlatformData* GetPlatformData() const { return mPlatformData.get(); }
+ const void* StackTop() const { return mStackTop; }
+
+ size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const;
+
+ const RefPtr<ThreadInfo> Info() const { return mThreadInfo; }
+
+ private:
+ class RacyRegisteredThread mRacyRegisteredThread;
+
+ const UniquePlatformData mPlatformData;
+ const void* mStackTop;
+
+ const RefPtr<ThreadInfo> mThreadInfo;
+};
+
+} // namespace baseprofiler
+} // namespace mozilla
+
+#endif // RegisteredThread_h
diff --git a/mozglue/baseprofiler/core/ThreadInfo.h b/mozglue/baseprofiler/core/ThreadInfo.h
new file mode 100644
index 0000000000..4be84a45a9
--- /dev/null
+++ b/mozglue/baseprofiler/core/ThreadInfo.h
@@ -0,0 +1,61 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ThreadInfo_h
+#define ThreadInfo_h
+
+#include "mozilla/Atomics.h"
+#include "mozilla/TimeStamp.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+// This class contains information about a thread which needs to be stored
+// across restarts of the profiler and which can be useful even after the
+// thread has stopped running.
+// It uses threadsafe refcounting and only contains immutable data.
+class ThreadInfo final {
+ public:
+ ThreadInfo(const char* aName, int aThreadId, bool aIsMainThread,
+ const TimeStamp& aRegisterTime = TimeStamp::NowUnfuzzed())
+ : mName(aName),
+ mRegisterTime(aRegisterTime),
+ mThreadId(aThreadId),
+ mIsMainThread(aIsMainThread),
+ mRefCnt(0) {
+ // I don't know if we can assert this. But we should warn.
+ MOZ_ASSERT(aThreadId >= 0, "native thread ID is < 0");
+ MOZ_ASSERT(aThreadId <= INT32_MAX, "native thread ID is > INT32_MAX");
+ }
+
+ // Using hand-rolled ref-counting, because RefCounted.h macros don't produce
+ // the same code between mozglue and libxul, see bug 1536656.
+ MFBT_API void AddRef() const { ++mRefCnt; }
+ MFBT_API void Release() const {
+ MOZ_ASSERT(int32_t(mRefCnt) > 0);
+ if (--mRefCnt == 0) {
+ delete this;
+ }
+ }
+
+ const char* Name() const { return mName.c_str(); }
+ TimeStamp RegisterTime() const { return mRegisterTime; }
+ int ThreadId() const { return mThreadId; }
+ bool IsMainThread() const { return mIsMainThread; }
+
+ private:
+ const std::string mName;
+ const TimeStamp mRegisterTime;
+ const int mThreadId;
+ const bool mIsMainThread;
+
+ mutable Atomic<int32_t, MemoryOrdering::ReleaseAcquire> mRefCnt;
+};
+
+} // namespace baseprofiler
+} // namespace mozilla
+
+#endif // ThreadInfo_h
diff --git a/mozglue/baseprofiler/core/VTuneProfiler.cpp b/mozglue/baseprofiler/core/VTuneProfiler.cpp
new file mode 100644
index 0000000000..2911c39f08
--- /dev/null
+++ b/mozglue/baseprofiler/core/VTuneProfiler.cpp
@@ -0,0 +1,92 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef XP_WIN
+# undef UNICODE
+# undef _UNICODE
+#endif
+
+#include "VTuneProfiler.h"
+
+#include <memory>
+
+#include "BaseProfiler.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+VTuneProfiler* VTuneProfiler::mInstance = nullptr;
+
+void VTuneProfiler::Initialize() {
+ // This is just a 'dirty trick' to find out if the ittnotify DLL was found.
+ // If it wasn't this function always returns 0, otherwise it returns
+ // incrementing numbers, if the library was found this wastes 2 events but
+ // that should be okay.
+ // TODO re-implement here if vtune is needed
+ // __itt_event testEvent =
+ // __itt_event_create("Test event", strlen("Test event"));
+ // testEvent = __itt_event_create("Test event 2", strlen("Test event 2"));
+
+ // if (testEvent) {
+ // mInstance = new VTuneProfiler();
+ // }
+}
+
+void VTuneProfiler::Shutdown() {}
+
+void VTuneProfiler::TraceInternal(const char* aName, TracingKind aKind) {
+ // TODO re-implement here if vtune is needed
+ // std::string str(aName);
+
+ // auto iter = mStrings.find(str);
+
+ // __itt_event event;
+ // if (iter != mStrings.end()) {
+ // event = iter->second;
+ // } else {
+ // event = __itt_event_create(aName, str.length());
+ // mStrings.insert({str, event});
+ // }
+
+ // if (aKind == TRACING_INTERVAL_START || aKind == TRACING_EVENT) {
+ // // VTune will consider starts not matched with an end to be single point
+ // in
+ // // time events.
+ // __itt_event_start(event);
+ // } else {
+ // __itt_event_end(event);
+ // }
+}
+
+void VTuneProfiler::RegisterThreadInternal(const char* aName) {
+ // TODO re-implement here if vtune is needed
+ // std::string str(aName);
+
+ // if (!str.compare("Main Thread (Base Profiler)")) {
+ // // Process main thread.
+ // switch (XRE_GetProcessType()) {
+ // case GeckoProcessType::GeckoProcessType_Default:
+ // __itt_thread_set_name("Main Process");
+ // break;
+ // case GeckoProcessType::GeckoProcessType_Content:
+ // __itt_thread_set_name("Content Process");
+ // break;
+ // case GeckoProcessType::GeckoProcessType_GMPlugin:
+ // __itt_thread_set_name("Plugin Process");
+ // break;
+ // case GeckoProcessType::GeckoProcessType_GPU:
+ // __itt_thread_set_name("GPU Process");
+ // break;
+ // default:
+ // __itt_thread_set_name("Unknown Process");
+ // }
+ // return;
+ // }
+ // __itt_thread_set_name(aName);
+}
+
+} // namespace baseprofiler
+} // namespace mozilla
diff --git a/mozglue/baseprofiler/core/VTuneProfiler.h b/mozglue/baseprofiler/core/VTuneProfiler.h
new file mode 100644
index 0000000000..cf94ab7242
--- /dev/null
+++ b/mozglue/baseprofiler/core/VTuneProfiler.h
@@ -0,0 +1,84 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef VTuneProfiler_h
+#define VTuneProfiler_h
+
+// The intent here is to add 0 overhead for regular users. In order to build
+// the VTune profiler code at all --enable-vtune-instrumentation needs to be
+// set as a build option. Even then, when none of the environment variables
+// is specified that allow us to find the ittnotify DLL, these functions
+// should be minimal overhead. When starting Firefox under VTune, these
+// env vars will be automatically defined, otherwise INTEL_LIBITTNOTIFY32/64
+// should be set to point at the ittnotify DLL.
+#ifndef MOZ_VTUNE_INSTRUMENTATION
+
+# define VTUNE_INIT()
+# define VTUNE_SHUTDOWN()
+
+# define VTUNE_TRACING(name, kind)
+# define VTUNE_REGISTER_THREAD(name)
+
+#else
+
+# include "BaseProfiler.h"
+
+// This is the regular Intel header, these functions are actually defined for
+// us inside js/src/vtune by an intel C file which actually dynamically resolves
+// them to the correct DLL. Through libxul these will 'magically' resolve.
+# include "vtune/ittnotify.h"
+
+# include <stddef.h>
+# include <unordered_map>
+# include <string>
+
+namespace mozilla {
+namespace baseprofiler {
+
+class VTuneProfiler {
+ public:
+ static void Initialize();
+ static void Shutdown();
+
+ enum TracingKind {
+ TRACING_EVENT,
+ TRACING_INTERVAL_START,
+ TRACING_INTERVAL_END,
+ };
+
+ static void Trace(const char* aName, TracingKind aKind) {
+ if (mInstance) {
+ mInstance->TraceInternal(aName, aKind);
+ }
+ }
+ static void RegisterThread(const char* aName) {
+ if (mInstance) {
+ mInstance->RegisterThreadInternal(aName);
+ }
+ }
+
+ private:
+ void TraceInternal(const char* aName, TracingKind aKind);
+ void RegisterThreadInternal(const char* aName);
+
+ // This is null when the ittnotify DLL could not be found.
+ static VTuneProfiler* mInstance;
+
+ std::unordered_map<std::string, __itt_event> mStrings;
+};
+
+# define VTUNE_INIT() VTuneProfiler::Initialize()
+# define VTUNE_SHUTDOWN() VTuneProfiler::Shutdown()
+
+# define VTUNE_TRACING(name, kind) VTuneProfiler::Trace(name, kind)
+# define VTUNE_REGISTER_THREAD(name) VTuneProfiler::RegisterThread(name)
+
+} // namespace baseprofiler
+} // namespace mozilla
+
+#endif
+
+#endif /* VTuneProfiler_h */
diff --git a/mozglue/baseprofiler/core/platform-linux-android.cpp b/mozglue/baseprofiler/core/platform-linux-android.cpp
new file mode 100644
index 0000000000..210bc4dd31
--- /dev/null
+++ b/mozglue/baseprofiler/core/platform-linux-android.cpp
@@ -0,0 +1,550 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+// Copyright (c) 2006-2011 The Chromium Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in
+// the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google, Inc. nor the names of its contributors
+// may be used to endorse or promote products derived from this
+// software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// This file is used for both Linux and Android.
+
+#include <stdio.h>
+#include <math.h>
+
+#include <pthread.h>
+#if defined(GP_OS_freebsd)
+# include <sys/thr.h>
+#endif
+#include <semaphore.h>
+#include <signal.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <stdlib.h>
+#include <sched.h>
+#include <ucontext.h>
+// Ubuntu Dapper requires memory pages to be marked as
+// executable. Otherwise, OS raises an exception when executing code
+// in that page.
+#include <sys/types.h> // mmap & munmap
+#include <sys/mman.h> // mmap & munmap
+#include <sys/stat.h> // open
+#include <fcntl.h> // open
+#include <unistd.h> // sysconf
+#include <semaphore.h>
+#ifdef __GLIBC__
+# include <execinfo.h> // backtrace, backtrace_symbols
+#endif // def __GLIBC__
+#include <strings.h> // index
+#include <errno.h>
+#include <stdarg.h>
+
+#include "prenv.h"
+#include "mozilla/PodOperations.h"
+#include "mozilla/DebugOnly.h"
+
+#include <string.h>
+#include <list>
+
+using namespace mozilla;
+
+namespace mozilla {
+namespace baseprofiler {
+
+int profiler_current_process_id() { return getpid(); }
+
+int profiler_current_thread_id() {
+#if defined(GP_OS_linux) || defined(GP_OS_android)
+ // glibc doesn't provide a wrapper for gettid().
+ return static_cast<int>(static_cast<pid_t>(syscall(SYS_gettid)));
+#elif defined(GP_OS_freebsd)
+ long id;
+ (void)thr_self(&id);
+ return static_cast<int>(id);
+#else
+# error "bad platform"
+#endif
+}
+
+static int64_t MicrosecondsSince1970() {
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ return int64_t(tv.tv_sec) * 1000000 + int64_t(tv.tv_usec);
+}
+
+void* GetStackTop(void* aGuess) { return aGuess; }
+
+static void PopulateRegsFromContext(Registers& aRegs, ucontext_t* aContext) {
+ aRegs.mContext = aContext;
+ mcontext_t& mcontext = aContext->uc_mcontext;
+
+ // Extracting the sample from the context is extremely machine dependent.
+#if defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
+ aRegs.mPC = reinterpret_cast<Address>(mcontext.gregs[REG_EIP]);
+ aRegs.mSP = reinterpret_cast<Address>(mcontext.gregs[REG_ESP]);
+ aRegs.mFP = reinterpret_cast<Address>(mcontext.gregs[REG_EBP]);
+ aRegs.mLR = 0;
+#elif defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android)
+ aRegs.mPC = reinterpret_cast<Address>(mcontext.gregs[REG_RIP]);
+ aRegs.mSP = reinterpret_cast<Address>(mcontext.gregs[REG_RSP]);
+ aRegs.mFP = reinterpret_cast<Address>(mcontext.gregs[REG_RBP]);
+ aRegs.mLR = 0;
+#elif defined(GP_PLAT_amd64_freebsd)
+ aRegs.mPC = reinterpret_cast<Address>(mcontext.mc_rip);
+ aRegs.mSP = reinterpret_cast<Address>(mcontext.mc_rsp);
+ aRegs.mFP = reinterpret_cast<Address>(mcontext.mc_rbp);
+ aRegs.mLR = 0;
+#elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
+ aRegs.mPC = reinterpret_cast<Address>(mcontext.arm_pc);
+ aRegs.mSP = reinterpret_cast<Address>(mcontext.arm_sp);
+ aRegs.mFP = reinterpret_cast<Address>(mcontext.arm_fp);
+ aRegs.mLR = reinterpret_cast<Address>(mcontext.arm_lr);
+#elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android)
+ aRegs.mPC = reinterpret_cast<Address>(mcontext.pc);
+ aRegs.mSP = reinterpret_cast<Address>(mcontext.sp);
+ aRegs.mFP = reinterpret_cast<Address>(mcontext.regs[29]);
+ aRegs.mLR = reinterpret_cast<Address>(mcontext.regs[30]);
+#elif defined(GP_PLAT_arm64_freebsd)
+ aRegs.mPC = reinterpret_cast<Address>(mcontext.mc_gpregs.gp_elr);
+ aRegs.mSP = reinterpret_cast<Address>(mcontext.mc_gpregs.gp_sp);
+ aRegs.mFP = reinterpret_cast<Address>(mcontext.mc_gpregs.gp_x[29]);
+ aRegs.mLR = reinterpret_cast<Address>(mcontext.mc_gpregs.gp_lr);
+#elif defined(GP_PLAT_mips64_linux) || defined(GP_PLAT_mips64_android)
+ aRegs.mPC = reinterpret_cast<Address>(mcontext.pc);
+ aRegs.mSP = reinterpret_cast<Address>(mcontext.gregs[29]);
+ aRegs.mFP = reinterpret_cast<Address>(mcontext.gregs[30]);
+
+#else
+# error "bad platform"
+#endif
+}
+
+#if defined(GP_OS_android)
+# define SYS_tgkill __NR_tgkill
+#endif
+
+#if defined(GP_OS_linux) || defined(GP_OS_android)
+int tgkill(pid_t tgid, pid_t tid, int signalno) {
+ return syscall(SYS_tgkill, tgid, tid, signalno);
+}
+#endif
+
+#if defined(GP_OS_freebsd)
+# define tgkill thr_kill2
+#endif
+
+class PlatformData {
+ public:
+ explicit PlatformData(int aThreadId) {}
+
+ ~PlatformData() {}
+};
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN Sampler target specifics
+
+// The only way to reliably interrupt a Linux thread and inspect its register
+// and stack state is by sending a signal to it, and doing the work inside the
+// signal handler. But we don't want to run much code inside the signal
+// handler, since POSIX severely restricts what we can do in signal handlers.
+// So we use a system of semaphores to suspend the thread and allow the
+// sampler thread to do all the work of unwinding and copying out whatever
+// data it wants.
+//
+// A four-message protocol is used to reliably suspend and later resume the
+// thread to be sampled (the samplee):
+//
+// Sampler (signal sender) thread Samplee (thread to be sampled)
+//
+// Prepare the SigHandlerCoordinator
+// and point sSigHandlerCoordinator at it
+//
+// send SIGPROF to samplee ------- MSG 1 ----> (enter signal handler)
+// wait(mMessage2) Copy register state
+// into sSigHandlerCoordinator
+// <------ MSG 2 ----- post(mMessage2)
+// Samplee is now suspended. wait(mMessage3)
+// Examine its stack/register
+// state at leisure
+//
+// Release samplee:
+// post(mMessage3) ------- MSG 3 ----->
+// wait(mMessage4) Samplee now resumes. Tell
+// the sampler that we are done.
+// <------ MSG 4 ------ post(mMessage4)
+// Now we know the samplee's signal (leave signal handler)
+// handler has finished using
+// sSigHandlerCoordinator. We can
+// safely reuse it for some other thread.
+//
+
+// A type used to coordinate between the sampler (signal sending) thread and
+// the thread currently being sampled (the samplee, which receives the
+// signals).
+//
+// The first message is sent using a SIGPROF signal delivery. The subsequent
+// three are sent using sem_wait/sem_post pairs. They are named accordingly
+// in the following struct.
+struct SigHandlerCoordinator {
+ SigHandlerCoordinator() {
+ PodZero(&mUContext);
+ int r = sem_init(&mMessage2, /* pshared */ 0, 0);
+ r |= sem_init(&mMessage3, /* pshared */ 0, 0);
+ r |= sem_init(&mMessage4, /* pshared */ 0, 0);
+ MOZ_ASSERT(r == 0);
+ }
+
+ ~SigHandlerCoordinator() {
+ int r = sem_destroy(&mMessage2);
+ r |= sem_destroy(&mMessage3);
+ r |= sem_destroy(&mMessage4);
+ MOZ_ASSERT(r == 0);
+ }
+
+ sem_t mMessage2; // To sampler: "context is in sSigHandlerCoordinator"
+ sem_t mMessage3; // To samplee: "resume"
+ sem_t mMessage4; // To sampler: "finished with sSigHandlerCoordinator"
+ ucontext_t mUContext; // Context at signal
+};
+
+struct SigHandlerCoordinator* Sampler::sSigHandlerCoordinator = nullptr;
+
+static void SigprofHandler(int aSignal, siginfo_t* aInfo, void* aContext) {
+ // Avoid TSan warning about clobbering errno.
+ int savedErrno = errno;
+
+ MOZ_ASSERT(aSignal == SIGPROF);
+ MOZ_ASSERT(Sampler::sSigHandlerCoordinator);
+
+ // By sending us this signal, the sampler thread has sent us message 1 in
+ // the comment above, with the meaning "|sSigHandlerCoordinator| is ready
+ // for use, please copy your register context into it."
+ Sampler::sSigHandlerCoordinator->mUContext =
+ *static_cast<ucontext_t*>(aContext);
+
+ // Send message 2: tell the sampler thread that the context has been copied
+ // into |sSigHandlerCoordinator->mUContext|. sem_post can never fail by
+ // being interrupted by a signal, so there's no loop around this call.
+ int r = sem_post(&Sampler::sSigHandlerCoordinator->mMessage2);
+ MOZ_ASSERT(r == 0);
+
+ // At this point, the sampler thread assumes we are suspended, so we must
+ // not touch any global state here.
+
+ // Wait for message 3: the sampler thread tells us to resume.
+ while (true) {
+ r = sem_wait(&Sampler::sSigHandlerCoordinator->mMessage3);
+ if (r == -1 && errno == EINTR) {
+ // Interrupted by a signal. Try again.
+ continue;
+ }
+ // We don't expect any other kind of failure
+ MOZ_ASSERT(r == 0);
+ break;
+ }
+
+ // Send message 4: tell the sampler thread that we are finished accessing
+ // |sSigHandlerCoordinator|. After this point it is not safe to touch
+ // |sSigHandlerCoordinator|.
+ r = sem_post(&Sampler::sSigHandlerCoordinator->mMessage4);
+ MOZ_ASSERT(r == 0);
+
+ errno = savedErrno;
+}
+
+Sampler::Sampler(PSLockRef aLock)
+ : mMyPid(profiler_current_process_id())
+ // We don't know what the sampler thread's ID will be until it runs, so
+ // set mSamplerTid to a dummy value and fill it in for real in
+ // SuspendAndSampleAndResumeThread().
+ ,
+ mSamplerTid(-1) {
+#if defined(USE_EHABI_STACKWALK)
+ EHABIStackWalkInit();
+#endif
+
+ // NOTE: We don't initialize LUL here, instead initializing it in
+ // SamplerThread's constructor. This is because with the
+ // profiler_suspend_and_sample_thread entry point, we want to be able to
+ // sample without waiting for LUL to be initialized.
+
+ // Request profiling signals.
+ struct sigaction sa;
+ sa.sa_sigaction = SigprofHandler;
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = SA_RESTART | SA_SIGINFO;
+ if (sigaction(SIGPROF, &sa, &mOldSigprofHandler) != 0) {
+ MOZ_CRASH("Error installing SIGPROF handler in the profiler");
+ }
+}
+
+void Sampler::Disable(PSLockRef aLock) {
+ // Restore old signal handler. This is global state so it's important that
+ // we do it now, while gPSMutex is locked.
+ sigaction(SIGPROF, &mOldSigprofHandler, 0);
+}
+
+template <typename Func>
+void Sampler::SuspendAndSampleAndResumeThread(
+ PSLockRef aLock, const RegisteredThread& aRegisteredThread,
+ const TimeStamp& aNow, const Func& aProcessRegs) {
+ // Only one sampler thread can be sampling at once. So we expect to have
+ // complete control over |sSigHandlerCoordinator|.
+ MOZ_ASSERT(!sSigHandlerCoordinator);
+
+ if (mSamplerTid == -1) {
+ mSamplerTid = profiler_current_thread_id();
+ }
+ int sampleeTid = aRegisteredThread.Info()->ThreadId();
+ MOZ_RELEASE_ASSERT(sampleeTid != mSamplerTid);
+
+ //----------------------------------------------------------------//
+ // Suspend the samplee thread and get its context.
+
+ SigHandlerCoordinator coord; // on sampler thread's stack
+ sSigHandlerCoordinator = &coord;
+
+ // Send message 1 to the samplee (the thread to be sampled), by
+ // signalling at it.
+ // This could fail if the thread doesn't exist anymore.
+ int r = tgkill(mMyPid, sampleeTid, SIGPROF);
+ if (r == 0) {
+ // Wait for message 2 from the samplee, indicating that the context
+ // is available and that the thread is suspended.
+ while (true) {
+ r = sem_wait(&sSigHandlerCoordinator->mMessage2);
+ if (r == -1 && errno == EINTR) {
+ // Interrupted by a signal. Try again.
+ continue;
+ }
+ // We don't expect any other kind of failure.
+ MOZ_ASSERT(r == 0);
+ break;
+ }
+
+ //----------------------------------------------------------------//
+ // Sample the target thread.
+
+ // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+ //
+ // The profiler's "critical section" begins here. In the critical section,
+ // we must not do any dynamic memory allocation, nor try to acquire any lock
+ // or any other unshareable resource. This is because the thread to be
+ // sampled has been suspended at some entirely arbitrary point, and we have
+ // no idea which unsharable resources (locks, essentially) it holds. So any
+ // attempt to acquire any lock, including the implied locks used by the
+ // malloc implementation, risks deadlock. This includes TimeStamp::Now(),
+ // which gets a lock on Windows.
+
+ // The samplee thread is now frozen and sSigHandlerCoordinator->mUContext is
+ // valid. We can poke around in it and unwind its stack as we like.
+
+ // Extract the current register values.
+ Registers regs;
+ PopulateRegsFromContext(regs, &sSigHandlerCoordinator->mUContext);
+ aProcessRegs(regs, aNow);
+
+ //----------------------------------------------------------------//
+ // Resume the target thread.
+
+ // Send message 3 to the samplee, which tells it to resume.
+ r = sem_post(&sSigHandlerCoordinator->mMessage3);
+ MOZ_ASSERT(r == 0);
+
+ // Wait for message 4 from the samplee, which tells us that it has
+ // finished with |sSigHandlerCoordinator|.
+ while (true) {
+ r = sem_wait(&sSigHandlerCoordinator->mMessage4);
+ if (r == -1 && errno == EINTR) {
+ continue;
+ }
+ MOZ_ASSERT(r == 0);
+ break;
+ }
+
+ // The profiler's critical section ends here. After this point, none of the
+ // critical section limitations documented above apply.
+ //
+ // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+ }
+
+ // This isn't strictly necessary, but doing so does help pick up anomalies
+ // in which the signal handler is running when it shouldn't be.
+ sSigHandlerCoordinator = nullptr;
+}
+
+// END Sampler target specifics
+////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN SamplerThread target specifics
+
+static void* ThreadEntry(void* aArg) {
+ auto thread = static_cast<SamplerThread*>(aArg);
+ thread->Run();
+ return nullptr;
+}
+
+SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
+ double aIntervalMilliseconds)
+ : mSampler(aLock),
+ mActivityGeneration(aActivityGeneration),
+ mIntervalMicroseconds(
+ std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5)))) {
+#if defined(USE_LUL_STACKWALK)
+ lul::LUL* lul = CorePS::Lul(aLock);
+ if (!lul) {
+ CorePS::SetLul(aLock, MakeUnique<lul::LUL>(logging_sink_for_LUL));
+ // Read all the unwind info currently available.
+ lul = CorePS::Lul(aLock);
+ read_procmaps(lul);
+
+ // Switch into unwind mode. After this point, we can't add or remove any
+ // unwind info to/from this LUL instance. The only thing we can do with
+ // it is Unwind() calls.
+ lul->EnableUnwinding();
+
+ // Has a test been requested?
+ if (getenv("MOZ_PROFILER_LUL_TEST")) {
+ int nTests = 0, nTestsPassed = 0;
+ RunLulUnitTests(&nTests, &nTestsPassed, lul);
+ }
+ }
+#endif
+
+ // Start the sampling thread. It repeatedly sends a SIGPROF signal. Sending
+ // the signal ourselves instead of relying on itimer provides much better
+ // accuracy.
+ if (pthread_create(&mThread, nullptr, ThreadEntry, this) != 0) {
+ MOZ_CRASH("pthread_create failed");
+ }
+}
+
+SamplerThread::~SamplerThread() { pthread_join(mThread, nullptr); }
+
+void SamplerThread::SleepMicro(uint32_t aMicroseconds) {
+ if (aMicroseconds >= 1000000) {
+ // Use usleep for larger intervals, because the nanosleep
+ // code below only supports intervals < 1 second.
+ MOZ_ALWAYS_TRUE(!::usleep(aMicroseconds));
+ return;
+ }
+
+ struct timespec ts;
+ ts.tv_sec = 0;
+ ts.tv_nsec = aMicroseconds * 1000UL;
+
+ int rv = ::nanosleep(&ts, &ts);
+
+ while (rv != 0 && errno == EINTR) {
+ // Keep waiting in case of interrupt.
+ // nanosleep puts the remaining time back into ts.
+ rv = ::nanosleep(&ts, &ts);
+ }
+
+ MOZ_ASSERT(!rv, "nanosleep call failed");
+}
+
+void SamplerThread::Stop(PSLockRef aLock) {
+ // Restore old signal handler. This is global state so it's important that
+ // we do it now, while gPSMutex is locked. It's safe to do this now even
+ // though this SamplerThread is still alive, because the next time the main
+ // loop of Run() iterates it won't get past the mActivityGeneration check,
+ // and so won't send any signals.
+ mSampler.Disable(aLock);
+}
+
+// END SamplerThread target specifics
+////////////////////////////////////////////////////////////////////////
+
+#if defined(GP_OS_linux) || defined(GP_OS_freebsd)
+
+// We use pthread_atfork() to temporarily disable signal delivery during any
+// fork() call. Without that, fork() can be repeatedly interrupted by signal
+// delivery, requiring it to be repeatedly restarted, which can lead to *long*
+// delays. See bug 837390.
+//
+// We provide no paf_child() function to run in the child after forking. This
+// is fine because we always immediately exec() after fork(), and exec()
+// clobbers all process state. (At one point we did have a paf_child()
+// function, but it caused problems related to locking gPSMutex. See bug
+// 1348374.)
+//
+// Unfortunately all this is only doable on non-Android because Bionic doesn't
+// have pthread_atfork.
+
+// In the parent, before the fork, record IsSamplingPaused, and then pause.
+static void paf_prepare() {
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ PSAutoLock lock;
+
+ if (ActivePS::Exists(lock)) {
+ ActivePS::SetWasSamplingPaused(lock, ActivePS::IsSamplingPaused(lock));
+ ActivePS::SetIsSamplingPaused(lock, true);
+ }
+}
+
+// In the parent, after the fork, return IsSamplingPaused to the pre-fork state.
+static void paf_parent() {
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ PSAutoLock lock;
+
+ if (ActivePS::Exists(lock)) {
+ ActivePS::SetIsSamplingPaused(lock, ActivePS::WasSamplingPaused(lock));
+ ActivePS::SetWasSamplingPaused(lock, false);
+ }
+}
+
+static void PlatformInit(PSLockRef aLock) {
+ // Set up the fork handlers.
+ pthread_atfork(paf_prepare, paf_parent, nullptr);
+}
+
+#else
+
+static void PlatformInit(PSLockRef aLock) {}
+
+#endif
+
+#if defined(HAVE_NATIVE_UNWIND)
+// Context used by synchronous samples. It's safe to have a single one because
+// only one synchronous sample can be taken at a time (due to
+// profiler_get_backtrace()'s PSAutoLock).
+// ucontext_t sSyncUContext;
+
+void Registers::SyncPopulate() {
+ // TODO port getcontext from breakpad, if profiler_get_backtrace is needed.
+ MOZ_CRASH("profiler_get_backtrace() unsupported");
+ // if (!getcontext(&sSyncUContext)) {
+ // PopulateRegsFromContext(*this, &sSyncUContext);
+ // }
+}
+#endif
+
+} // namespace baseprofiler
+} // namespace mozilla
diff --git a/mozglue/baseprofiler/core/platform-macos.cpp b/mozglue/baseprofiler/core/platform-macos.cpp
new file mode 100644
index 0000000000..fc847886ee
--- /dev/null
+++ b/mozglue/baseprofiler/core/platform-macos.cpp
@@ -0,0 +1,233 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <unistd.h>
+#include <sys/mman.h>
+#include <mach/mach_init.h>
+#include <mach-o/getsect.h>
+
+#include <AvailabilityMacros.h>
+
+#include <pthread.h>
+#include <semaphore.h>
+#include <signal.h>
+#include <libkern/OSAtomic.h>
+#include <mach/mach.h>
+#include <mach/semaphore.h>
+#include <mach/task.h>
+#include <mach/thread_act.h>
+#include <mach/vm_statistics.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <math.h>
+
+// this port is based off of v8 svn revision 9837
+
+namespace mozilla {
+namespace baseprofiler {
+
+int profiler_current_process_id() { return getpid(); }
+
+int profiler_current_thread_id() {
+ return static_cast<int>(static_cast<pid_t>(syscall(SYS_thread_selfid)));
+}
+
+static int64_t MicrosecondsSince1970() {
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ return int64_t(tv.tv_sec) * 1000000 + int64_t(tv.tv_usec);
+}
+
+void* GetStackTop(void* aGuess) {
+ pthread_t thread = pthread_self();
+ return pthread_get_stackaddr_np(thread);
+}
+
+class PlatformData {
+ public:
+ explicit PlatformData(int aThreadId) : mProfiledThread(mach_thread_self()) {}
+
+ ~PlatformData() {
+ // Deallocate Mach port for thread.
+ mach_port_deallocate(mach_task_self(), mProfiledThread);
+ }
+
+ thread_act_t ProfiledThread() { return mProfiledThread; }
+
+ private:
+ // Note: for mProfiledThread Mach primitives are used instead of pthread's
+ // because the latter doesn't provide thread manipulation primitives required.
+ // For details, consult "Mac OS X Internals" book, Section 7.3.
+ thread_act_t mProfiledThread;
+};
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN Sampler target specifics
+
+Sampler::Sampler(PSLockRef aLock) {}
+
+void Sampler::Disable(PSLockRef aLock) {}
+
+template <typename Func>
+void Sampler::SuspendAndSampleAndResumeThread(
+ PSLockRef aLock, const RegisteredThread& aRegisteredThread,
+ const TimeStamp& aNow, const Func& aProcessRegs) {
+ thread_act_t samplee_thread =
+ aRegisteredThread.GetPlatformData()->ProfiledThread();
+
+ //----------------------------------------------------------------//
+ // Suspend the samplee thread and get its context.
+
+ // We're using thread_suspend on OS X because pthread_kill (which is what we
+ // at one time used on Linux) has less consistent performance and causes
+ // strange crashes, see bug 1166778 and bug 1166808. thread_suspend
+ // is also just a lot simpler to use.
+
+ if (KERN_SUCCESS != thread_suspend(samplee_thread)) {
+ return;
+ }
+
+ //----------------------------------------------------------------//
+ // Sample the target thread.
+
+ // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+ //
+ // The profiler's "critical section" begins here. We must be very careful
+ // what we do here, or risk deadlock. See the corresponding comment in
+ // platform-linux-android.cpp for details.
+
+#if defined(__x86_64__)
+ thread_state_flavor_t flavor = x86_THREAD_STATE64;
+ x86_thread_state64_t state;
+ mach_msg_type_number_t count = x86_THREAD_STATE64_COUNT;
+# if __DARWIN_UNIX03
+# define REGISTER_FIELD(name) __r##name
+# else
+# define REGISTER_FIELD(name) r##name
+# endif // __DARWIN_UNIX03
+#elif defined(__aarch64__)
+ thread_state_flavor_t flavor = ARM_THREAD_STATE64;
+ arm_thread_state64_t state;
+ mach_msg_type_number_t count = ARM_THREAD_STATE64_COUNT;
+# if __DARWIN_UNIX03
+# define REGISTER_FIELD(name) __##name
+# else
+# define REGISTER_FIELD(name) name
+# endif // __DARWIN_UNIX03
+#else
+# error "unknown architecture"
+#endif
+
+ if (thread_get_state(samplee_thread, flavor,
+ reinterpret_cast<natural_t*>(&state),
+ &count) == KERN_SUCCESS) {
+ Registers regs;
+#if defined(__x86_64__)
+ regs.mPC = reinterpret_cast<Address>(state.REGISTER_FIELD(ip));
+ regs.mSP = reinterpret_cast<Address>(state.REGISTER_FIELD(sp));
+ regs.mFP = reinterpret_cast<Address>(state.REGISTER_FIELD(bp));
+#elif defined(__aarch64__)
+ regs.mPC = reinterpret_cast<Address>(state.REGISTER_FIELD(pc));
+ regs.mSP = reinterpret_cast<Address>(state.REGISTER_FIELD(sp));
+ regs.mFP = reinterpret_cast<Address>(state.REGISTER_FIELD(fp));
+#else
+# error "unknown architecture"
+#endif
+ regs.mLR = 0;
+
+ aProcessRegs(regs, aNow);
+ }
+
+#undef REGISTER_FIELD
+
+ //----------------------------------------------------------------//
+ // Resume the target thread.
+
+ thread_resume(samplee_thread);
+
+ // The profiler's critical section ends here.
+ //
+ // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+}
+
+// END Sampler target specifics
+////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN SamplerThread target specifics
+
+static void* ThreadEntry(void* aArg) {
+ auto thread = static_cast<SamplerThread*>(aArg);
+ thread->Run();
+ return nullptr;
+}
+
+SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
+ double aIntervalMilliseconds)
+ : mSampler(aLock),
+ mActivityGeneration(aActivityGeneration),
+ mIntervalMicroseconds(
+ std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5)))),
+ mThread{nullptr} {
+ pthread_attr_t* attr_ptr = nullptr;
+ if (pthread_create(&mThread, attr_ptr, ThreadEntry, this) != 0) {
+ MOZ_CRASH("pthread_create failed");
+ }
+}
+
+SamplerThread::~SamplerThread() { pthread_join(mThread, nullptr); }
+
+void SamplerThread::SleepMicro(uint32_t aMicroseconds) {
+ usleep(aMicroseconds);
+ // FIXME: the OSX 10.12 page for usleep says "The usleep() function is
+ // obsolescent. Use nanosleep(2) instead." This implementation could be
+ // merged with the linux-android version. Also, this doesn't handle the
+ // case where the usleep call is interrupted by a signal.
+}
+
+void SamplerThread::Stop(PSLockRef aLock) { mSampler.Disable(aLock); }
+
+// END SamplerThread target specifics
+////////////////////////////////////////////////////////////////////////
+
+static void PlatformInit(PSLockRef aLock) {}
+
+#if defined(HAVE_NATIVE_UNWIND)
+void Registers::SyncPopulate() {
+# if defined(__x86_64__)
+ asm(
+ // Compute caller's %rsp by adding to %rbp:
+ // 8 bytes for previous %rbp, 8 bytes for return address
+ "leaq 0x10(%%rbp), %0\n\t"
+ // Dereference %rbp to get previous %rbp
+ "movq (%%rbp), %1\n\t"
+ : "=r"(mSP), "=r"(mFP));
+# elif defined(__aarch64__)
+ asm(
+ // Compute caller's sp by adding to fp:
+ // 8 bytes for previous fp, 8 bytes for return address
+ "add %0, x29, #0x10\n\t"
+ // Dereference fp to get previous fp
+ "ldr %1, [x29]\n\t"
+ : "=r"(mSP), "=r"(mFP));
+# else
+# error "unknown architecture"
+# endif
+ mPC = reinterpret_cast<Address>(
+ __builtin_extract_return_addr(__builtin_return_address(0)));
+ mLR = 0;
+}
+#endif
+
+} // namespace baseprofiler
+} // namespace mozilla
diff --git a/mozglue/baseprofiler/core/platform-win32.cpp b/mozglue/baseprofiler/core/platform-win32.cpp
new file mode 100644
index 0000000000..22b8a8462b
--- /dev/null
+++ b/mozglue/baseprofiler/core/platform-win32.cpp
@@ -0,0 +1,351 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+// Copyright (c) 2006-2011 The Chromium Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in
+// the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google, Inc. nor the names of its contributors
+// may be used to endorse or promote products derived from this
+// software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+#include <windows.h>
+#include <mmsystem.h>
+#include <process.h>
+
+#include "nsWindowsDllInterceptor.h"
+#include "mozilla/StackWalk_windows.h"
+#include "mozilla/WindowsVersion.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+int profiler_current_process_id() { return _getpid(); }
+
+int profiler_current_thread_id() {
+ DWORD threadId = GetCurrentThreadId();
+ MOZ_ASSERT(threadId <= INT32_MAX, "native thread ID is > INT32_MAX");
+ return int(threadId);
+}
+
+static int64_t MicrosecondsSince1970() {
+ int64_t prt;
+ FILETIME ft;
+ SYSTEMTIME st;
+
+ GetSystemTime(&st);
+ SystemTimeToFileTime(&st, &ft);
+ static_assert(sizeof(ft) == sizeof(prt), "Expect FILETIME to be 64 bits");
+ memcpy(&prt, &ft, sizeof(prt));
+ const int64_t epochBias = 116444736000000000LL;
+ prt = (prt - epochBias) / 10;
+
+ return prt;
+}
+
+void* GetStackTop(void* aGuess) {
+ PNT_TIB pTib = reinterpret_cast<PNT_TIB>(NtCurrentTeb());
+ return reinterpret_cast<void*>(pTib->StackBase);
+}
+
+static void PopulateRegsFromContext(Registers& aRegs, CONTEXT* aContext) {
+#if defined(GP_ARCH_amd64)
+ aRegs.mPC = reinterpret_cast<Address>(aContext->Rip);
+ aRegs.mSP = reinterpret_cast<Address>(aContext->Rsp);
+ aRegs.mFP = reinterpret_cast<Address>(aContext->Rbp);
+#elif defined(GP_ARCH_x86)
+ aRegs.mPC = reinterpret_cast<Address>(aContext->Eip);
+ aRegs.mSP = reinterpret_cast<Address>(aContext->Esp);
+ aRegs.mFP = reinterpret_cast<Address>(aContext->Ebp);
+#elif defined(GP_ARCH_arm64)
+ aRegs.mPC = reinterpret_cast<Address>(aContext->Pc);
+ aRegs.mSP = reinterpret_cast<Address>(aContext->Sp);
+ aRegs.mFP = reinterpret_cast<Address>(aContext->Fp);
+#else
+# error "bad arch"
+#endif
+ aRegs.mLR = 0;
+}
+
+// Gets a real (i.e. not pseudo) handle for the current thread, with the
+// permissions needed for profiling.
+// @return a real HANDLE for the current thread.
+static HANDLE GetRealCurrentThreadHandleForProfiling() {
+ HANDLE realCurrentThreadHandle;
+ if (!::DuplicateHandle(
+ ::GetCurrentProcess(), ::GetCurrentThread(), ::GetCurrentProcess(),
+ &realCurrentThreadHandle,
+ THREAD_GET_CONTEXT | THREAD_SUSPEND_RESUME | THREAD_QUERY_INFORMATION,
+ FALSE, 0)) {
+ return nullptr;
+ }
+
+ return realCurrentThreadHandle;
+}
+
+class PlatformData {
+ public:
+ // Get a handle to the calling thread. This is the thread that we are
+ // going to profile. We need a real handle because we are going to use it in
+ // the sampler thread.
+ explicit PlatformData(int aThreadId)
+ : mProfiledThread(GetRealCurrentThreadHandleForProfiling()) {
+ MOZ_ASSERT(aThreadId == ::GetCurrentThreadId());
+ }
+
+ ~PlatformData() {
+ if (mProfiledThread != nullptr) {
+ CloseHandle(mProfiledThread);
+ mProfiledThread = nullptr;
+ }
+ }
+
+ HANDLE ProfiledThread() { return mProfiledThread; }
+
+ private:
+ HANDLE mProfiledThread;
+};
+
+#if defined(USE_MOZ_STACK_WALK)
+HANDLE
+GetThreadHandle(PlatformData* aData) { return aData->ProfiledThread(); }
+#endif
+
+static const HANDLE kNoThread = INVALID_HANDLE_VALUE;
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN Sampler target specifics
+
+Sampler::Sampler(PSLockRef aLock) {}
+
+void Sampler::Disable(PSLockRef aLock) {}
+
+template <typename Func>
+void Sampler::SuspendAndSampleAndResumeThread(
+ PSLockRef aLock, const RegisteredThread& aRegisteredThread,
+ const TimeStamp& aNow, const Func& aProcessRegs) {
+ HANDLE profiled_thread =
+ aRegisteredThread.GetPlatformData()->ProfiledThread();
+ if (profiled_thread == nullptr) {
+ return;
+ }
+
+ // Context used for sampling the register state of the profiled thread.
+ CONTEXT context;
+ memset(&context, 0, sizeof(context));
+
+ //----------------------------------------------------------------//
+ // Suspend the samplee thread and get its context.
+
+ static const DWORD kSuspendFailed = static_cast<DWORD>(-1);
+ if (SuspendThread(profiled_thread) == kSuspendFailed) {
+ return;
+ }
+
+ // SuspendThread is asynchronous, so the thread may still be running.
+ // Call GetThreadContext first to ensure the thread is really suspended.
+ // See https://blogs.msdn.microsoft.com/oldnewthing/20150205-00/?p=44743.
+
+ // Using only CONTEXT_CONTROL is faster but on 64-bit it causes crashes in
+ // RtlVirtualUnwind (see bug 1120126) so we set all the flags.
+#if defined(GP_ARCH_amd64)
+ context.ContextFlags = CONTEXT_FULL;
+#else
+ context.ContextFlags = CONTEXT_CONTROL;
+#endif
+ if (!GetThreadContext(profiled_thread, &context)) {
+ ResumeThread(profiled_thread);
+ return;
+ }
+
+ //----------------------------------------------------------------//
+ // Sample the target thread.
+
+ // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+ //
+ // The profiler's "critical section" begins here. We must be very careful
+ // what we do here, or risk deadlock. See the corresponding comment in
+ // platform-linux-android.cpp for details.
+
+ Registers regs;
+ PopulateRegsFromContext(regs, &context);
+ aProcessRegs(regs, aNow);
+
+ //----------------------------------------------------------------//
+ // Resume the target thread.
+
+ ResumeThread(profiled_thread);
+
+ // The profiler's critical section ends here.
+ //
+ // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+}
+
+// END Sampler target specifics
+////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN SamplerThread target specifics
+
+static unsigned int __stdcall ThreadEntry(void* aArg) {
+ auto thread = static_cast<SamplerThread*>(aArg);
+ thread->Run();
+ return 0;
+}
+
+SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
+ double aIntervalMilliseconds)
+ : mSampler(aLock),
+ mActivityGeneration(aActivityGeneration),
+ mIntervalMicroseconds(
+ std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5)))) {
+ // By default we'll not adjust the timer resolution which tends to be
+ // around 16ms. However, if the requested interval is sufficiently low
+ // we'll try to adjust the resolution to match.
+ if (mIntervalMicroseconds < 10 * 1000) {
+ ::timeBeginPeriod(mIntervalMicroseconds / 1000);
+ }
+
+ // Create a new thread. It is important to use _beginthreadex() instead of
+ // the Win32 function CreateThread(), because the CreateThread() does not
+ // initialize thread-specific structures in the C runtime library.
+ mThread = reinterpret_cast<HANDLE>(_beginthreadex(nullptr,
+ /* stack_size */ 0,
+ ThreadEntry, this,
+ /* initflag */ 0, nullptr));
+ if (mThread == 0) {
+ MOZ_CRASH("_beginthreadex failed");
+ }
+}
+
+SamplerThread::~SamplerThread() {
+ WaitForSingleObject(mThread, INFINITE);
+
+ // Close our own handle for the thread.
+ if (mThread != kNoThread) {
+ CloseHandle(mThread);
+ }
+}
+
+void SamplerThread::SleepMicro(uint32_t aMicroseconds) {
+ // For now, keep the old behaviour of minimum Sleep(1), even for
+ // smaller-than-usual sleeps after an overshoot, unless the user has
+ // explicitly opted into a sub-millisecond profiler interval.
+ if (mIntervalMicroseconds >= 1000) {
+ ::Sleep(std::max(1u, aMicroseconds / 1000));
+ } else {
+ TimeStamp start = TimeStamp::NowUnfuzzed();
+ TimeStamp end = start + TimeDuration::FromMicroseconds(aMicroseconds);
+
+ // First, sleep for as many whole milliseconds as possible.
+ if (aMicroseconds >= 1000) {
+ ::Sleep(aMicroseconds / 1000);
+ }
+
+ // Then, spin until enough time has passed.
+ while (TimeStamp::NowUnfuzzed() < end) {
+ YieldProcessor();
+ }
+ }
+}
+
+void SamplerThread::Stop(PSLockRef aLock) {
+ // Disable any timer resolution changes we've made. Do it now while
+ // gPSMutex is locked, i.e. before any other SamplerThread can be created
+ // and call ::timeBeginPeriod().
+ //
+ // It's safe to do this now even though this SamplerThread is still alive,
+ // because the next time the main loop of Run() iterates it won't get past
+ // the mActivityGeneration check, and so it won't make any more ::Sleep()
+ // calls.
+ if (mIntervalMicroseconds < 10 * 1000) {
+ ::timeEndPeriod(mIntervalMicroseconds / 1000);
+ }
+
+ mSampler.Disable(aLock);
+}
+
+// END SamplerThread target specifics
+////////////////////////////////////////////////////////////////////////
+
+static void PlatformInit(PSLockRef aLock) {}
+
+#if defined(HAVE_NATIVE_UNWIND)
+void Registers::SyncPopulate() {
+ CONTEXT context;
+ RtlCaptureContext(&context);
+ PopulateRegsFromContext(*this, &context);
+}
+#endif
+
+#if defined(GP_PLAT_amd64_windows)
+static WindowsDllInterceptor NtDllIntercept;
+
+typedef NTSTATUS(NTAPI* LdrUnloadDll_func)(HMODULE module);
+static WindowsDllInterceptor::FuncHookType<LdrUnloadDll_func> stub_LdrUnloadDll;
+
+static NTSTATUS NTAPI patched_LdrUnloadDll(HMODULE module) {
+ // Prevent the stack walker from suspending this thread when LdrUnloadDll
+ // holds the RtlLookupFunctionEntry lock.
+ AutoSuppressStackWalking suppress;
+ return stub_LdrUnloadDll(module);
+}
+
+// These pointers are disguised as PVOID to avoid pulling in obscure headers
+typedef PVOID(WINAPI* LdrResolveDelayLoadedAPI_func)(
+ PVOID ParentModuleBase, PVOID DelayloadDescriptor, PVOID FailureDllHook,
+ PVOID FailureSystemHook, PVOID ThunkAddress, ULONG Flags);
+static WindowsDllInterceptor::FuncHookType<LdrResolveDelayLoadedAPI_func>
+ stub_LdrResolveDelayLoadedAPI;
+
+static PVOID WINAPI patched_LdrResolveDelayLoadedAPI(
+ PVOID ParentModuleBase, PVOID DelayloadDescriptor, PVOID FailureDllHook,
+ PVOID FailureSystemHook, PVOID ThunkAddress, ULONG Flags) {
+ // Prevent the stack walker from suspending this thread when
+ // LdrResolveDelayLoadAPI holds the RtlLookupFunctionEntry lock.
+ AutoSuppressStackWalking suppress;
+ return stub_LdrResolveDelayLoadedAPI(ParentModuleBase, DelayloadDescriptor,
+ FailureDllHook, FailureSystemHook,
+ ThunkAddress, Flags);
+}
+
+MFBT_API void InitializeWin64ProfilerHooks() {
+ // This function could be called by both profilers, but we only want to run
+ // it once.
+ static bool ran = false;
+ if (ran) {
+ return;
+ }
+ ran = true;
+
+ NtDllIntercept.Init("ntdll.dll");
+ stub_LdrUnloadDll.Set(NtDllIntercept, "LdrUnloadDll", &patched_LdrUnloadDll);
+ if (IsWin8OrLater()) { // LdrResolveDelayLoadedAPI was introduced in Win8
+ stub_LdrResolveDelayLoadedAPI.Set(NtDllIntercept,
+ "LdrResolveDelayLoadedAPI",
+ &patched_LdrResolveDelayLoadedAPI);
+ }
+}
+#endif // defined(GP_PLAT_amd64_windows)
+
+} // namespace baseprofiler
+} // namespace mozilla
diff --git a/mozglue/baseprofiler/core/platform.cpp b/mozglue/baseprofiler/core/platform.cpp
new file mode 100644
index 0000000000..14c48ce649
--- /dev/null
+++ b/mozglue/baseprofiler/core/platform.cpp
@@ -0,0 +1,3712 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// There are three kinds of samples done by the profiler.
+//
+// - A "periodic" sample is the most complex kind. It is done in response to a
+// timer while the profiler is active. It involves writing a stack trace plus
+// a variety of other values (memory measurements, responsiveness
+// measurements, etc.) into the main ProfileBuffer. The sampling is done from
+// off-thread, and so SuspendAndSampleAndResumeThread() is used to get the
+// register values.
+//
+// - A "synchronous" sample is a simpler kind. It is done in response to an API
+// call (profiler_get_backtrace()). It involves writing a stack trace and
+// little else into a temporary ProfileBuffer, and wrapping that up in a
+// ProfilerBacktrace that can be subsequently used in a marker. The sampling
+// is done on-thread, and so Registers::SyncPopulate() is used to get the
+// register values.
+//
+// - A "backtrace" sample is the simplest kind. It is done in response to an
+// API call (profiler_suspend_and_sample_thread()). It involves getting a
+// stack trace via a ProfilerStackCollector; it does not write to a
+// ProfileBuffer. The sampling is done from off-thread, and so uses
+// SuspendAndSampleAndResumeThread() to get the register values.
+
+#include "platform.h"
+
+#include <algorithm>
+#include <errno.h>
+#include <fstream>
+#include <ostream>
+#include <set>
+#include <sstream>
+
+// #include "memory_hooks.h"
+#include "mozilla/ArrayUtils.h"
+#include "mozilla/Atomics.h"
+#include "mozilla/AutoProfilerLabel.h"
+#include "mozilla/BaseProfilerDetail.h"
+#include "mozilla/DoubleConversion.h"
+#include "mozilla/Printf.h"
+#include "mozilla/ProfileBufferChunkManagerSingle.h"
+#include "mozilla/ProfileBufferChunkManagerWithLocalLimit.h"
+#include "mozilla/ProfileChunkedBuffer.h"
+#include "mozilla/Services.h"
+#include "mozilla/Span.h"
+#include "mozilla/StackWalk.h"
+#include "mozilla/StaticPtr.h"
+#include "mozilla/ThreadLocal.h"
+#include "mozilla/TimeStamp.h"
+#include "mozilla/Tuple.h"
+#include "mozilla/UniquePtr.h"
+#include "mozilla/Vector.h"
+#include "prdtoa.h"
+#include "prtime.h"
+
+#include "BaseProfiler.h"
+#include "BaseProfilingCategory.h"
+#include "PageInformation.h"
+#include "ProfiledThreadData.h"
+#include "ProfilerBacktrace.h"
+#include "ProfileBuffer.h"
+#include "RegisteredThread.h"
+#include "BaseProfilerSharedLibraries.h"
+#include "ThreadInfo.h"
+#include "VTuneProfiler.h"
+
+// Win32 builds always have frame pointers, so FramePointerStackWalk() always
+// works.
+#if defined(GP_PLAT_x86_windows)
+# define HAVE_NATIVE_UNWIND
+# define USE_FRAME_POINTER_STACK_WALK
+#endif
+
+// Win64 builds always omit frame pointers, so we use the slower
+// MozStackWalk(), which works in that case.
+#if defined(GP_PLAT_amd64_windows)
+# define HAVE_NATIVE_UNWIND
+# define USE_MOZ_STACK_WALK
+#endif
+
+// AArch64 Win64 doesn't seem to use frame pointers, so we use the slower
+// MozStackWalk().
+#if defined(GP_PLAT_arm64_windows)
+# define HAVE_NATIVE_UNWIND
+# define USE_MOZ_STACK_WALK
+#endif
+
+// Mac builds only have frame pointers when MOZ_PROFILING is specified, so
+// FramePointerStackWalk() only works in that case. We don't use MozStackWalk()
+// on Mac.
+#if defined(GP_OS_darwin) && defined(MOZ_PROFILING)
+# define HAVE_NATIVE_UNWIND
+# define USE_FRAME_POINTER_STACK_WALK
+#endif
+
+// No stack-walking in baseprofiler on linux, android, bsd.
+// APIs now make it easier to capture backtraces from the Base Profiler, which
+// is currently not supported on these platform, and would lead to a MOZ_CRASH
+// in Registers::SyncPopulate(). `#if 0` added in bug 1658232, follow-up bugs
+// should be referenced in meta bug 1557568.
+#if 0
+// Android builds use the ARM Exception Handling ABI to unwind.
+# if defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
+# define HAVE_NATIVE_UNWIND
+# define USE_EHABI_STACKWALK
+# include "EHABIStackWalk.h"
+# endif
+
+// Linux/BSD builds use LUL, which uses DWARF info to unwind stacks.
+# if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) || \
+ defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) || \
+ defined(GP_PLAT_mips64_linux) || defined(GP_PLAT_arm64_linux) || \
+ defined(GP_PLAT_arm64_android) || defined(GP_PLAT_amd64_freebsd) || \
+ defined(GP_PLAT_arm64_freebsd)
+# define HAVE_NATIVE_UNWIND
+# define USE_LUL_STACKWALK
+# include "lul/LulMain.h"
+# include "lul/platform-linux-lul.h"
+
+// On linux we use LUL for periodic samples and synchronous samples, but we use
+// FramePointerStackWalk for backtrace samples when MOZ_PROFILING is enabled.
+// (See the comment at the top of the file for a definition of
+// periodic/synchronous/backtrace.).
+//
+// FramePointerStackWalk can produce incomplete stacks when the current entry is
+// in a shared library without framepointers, however LUL can take a long time
+// to initialize, which is undesirable for consumers of
+// profiler_suspend_and_sample_thread like the Background Hang Reporter.
+# if defined(MOZ_PROFILING)
+# define USE_FRAME_POINTER_STACK_WALK
+# endif
+# endif
+#endif
+
+// We can only stackwalk without expensive initialization on platforms which
+// support FramePointerStackWalk or MozStackWalk. LUL Stackwalking requires
+// initializing LUL, and EHABIStackWalk requires initializing EHABI, both of
+// which can be expensive.
+#if defined(USE_FRAME_POINTER_STACK_WALK) || defined(USE_MOZ_STACK_WALK)
+# define HAVE_FASTINIT_NATIVE_UNWIND
+#endif
+
+#ifdef MOZ_VALGRIND
+# include <valgrind/memcheck.h>
+#else
+# define VALGRIND_MAKE_MEM_DEFINED(_addr, _len) ((void)0)
+#endif
+
+#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
+# include <ucontext.h>
+#endif
+
+namespace mozilla {
+namespace baseprofiler {
+
+using detail::RacyFeatures;
+
+bool LogTest(int aLevelToTest) {
+ static const int maxLevel = getenv("MOZ_BASE_PROFILER_VERBOSE_LOGGING") ? 5
+ : getenv("MOZ_BASE_PROFILER_DEBUG_LOGGING") ? 4
+ : getenv("MOZ_BASE_PROFILER_LOGGING") ? 3
+ : 0;
+ return aLevelToTest <= maxLevel;
+}
+
+void PrintToConsole(const char* aFmt, ...) {
+ va_list args;
+ va_start(args, aFmt);
+#if defined(ANDROID)
+ __android_log_vprint(ANDROID_LOG_INFO, "Gecko", aFmt, args);
+#else
+ vfprintf(stderr, aFmt, args);
+#endif
+ va_end(args);
+}
+
+// Statically initialized to 0, then set once from profiler_init(), which should
+// be called from the main thread before any other use of the profiler.
+int scProfilerMainThreadId;
+
+constexpr static bool ValidateFeatures() {
+ int expectedFeatureNumber = 0;
+
+ // Feature numbers should start at 0 and increase by 1 each.
+#define CHECK_FEATURE(n_, str_, Name_, desc_) \
+ if ((n_) != expectedFeatureNumber) { \
+ return false; \
+ } \
+ ++expectedFeatureNumber;
+
+ BASE_PROFILER_FOR_EACH_FEATURE(CHECK_FEATURE)
+
+#undef CHECK_FEATURE
+
+ return true;
+}
+
+static_assert(ValidateFeatures(), "Feature list is invalid");
+
+// Return all features that are available on this platform.
+static uint32_t AvailableFeatures() {
+ uint32_t features = 0;
+
+#define ADD_FEATURE(n_, str_, Name_, desc_) \
+ ProfilerFeature::Set##Name_(features);
+
+ // Add all the possible features.
+ BASE_PROFILER_FOR_EACH_FEATURE(ADD_FEATURE)
+
+#undef ADD_FEATURE
+
+ // Now remove features not supported on this platform/configuration.
+ ProfilerFeature::ClearJava(features);
+ ProfilerFeature::ClearJS(features);
+ ProfilerFeature::ClearScreenshots(features);
+#if !defined(HAVE_NATIVE_UNWIND)
+ ProfilerFeature::ClearStackWalk(features);
+#endif
+ ProfilerFeature::ClearTaskTracer(features);
+ ProfilerFeature::ClearJSTracer(features);
+
+ return features;
+}
+
+// Default features common to all contexts (even if not available).
+static uint32_t DefaultFeatures() {
+ return ProfilerFeature::Java | ProfilerFeature::JS | ProfilerFeature::Leaf |
+ ProfilerFeature::StackWalk | ProfilerFeature::Threads;
+}
+
+// Extra default features when MOZ_PROFILER_STARTUP is set (even if not
+// available).
+static uint32_t StartupExtraDefaultFeatures() {
+ // Enable CPUUtilization by default for startup profiles as it is useful to
+ // see when startup alternates between CPU intensive tasks and being blocked.
+ // Enable mainthreadio by default for startup profiles as startup is heavy on
+ // I/O operations, and main thread I/O is really important to see there.
+ return ProfilerFeature::CPUUtilization | ProfilerFeature::MainThreadIO;
+}
+
+class MOZ_RAII PSAutoTryLock;
+
+// The auto-lock/unlock mutex that guards accesses to CorePS and ActivePS.
+// Use `PSAutoLock lock;` to take the lock until the end of the enclosing block.
+// External profilers may use this same lock for their own data, but as the lock
+// is non-recursive, *only* `f(PSLockRef, ...)` functions below should be
+// called, to avoid double-locking.
+class MOZ_RAII PSAutoLock {
+ public:
+ PSAutoLock() { gPSMutex.Lock(); }
+
+ ~PSAutoLock() { gPSMutex.Unlock(); }
+
+ PSAutoLock(const PSAutoLock&) = delete;
+ void operator=(const PSAutoLock&) = delete;
+
+ [[nodiscard]] static bool IsLockedOnCurrentThread() {
+ return gPSMutex.IsLockedOnCurrentThread();
+ }
+
+ private:
+ // Allow PSAutoTryLock to access gPSMutex, and to call the following
+ // `PSAutoLock(int)` constructor through `Maybe<const PSAutoLock>::emplace()`.
+ friend class PSAutoTryLock;
+ friend class Maybe<const PSAutoLock>;
+
+ // Special constructor for an already-locked gPSMutex. The `int` parameter is
+ // necessary to distinguish it from the main constructor.
+ explicit PSAutoLock(int) { gPSMutex.AssertCurrentThreadOwns(); }
+
+ static detail::BaseProfilerMutex gPSMutex;
+};
+
+// RAII class that attempts to lock the profiler mutex. Example usage:
+// PSAutoTryLock tryLock;
+// if (tryLock.IsLocked()) { locked_foo(tryLock.LockRef()); }
+class MOZ_RAII PSAutoTryLock {
+ public:
+ PSAutoTryLock() {
+ if (PSAutoLock::gPSMutex.TryLock()) {
+ mMaybePSAutoLock.emplace(0);
+ }
+ }
+
+ // Return true if the mutex was aquired and locked.
+ [[nodiscard]] bool IsLocked() const { return mMaybePSAutoLock.isSome(); }
+
+ // Assuming the mutex is locked, return a reference to a `PSAutoLock` for that
+ // mutex, which can be passed as proof-of-lock.
+ [[nodiscard]] const PSAutoLock& LockRef() const {
+ MOZ_ASSERT(IsLocked());
+ return mMaybePSAutoLock.ref();
+ }
+
+ private:
+ // `mMaybePSAutoLock` is `Nothing` if locking failed, otherwise it contains a
+ // `const PSAutoLock` holding the locked mutex, and whose reference may be
+ // passed to functions expecting a proof-of-lock.
+ Maybe<const PSAutoLock> mMaybePSAutoLock;
+};
+
+detail::BaseProfilerMutex PSAutoLock::gPSMutex;
+
+// Only functions that take a PSLockRef arg can access CorePS's and ActivePS's
+// fields.
+typedef const PSAutoLock& PSLockRef;
+
+#define PS_GET(type_, name_) \
+ static type_ name_(PSLockRef) { \
+ MOZ_ASSERT(sInstance); \
+ return sInstance->m##name_; \
+ }
+
+#define PS_GET_LOCKLESS(type_, name_) \
+ static type_ name_() { \
+ MOZ_ASSERT(sInstance); \
+ return sInstance->m##name_; \
+ }
+
+#define PS_GET_AND_SET(type_, name_) \
+ PS_GET(type_, name_) \
+ static void Set##name_(PSLockRef, type_ a##name_) { \
+ MOZ_ASSERT(sInstance); \
+ sInstance->m##name_ = a##name_; \
+ }
+
+// All functions in this file can run on multiple threads unless they have an
+// NS_IsMainThread() assertion.
+
+// This class contains the profiler's core global state, i.e. that which is
+// valid even when the profiler is not active. Most profile operations can't do
+// anything useful when this class is not instantiated, so we release-assert
+// its non-nullness in all such operations.
+//
+// Accesses to CorePS are guarded by gPSMutex. Getters and setters take a
+// PSAutoLock reference as an argument as proof that the gPSMutex is currently
+// locked. This makes it clear when gPSMutex is locked and helps avoid
+// accidental unlocked accesses to global state. There are ways to circumvent
+// this mechanism, but please don't do so without *very* good reason and a
+// detailed explanation.
+//
+// The exceptions to this rule:
+//
+// - mProcessStartTime, because it's immutable;
+//
+// - each thread's RacyRegisteredThread object is accessible without locking via
+// TLSRegisteredThread::RacyRegisteredThread().
+class CorePS {
+ private:
+ CorePS()
+ : mProcessStartTime(TimeStamp::ProcessCreation()),
+ // This needs its own mutex, because it is used concurrently from
+ // functions guarded by gPSMutex as well as others without safety (e.g.,
+ // profiler_add_marker). It is *not* used inside the critical section of
+ // the sampler, because mutexes cannot be used there.
+ mCoreBuffer(ProfileChunkedBuffer::ThreadSafety::WithMutex)
+#ifdef USE_LUL_STACKWALK
+ ,
+ mLul(nullptr)
+#endif
+ {
+ }
+
+ ~CorePS() {}
+
+ public:
+ static void Create(PSLockRef aLock) {
+ MOZ_ASSERT(!sInstance);
+ sInstance = new CorePS();
+ }
+
+ static void Destroy(PSLockRef aLock) {
+ MOZ_ASSERT(sInstance);
+ delete sInstance;
+ sInstance = nullptr;
+ }
+
+ // Unlike ActivePS::Exists(), CorePS::Exists() can be called without gPSMutex
+ // being locked. This is because CorePS is instantiated so early on the main
+ // thread that we don't have to worry about it being racy.
+ static bool Exists() { return !!sInstance; }
+
+ static void AddSizeOf(PSLockRef, MallocSizeOf aMallocSizeOf,
+ size_t& aProfSize, size_t& aLulSize) {
+ MOZ_ASSERT(sInstance);
+
+ aProfSize += aMallocSizeOf(sInstance);
+
+ for (auto& registeredThread : sInstance->mRegisteredThreads) {
+ aProfSize += registeredThread->SizeOfIncludingThis(aMallocSizeOf);
+ }
+
+ for (auto& registeredPage : sInstance->mRegisteredPages) {
+ aProfSize += registeredPage->SizeOfIncludingThis(aMallocSizeOf);
+ }
+
+ // Measurement of the following things may be added later if DMD finds it
+ // is worthwhile:
+ // - CorePS::mRegisteredThreads itself (its elements' children are
+ // measured above)
+ // - CorePS::mRegisteredPages itself (its elements' children are
+ // measured above)
+ // - CorePS::mInterposeObserver
+
+#if defined(USE_LUL_STACKWALK)
+ if (sInstance->mLul) {
+ aLulSize += sInstance->mLul->SizeOfIncludingThis(aMallocSizeOf);
+ }
+#endif
+ }
+
+ // No PSLockRef is needed for this field because it's immutable.
+ PS_GET_LOCKLESS(const TimeStamp&, ProcessStartTime)
+
+ // No PSLockRef is needed for this field because it's thread-safe.
+ PS_GET_LOCKLESS(ProfileChunkedBuffer&, CoreBuffer)
+
+ PS_GET(const Vector<UniquePtr<RegisteredThread>>&, RegisteredThreads)
+
+ static void AppendRegisteredThread(
+ PSLockRef, UniquePtr<RegisteredThread>&& aRegisteredThread) {
+ MOZ_ASSERT(sInstance);
+ MOZ_RELEASE_ASSERT(
+ sInstance->mRegisteredThreads.append(std::move(aRegisteredThread)));
+ }
+
+ static void RemoveRegisteredThread(PSLockRef,
+ RegisteredThread* aRegisteredThread) {
+ MOZ_ASSERT(sInstance);
+ // Remove aRegisteredThread from mRegisteredThreads.
+ for (UniquePtr<RegisteredThread>& rt : sInstance->mRegisteredThreads) {
+ if (rt.get() == aRegisteredThread) {
+ sInstance->mRegisteredThreads.erase(&rt);
+ return;
+ }
+ }
+ }
+
+ PS_GET(Vector<RefPtr<PageInformation>>&, RegisteredPages)
+
+ static void AppendRegisteredPage(PSLockRef,
+ RefPtr<PageInformation>&& aRegisteredPage) {
+ MOZ_ASSERT(sInstance);
+ struct RegisteredPageComparator {
+ PageInformation* aA;
+ bool operator()(PageInformation* aB) const { return aA->Equals(aB); }
+ };
+
+ auto foundPageIter = std::find_if(
+ sInstance->mRegisteredPages.begin(), sInstance->mRegisteredPages.end(),
+ RegisteredPageComparator{aRegisteredPage.get()});
+
+ if (foundPageIter != sInstance->mRegisteredPages.end()) {
+ if ((*foundPageIter)->Url() == "about:blank") {
+ // When a BrowsingContext is loaded, the first url loaded in it will be
+ // about:blank, and if the principal matches, the first document loaded
+ // in it will share an inner window. That's why we should delete the
+ // intermittent about:blank if they share the inner window.
+ sInstance->mRegisteredPages.erase(foundPageIter);
+ } else {
+ // Do not register the same page again.
+ return;
+ }
+ }
+ MOZ_RELEASE_ASSERT(
+ sInstance->mRegisteredPages.append(std::move(aRegisteredPage)));
+ }
+
+ static void RemoveRegisteredPage(PSLockRef,
+ uint64_t aRegisteredInnerWindowID) {
+ MOZ_ASSERT(sInstance);
+ // Remove RegisteredPage from mRegisteredPages by given inner window ID.
+ sInstance->mRegisteredPages.eraseIf([&](const RefPtr<PageInformation>& rd) {
+ return rd->InnerWindowID() == aRegisteredInnerWindowID;
+ });
+ }
+
+ static void ClearRegisteredPages(PSLockRef) {
+ MOZ_ASSERT(sInstance);
+ sInstance->mRegisteredPages.clear();
+ }
+
+ PS_GET(const Vector<BaseProfilerCount*>&, Counters)
+
+ static void AppendCounter(PSLockRef, BaseProfilerCount* aCounter) {
+ MOZ_ASSERT(sInstance);
+ // we don't own the counter; they may be stored in static objects
+ MOZ_RELEASE_ASSERT(sInstance->mCounters.append(aCounter));
+ }
+
+ static void RemoveCounter(PSLockRef, BaseProfilerCount* aCounter) {
+ // we may be called to remove a counter after the profiler is stopped or
+ // late in shutdown.
+ if (sInstance) {
+ auto* counter = std::find(sInstance->mCounters.begin(),
+ sInstance->mCounters.end(), aCounter);
+ MOZ_RELEASE_ASSERT(counter != sInstance->mCounters.end());
+ sInstance->mCounters.erase(counter);
+ }
+ }
+
+#ifdef USE_LUL_STACKWALK
+ static lul::LUL* Lul(PSLockRef) {
+ MOZ_ASSERT(sInstance);
+ return sInstance->mLul.get();
+ }
+ static void SetLul(PSLockRef, UniquePtr<lul::LUL> aLul) {
+ MOZ_ASSERT(sInstance);
+ sInstance->mLul = std::move(aLul);
+ }
+#endif
+
+ PS_GET_AND_SET(const std::string&, ProcessName)
+ PS_GET_AND_SET(const std::string&, ETLDplus1)
+
+ private:
+ // The singleton instance
+ static CorePS* sInstance;
+
+ // The time that the process started.
+ const TimeStamp mProcessStartTime;
+
+ // The thread-safe blocks-oriented buffer into which all profiling data is
+ // recorded.
+ // ActivePS controls the lifetime of the underlying contents buffer: When
+ // ActivePS does not exist, mCoreBuffer is empty and rejects all reads&writes;
+ // see ActivePS for further details.
+ // Note: This needs to live here outside of ActivePS, because some producers
+ // are indirectly controlled (e.g., by atomic flags) and therefore may still
+ // attempt to write some data shortly after ActivePS has shutdown and deleted
+ // the underlying buffer in memory.
+ ProfileChunkedBuffer mCoreBuffer;
+
+ // Info on all the registered threads.
+ // ThreadIds in mRegisteredThreads are unique.
+ Vector<UniquePtr<RegisteredThread>> mRegisteredThreads;
+
+ // Info on all the registered pages.
+ // InnerWindowIDs in mRegisteredPages are unique.
+ Vector<RefPtr<PageInformation>> mRegisteredPages;
+
+ // Non-owning pointers to all active counters
+ Vector<BaseProfilerCount*> mCounters;
+
+#ifdef USE_LUL_STACKWALK
+ // LUL's state. Null prior to the first activation, non-null thereafter.
+ UniquePtr<lul::LUL> mLul;
+#endif
+
+ // Process name, provided by child process initialization code.
+ std::string mProcessName;
+ // Private name, provided by child process initialization code (eTLD+1 in
+ // fission)
+ std::string mETLDplus1;
+};
+
+CorePS* CorePS::sInstance = nullptr;
+
+ProfileChunkedBuffer& profiler_get_core_buffer() {
+ MOZ_ASSERT(CorePS::Exists());
+ return CorePS::CoreBuffer();
+}
+
+class SamplerThread;
+
+static SamplerThread* NewSamplerThread(PSLockRef aLock, uint32_t aGeneration,
+ double aInterval);
+
+struct LiveProfiledThreadData {
+ RegisteredThread* mRegisteredThread;
+ UniquePtr<ProfiledThreadData> mProfiledThreadData;
+};
+
+// The buffer size is provided as a number of "entries", this is their size in
+// bytes.
+constexpr static uint32_t scBytesPerEntry = 8;
+
+// This class contains the profiler's global state that is valid only when the
+// profiler is active. When not instantiated, the profiler is inactive.
+//
+// Accesses to ActivePS are guarded by gPSMutex, in much the same fashion as
+// CorePS.
+//
+class ActivePS {
+ private:
+ // We need to decide how many chunks of what size we want to fit in the given
+ // total maximum capacity for this process, in the (likely) context of
+ // multiple processes doing the same choice and having an inter-process
+ // mechanism to control the overal memory limit.
+
+ // Minimum chunk size allowed, enough for at least one stack.
+ constexpr static uint32_t scMinimumChunkSize =
+ 2 * ProfileBufferChunkManager::scExpectedMaximumStackSize;
+
+ // Ideally we want at least 2 unreleased chunks to work with (1 current and 1
+ // next), and 2 released chunks (so that one can be recycled when old, leaving
+ // one with some data).
+ constexpr static uint32_t scMinimumNumberOfChunks = 4;
+
+ // And we want to limit chunks to a maximum size, which is a compromise
+ // between:
+ // - A big size, which helps with reducing the rate of allocations and IPCs.
+ // - A small size, which helps with equalizing the duration of recorded data
+ // (as the inter-process controller will discard the oldest chunks in all
+ // Firefox processes).
+ constexpr static uint32_t scMaximumChunkSize = 1024 * 1024;
+
+ public:
+ // We should be able to store at least the minimum number of the smallest-
+ // possible chunks.
+ constexpr static uint32_t scMinimumBufferSize =
+ scMinimumNumberOfChunks * scMinimumChunkSize;
+ constexpr static uint32_t scMinimumBufferEntries =
+ scMinimumBufferSize / scBytesPerEntry;
+
+ // Limit to 2GiB.
+ constexpr static uint32_t scMaximumBufferSize = 2u * 1024u * 1024u * 1024u;
+ constexpr static uint32_t scMaximumBufferEntries =
+ scMaximumBufferSize / scBytesPerEntry;
+
+ constexpr static uint32_t ClampToAllowedEntries(uint32_t aEntries) {
+ if (aEntries <= scMinimumBufferEntries) {
+ return scMinimumBufferEntries;
+ }
+ if (aEntries >= scMaximumBufferEntries) {
+ return scMaximumBufferEntries;
+ }
+ return aEntries;
+ }
+
+ private:
+ constexpr static uint32_t ChunkSizeForEntries(uint32_t aEntries) {
+ return uint32_t(std::min(size_t(ClampToAllowedEntries(aEntries)) *
+ scBytesPerEntry / scMinimumNumberOfChunks,
+ size_t(scMaximumChunkSize)));
+ }
+
+ static uint32_t AdjustFeatures(uint32_t aFeatures, uint32_t aFilterCount) {
+ // Filter out any features unavailable in this platform/configuration.
+ aFeatures &= AvailableFeatures();
+
+ // Always enable ProfilerFeature::Threads if we have a filter, because
+ // users sometimes ask to filter by a list of threads but forget to
+ // explicitly specify ProfilerFeature::Threads.
+ if (aFilterCount > 0) {
+ aFeatures |= ProfilerFeature::Threads;
+ }
+
+ // Some features imply others.
+ if (aFeatures & ProfilerFeature::FileIOAll) {
+ aFeatures |= ProfilerFeature::MainThreadIO | ProfilerFeature::FileIO;
+ } else if (aFeatures & ProfilerFeature::FileIO) {
+ aFeatures |= ProfilerFeature::MainThreadIO;
+ }
+
+ return aFeatures;
+ }
+
+ ActivePS(PSLockRef aLock, PowerOfTwo32 aCapacity, double aInterval,
+ uint32_t aFeatures, const char** aFilters, uint32_t aFilterCount,
+ const Maybe<double>& aDuration)
+ : mGeneration(sNextGeneration++),
+ mCapacity(aCapacity),
+ mDuration(aDuration),
+ mInterval(aInterval),
+ mFeatures(AdjustFeatures(aFeatures, aFilterCount)),
+ mProfileBufferChunkManager(
+ size_t(ClampToAllowedEntries(aCapacity.Value())) * scBytesPerEntry,
+ ChunkSizeForEntries(aCapacity.Value())),
+ mProfileBuffer([this]() -> ProfileChunkedBuffer& {
+ CorePS::CoreBuffer().SetChunkManager(mProfileBufferChunkManager);
+ return CorePS::CoreBuffer();
+ }()),
+ // The new sampler thread doesn't start sampling immediately because the
+ // main loop within Run() is blocked until this function's caller
+ // unlocks gPSMutex.
+ mSamplerThread(NewSamplerThread(aLock, mGeneration, aInterval)),
+ mIsPaused(false),
+ mIsSamplingPaused(false)
+#if defined(GP_OS_linux) || defined(GP_OS_freebsd)
+ ,
+ mWasSamplingPaused(false)
+#endif
+ {
+ // Deep copy aFilters.
+ MOZ_ALWAYS_TRUE(mFilters.resize(aFilterCount));
+ for (uint32_t i = 0; i < aFilterCount; ++i) {
+ mFilters[i] = aFilters[i];
+ }
+ }
+
+ ~ActivePS() { CorePS::CoreBuffer().ResetChunkManager(); }
+
+ bool ThreadSelected(const char* aThreadName) {
+ if (mFilters.empty()) {
+ return true;
+ }
+
+ std::string name = aThreadName;
+ std::transform(name.begin(), name.end(), name.begin(), ::tolower);
+
+ for (uint32_t i = 0; i < mFilters.length(); ++i) {
+ std::string filter = mFilters[i];
+
+ if (filter == "*") {
+ return true;
+ }
+
+ std::transform(filter.begin(), filter.end(), filter.begin(), ::tolower);
+
+ // Crude, non UTF-8 compatible, case insensitive substring search
+ if (name.find(filter) != std::string::npos) {
+ return true;
+ }
+
+ // If the filter starts with pid:, check for a pid match
+ if (filter.find("pid:") == 0) {
+ std::string mypid = std::to_string(profiler_current_process_id());
+ if (filter.compare(4, std::string::npos, mypid) == 0) {
+ return true;
+ }
+ }
+ }
+
+ return false;
+ }
+
+ public:
+ static void Create(PSLockRef aLock, PowerOfTwo32 aCapacity, double aInterval,
+ uint32_t aFeatures, const char** aFilters,
+ uint32_t aFilterCount, const Maybe<double>& aDuration) {
+ MOZ_ASSERT(!sInstance);
+ sInstance = new ActivePS(aLock, aCapacity, aInterval, aFeatures, aFilters,
+ aFilterCount, aDuration);
+ }
+
+ [[nodiscard]] static SamplerThread* Destroy(PSLockRef aLock) {
+ MOZ_ASSERT(sInstance);
+ auto samplerThread = sInstance->mSamplerThread;
+ delete sInstance;
+ sInstance = nullptr;
+
+ return samplerThread;
+ }
+
+ static bool Exists(PSLockRef) { return !!sInstance; }
+
+ static bool Equals(PSLockRef, PowerOfTwo32 aCapacity,
+ const Maybe<double>& aDuration, double aInterval,
+ uint32_t aFeatures, const char** aFilters,
+ uint32_t aFilterCount) {
+ MOZ_ASSERT(sInstance);
+ if (sInstance->mCapacity != aCapacity ||
+ sInstance->mDuration != aDuration ||
+ sInstance->mInterval != aInterval ||
+ sInstance->mFeatures != aFeatures ||
+ sInstance->mFilters.length() != aFilterCount) {
+ return false;
+ }
+
+ for (uint32_t i = 0; i < sInstance->mFilters.length(); ++i) {
+ if (strcmp(sInstance->mFilters[i].c_str(), aFilters[i]) != 0) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ static size_t SizeOf(PSLockRef, MallocSizeOf aMallocSizeOf) {
+ MOZ_ASSERT(sInstance);
+
+ size_t n = aMallocSizeOf(sInstance);
+
+ n += sInstance->mProfileBuffer.SizeOfExcludingThis(aMallocSizeOf);
+
+ // Measurement of the following members may be added later if DMD finds it
+ // is worthwhile:
+ // - mLiveProfiledThreads (both the array itself, and the contents)
+ // - mDeadProfiledThreads (both the array itself, and the contents)
+ //
+
+ return n;
+ }
+
+ static bool ShouldProfileThread(PSLockRef aLock, ThreadInfo* aInfo) {
+ MOZ_ASSERT(sInstance);
+ return ((aInfo->IsMainThread() || FeatureThreads(aLock)) &&
+ sInstance->ThreadSelected(aInfo->Name()));
+ }
+
+ PS_GET(uint32_t, Generation)
+
+ PS_GET(PowerOfTwo32, Capacity)
+
+ PS_GET(Maybe<double>, Duration)
+
+ PS_GET(double, Interval)
+
+ PS_GET(uint32_t, Features)
+
+#define PS_GET_FEATURE(n_, str_, Name_, desc_) \
+ static bool Feature##Name_(PSLockRef) { \
+ MOZ_ASSERT(sInstance); \
+ return ProfilerFeature::Has##Name_(sInstance->mFeatures); \
+ }
+
+ BASE_PROFILER_FOR_EACH_FEATURE(PS_GET_FEATURE)
+
+#undef PS_GET_FEATURE
+
+ PS_GET(const Vector<std::string>&, Filters)
+
+ static void FulfillChunkRequests(PSLockRef) {
+ MOZ_ASSERT(sInstance);
+ sInstance->mProfileBufferChunkManager.FulfillChunkRequests();
+ }
+
+ static ProfileBuffer& Buffer(PSLockRef) {
+ MOZ_ASSERT(sInstance);
+ return sInstance->mProfileBuffer;
+ }
+
+ static const Vector<LiveProfiledThreadData>& LiveProfiledThreads(PSLockRef) {
+ MOZ_ASSERT(sInstance);
+ return sInstance->mLiveProfiledThreads;
+ }
+
+ // Returns an array containing (RegisteredThread*, ProfiledThreadData*) pairs
+ // for all threads that should be included in a profile, both for threads
+ // that are still registered, and for threads that have been unregistered but
+ // still have data in the buffer.
+ // For threads that have already been unregistered, the RegisteredThread
+ // pointer will be null.
+ // The returned array is sorted by thread register time.
+ // Do not hold on to the return value across thread registration or profiler
+ // restarts.
+ static Vector<std::pair<RegisteredThread*, ProfiledThreadData*>>
+ ProfiledThreads(PSLockRef) {
+ MOZ_ASSERT(sInstance);
+ Vector<std::pair<RegisteredThread*, ProfiledThreadData*>> array;
+ MOZ_RELEASE_ASSERT(
+ array.initCapacity(sInstance->mLiveProfiledThreads.length() +
+ sInstance->mDeadProfiledThreads.length()));
+ for (auto& t : sInstance->mLiveProfiledThreads) {
+ MOZ_RELEASE_ASSERT(array.append(
+ std::make_pair(t.mRegisteredThread, t.mProfiledThreadData.get())));
+ }
+ for (auto& t : sInstance->mDeadProfiledThreads) {
+ MOZ_RELEASE_ASSERT(
+ array.append(std::make_pair((RegisteredThread*)nullptr, t.get())));
+ }
+
+ std::sort(array.begin(), array.end(),
+ [](const std::pair<RegisteredThread*, ProfiledThreadData*>& a,
+ const std::pair<RegisteredThread*, ProfiledThreadData*>& b) {
+ return a.second->Info()->RegisterTime() <
+ b.second->Info()->RegisterTime();
+ });
+ return array;
+ }
+
+ static Vector<RefPtr<PageInformation>> ProfiledPages(PSLockRef aLock) {
+ MOZ_ASSERT(sInstance);
+ Vector<RefPtr<PageInformation>> array;
+ for (auto& d : CorePS::RegisteredPages(aLock)) {
+ MOZ_RELEASE_ASSERT(array.append(d));
+ }
+ for (auto& d : sInstance->mDeadProfiledPages) {
+ MOZ_RELEASE_ASSERT(array.append(d));
+ }
+ // We don't need to sort the pages like threads since we won't show them
+ // as a list.
+ return array;
+ }
+
+ // Do a linear search through mLiveProfiledThreads to find the
+ // ProfiledThreadData object for a RegisteredThread.
+ static ProfiledThreadData* GetProfiledThreadData(
+ PSLockRef, RegisteredThread* aRegisteredThread) {
+ MOZ_ASSERT(sInstance);
+ for (const LiveProfiledThreadData& thread :
+ sInstance->mLiveProfiledThreads) {
+ if (thread.mRegisteredThread == aRegisteredThread) {
+ return thread.mProfiledThreadData.get();
+ }
+ }
+ return nullptr;
+ }
+
+ static ProfiledThreadData* AddLiveProfiledThread(
+ PSLockRef, RegisteredThread* aRegisteredThread,
+ UniquePtr<ProfiledThreadData>&& aProfiledThreadData) {
+ MOZ_ASSERT(sInstance);
+ MOZ_RELEASE_ASSERT(
+ sInstance->mLiveProfiledThreads.append(LiveProfiledThreadData{
+ aRegisteredThread, std::move(aProfiledThreadData)}));
+
+ // Return a weak pointer to the ProfiledThreadData object.
+ return sInstance->mLiveProfiledThreads.back().mProfiledThreadData.get();
+ }
+
+ static void UnregisterThread(PSLockRef aLockRef,
+ RegisteredThread* aRegisteredThread) {
+ MOZ_ASSERT(sInstance);
+
+ DiscardExpiredDeadProfiledThreads(aLockRef);
+
+ // Find the right entry in the mLiveProfiledThreads array and remove the
+ // element, moving the ProfiledThreadData object for the thread into the
+ // mDeadProfiledThreads array.
+ // The thread's RegisteredThread object gets destroyed here.
+ for (size_t i = 0; i < sInstance->mLiveProfiledThreads.length(); i++) {
+ LiveProfiledThreadData& thread = sInstance->mLiveProfiledThreads[i];
+ if (thread.mRegisteredThread == aRegisteredThread) {
+ thread.mProfiledThreadData->NotifyUnregistered(
+ sInstance->mProfileBuffer.BufferRangeEnd());
+ MOZ_RELEASE_ASSERT(sInstance->mDeadProfiledThreads.append(
+ std::move(thread.mProfiledThreadData)));
+ sInstance->mLiveProfiledThreads.erase(
+ &sInstance->mLiveProfiledThreads[i]);
+ return;
+ }
+ }
+ }
+
+ PS_GET_AND_SET(bool, IsPaused)
+
+ // True if sampling is paused (though generic `SetIsPaused()` or specific
+ // `SetIsSamplingPaused()`).
+ static bool IsSamplingPaused(PSLockRef lock) {
+ MOZ_ASSERT(sInstance);
+ return IsPaused(lock) || sInstance->mIsSamplingPaused;
+ }
+
+ static void SetIsSamplingPaused(PSLockRef, bool aIsSamplingPaused) {
+ MOZ_ASSERT(sInstance);
+ sInstance->mIsSamplingPaused = aIsSamplingPaused;
+ }
+
+#if defined(GP_OS_linux) || defined(GP_OS_freebsd)
+ PS_GET_AND_SET(bool, WasSamplingPaused)
+#endif
+
+ static void DiscardExpiredDeadProfiledThreads(PSLockRef) {
+ MOZ_ASSERT(sInstance);
+ uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart();
+ // Discard any dead threads that were unregistered before bufferRangeStart.
+ sInstance->mDeadProfiledThreads.eraseIf(
+ [bufferRangeStart](
+ const UniquePtr<ProfiledThreadData>& aProfiledThreadData) {
+ Maybe<uint64_t> bufferPosition =
+ aProfiledThreadData->BufferPositionWhenUnregistered();
+ MOZ_RELEASE_ASSERT(bufferPosition,
+ "should have unregistered this thread");
+ return *bufferPosition < bufferRangeStart;
+ });
+ }
+
+ static void UnregisterPage(PSLockRef aLock,
+ uint64_t aRegisteredInnerWindowID) {
+ MOZ_ASSERT(sInstance);
+ auto& registeredPages = CorePS::RegisteredPages(aLock);
+ for (size_t i = 0; i < registeredPages.length(); i++) {
+ RefPtr<PageInformation>& page = registeredPages[i];
+ if (page->InnerWindowID() == aRegisteredInnerWindowID) {
+ page->NotifyUnregistered(sInstance->mProfileBuffer.BufferRangeEnd());
+ MOZ_RELEASE_ASSERT(
+ sInstance->mDeadProfiledPages.append(std::move(page)));
+ registeredPages.erase(&registeredPages[i--]);
+ }
+ }
+ }
+
+ static void DiscardExpiredPages(PSLockRef) {
+ MOZ_ASSERT(sInstance);
+ uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart();
+ // Discard any dead pages that were unregistered before
+ // bufferRangeStart.
+ sInstance->mDeadProfiledPages.eraseIf(
+ [bufferRangeStart](const RefPtr<PageInformation>& aProfiledPage) {
+ Maybe<uint64_t> bufferPosition =
+ aProfiledPage->BufferPositionWhenUnregistered();
+ MOZ_RELEASE_ASSERT(bufferPosition,
+ "should have unregistered this page");
+ return *bufferPosition < bufferRangeStart;
+ });
+ }
+
+ static void ClearUnregisteredPages(PSLockRef) {
+ MOZ_ASSERT(sInstance);
+ sInstance->mDeadProfiledPages.clear();
+ }
+
+ static void ClearExpiredExitProfiles(PSLockRef) {
+ MOZ_ASSERT(sInstance);
+ uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart();
+ // Discard exit profiles that were gathered before our buffer RangeStart.
+ sInstance->mExitProfiles.eraseIf(
+ [bufferRangeStart](const ExitProfile& aExitProfile) {
+ return aExitProfile.mBufferPositionAtGatherTime < bufferRangeStart;
+ });
+ }
+
+ static void AddExitProfile(PSLockRef aLock, const std::string& aExitProfile) {
+ MOZ_ASSERT(sInstance);
+
+ ClearExpiredExitProfiles(aLock);
+
+ MOZ_RELEASE_ASSERT(sInstance->mExitProfiles.append(
+ ExitProfile{aExitProfile, sInstance->mProfileBuffer.BufferRangeEnd()}));
+ }
+
+ static Vector<std::string> MoveExitProfiles(PSLockRef aLock) {
+ MOZ_ASSERT(sInstance);
+
+ ClearExpiredExitProfiles(aLock);
+
+ Vector<std::string> profiles;
+ MOZ_RELEASE_ASSERT(
+ profiles.initCapacity(sInstance->mExitProfiles.length()));
+ for (auto& profile : sInstance->mExitProfiles) {
+ MOZ_RELEASE_ASSERT(profiles.append(std::move(profile.mJSON)));
+ }
+ sInstance->mExitProfiles.clear();
+ return profiles;
+ }
+
+ private:
+ // The singleton instance.
+ static ActivePS* sInstance;
+
+ // We need to track activity generations. If we didn't we could have the
+ // following scenario.
+ //
+ // - profiler_stop() locks gPSMutex, de-instantiates ActivePS, unlocks
+ // gPSMutex, deletes the SamplerThread (which does a join).
+ //
+ // - profiler_start() runs on a different thread, locks gPSMutex,
+ // re-instantiates ActivePS, unlocks gPSMutex -- all before the join
+ // completes.
+ //
+ // - SamplerThread::Run() locks gPSMutex, sees that ActivePS is instantiated,
+ // and continues as if the start/stop pair didn't occur. Also
+ // profiler_stop() is stuck, unable to finish.
+ //
+ // By checking ActivePS *and* the generation, we can avoid this scenario.
+ // sNextGeneration is used to track the next generation number; it is static
+ // because it must persist across different ActivePS instantiations.
+ const uint32_t mGeneration;
+ static uint32_t sNextGeneration;
+
+ // The maximum number of 8-byte entries in mProfileBuffer.
+ const PowerOfTwo32 mCapacity;
+
+ // The maximum duration of entries in mProfileBuffer, in seconds.
+ const Maybe<double> mDuration;
+
+ // The interval between samples, measured in milliseconds.
+ const double mInterval;
+
+ // The profile features that are enabled.
+ const uint32_t mFeatures;
+
+ // Substrings of names of threads we want to profile.
+ Vector<std::string> mFilters;
+
+ // The chunk manager used by `mProfileBuffer` below.
+ ProfileBufferChunkManagerWithLocalLimit mProfileBufferChunkManager;
+
+ // The buffer into which all samples are recorded.
+ ProfileBuffer mProfileBuffer;
+
+ // ProfiledThreadData objects for any threads that were profiled at any point
+ // during this run of the profiler:
+ // - mLiveProfiledThreads contains all threads that are still registered, and
+ // - mDeadProfiledThreads contains all threads that have already been
+ // unregistered but for which there is still data in the profile buffer.
+ Vector<LiveProfiledThreadData> mLiveProfiledThreads;
+ Vector<UniquePtr<ProfiledThreadData>> mDeadProfiledThreads;
+
+ // Info on all the dead pages.
+ // Registered pages are being moved to this array after unregistration.
+ // We are keeping them in case we need them in the profile data.
+ // We are removing them when we ensure that we won't need them anymore.
+ Vector<RefPtr<PageInformation>> mDeadProfiledPages;
+
+ // The current sampler thread. This class is not responsible for destroying
+ // the SamplerThread object; the Destroy() method returns it so the caller
+ // can destroy it.
+ SamplerThread* const mSamplerThread;
+
+ // Is the profiler fully paused?
+ bool mIsPaused;
+
+ // Is the profiler periodic sampling paused?
+ bool mIsSamplingPaused;
+
+#if defined(GP_OS_linux) || defined(GP_OS_freebsd)
+ // Used to record whether the sampler was paused just before forking. False
+ // at all times except just before/after forking.
+ bool mWasSamplingPaused;
+#endif
+
+ struct ExitProfile {
+ std::string mJSON;
+ uint64_t mBufferPositionAtGatherTime;
+ };
+ Vector<ExitProfile> mExitProfiles;
+};
+
+ActivePS* ActivePS::sInstance = nullptr;
+uint32_t ActivePS::sNextGeneration = 0;
+
+#undef PS_GET
+#undef PS_GET_LOCKLESS
+#undef PS_GET_AND_SET
+
+Atomic<uint32_t, MemoryOrdering::Relaxed> RacyFeatures::sActiveAndFeatures(0);
+
+/* static */
+void RacyFeatures::SetActive(uint32_t aFeatures) {
+ sActiveAndFeatures = Active | aFeatures;
+}
+
+/* static */
+void RacyFeatures::SetInactive() { sActiveAndFeatures = 0; }
+
+/* static */
+bool RacyFeatures::IsActive() { return uint32_t(sActiveAndFeatures) & Active; }
+
+/* static */
+void RacyFeatures::SetPaused() { sActiveAndFeatures |= Paused; }
+
+/* static */
+void RacyFeatures::SetUnpaused() { sActiveAndFeatures &= ~Paused; }
+
+/* static */
+void RacyFeatures::SetSamplingPaused() { sActiveAndFeatures |= SamplingPaused; }
+
+/* static */
+void RacyFeatures::SetSamplingUnpaused() {
+ sActiveAndFeatures &= ~SamplingPaused;
+}
+
+/* static */
+bool RacyFeatures::IsActiveWithFeature(uint32_t aFeature) {
+ uint32_t af = sActiveAndFeatures; // copy it first
+ return (af & Active) && (af & aFeature);
+}
+
+/* static */
+bool RacyFeatures::IsActiveAndUnpaused() {
+ uint32_t af = sActiveAndFeatures; // copy it first
+ return (af & Active) && !(af & Paused);
+}
+
+/* static */
+bool RacyFeatures::IsActiveAndSamplingUnpaused() {
+ uint32_t af = sActiveAndFeatures; // copy it first
+ return (af & Active) && !(af & (Paused | SamplingPaused));
+}
+
+// Each live thread has a RegisteredThread, and we store a reference to it in
+// TLS. This class encapsulates that TLS.
+class TLSRegisteredThread {
+ public:
+ static bool Init(PSLockRef) {
+ bool ok1 = sRegisteredThread.init();
+ bool ok2 = AutoProfilerLabel::sProfilingStack.init();
+ return ok1 && ok2;
+ }
+
+ // Get the entire RegisteredThread. Accesses are guarded by gPSMutex.
+ static class RegisteredThread* RegisteredThread(PSLockRef) {
+ return sRegisteredThread.get();
+ }
+
+ // Get only the RacyRegisteredThread. Accesses are not guarded by gPSMutex.
+ static class RacyRegisteredThread* RacyRegisteredThread() {
+ class RegisteredThread* registeredThread = sRegisteredThread.get();
+ return registeredThread ? &registeredThread->RacyRegisteredThread()
+ : nullptr;
+ }
+
+ // Get only the ProfilingStack. Accesses are not guarded by gPSMutex.
+ // RacyRegisteredThread() can also be used to get the ProfilingStack, but that
+ // is marginally slower because it requires an extra pointer indirection.
+ static ProfilingStack* Stack() {
+ return AutoProfilerLabel::sProfilingStack.get();
+ }
+
+ static void SetRegisteredThread(PSLockRef,
+ class RegisteredThread* aRegisteredThread) {
+ sRegisteredThread.set(aRegisteredThread);
+ AutoProfilerLabel::sProfilingStack.set(
+ aRegisteredThread
+ ? &aRegisteredThread->RacyRegisteredThread().ProfilingStack()
+ : nullptr);
+ }
+
+ private:
+ // This is a non-owning reference to the RegisteredThread;
+ // CorePS::mRegisteredThreads is the owning reference. On thread
+ // deregistration, this reference is cleared and the RegisteredThread is
+ // destroyed.
+ static MOZ_THREAD_LOCAL(class RegisteredThread*) sRegisteredThread;
+};
+
+MOZ_THREAD_LOCAL(RegisteredThread*) TLSRegisteredThread::sRegisteredThread;
+
+/* static */
+ProfilingStack* AutoProfilerLabel::GetProfilingStack() {
+ return sProfilingStack.get();
+}
+
+// Although you can access a thread's ProfilingStack via
+// TLSRegisteredThread::sRegisteredThread, we also have a second TLS pointer
+// directly to the ProfilingStack. Here's why.
+//
+// - We need to be able to push to and pop from the ProfilingStack in
+// AutoProfilerLabel.
+//
+// - The class functions are hot and must be defined in BaseProfiler.h so they
+// can be inlined.
+//
+// - We don't want to expose TLSRegisteredThread (and RegisteredThread) in
+// BaseProfiler.h.
+//
+// This second pointer isn't ideal, but does provide a way to satisfy those
+// constraints. TLSRegisteredThread is responsible for updating it.
+MOZ_THREAD_LOCAL(ProfilingStack*) AutoProfilerLabel::sProfilingStack;
+
+// The name of the main thread.
+static const char* const kMainThreadName = "GeckoMain";
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN sampling/unwinding code
+
+// The registers used for stack unwinding and a few other sampling purposes.
+// The ctor does nothing; users are responsible for filling in the fields.
+class Registers {
+ public:
+ Registers() : mPC{nullptr}, mSP{nullptr}, mFP{nullptr}, mLR{nullptr} {}
+
+#if defined(HAVE_NATIVE_UNWIND)
+ // Fills in mPC, mSP, mFP, mLR, and mContext for a synchronous sample.
+ void SyncPopulate();
+#endif
+
+ void Clear() { memset(this, 0, sizeof(*this)); }
+
+ // These fields are filled in by
+ // Sampler::SuspendAndSampleAndResumeThread() for periodic and backtrace
+ // samples, and by SyncPopulate() for synchronous samples.
+ Address mPC; // Instruction pointer.
+ Address mSP; // Stack pointer.
+ Address mFP; // Frame pointer.
+ Address mLR; // ARM link register.
+#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
+ // This contains all the registers, which means it duplicates the four fields
+ // above. This is ok.
+ ucontext_t* mContext; // The context from the signal handler.
+#endif
+};
+
+// Setting MAX_NATIVE_FRAMES too high risks the unwinder wasting a lot of time
+// looping on corrupted stacks.
+static const size_t MAX_NATIVE_FRAMES = 1024;
+
+struct NativeStack {
+ void* mPCs[MAX_NATIVE_FRAMES];
+ void* mSPs[MAX_NATIVE_FRAMES];
+ size_t mCount; // Number of frames filled.
+
+ NativeStack() : mPCs(), mSPs(), mCount(0) {}
+};
+
+// Merges the profiling stack and native stack, outputting the details to
+// aCollector.
+static void MergeStacks(uint32_t aFeatures, bool aIsSynchronous,
+ const RegisteredThread& aRegisteredThread,
+ const Registers& aRegs, const NativeStack& aNativeStack,
+ ProfilerStackCollector& aCollector) {
+ // WARNING: this function runs within the profiler's "critical section".
+ // WARNING: this function might be called while the profiler is inactive, and
+ // cannot rely on ActivePS.
+
+ const ProfilingStack& profilingStack =
+ aRegisteredThread.RacyRegisteredThread().ProfilingStack();
+ const ProfilingStackFrame* profilingStackFrames = profilingStack.frames;
+ uint32_t profilingStackFrameCount = profilingStack.stackSize();
+
+ Maybe<uint64_t> samplePosInBuffer;
+ if (!aIsSynchronous) {
+ // aCollector.SamplePositionInBuffer() will return Nothing() when
+ // profiler_suspend_and_sample_thread is called from the background hang
+ // reporter.
+ samplePosInBuffer = aCollector.SamplePositionInBuffer();
+ }
+ // While the profiling stack array is ordered oldest-to-youngest, the JS and
+ // native arrays are ordered youngest-to-oldest. We must add frames to aInfo
+ // oldest-to-youngest. Thus, iterate over the profiling stack forwards and JS
+ // and native arrays backwards. Note: this means the terminating condition
+ // jsIndex and nativeIndex is being < 0.
+ uint32_t profilingStackIndex = 0;
+ int32_t nativeIndex = aNativeStack.mCount - 1;
+
+ uint8_t* lastLabelFrameStackAddr = nullptr;
+
+ // Iterate as long as there is at least one frame remaining.
+ while (profilingStackIndex != profilingStackFrameCount || nativeIndex >= 0) {
+ // There are 1 to 3 frames available. Find and add the oldest.
+ uint8_t* profilingStackAddr = nullptr;
+ uint8_t* nativeStackAddr = nullptr;
+
+ if (profilingStackIndex != profilingStackFrameCount) {
+ const ProfilingStackFrame& profilingStackFrame =
+ profilingStackFrames[profilingStackIndex];
+
+ if (profilingStackFrame.isLabelFrame() ||
+ profilingStackFrame.isSpMarkerFrame()) {
+ lastLabelFrameStackAddr = (uint8_t*)profilingStackFrame.stackAddress();
+ }
+
+ // Skip any JS_OSR frames. Such frames are used when the JS interpreter
+ // enters a jit frame on a loop edge (via on-stack-replacement, or OSR).
+ // To avoid both the profiling stack frame and jit frame being recorded
+ // (and showing up twice), the interpreter marks the interpreter
+ // profiling stack frame as JS_OSR to ensure that it doesn't get counted.
+ if (profilingStackFrame.isOSRFrame()) {
+ profilingStackIndex++;
+ continue;
+ }
+
+ MOZ_ASSERT(lastLabelFrameStackAddr);
+ profilingStackAddr = lastLabelFrameStackAddr;
+ }
+
+ if (nativeIndex >= 0) {
+ nativeStackAddr = (uint8_t*)aNativeStack.mSPs[nativeIndex];
+ }
+
+ // If there's a native stack frame which has the same SP as a profiling
+ // stack frame, pretend we didn't see the native stack frame. Ditto for a
+ // native stack frame which has the same SP as a JS stack frame. In effect
+ // this means profiling stack frames or JS frames trump conflicting native
+ // frames.
+ if (nativeStackAddr && (profilingStackAddr == nativeStackAddr)) {
+ nativeStackAddr = nullptr;
+ nativeIndex--;
+ MOZ_ASSERT(profilingStackAddr);
+ }
+
+ // Sanity checks.
+ MOZ_ASSERT_IF(profilingStackAddr, profilingStackAddr != nativeStackAddr);
+ MOZ_ASSERT_IF(nativeStackAddr, nativeStackAddr != profilingStackAddr);
+
+ // Check to see if profiling stack frame is top-most.
+ if (profilingStackAddr > nativeStackAddr) {
+ MOZ_ASSERT(profilingStackIndex < profilingStackFrameCount);
+ const ProfilingStackFrame& profilingStackFrame =
+ profilingStackFrames[profilingStackIndex];
+
+ // Sp marker frames are just annotations and should not be recorded in
+ // the profile.
+ if (!profilingStackFrame.isSpMarkerFrame()) {
+ aCollector.CollectProfilingStackFrame(profilingStackFrame);
+ }
+ profilingStackIndex++;
+ continue;
+ }
+
+ // If we reach here, there must be a native stack frame and it must be the
+ // greatest frame.
+ if (nativeStackAddr) {
+ MOZ_ASSERT(nativeIndex >= 0);
+ void* addr = (void*)aNativeStack.mPCs[nativeIndex];
+ aCollector.CollectNativeLeafAddr(addr);
+ }
+ if (nativeIndex >= 0) {
+ nativeIndex--;
+ }
+ }
+}
+
+#if defined(GP_OS_windows) && defined(USE_MOZ_STACK_WALK)
+static HANDLE GetThreadHandle(PlatformData* aData);
+#endif
+
+#if defined(USE_FRAME_POINTER_STACK_WALK) || defined(USE_MOZ_STACK_WALK)
+static void StackWalkCallback(uint32_t aFrameNumber, void* aPC, void* aSP,
+ void* aClosure) {
+ NativeStack* nativeStack = static_cast<NativeStack*>(aClosure);
+ MOZ_ASSERT(nativeStack->mCount < MAX_NATIVE_FRAMES);
+ nativeStack->mSPs[nativeStack->mCount] = aSP;
+ nativeStack->mPCs[nativeStack->mCount] = aPC;
+ nativeStack->mCount++;
+}
+#endif
+
+#if defined(USE_FRAME_POINTER_STACK_WALK)
+static void DoFramePointerBacktrace(PSLockRef aLock,
+ const RegisteredThread& aRegisteredThread,
+ const Registers& aRegs,
+ NativeStack& aNativeStack) {
+ // WARNING: this function runs within the profiler's "critical section".
+ // WARNING: this function might be called while the profiler is inactive, and
+ // cannot rely on ActivePS.
+
+ // Start with the current function. We use 0 as the frame number here because
+ // the FramePointerStackWalk() call below will use 1..N. This is a bit weird
+ // but it doesn't matter because StackWalkCallback() doesn't use the frame
+ // number argument.
+ StackWalkCallback(/* frameNum */ 0, aRegs.mPC, aRegs.mSP, &aNativeStack);
+
+ uint32_t maxFrames = uint32_t(MAX_NATIVE_FRAMES - aNativeStack.mCount);
+
+ const void* stackEnd = aRegisteredThread.StackTop();
+ if (aRegs.mFP >= aRegs.mSP && aRegs.mFP <= stackEnd) {
+ FramePointerStackWalk(StackWalkCallback, /* skipFrames */ 0, maxFrames,
+ &aNativeStack, reinterpret_cast<void**>(aRegs.mFP),
+ const_cast<void*>(stackEnd));
+ }
+}
+#endif
+
+#if defined(USE_MOZ_STACK_WALK)
+static void DoMozStackWalkBacktrace(PSLockRef aLock,
+ const RegisteredThread& aRegisteredThread,
+ const Registers& aRegs,
+ NativeStack& aNativeStack) {
+ // WARNING: this function runs within the profiler's "critical section".
+ // WARNING: this function might be called while the profiler is inactive, and
+ // cannot rely on ActivePS.
+
+ // Start with the current function. We use 0 as the frame number here because
+ // the MozStackWalkThread() call below will use 1..N. This is a bit weird but
+ // it doesn't matter because StackWalkCallback() doesn't use the frame number
+ // argument.
+ StackWalkCallback(/* frameNum */ 0, aRegs.mPC, aRegs.mSP, &aNativeStack);
+
+ uint32_t maxFrames = uint32_t(MAX_NATIVE_FRAMES - aNativeStack.mCount);
+
+ HANDLE thread = GetThreadHandle(aRegisteredThread.GetPlatformData());
+ MOZ_ASSERT(thread);
+ MozStackWalkThread(StackWalkCallback, /* skipFrames */ 0, maxFrames,
+ &aNativeStack, thread, /* context */ nullptr);
+}
+#endif
+
+#ifdef USE_EHABI_STACKWALK
+static void DoEHABIBacktrace(PSLockRef aLock,
+ const RegisteredThread& aRegisteredThread,
+ const Registers& aRegs,
+ NativeStack& aNativeStack) {
+ // WARNING: this function runs within the profiler's "critical section".
+ // WARNING: this function might be called while the profiler is inactive, and
+ // cannot rely on ActivePS.
+
+ aNativeStack.mCount =
+ EHABIStackWalk(aRegs.mContext->uc_mcontext,
+ const_cast<void*>(aRegisteredThread.StackTop()),
+ aNativeStack.mSPs, aNativeStack.mPCs, MAX_NATIVE_FRAMES);
+}
+#endif
+
+#ifdef USE_LUL_STACKWALK
+
+// See the comment at the callsite for why this function is necessary.
+# if defined(MOZ_HAVE_ASAN_BLACKLIST)
+MOZ_ASAN_BLACKLIST static void ASAN_memcpy(void* aDst, const void* aSrc,
+ size_t aLen) {
+ // The obvious thing to do here is call memcpy(). However, although
+ // ASAN_memcpy() is not instrumented by ASAN, memcpy() still is, and the
+ // false positive still manifests! So we must implement memcpy() ourselves
+ // within this function.
+ char* dst = static_cast<char*>(aDst);
+ const char* src = static_cast<const char*>(aSrc);
+
+ for (size_t i = 0; i < aLen; i++) {
+ dst[i] = src[i];
+ }
+}
+# endif
+
+static void DoLULBacktrace(PSLockRef aLock,
+ const RegisteredThread& aRegisteredThread,
+ const Registers& aRegs, NativeStack& aNativeStack) {
+ // WARNING: this function runs within the profiler's "critical section".
+ // WARNING: this function might be called while the profiler is inactive, and
+ // cannot rely on ActivePS.
+
+ const mcontext_t* mc = &aRegs.mContext->uc_mcontext;
+
+ lul::UnwindRegs startRegs;
+ memset(&startRegs, 0, sizeof(startRegs));
+
+# if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android)
+ startRegs.xip = lul::TaggedUWord(mc->gregs[REG_RIP]);
+ startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_RSP]);
+ startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_RBP]);
+# elif defined(GP_PLAT_amd64_freebsd)
+ startRegs.xip = lul::TaggedUWord(mc->mc_rip);
+ startRegs.xsp = lul::TaggedUWord(mc->mc_rsp);
+ startRegs.xbp = lul::TaggedUWord(mc->mc_rbp);
+# elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
+ startRegs.r15 = lul::TaggedUWord(mc->arm_pc);
+ startRegs.r14 = lul::TaggedUWord(mc->arm_lr);
+ startRegs.r13 = lul::TaggedUWord(mc->arm_sp);
+ startRegs.r12 = lul::TaggedUWord(mc->arm_ip);
+ startRegs.r11 = lul::TaggedUWord(mc->arm_fp);
+ startRegs.r7 = lul::TaggedUWord(mc->arm_r7);
+# elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android)
+ startRegs.pc = lul::TaggedUWord(mc->pc);
+ startRegs.x29 = lul::TaggedUWord(mc->regs[29]);
+ startRegs.x30 = lul::TaggedUWord(mc->regs[30]);
+ startRegs.sp = lul::TaggedUWord(mc->sp);
+# elif defined(GP_PLAT_arm64_freebsd)
+ startRegs.pc = lul::TaggedUWord(mc->mc_gpregs.gp_elr);
+ startRegs.x29 = lul::TaggedUWord(mc->mc_gpregs.gp_x[29]);
+ startRegs.x30 = lul::TaggedUWord(mc->mc_gpregs.gp_lr);
+ startRegs.sp = lul::TaggedUWord(mc->mc_gpregs.gp_sp);
+# elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
+ startRegs.xip = lul::TaggedUWord(mc->gregs[REG_EIP]);
+ startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_ESP]);
+ startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_EBP]);
+# elif defined(GP_PLAT_mips64_linux)
+ startRegs.pc = lul::TaggedUWord(mc->pc);
+ startRegs.sp = lul::TaggedUWord(mc->gregs[29]);
+ startRegs.fp = lul::TaggedUWord(mc->gregs[30]);
+# else
+# error "Unknown plat"
+# endif
+
+ // Copy up to N_STACK_BYTES from rsp-REDZONE upwards, but not going past the
+ // stack's registered top point. Do some basic sanity checks too. This
+ // assumes that the TaggedUWord holding the stack pointer value is valid, but
+ // it should be, since it was constructed that way in the code just above.
+
+ // We could construct |stackImg| so that LUL reads directly from the stack in
+ // question, rather than from a copy of it. That would reduce overhead and
+ // space use a bit. However, it gives a problem with dynamic analysis tools
+ // (ASan, TSan, Valgrind) which is that such tools will report invalid or
+ // racing memory accesses, and such accesses will be reported deep inside LUL.
+ // By taking a copy here, we can either sanitise the copy (for Valgrind) or
+ // copy it using an unchecked memcpy (for ASan, TSan). That way we don't have
+ // to try and suppress errors inside LUL.
+ //
+ // N_STACK_BYTES is set to 160KB. This is big enough to hold all stacks
+ // observed in some minutes of testing, whilst keeping the size of this
+ // function (DoNativeBacktrace)'s frame reasonable. Most stacks observed in
+ // practice are small, 4KB or less, and so the copy costs are insignificant
+ // compared to other profiler overhead.
+ //
+ // |stackImg| is allocated on this (the sampling thread's) stack. That
+ // implies that the frame for this function is at least N_STACK_BYTES large.
+ // In general it would be considered unacceptable to have such a large frame
+ // on a stack, but it only exists for the unwinder thread, and so is not
+ // expected to be a problem. Allocating it on the heap is troublesome because
+ // this function runs whilst the sampled thread is suspended, so any heap
+ // allocation risks deadlock. Allocating it as a global variable is not
+ // thread safe, which would be a problem if we ever allow multiple sampler
+ // threads. Hence allocating it on the stack seems to be the least-worst
+ // option.
+
+ lul::StackImage stackImg;
+
+ {
+# if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android) || \
+ defined(GP_PLAT_amd64_freebsd)
+ uintptr_t rEDZONE_SIZE = 128;
+ uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE;
+# elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
+ uintptr_t rEDZONE_SIZE = 0;
+ uintptr_t start = startRegs.r13.Value() - rEDZONE_SIZE;
+# elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android) || \
+ defined(GP_PLAT_arm64_freebsd)
+ uintptr_t rEDZONE_SIZE = 0;
+ uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE;
+# elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
+ uintptr_t rEDZONE_SIZE = 0;
+ uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE;
+# elif defined(GP_PLAT_mips64_linux)
+ uintptr_t rEDZONE_SIZE = 0;
+ uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE;
+# else
+# error "Unknown plat"
+# endif
+ uintptr_t end = reinterpret_cast<uintptr_t>(aRegisteredThread.StackTop());
+ uintptr_t ws = sizeof(void*);
+ start &= ~(ws - 1);
+ end &= ~(ws - 1);
+ uintptr_t nToCopy = 0;
+ if (start < end) {
+ nToCopy = end - start;
+ if (nToCopy > lul::N_STACK_BYTES) nToCopy = lul::N_STACK_BYTES;
+ }
+ MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES);
+ stackImg.mLen = nToCopy;
+ stackImg.mStartAvma = start;
+ if (nToCopy > 0) {
+ // If this is a vanilla memcpy(), ASAN makes the following complaint:
+ //
+ // ERROR: AddressSanitizer: stack-buffer-underflow ...
+ // ...
+ // HINT: this may be a false positive if your program uses some custom
+ // stack unwind mechanism or swapcontext
+ //
+ // This code is very much a custom stack unwind mechanism! So we use an
+ // alternative memcpy() implementation that is ignored by ASAN.
+# if defined(MOZ_HAVE_ASAN_BLACKLIST)
+ ASAN_memcpy(&stackImg.mContents[0], (void*)start, nToCopy);
+# else
+ memcpy(&stackImg.mContents[0], (void*)start, nToCopy);
+# endif
+ (void)VALGRIND_MAKE_MEM_DEFINED(&stackImg.mContents[0], nToCopy);
+ }
+ }
+
+ size_t framePointerFramesAcquired = 0;
+ lul::LUL* lul = CorePS::Lul(aLock);
+ lul->Unwind(reinterpret_cast<uintptr_t*>(aNativeStack.mPCs),
+ reinterpret_cast<uintptr_t*>(aNativeStack.mSPs),
+ &aNativeStack.mCount, &framePointerFramesAcquired,
+ MAX_NATIVE_FRAMES, &startRegs, &stackImg);
+
+ // Update stats in the LUL stats object. Unfortunately this requires
+ // three global memory operations.
+ lul->mStats.mContext += 1;
+ lul->mStats.mCFI += aNativeStack.mCount - 1 - framePointerFramesAcquired;
+ lul->mStats.mFP += framePointerFramesAcquired;
+}
+
+#endif
+
+#ifdef HAVE_NATIVE_UNWIND
+static void DoNativeBacktrace(PSLockRef aLock,
+ const RegisteredThread& aRegisteredThread,
+ const Registers& aRegs,
+ NativeStack& aNativeStack) {
+ // This method determines which stackwalker is used for periodic and
+ // synchronous samples. (Backtrace samples are treated differently, see
+ // profiler_suspend_and_sample_thread() for details). The only part of the
+ // ordering that matters is that LUL must precede FRAME_POINTER, because on
+ // Linux they can both be present.
+# if defined(USE_LUL_STACKWALK)
+ DoLULBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack);
+# elif defined(USE_EHABI_STACKWALK)
+ DoEHABIBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack);
+# elif defined(USE_FRAME_POINTER_STACK_WALK)
+ DoFramePointerBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack);
+# elif defined(USE_MOZ_STACK_WALK)
+ DoMozStackWalkBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack);
+# else
+# error "Invalid configuration"
+# endif
+}
+#endif
+
+// Writes some components shared by periodic and synchronous profiles to
+// ActivePS's ProfileBuffer. (This should only be called from DoSyncSample()
+// and DoPeriodicSample().)
+//
+// The grammar for entry sequences is in a comment above
+// ProfileBuffer::StreamSamplesToJSON.
+static inline void DoSharedSample(PSLockRef aLock, bool aIsSynchronous,
+ RegisteredThread& aRegisteredThread,
+ const Registers& aRegs, uint64_t aSamplePos,
+ ProfileBuffer& aBuffer) {
+ // WARNING: this function runs within the profiler's "critical section".
+
+ MOZ_ASSERT(!aBuffer.IsThreadSafe(),
+ "Mutexes cannot be used inside this critical section");
+
+ MOZ_RELEASE_ASSERT(ActivePS::Exists(aLock));
+
+ ProfileBufferCollector collector(aBuffer, aSamplePos);
+ NativeStack nativeStack;
+#if defined(HAVE_NATIVE_UNWIND)
+ if (ActivePS::FeatureStackWalk(aLock)) {
+ DoNativeBacktrace(aLock, aRegisteredThread, aRegs, nativeStack);
+
+ MergeStacks(ActivePS::Features(aLock), aIsSynchronous, aRegisteredThread,
+ aRegs, nativeStack, collector);
+ } else
+#endif
+ {
+ MergeStacks(ActivePS::Features(aLock), aIsSynchronous, aRegisteredThread,
+ aRegs, nativeStack, collector);
+
+ // We can't walk the whole native stack, but we can record the top frame.
+ if (ActivePS::FeatureLeaf(aLock)) {
+ aBuffer.AddEntry(ProfileBufferEntry::NativeLeafAddr((void*)aRegs.mPC));
+ }
+ }
+}
+
+// Writes the components of a synchronous sample to the given ProfileBuffer.
+static void DoSyncSample(PSLockRef aLock, RegisteredThread& aRegisteredThread,
+ const TimeStamp& aNow, const Registers& aRegs,
+ ProfileBuffer& aBuffer) {
+ // WARNING: this function runs within the profiler's "critical section".
+
+ uint64_t samplePos =
+ aBuffer.AddThreadIdEntry(aRegisteredThread.Info()->ThreadId());
+
+ TimeDuration delta = aNow - CorePS::ProcessStartTime();
+ aBuffer.AddEntry(ProfileBufferEntry::Time(delta.ToMilliseconds()));
+
+ DoSharedSample(aLock, /* aIsSynchronous = */ true, aRegisteredThread, aRegs,
+ samplePos, aBuffer);
+}
+
+// Writes the components of a periodic sample to ActivePS's ProfileBuffer.
+// The ThreadId entry is already written in the main ProfileBuffer, its location
+// is `aSamplePos`, we can write the rest to `aBuffer` (which may be different).
+static void DoPeriodicSample(PSLockRef aLock,
+ RegisteredThread& aRegisteredThread,
+ ProfiledThreadData& aProfiledThreadData,
+ const Registers& aRegs, uint64_t aSamplePos,
+ ProfileBuffer& aBuffer) {
+ // WARNING: this function runs within the profiler's "critical section".
+
+ DoSharedSample(aLock, /* aIsSynchronous = */ false, aRegisteredThread, aRegs,
+ aSamplePos, aBuffer);
+}
+
+// END sampling/unwinding code
+////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN saving/streaming code
+
+const static uint64_t kJS_MAX_SAFE_UINTEGER = +9007199254740991ULL;
+
+static int64_t SafeJSInteger(uint64_t aValue) {
+ return aValue <= kJS_MAX_SAFE_UINTEGER ? int64_t(aValue) : -1;
+}
+
+static void AddSharedLibraryInfoToStream(JSONWriter& aWriter,
+ const SharedLibrary& aLib) {
+ aWriter.StartObjectElement();
+ aWriter.IntProperty("start", SafeJSInteger(aLib.GetStart()));
+ aWriter.IntProperty("end", SafeJSInteger(aLib.GetEnd()));
+ aWriter.IntProperty("offset", SafeJSInteger(aLib.GetOffset()));
+ aWriter.StringProperty("name", aLib.GetModuleName());
+ aWriter.StringProperty("path", aLib.GetModulePath());
+ aWriter.StringProperty("debugName", aLib.GetDebugName());
+ aWriter.StringProperty("debugPath", aLib.GetDebugPath());
+ aWriter.StringProperty("breakpadId", aLib.GetBreakpadId());
+ aWriter.StringProperty("arch", aLib.GetArch());
+ aWriter.EndObject();
+}
+
+void AppendSharedLibraries(JSONWriter& aWriter) {
+ SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf();
+ info.SortByAddress();
+ for (size_t i = 0; i < info.GetSize(); i++) {
+ AddSharedLibraryInfoToStream(aWriter, info.GetEntry(i));
+ }
+}
+
+static void StreamCategories(SpliceableJSONWriter& aWriter) {
+ // Same order as ProfilingCategory. Format:
+ // [
+ // {
+ // name: "Idle",
+ // color: "transparent",
+ // subcategories: ["Other"],
+ // },
+ // {
+ // name: "Other",
+ // color: "grey",
+ // subcategories: [
+ // "JSM loading",
+ // "Subprocess launching",
+ // "DLL loading"
+ // ]
+ // },
+ // ...
+ // ]
+
+#define CATEGORY_JSON_BEGIN_CATEGORY(name, labelAsString, color) \
+ aWriter.Start(); \
+ aWriter.StringProperty("name", labelAsString); \
+ aWriter.StringProperty("color", color); \
+ aWriter.StartArrayProperty("subcategories");
+#define CATEGORY_JSON_SUBCATEGORY(supercategory, name, labelAsString) \
+ aWriter.StringElement(labelAsString);
+#define CATEGORY_JSON_END_CATEGORY \
+ aWriter.EndArray(); \
+ aWriter.EndObject();
+
+ MOZ_PROFILING_CATEGORY_LIST(CATEGORY_JSON_BEGIN_CATEGORY,
+ CATEGORY_JSON_SUBCATEGORY,
+ CATEGORY_JSON_END_CATEGORY)
+
+#undef CATEGORY_JSON_BEGIN_CATEGORY
+#undef CATEGORY_JSON_SUBCATEGORY
+#undef CATEGORY_JSON_END_CATEGORY
+}
+
+static void StreamMarkerSchema(SpliceableJSONWriter& aWriter) {
+ // Get an array view with all registered marker-type-specific functions.
+ Span<const base_profiler_markers_detail::Streaming::MarkerTypeFunctions>
+ markerTypeFunctionsArray =
+ base_profiler_markers_detail::Streaming::MarkerTypeFunctionsArray();
+ // List of streamed marker names, this is used to spot duplicates.
+ std::set<std::string> names;
+ // Stream the display schema for each different one. (Duplications may come
+ // from the same code potentially living in different libraries.)
+ for (const auto& markerTypeFunctions : markerTypeFunctionsArray) {
+ auto name = markerTypeFunctions.mMarkerTypeNameFunction();
+ // std::set.insert(T&&) returns a pair, its `second` is true if the element
+ // was actually inserted (i.e., it was not there yet.)
+ const bool didInsert =
+ names.insert(std::string(name.data(), name.size())).second;
+ if (didInsert) {
+ markerTypeFunctions.mMarkerSchemaFunction().Stream(aWriter, name);
+ }
+ }
+}
+
+static int64_t MicrosecondsSince1970();
+
+static void StreamMetaJSCustomObject(PSLockRef aLock,
+ SpliceableJSONWriter& aWriter,
+ bool aIsShuttingDown) {
+ MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
+
+ aWriter.IntProperty("version", 19);
+
+ // The "startTime" field holds the number of milliseconds since midnight
+ // January 1, 1970 GMT. This grotty code computes (Now - (Now -
+ // ProcessStartTime)) to convert CorePS::ProcessStartTime() into that form.
+ TimeDuration delta = TimeStamp::NowUnfuzzed() - CorePS::ProcessStartTime();
+ aWriter.DoubleProperty(
+ "startTime", MicrosecondsSince1970() / 1000.0 - delta.ToMilliseconds());
+
+ // Write the shutdownTime field. Unlike startTime, shutdownTime is not an
+ // absolute time stamp: It's relative to startTime. This is consistent with
+ // all other (non-"startTime") times anywhere in the profile JSON.
+ if (aIsShuttingDown) {
+ aWriter.DoubleProperty("shutdownTime", profiler_time());
+ } else {
+ aWriter.NullProperty("shutdownTime");
+ }
+
+ aWriter.StartArrayProperty("categories");
+ StreamCategories(aWriter);
+ aWriter.EndArray();
+
+ aWriter.StartArrayProperty("markerSchema");
+ StreamMarkerSchema(aWriter);
+ aWriter.EndArray();
+
+ if (!profiler_is_main_thread()) {
+ // Leave the rest of the properties out if we're not on the main thread.
+ // At the moment, the only case in which this function is called on a
+ // background thread is if we're in a content process and are going to
+ // send this profile to the parent process. In that case, the parent
+ // process profile's "meta" object already has the rest of the properties,
+ // and the parent process profile is dumped on that process's main thread.
+ return;
+ }
+
+ aWriter.DoubleProperty("interval", ActivePS::Interval(aLock));
+ aWriter.IntProperty("stackwalk", ActivePS::FeatureStackWalk(aLock));
+
+#ifdef DEBUG
+ aWriter.IntProperty("debug", 1);
+#else
+ aWriter.IntProperty("debug", 0);
+#endif
+
+ aWriter.IntProperty("gcpoison", 0);
+
+ aWriter.IntProperty("asyncstack", 0);
+
+ aWriter.IntProperty("processType", 0);
+}
+
+static void StreamPages(PSLockRef aLock, SpliceableJSONWriter& aWriter) {
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+ ActivePS::DiscardExpiredPages(aLock);
+ for (const auto& page : ActivePS::ProfiledPages(aLock)) {
+ page->StreamJSON(aWriter);
+ }
+}
+
+static void locked_profiler_stream_json_for_this_process(
+ PSLockRef aLock, SpliceableJSONWriter& aWriter, double aSinceTime,
+ bool aIsShuttingDown, bool aOnlyThreads = false) {
+ LOG("locked_profiler_stream_json_for_this_process");
+
+ MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
+
+ AUTO_PROFILER_STATS(base_locked_profiler_stream_json_for_this_process);
+
+ const double collectionStartMs = profiler_time();
+
+ ProfileBuffer& buffer = ActivePS::Buffer(aLock);
+
+ // If there is a set "Window length", discard older data.
+ Maybe<double> durationS = ActivePS::Duration(aLock);
+ if (durationS.isSome()) {
+ const double durationStartMs = collectionStartMs - *durationS * 1000;
+ buffer.DiscardSamplesBeforeTime(durationStartMs);
+ }
+
+ if (!aOnlyThreads) {
+ // Put shared library info
+ aWriter.StartArrayProperty("libs");
+ AppendSharedLibraries(aWriter);
+ aWriter.EndArray();
+
+ // Put meta data
+ aWriter.StartObjectProperty("meta");
+ { StreamMetaJSCustomObject(aLock, aWriter, aIsShuttingDown); }
+ aWriter.EndObject();
+
+ // Put page data
+ aWriter.StartArrayProperty("pages");
+ { StreamPages(aLock, aWriter); }
+ aWriter.EndArray();
+
+ buffer.StreamProfilerOverheadToJSON(aWriter, CorePS::ProcessStartTime(),
+ aSinceTime);
+ buffer.StreamCountersToJSON(aWriter, CorePS::ProcessStartTime(),
+ aSinceTime);
+
+ // Lists the samples for each thread profile
+ aWriter.StartArrayProperty("threads");
+ }
+
+ // if aOnlyThreads is true, the only output will be the threads array items.
+ {
+ ActivePS::DiscardExpiredDeadProfiledThreads(aLock);
+ Vector<std::pair<RegisteredThread*, ProfiledThreadData*>> threads =
+ ActivePS::ProfiledThreads(aLock);
+ for (auto& thread : threads) {
+ ProfiledThreadData* profiledThreadData = thread.second;
+ profiledThreadData->StreamJSON(
+ buffer, aWriter, CorePS::ProcessName(aLock), CorePS::ETLDplus1(aLock),
+ CorePS::ProcessStartTime(), aSinceTime);
+ }
+ }
+
+ if (!aOnlyThreads) {
+ aWriter.EndArray();
+
+ aWriter.StartArrayProperty("pausedRanges");
+ { buffer.StreamPausedRangesToJSON(aWriter, aSinceTime); }
+ aWriter.EndArray();
+ }
+
+ const double collectionEndMs = profiler_time();
+
+ // Record timestamps for the collection into the buffer, so that consumers
+ // know why we didn't collect any samples for its duration.
+ // We put these entries into the buffer after we've collected the profile,
+ // so they'll be visible for the *next* profile collection (if they haven't
+ // been overwritten due to buffer wraparound by then).
+ buffer.AddEntry(ProfileBufferEntry::CollectionStart(collectionStartMs));
+ buffer.AddEntry(ProfileBufferEntry::CollectionEnd(collectionEndMs));
+}
+
+bool profiler_stream_json_for_this_process(SpliceableJSONWriter& aWriter,
+ double aSinceTime,
+ bool aIsShuttingDown,
+ bool aOnlyThreads) {
+ LOG("profiler_stream_json_for_this_process");
+
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ PSAutoLock lock;
+
+ if (!ActivePS::Exists(lock)) {
+ return false;
+ }
+
+ locked_profiler_stream_json_for_this_process(lock, aWriter, aSinceTime,
+ aIsShuttingDown, aOnlyThreads);
+ return true;
+}
+
+// END saving/streaming code
+////////////////////////////////////////////////////////////////////////
+
+static char FeatureCategory(uint32_t aFeature) {
+ if (aFeature & DefaultFeatures()) {
+ if (aFeature & AvailableFeatures()) {
+ return 'D';
+ }
+ return 'd';
+ }
+
+ if (aFeature & StartupExtraDefaultFeatures()) {
+ if (aFeature & AvailableFeatures()) {
+ return 'S';
+ }
+ return 's';
+ }
+
+ if (aFeature & AvailableFeatures()) {
+ return '-';
+ }
+ return 'x';
+}
+
+static void PrintUsageThenExit(int aExitCode) {
+ PrintToConsole(
+ "\n"
+ "Profiler environment variable usage:\n"
+ "\n"
+ " MOZ_BASE_PROFILER_HELP\n"
+ " If set to any value, prints this message.\n"
+ " (Only BaseProfiler features are known here; Use MOZ_PROFILER_HELP\n"
+ " for Gecko Profiler help, with more features).\n"
+ "\n"
+ " MOZ_BASE_PROFILER_{,DEBUG_,VERBOSE}LOGGING\n"
+ " Enables BaseProfiler logging to stdout. The levels of logging\n"
+ " available are MOZ_BASE_PROFILER_LOGGING' (least verbose),\n"
+ " '..._DEBUG_LOGGING', '..._VERBOSE_LOGGING' (most verbose)\n"
+ "\n"
+ " MOZ_PROFILER_STARTUP\n"
+ " If set to any value other than '' or '0'/'N'/'n', starts the\n"
+ " profiler immediately on start-up.\n"
+ " Useful if you want profile code that runs very early.\n"
+ "\n"
+ " MOZ_PROFILER_STARTUP_ENTRIES=<%u..%u>\n"
+ " If MOZ_PROFILER_STARTUP is set, specifies the number of entries\n"
+ " per process in the profiler's circular buffer when the profiler is\n"
+ " first started.\n"
+ " If unset, the platform default is used:\n"
+ " %u entries per process, or %u when MOZ_PROFILER_STARTUP is set.\n"
+ " (%u bytes per entry -> %u or %u total bytes per process)\n"
+ "\n"
+ " MOZ_PROFILER_STARTUP_DURATION=<1..>\n"
+ " If MOZ_PROFILER_STARTUP is set, specifies the maximum life time\n"
+ " of entries in the the profiler's circular buffer when the profiler\n"
+ " is first started, in seconds.\n"
+ " If unset, the life time of the entries will only be restricted by\n"
+ " MOZ_PROFILER_STARTUP_ENTRIES (or its default value), and no\n"
+ " additional time duration restriction will be applied.\n"
+ "\n"
+ " MOZ_PROFILER_STARTUP_INTERVAL=<1..1000>\n"
+ " If MOZ_PROFILER_STARTUP is set, specifies the sample interval,\n"
+ " measured in milliseconds, when the profiler is first started.\n"
+ " If unset, the platform default is used.\n"
+ "\n"
+ " MOZ_PROFILER_STARTUP_FEATURES_BITFIELD=<Number>\n"
+ " If MOZ_PROFILER_STARTUP is set, specifies the profiling\n"
+ " features, as the integer value of the features bitfield.\n"
+ " If unset, the value from MOZ_PROFILER_STARTUP_FEATURES is used.\n"
+ "\n"
+ " MOZ_PROFILER_STARTUP_FEATURES=<Features>\n"
+ " If MOZ_PROFILER_STARTUP is set, specifies the profiling\n"
+ " features, as a comma-separated list of strings.\n"
+ " Ignored if MOZ_PROFILER_STARTUP_FEATURES_BITFIELD is set.\n"
+ " If unset, the platform default is used.\n"
+ "\n"
+ " Features: (x=unavailable, D/d=default/unavailable,\n"
+ " S/s=MOZ_PROFILER_STARTUP extra "
+ "default/unavailable)\n",
+ unsigned(ActivePS::scMinimumBufferEntries),
+ unsigned(ActivePS::scMaximumBufferEntries),
+ unsigned(BASE_PROFILER_DEFAULT_ENTRIES.Value()),
+ unsigned(BASE_PROFILER_DEFAULT_STARTUP_ENTRIES.Value()),
+ unsigned(scBytesPerEntry),
+ unsigned(BASE_PROFILER_DEFAULT_ENTRIES.Value() * scBytesPerEntry),
+ unsigned(BASE_PROFILER_DEFAULT_STARTUP_ENTRIES.Value() *
+ scBytesPerEntry));
+
+#define PRINT_FEATURE(n_, str_, Name_, desc_) \
+ PrintToConsole(" %c %7u: \"%s\" (%s)\n", \
+ FeatureCategory(ProfilerFeature::Name_), \
+ ProfilerFeature::Name_, str_, desc_);
+
+ BASE_PROFILER_FOR_EACH_FEATURE(PRINT_FEATURE)
+
+#undef PRINT_FEATURE
+
+ PrintToConsole(
+ " - \"default\" (All above D+S defaults)\n"
+ "\n"
+ " MOZ_PROFILER_STARTUP_FILTERS=<Filters>\n"
+ " If MOZ_PROFILER_STARTUP is set, specifies the thread filters, as "
+ "a\n"
+ " comma-separated list of strings. A given thread will be sampled if\n"
+ " any of the filters is a case-insensitive substring of the thread\n"
+ " name. If unset, a default is used.\n"
+ "\n"
+ " MOZ_PROFILER_SHUTDOWN\n"
+ " If set, the profiler saves a profile to the named file on shutdown.\n"
+ "\n"
+ " MOZ_PROFILER_SYMBOLICATE\n"
+ " If set, the profiler will pre-symbolicate profiles.\n"
+ " *Note* This will add a significant pause when gathering data, and\n"
+ " is intended mainly for local development.\n"
+ "\n"
+ " MOZ_PROFILER_LUL_TEST\n"
+ " If set to any value, runs LUL unit tests at startup.\n"
+ "\n"
+ " This platform %s native unwinding.\n"
+ "\n",
+#if defined(HAVE_NATIVE_UNWIND)
+ "supports"
+#else
+ "does not support"
+#endif
+ );
+
+ exit(aExitCode);
+}
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN Sampler
+
+#if defined(GP_OS_linux) || defined(GP_OS_android)
+struct SigHandlerCoordinator;
+#endif
+
+// Sampler performs setup and teardown of the state required to sample with the
+// profiler. Sampler may exist when ActivePS is not present.
+//
+// SuspendAndSampleAndResumeThread must only be called from a single thread,
+// and must not sample the thread it is being called from. A separate Sampler
+// instance must be used for each thread which wants to capture samples.
+
+// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+//
+// With the exception of SamplerThread, all Sampler objects must be Disable-d
+// before releasing the lock which was used to create them. This avoids races
+// on linux with the SIGPROF signal handler.
+
+class Sampler {
+ public:
+ // Sets up the profiler such that it can begin sampling.
+ explicit Sampler(PSLockRef aLock);
+
+ // Disable the sampler, restoring it to its previous state. This must be
+ // called once, and only once, before the Sampler is destroyed.
+ void Disable(PSLockRef aLock);
+
+ // This method suspends and resumes the samplee thread. It calls the passed-in
+ // function-like object aProcessRegs (passing it a populated |const
+ // Registers&| arg) while the samplee thread is suspended.
+ //
+ // Func must be a function-like object of type `void()`.
+ template <typename Func>
+ void SuspendAndSampleAndResumeThread(
+ PSLockRef aLock, const RegisteredThread& aRegisteredThread,
+ const TimeStamp& aNow, const Func& aProcessRegs);
+
+ private:
+#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
+ // Used to restore the SIGPROF handler when ours is removed.
+ struct sigaction mOldSigprofHandler;
+
+ // This process' ID. Needed as an argument for tgkill in
+ // SuspendAndSampleAndResumeThread.
+ int mMyPid;
+
+ // The sampler thread's ID. Used to assert that it is not sampling itself,
+ // which would lead to deadlock.
+ int mSamplerTid;
+
+ public:
+ // This is the one-and-only variable used to communicate between the sampler
+ // thread and the samplee thread's signal handler. It's static because the
+ // samplee thread's signal handler is static.
+ static struct SigHandlerCoordinator* sSigHandlerCoordinator;
+#endif
+};
+
+// END Sampler
+////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN SamplerThread
+
+// The sampler thread controls sampling and runs whenever the profiler is
+// active. It periodically runs through all registered threads, finds those
+// that should be sampled, then pauses and samples them.
+
+class SamplerThread {
+ public:
+ // Creates a sampler thread, but doesn't start it.
+ SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
+ double aIntervalMilliseconds);
+ ~SamplerThread();
+
+ // This runs on (is!) the sampler thread.
+ void Run();
+
+ // This runs on the main thread.
+ void Stop(PSLockRef aLock);
+
+ private:
+ // This suspends the calling thread for the given number of microseconds.
+ // Best effort timing.
+ void SleepMicro(uint32_t aMicroseconds);
+
+ // The sampler used to suspend and sample threads.
+ Sampler mSampler;
+
+ // The activity generation, for detecting when the sampler thread must stop.
+ const uint32_t mActivityGeneration;
+
+ // The interval between samples, measured in microseconds.
+ const int mIntervalMicroseconds;
+
+ // The OS-specific handle for the sampler thread.
+#if defined(GP_OS_windows)
+ HANDLE mThread;
+#elif defined(GP_OS_darwin) || defined(GP_OS_linux) || \
+ defined(GP_OS_android) || defined(GP_OS_freebsd)
+ pthread_t mThread;
+#endif
+
+ SamplerThread(const SamplerThread&) = delete;
+ void operator=(const SamplerThread&) = delete;
+};
+
+// This function is required because we need to create a SamplerThread within
+// ActivePS's constructor, but SamplerThread is defined after ActivePS. It
+// could probably be removed by moving some code around.
+static SamplerThread* NewSamplerThread(PSLockRef aLock, uint32_t aGeneration,
+ double aInterval) {
+ return new SamplerThread(aLock, aGeneration, aInterval);
+}
+
+// This function is the sampler thread. This implementation is used for all
+// targets.
+void SamplerThread::Run() {
+ // TODO: If possible, name this thread later on, after NSPR becomes available.
+ // PR_SetCurrentThreadName("SamplerThread");
+
+ // Features won't change during this SamplerThread's lifetime, so we can read
+ // them once and store them locally.
+ const uint32_t features = []() -> uint32_t {
+ PSAutoLock lock;
+ if (!ActivePS::Exists(lock)) {
+ // If there is no active profiler, it doesn't matter what we return,
+ // because this thread will exit before any feature is used.
+ return 0;
+ }
+ return ActivePS::Features(lock);
+ }();
+
+ // Not *no*-stack-sampling means we do want stack sampling.
+ const bool stackSampling = !ProfilerFeature::HasNoStackSampling(features);
+
+ // Use local BlocksRingBuffer&ProfileBuffer to capture the stack.
+ // (This is to avoid touching the CorePS::CoreBuffer lock while
+ // a thread is suspended, because that thread could be working with
+ // the CorePS::CoreBuffer as well.)
+ ProfileBufferChunkManagerSingle localChunkManager(
+ ProfileBufferChunkManager::scExpectedMaximumStackSize);
+ ProfileChunkedBuffer localBuffer(
+ ProfileChunkedBuffer::ThreadSafety::WithoutMutex, localChunkManager);
+ ProfileBuffer localProfileBuffer(localBuffer);
+
+ // Will be kept between collections, to know what each collection does.
+ auto previousState = localBuffer.GetState();
+
+ // This will be positive if we are running behind schedule (sampling less
+ // frequently than desired) and negative if we are ahead of schedule.
+ TimeDuration lastSleepOvershoot = 0;
+ TimeStamp sampleStart = TimeStamp::NowUnfuzzed();
+
+ while (true) {
+ // This scope is for |lock|. It ends before we sleep below.
+ {
+ PSAutoLock lock;
+ TimeStamp lockAcquired = TimeStamp::NowUnfuzzed();
+
+ if (!ActivePS::Exists(lock)) {
+ return;
+ }
+
+ // At this point profiler_stop() might have been called, and
+ // profiler_start() might have been called on another thread. If this
+ // happens the generation won't match.
+ if (ActivePS::Generation(lock) != mActivityGeneration) {
+ return;
+ }
+
+ ActivePS::ClearExpiredExitProfiles(lock);
+
+ TimeStamp expiredMarkersCleaned = TimeStamp::NowUnfuzzed();
+
+ if (!ActivePS::IsSamplingPaused(lock)) {
+ TimeDuration delta = sampleStart - CorePS::ProcessStartTime();
+ ProfileBuffer& buffer = ActivePS::Buffer(lock);
+
+ // handle per-process generic counters
+ const Vector<BaseProfilerCount*>& counters = CorePS::Counters(lock);
+ for (auto& counter : counters) {
+ // create Buffer entries for each counter
+ buffer.AddEntry(ProfileBufferEntry::CounterId(counter));
+ buffer.AddEntry(ProfileBufferEntry::Time(delta.ToMilliseconds()));
+ // XXX support keyed maps of counts
+ // In the future, we'll support keyed counters - for example, counters
+ // with a key which is a thread ID. For "simple" counters we'll just
+ // use a key of 0.
+ int64_t count;
+ uint64_t number;
+ counter->Sample(count, number);
+ buffer.AddEntry(ProfileBufferEntry::CounterKey(0));
+ buffer.AddEntry(ProfileBufferEntry::Count(count));
+ if (number) {
+ buffer.AddEntry(ProfileBufferEntry::Number(number));
+ }
+ }
+ TimeStamp countersSampled = TimeStamp::NowUnfuzzed();
+
+ if (stackSampling) {
+ const Vector<LiveProfiledThreadData>& liveThreads =
+ ActivePS::LiveProfiledThreads(lock);
+
+ for (auto& thread : liveThreads) {
+ RegisteredThread* registeredThread = thread.mRegisteredThread;
+ ProfiledThreadData* profiledThreadData =
+ thread.mProfiledThreadData.get();
+ RefPtr<ThreadInfo> info = registeredThread->Info();
+
+ // If the thread is asleep and has been sampled before in the same
+ // sleep episode, find and copy the previous sample, as that's
+ // cheaper than taking a new sample.
+ if (registeredThread->RacyRegisteredThread()
+ .CanDuplicateLastSampleDueToSleep()) {
+ bool dup_ok = ActivePS::Buffer(lock).DuplicateLastSample(
+ info->ThreadId(), CorePS::ProcessStartTime(),
+ profiledThreadData->LastSample());
+ if (dup_ok) {
+ continue;
+ }
+ }
+
+ AUTO_PROFILER_STATS(base_SamplerThread_Run_DoPeriodicSample);
+
+ TimeStamp now = TimeStamp::NowUnfuzzed();
+
+ // Add the thread ID now, so we know its position in the main
+ // buffer, which is used by some JS data. (DoPeriodicSample only
+ // knows about the temporary local buffer.)
+ uint64_t samplePos =
+ buffer.AddThreadIdEntry(registeredThread->Info()->ThreadId());
+ profiledThreadData->LastSample() = Some(samplePos);
+
+ // Also add the time, so it's always there after the thread ID, as
+ // expected by the parser. (Other stack data is optional.)
+ TimeDuration delta = now - CorePS::ProcessStartTime();
+ buffer.AddEntry(ProfileBufferEntry::Time(delta.ToMilliseconds()));
+
+ mSampler.SuspendAndSampleAndResumeThread(
+ lock, *registeredThread, now,
+ [&](const Registers& aRegs, const TimeStamp& aNow) {
+ DoPeriodicSample(lock, *registeredThread, *profiledThreadData,
+ aRegs, samplePos, localProfileBuffer);
+ });
+
+ // If data is complete, copy it into the global buffer.
+ auto state = localBuffer.GetState();
+ if (state.mClearedBlockCount != previousState.mClearedBlockCount) {
+ LOG("Stack sample too big for local storage, needed %u bytes",
+ unsigned(state.mRangeEnd - previousState.mRangeEnd));
+ } else if (state.mRangeEnd - previousState.mRangeEnd >=
+ *CorePS::CoreBuffer().BufferLength()) {
+ LOG("Stack sample too big for profiler storage, needed %u bytes",
+ unsigned(state.mRangeEnd - previousState.mRangeEnd));
+ } else {
+ CorePS::CoreBuffer().AppendContents(localBuffer);
+ }
+
+ // Clean up for the next run.
+ localBuffer.Clear();
+ previousState = localBuffer.GetState();
+ }
+ }
+
+#if defined(USE_LUL_STACKWALK)
+ // The LUL unwind object accumulates frame statistics. Periodically we
+ // should poke it to give it a chance to print those statistics. This
+ // involves doing I/O (fprintf, __android_log_print, etc.) and so
+ // can't safely be done from the critical section inside
+ // SuspendAndSampleAndResumeThread, which is why it is done here.
+ CorePS::Lul(lock)->MaybeShowStats();
+#endif
+ TimeStamp threadsSampled = TimeStamp::NowUnfuzzed();
+
+ {
+ AUTO_PROFILER_STATS(Sampler_FulfillChunkRequests);
+ ActivePS::FulfillChunkRequests(lock);
+ }
+
+ buffer.CollectOverheadStats(delta, lockAcquired - sampleStart,
+ expiredMarkersCleaned - lockAcquired,
+ countersSampled - expiredMarkersCleaned,
+ threadsSampled - countersSampled);
+ }
+ }
+ // gPSMutex is not held after this point.
+
+ // Calculate how long a sleep to request. After the sleep, measure how
+ // long we actually slept and take the difference into account when
+ // calculating the sleep interval for the next iteration. This is an
+ // attempt to keep "to schedule" in the presence of inaccuracy of the
+ // actual sleep intervals.
+ TimeStamp targetSleepEndTime =
+ sampleStart + TimeDuration::FromMicroseconds(mIntervalMicroseconds);
+ TimeStamp beforeSleep = TimeStamp::NowUnfuzzed();
+ TimeDuration targetSleepDuration = targetSleepEndTime - beforeSleep;
+ double sleepTime = std::max(
+ 0.0, (targetSleepDuration - lastSleepOvershoot).ToMicroseconds());
+ SleepMicro(static_cast<uint32_t>(sleepTime));
+ sampleStart = TimeStamp::NowUnfuzzed();
+ lastSleepOvershoot =
+ sampleStart - (beforeSleep + TimeDuration::FromMicroseconds(sleepTime));
+ }
+}
+
+// Temporary closing namespaces from enclosing platform.cpp.
+} // namespace baseprofiler
+} // namespace mozilla
+
+// We #include these files directly because it means those files can use
+// declarations from this file trivially. These provide target-specific
+// implementations of all SamplerThread methods except Run().
+#if defined(GP_OS_windows)
+# include "platform-win32.cpp"
+#elif defined(GP_OS_darwin)
+# include "platform-macos.cpp"
+#elif defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
+# include "platform-linux-android.cpp"
+#else
+# error "bad platform"
+#endif
+
+namespace mozilla {
+namespace baseprofiler {
+
+UniquePlatformData AllocPlatformData(int aThreadId) {
+ return UniquePlatformData(new PlatformData(aThreadId));
+}
+
+void PlatformDataDestructor::operator()(PlatformData* aData) { delete aData; }
+
+// END SamplerThread
+////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN externally visible functions
+
+static uint32_t ParseFeature(const char* aFeature, bool aIsStartup) {
+ if (strcmp(aFeature, "default") == 0) {
+ return (aIsStartup ? (DefaultFeatures() | StartupExtraDefaultFeatures())
+ : DefaultFeatures()) &
+ AvailableFeatures();
+ }
+
+#define PARSE_FEATURE_BIT(n_, str_, Name_, desc_) \
+ if (strcmp(aFeature, str_) == 0) { \
+ return ProfilerFeature::Name_; \
+ }
+
+ BASE_PROFILER_FOR_EACH_FEATURE(PARSE_FEATURE_BIT)
+
+#undef PARSE_FEATURE_BIT
+
+ PrintToConsole("\nUnrecognized feature \"%s\".\n\n", aFeature);
+ // Since we may have an old feature we don't implement anymore, don't exit
+ PrintUsageThenExit(0);
+ return 0;
+}
+
+uint32_t ParseFeaturesFromStringArray(const char** aFeatures,
+ uint32_t aFeatureCount,
+ bool aIsStartup /* = false */) {
+ uint32_t features = 0;
+ for (size_t i = 0; i < aFeatureCount; i++) {
+ features |= ParseFeature(aFeatures[i], aIsStartup);
+ }
+ return features;
+}
+
+// Find the RegisteredThread for the current thread. This should only be called
+// in places where TLSRegisteredThread can't be used.
+static RegisteredThread* FindCurrentThreadRegisteredThread(PSLockRef aLock) {
+ int id = profiler_current_thread_id();
+ const Vector<UniquePtr<RegisteredThread>>& registeredThreads =
+ CorePS::RegisteredThreads(aLock);
+ for (auto& registeredThread : registeredThreads) {
+ if (registeredThread->Info()->ThreadId() == id) {
+ return registeredThread.get();
+ }
+ }
+
+ return nullptr;
+}
+
+static ProfilingStack* locked_register_thread(PSLockRef aLock,
+ const char* aName,
+ void* aStackTop) {
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ MOZ_ASSERT(!FindCurrentThreadRegisteredThread(aLock));
+
+ VTUNE_REGISTER_THREAD(aName);
+
+ if (!TLSRegisteredThread::Init(aLock)) {
+ return nullptr;
+ }
+
+ RefPtr<ThreadInfo> info = new ThreadInfo(aName, profiler_current_thread_id(),
+ profiler_is_main_thread());
+ UniquePtr<RegisteredThread> registeredThread =
+ MakeUnique<RegisteredThread>(info, aStackTop);
+
+ TLSRegisteredThread::SetRegisteredThread(aLock, registeredThread.get());
+
+ if (ActivePS::Exists(aLock) && ActivePS::ShouldProfileThread(aLock, info)) {
+ registeredThread->RacyRegisteredThread().SetIsBeingProfiled(true);
+ ActivePS::AddLiveProfiledThread(aLock, registeredThread.get(),
+ MakeUnique<ProfiledThreadData>(info));
+ }
+
+ ProfilingStack* profilingStack =
+ &registeredThread->RacyRegisteredThread().ProfilingStack();
+
+ CorePS::AppendRegisteredThread(aLock, std::move(registeredThread));
+
+ return profilingStack;
+}
+
+static void locked_profiler_start(PSLockRef aLock, PowerOfTwo32 aCapacity,
+ double aInterval, uint32_t aFeatures,
+ const char** aFilters, uint32_t aFilterCount,
+ const Maybe<double>& aDuration);
+
+static Vector<const char*> SplitAtCommas(const char* aString,
+ UniquePtr<char[]>& aStorage) {
+ size_t len = strlen(aString);
+ aStorage = MakeUnique<char[]>(len + 1);
+ PodCopy(aStorage.get(), aString, len + 1);
+
+ // Iterate over all characters in aStorage and split at commas, by
+ // overwriting commas with the null char.
+ Vector<const char*> array;
+ size_t currentElementStart = 0;
+ for (size_t i = 0; i <= len; i++) {
+ if (aStorage[i] == ',') {
+ aStorage[i] = '\0';
+ }
+ if (aStorage[i] == '\0') {
+ MOZ_RELEASE_ASSERT(array.append(&aStorage[currentElementStart]));
+ currentElementStart = i + 1;
+ }
+ }
+ return array;
+}
+
+void profiler_init(void* aStackTop) {
+ LOG("profiler_init");
+
+ scProfilerMainThreadId = profiler_current_thread_id();
+
+ VTUNE_INIT();
+
+ MOZ_RELEASE_ASSERT(!CorePS::Exists());
+
+ if (getenv("MOZ_BASE_PROFILER_HELP")) {
+ PrintUsageThenExit(0); // terminates execution
+ }
+
+ SharedLibraryInfo::Initialize();
+
+ uint32_t features = DefaultFeatures() & AvailableFeatures();
+
+ UniquePtr<char[]> filterStorage;
+
+ Vector<const char*> filters;
+ MOZ_RELEASE_ASSERT(filters.append(kMainThreadName));
+
+ PowerOfTwo32 capacity = BASE_PROFILER_DEFAULT_ENTRIES;
+ Maybe<double> duration = Nothing();
+ double interval = BASE_PROFILER_DEFAULT_INTERVAL;
+
+ {
+ PSAutoLock lock;
+
+ // We've passed the possible failure point. Instantiate CorePS, which
+ // indicates that the profiler has initialized successfully.
+ CorePS::Create(lock);
+
+ Unused << locked_register_thread(lock, kMainThreadName, aStackTop);
+
+ // Platform-specific initialization.
+ PlatformInit(lock);
+
+ // (Linux-only) We could create CorePS::mLul and read unwind info into it
+ // at this point. That would match the lifetime implied by destruction of
+ // it in profiler_shutdown() just below. However, that gives a big delay on
+ // startup, even if no profiling is actually to be done. So, instead, it is
+ // created on demand at the first call to PlatformStart().
+
+ const char* startupEnv = getenv("MOZ_PROFILER_STARTUP");
+ if (!startupEnv || startupEnv[0] == '\0' ||
+ ((startupEnv[0] == '0' || startupEnv[0] == 'N' ||
+ startupEnv[0] == 'n') &&
+ startupEnv[1] == '\0')) {
+ return;
+ }
+
+ // Hidden option to stop Base Profiler, mostly due to Talos intermittents,
+ // see https://bugzilla.mozilla.org/show_bug.cgi?id=1638851#c3
+ // TODO: Investigate root cause and remove this in bugs 1648324 and 1648325.
+ if (getenv("MOZ_PROFILER_STARTUP_NO_BASE")) {
+ return;
+ }
+
+ LOG("- MOZ_PROFILER_STARTUP is set");
+
+ // Startup default capacity may be different.
+ capacity = BASE_PROFILER_DEFAULT_STARTUP_ENTRIES;
+
+ const char* startupCapacity = getenv("MOZ_PROFILER_STARTUP_ENTRIES");
+ if (startupCapacity && startupCapacity[0] != '\0') {
+ errno = 0;
+ long capacityLong = strtol(startupCapacity, nullptr, 10);
+ // `long` could be 32 or 64 bits, so we force a 64-bit comparison with
+ // the maximum 32-bit signed number (as more than that is clamped down to
+ // 2^31 anyway).
+ if (errno == 0 && capacityLong > 0 &&
+ static_cast<uint64_t>(capacityLong) <=
+ static_cast<uint64_t>(INT32_MAX)) {
+ capacity = PowerOfTwo32(ActivePS::ClampToAllowedEntries(
+ static_cast<uint32_t>(capacityLong)));
+ LOG("- MOZ_PROFILER_STARTUP_ENTRIES = %u", unsigned(capacity.Value()));
+ } else {
+ PrintToConsole("- MOZ_PROFILER_STARTUP_ENTRIES not a valid integer: %s",
+ startupCapacity);
+ PrintUsageThenExit(1);
+ }
+ }
+
+ const char* startupDuration = getenv("MOZ_PROFILER_STARTUP_DURATION");
+ if (startupDuration && startupDuration[0] != '\0') {
+ // The duration is a floating point number. Use StringToDouble rather than
+ // strtod, so that "." is used as the decimal separator regardless of OS
+ // locale.
+ auto durationVal = StringToDouble(std::string(startupDuration));
+ if (durationVal && *durationVal >= 0.0) {
+ if (*durationVal > 0.0) {
+ duration = Some(*durationVal);
+ }
+ LOG("- MOZ_PROFILER_STARTUP_DURATION = %f", *durationVal);
+ } else {
+ PrintToConsole("- MOZ_PROFILER_STARTUP_DURATION not a valid float: %s",
+ startupDuration);
+ PrintUsageThenExit(1);
+ }
+ }
+
+ const char* startupInterval = getenv("MOZ_PROFILER_STARTUP_INTERVAL");
+ if (startupInterval && startupInterval[0] != '\0') {
+ // The interval is a floating point number. Use StringToDouble rather than
+ // strtod, so that "." is used as the decimal separator regardless of OS
+ // locale.
+ auto intervalValue = StringToDouble(MakeStringSpan(startupInterval));
+ if (intervalValue && *intervalValue > 0.0 && *intervalValue <= 1000.0) {
+ interval = *intervalValue;
+ LOG("- MOZ_PROFILER_STARTUP_INTERVAL = %f", interval);
+ } else {
+ PrintToConsole("- MOZ_PROFILER_STARTUP_INTERVAL not a valid float: %s",
+ startupInterval);
+ PrintUsageThenExit(1);
+ }
+ }
+
+ features |= StartupExtraDefaultFeatures() & AvailableFeatures();
+
+ const char* startupFeaturesBitfield =
+ getenv("MOZ_PROFILER_STARTUP_FEATURES_BITFIELD");
+ if (startupFeaturesBitfield && startupFeaturesBitfield[0] != '\0') {
+ errno = 0;
+ features = strtol(startupFeaturesBitfield, nullptr, 10);
+ if (errno == 0 && features != 0) {
+ LOG("- MOZ_PROFILER_STARTUP_FEATURES_BITFIELD = %d", features);
+ } else {
+ PrintToConsole(
+ "- MOZ_PROFILER_STARTUP_FEATURES_BITFIELD not a valid integer: %s",
+ startupFeaturesBitfield);
+ PrintUsageThenExit(1);
+ }
+ } else {
+ const char* startupFeatures = getenv("MOZ_PROFILER_STARTUP_FEATURES");
+ if (startupFeatures && startupFeatures[0] != '\0') {
+ // Interpret startupFeatures as a list of feature strings, separated by
+ // commas.
+ UniquePtr<char[]> featureStringStorage;
+ Vector<const char*> featureStringArray =
+ SplitAtCommas(startupFeatures, featureStringStorage);
+ features = ParseFeaturesFromStringArray(featureStringArray.begin(),
+ featureStringArray.length(),
+ /* aIsStartup */ true);
+ LOG("- MOZ_PROFILER_STARTUP_FEATURES = %d", features);
+ }
+ }
+
+ const char* startupFilters = getenv("MOZ_PROFILER_STARTUP_FILTERS");
+ if (startupFilters && startupFilters[0] != '\0') {
+ filters = SplitAtCommas(startupFilters, filterStorage);
+ LOG("- MOZ_PROFILER_STARTUP_FILTERS = %s", startupFilters);
+ }
+
+ locked_profiler_start(lock, capacity, interval, features, filters.begin(),
+ filters.length(), duration);
+ }
+
+ // TODO: Install memory counter if it is possible from mozglue.
+ // #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
+ // // start counting memory allocations (outside of lock because this may
+ // call
+ // // profiler_add_sampled_counter which would attempt to take the lock.)
+ // mozilla::profiler::install_memory_counter(true);
+ // #endif
+}
+
+static void locked_profiler_save_profile_to_file(PSLockRef aLock,
+ const char* aFilename,
+ bool aIsShuttingDown);
+
+static SamplerThread* locked_profiler_stop(PSLockRef aLock);
+
+void profiler_shutdown() {
+ LOG("profiler_shutdown");
+
+ VTUNE_SHUTDOWN();
+
+ MOZ_RELEASE_ASSERT(profiler_is_main_thread());
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ // If the profiler is active we must get a handle to the SamplerThread before
+ // ActivePS is destroyed, in order to delete it.
+ SamplerThread* samplerThread = nullptr;
+ {
+ PSAutoLock lock;
+
+ // Save the profile on shutdown if requested.
+ if (ActivePS::Exists(lock)) {
+ const char* filename = getenv("MOZ_PROFILER_SHUTDOWN");
+ if (filename) {
+ locked_profiler_save_profile_to_file(lock, filename,
+ /* aIsShuttingDown */ true);
+ }
+
+ samplerThread = locked_profiler_stop(lock);
+ }
+
+ CorePS::Destroy(lock);
+
+ // We just destroyed CorePS and the ThreadInfos it contains, so we can
+ // clear this thread's TLSRegisteredThread.
+ TLSRegisteredThread::SetRegisteredThread(lock, nullptr);
+ }
+
+ // We do these operations with gPSMutex unlocked. The comments in
+ // profiler_stop() explain why.
+ if (samplerThread) {
+ delete samplerThread;
+ }
+}
+
+static bool WriteProfileToJSONWriter(SpliceableChunkedJSONWriter& aWriter,
+ double aSinceTime, bool aIsShuttingDown,
+ bool aOnlyThreads = false) {
+ LOG("WriteProfileToJSONWriter");
+
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ if (!aOnlyThreads) {
+ aWriter.Start();
+ {
+ if (!profiler_stream_json_for_this_process(
+ aWriter, aSinceTime, aIsShuttingDown, aOnlyThreads)) {
+ return false;
+ }
+
+ // Don't include profiles from other processes because this is a
+ // synchronous function.
+ aWriter.StartArrayProperty("processes");
+ aWriter.EndArray();
+ }
+ aWriter.End();
+ } else {
+ aWriter.StartBareList();
+ if (!profiler_stream_json_for_this_process(aWriter, aSinceTime,
+ aIsShuttingDown, aOnlyThreads)) {
+ return false;
+ }
+ aWriter.EndBareList();
+ }
+ return true;
+}
+
+void profiler_set_process_name(const std::string& aProcessName,
+ const std::string* aETLDplus1) {
+ LOG("profiler_set_process_name(\"%s\", \"%s\")", aProcessName.c_str(),
+ aETLDplus1 ? aETLDplus1->c_str() : "<none>");
+ PSAutoLock lock;
+ CorePS::SetProcessName(lock, aProcessName);
+ if (aETLDplus1) {
+ CorePS::SetETLDplus1(lock, *aETLDplus1);
+ }
+}
+
+UniquePtr<char[]> profiler_get_profile(double aSinceTime, bool aIsShuttingDown,
+ bool aOnlyThreads) {
+ LOG("profiler_get_profile");
+
+ SpliceableChunkedJSONWriter b;
+ if (!WriteProfileToJSONWriter(b, aSinceTime, aIsShuttingDown, aOnlyThreads)) {
+ return nullptr;
+ }
+ return b.ChunkedWriteFunc().CopyData();
+}
+
+void profiler_get_profile_json_into_lazily_allocated_buffer(
+ const std::function<char*(size_t)>& aAllocator, double aSinceTime,
+ bool aIsShuttingDown) {
+ LOG("profiler_get_profile_json_into_lazily_allocated_buffer");
+
+ SpliceableChunkedJSONWriter b;
+ if (!WriteProfileToJSONWriter(b, aSinceTime, aIsShuttingDown)) {
+ return;
+ }
+
+ b.ChunkedWriteFunc().CopyDataIntoLazilyAllocatedBuffer(aAllocator);
+}
+
+void profiler_get_start_params(int* aCapacity, Maybe<double>* aDuration,
+ double* aInterval, uint32_t* aFeatures,
+ Vector<const char*>* aFilters) {
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ if (!aCapacity || !aDuration || !aInterval || !aFeatures || !aFilters) {
+ return;
+ }
+
+ PSAutoLock lock;
+
+ if (!ActivePS::Exists(lock)) {
+ *aCapacity = 0;
+ *aDuration = Nothing();
+ *aInterval = 0;
+ *aFeatures = 0;
+ aFilters->clear();
+ return;
+ }
+
+ *aCapacity = ActivePS::Capacity(lock).Value();
+ *aDuration = ActivePS::Duration(lock);
+ *aInterval = ActivePS::Interval(lock);
+ *aFeatures = ActivePS::Features(lock);
+
+ const Vector<std::string>& filters = ActivePS::Filters(lock);
+ MOZ_ALWAYS_TRUE(aFilters->resize(filters.length()));
+ for (uint32_t i = 0; i < filters.length(); ++i) {
+ (*aFilters)[i] = filters[i].c_str();
+ }
+}
+
+void GetProfilerEnvVarsForChildProcess(
+ std::function<void(const char* key, const char* value)>&& aSetEnv) {
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ PSAutoLock lock;
+
+ if (!ActivePS::Exists(lock)) {
+ aSetEnv("MOZ_PROFILER_STARTUP", "");
+ return;
+ }
+
+ aSetEnv("MOZ_PROFILER_STARTUP", "1");
+ auto capacityString =
+ Smprintf("%u", unsigned(ActivePS::Capacity(lock).Value()));
+ aSetEnv("MOZ_PROFILER_STARTUP_ENTRIES", capacityString.get());
+
+ // Use AppendFloat instead of Smprintf with %f because the decimal
+ // separator used by %f is locale-dependent. But the string we produce needs
+ // to be parseable by strtod, which only accepts the period character as a
+ // decimal separator. AppendFloat always uses the period character.
+ std::string intervalString = std::to_string(ActivePS::Interval(lock));
+ aSetEnv("MOZ_PROFILER_STARTUP_INTERVAL", intervalString.c_str());
+
+ auto featuresString = Smprintf("%d", ActivePS::Features(lock));
+ aSetEnv("MOZ_PROFILER_STARTUP_FEATURES_BITFIELD", featuresString.get());
+
+ std::string filtersString;
+ const Vector<std::string>& filters = ActivePS::Filters(lock);
+ for (uint32_t i = 0; i < filters.length(); ++i) {
+ filtersString += filters[i];
+ if (i != filters.length() - 1) {
+ filtersString += ",";
+ }
+ }
+ aSetEnv("MOZ_PROFILER_STARTUP_FILTERS", filtersString.c_str());
+}
+
+void profiler_received_exit_profile(const std::string& aExitProfile) {
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+ PSAutoLock lock;
+ if (!ActivePS::Exists(lock)) {
+ return;
+ }
+ ActivePS::AddExitProfile(lock, aExitProfile);
+}
+
+Vector<std::string> profiler_move_exit_profiles() {
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+ PSAutoLock lock;
+ Vector<std::string> profiles;
+ if (ActivePS::Exists(lock)) {
+ profiles = ActivePS::MoveExitProfiles(lock);
+ }
+ return profiles;
+}
+
+static void locked_profiler_save_profile_to_file(PSLockRef aLock,
+ const char* aFilename,
+ bool aIsShuttingDown = false) {
+ LOG("locked_profiler_save_profile_to_file(%s)", aFilename);
+
+ MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
+
+ std::ofstream stream;
+ stream.open(aFilename);
+ if (stream.is_open()) {
+ SpliceableJSONWriter w(MakeUnique<OStreamJSONWriteFunc>(stream));
+ w.Start();
+ {
+ locked_profiler_stream_json_for_this_process(aLock, w, /* sinceTime */ 0,
+ aIsShuttingDown);
+
+ w.StartArrayProperty("processes");
+ Vector<std::string> exitProfiles = ActivePS::MoveExitProfiles(aLock);
+ for (auto& exitProfile : exitProfiles) {
+ if (!exitProfile.empty()) {
+ w.Splice(exitProfile);
+ }
+ }
+ w.EndArray();
+ }
+ w.End();
+
+ stream.close();
+ }
+}
+
+void profiler_save_profile_to_file(const char* aFilename) {
+ LOG("profiler_save_profile_to_file(%s)", aFilename);
+
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ PSAutoLock lock;
+
+ if (!ActivePS::Exists(lock)) {
+ return;
+ }
+
+ locked_profiler_save_profile_to_file(lock, aFilename);
+}
+
+uint32_t profiler_get_available_features() {
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+ return AvailableFeatures();
+}
+
+Maybe<ProfilerBufferInfo> profiler_get_buffer_info() {
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ PSAutoLock lock;
+
+ if (!ActivePS::Exists(lock)) {
+ return Nothing();
+ }
+
+ return Some(ActivePS::Buffer(lock).GetProfilerBufferInfo());
+}
+
+// This basically duplicates AutoProfilerLabel's constructor.
+static void* MozGlueBaseLabelEnter(const char* aLabel,
+ const char* aDynamicString, void* aSp) {
+ ProfilingStack* profilingStack = AutoProfilerLabel::sProfilingStack.get();
+ if (profilingStack) {
+ profilingStack->pushLabelFrame(aLabel, aDynamicString, aSp,
+ ProfilingCategoryPair::OTHER);
+ }
+ return profilingStack;
+}
+
+// This basically duplicates AutoProfilerLabel's destructor.
+static void MozGlueBaseLabelExit(void* sProfilingStack) {
+ if (sProfilingStack) {
+ reinterpret_cast<ProfilingStack*>(sProfilingStack)->pop();
+ }
+}
+
+static void locked_profiler_start(PSLockRef aLock, PowerOfTwo32 aCapacity,
+ double aInterval, uint32_t aFeatures,
+ const char** aFilters, uint32_t aFilterCount,
+ const Maybe<double>& aDuration) {
+ if (LOG_TEST) {
+ LOG("locked_profiler_start");
+ LOG("- capacity = %d", int(aCapacity.Value()));
+ LOG("- duration = %.2f", aDuration ? *aDuration : -1);
+ LOG("- interval = %.2f", aInterval);
+
+#define LOG_FEATURE(n_, str_, Name_, desc_) \
+ if (ProfilerFeature::Has##Name_(aFeatures)) { \
+ LOG("- feature = %s", str_); \
+ }
+
+ BASE_PROFILER_FOR_EACH_FEATURE(LOG_FEATURE)
+
+#undef LOG_FEATURE
+
+ for (uint32_t i = 0; i < aFilterCount; i++) {
+ LOG("- threads = %s", aFilters[i]);
+ }
+ }
+
+ MOZ_RELEASE_ASSERT(CorePS::Exists() && !ActivePS::Exists(aLock));
+
+#if defined(GP_PLAT_amd64_windows)
+ InitializeWin64ProfilerHooks();
+#endif
+
+ // Fall back to the default values if the passed-in values are unreasonable.
+ // We want to be able to store at least one full stack.
+ // TODO: Review magic numbers.
+ PowerOfTwo32 capacity =
+ (aCapacity.Value() >=
+ ProfileBufferChunkManager::scExpectedMaximumStackSize / scBytesPerEntry)
+ ? aCapacity
+ : BASE_PROFILER_DEFAULT_ENTRIES;
+ Maybe<double> duration = aDuration;
+
+ if (aDuration && *aDuration <= 0) {
+ duration = Nothing();
+ }
+ double interval = aInterval > 0 ? aInterval : BASE_PROFILER_DEFAULT_INTERVAL;
+
+ ActivePS::Create(aLock, capacity, interval, aFeatures, aFilters, aFilterCount,
+ duration);
+
+ // Set up profiling for each registered thread, if appropriate.
+ const Vector<UniquePtr<RegisteredThread>>& registeredThreads =
+ CorePS::RegisteredThreads(aLock);
+ for (auto& registeredThread : registeredThreads) {
+ RefPtr<ThreadInfo> info = registeredThread->Info();
+
+ if (ActivePS::ShouldProfileThread(aLock, info)) {
+ registeredThread->RacyRegisteredThread().SetIsBeingProfiled(true);
+ ActivePS::AddLiveProfiledThread(aLock, registeredThread.get(),
+ MakeUnique<ProfiledThreadData>(info));
+ registeredThread->RacyRegisteredThread().ReinitializeOnResume();
+ }
+ }
+
+ // Setup support for pushing/popping labels in mozglue.
+ RegisterProfilerLabelEnterExit(MozGlueBaseLabelEnter, MozGlueBaseLabelExit);
+
+ // At the very end, set up RacyFeatures.
+ RacyFeatures::SetActive(ActivePS::Features(aLock));
+}
+
+void profiler_start(PowerOfTwo32 aCapacity, double aInterval,
+ uint32_t aFeatures, const char** aFilters,
+ uint32_t aFilterCount, const Maybe<double>& aDuration) {
+ LOG("profiler_start");
+
+ SamplerThread* samplerThread = nullptr;
+ {
+ PSAutoLock lock;
+
+ // Initialize if necessary.
+ if (!CorePS::Exists()) {
+ profiler_init(nullptr);
+ }
+
+ // Reset the current state if the profiler is running.
+ if (ActivePS::Exists(lock)) {
+ samplerThread = locked_profiler_stop(lock);
+ }
+
+ locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters,
+ aFilterCount, aDuration);
+ }
+
+ // TODO: Install memory counter if it is possible from mozglue.
+ // #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
+ // // start counting memory allocations (outside of lock because this may
+ // call
+ // // profiler_add_sampled_counter which would attempt to take the lock.)
+ // mozilla::profiler::install_memory_counter(true);
+ // #endif
+
+ // We do these operations with gPSMutex unlocked. The comments in
+ // profiler_stop() explain why.
+ if (samplerThread) {
+ delete samplerThread;
+ }
+}
+
+void profiler_ensure_started(PowerOfTwo32 aCapacity, double aInterval,
+ uint32_t aFeatures, const char** aFilters,
+ uint32_t aFilterCount,
+ const Maybe<double>& aDuration) {
+ LOG("profiler_ensure_started");
+
+ // bool startedProfiler = false; (See TODO below)
+ SamplerThread* samplerThread = nullptr;
+ {
+ PSAutoLock lock;
+
+ // Initialize if necessary.
+ if (!CorePS::Exists()) {
+ profiler_init(nullptr);
+ }
+
+ if (ActivePS::Exists(lock)) {
+ // The profiler is active.
+ if (!ActivePS::Equals(lock, aCapacity, aDuration, aInterval, aFeatures,
+ aFilters, aFilterCount)) {
+ // Stop and restart with different settings.
+ samplerThread = locked_profiler_stop(lock);
+ locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters,
+ aFilterCount, aDuration);
+ // startedProfiler = true; (See TODO below)
+ }
+ } else {
+ // The profiler is stopped.
+ locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters,
+ aFilterCount, aDuration);
+ // startedProfiler = true; (See TODO below)
+ }
+ }
+
+ // TODO: Install memory counter if it is possible from mozglue.
+ // #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
+ // // start counting memory allocations (outside of lock because this may
+ // // call profiler_add_sampled_counter which would attempt to take the
+ // // lock.)
+ // mozilla::profiler::install_memory_counter(true);
+ // #endif
+
+ // We do these operations with gPSMutex unlocked. The comments in
+ // profiler_stop() explain why.
+ if (samplerThread) {
+ delete samplerThread;
+ }
+}
+
+[[nodiscard]] static SamplerThread* locked_profiler_stop(PSLockRef aLock) {
+ LOG("locked_profiler_stop");
+
+ MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
+
+ // At the very start, clear RacyFeatures.
+ RacyFeatures::SetInactive();
+
+ // TODO: Uninstall memory counter if it is possible from mozglue.
+ // #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
+ // mozilla::profiler::install_memory_counter(false);
+ // #endif
+
+ // Remove support for pushing/popping labels in mozglue.
+ RegisterProfilerLabelEnterExit(nullptr, nullptr);
+
+ // Stop sampling live threads.
+ const Vector<LiveProfiledThreadData>& liveProfiledThreads =
+ ActivePS::LiveProfiledThreads(aLock);
+ for (auto& thread : liveProfiledThreads) {
+ RegisteredThread* registeredThread = thread.mRegisteredThread;
+ registeredThread->RacyRegisteredThread().SetIsBeingProfiled(false);
+ }
+
+ // The Stop() call doesn't actually stop Run(); that happens in this
+ // function's caller when the sampler thread is destroyed. Stop() just gives
+ // the SamplerThread a chance to do some cleanup with gPSMutex locked.
+ SamplerThread* samplerThread = ActivePS::Destroy(aLock);
+ samplerThread->Stop(aLock);
+
+ return samplerThread;
+}
+
+void profiler_stop() {
+ LOG("profiler_stop");
+
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ SamplerThread* samplerThread;
+ {
+ PSAutoLock lock;
+
+ if (!ActivePS::Exists(lock)) {
+ return;
+ }
+
+ samplerThread = locked_profiler_stop(lock);
+ }
+
+ // We delete with gPSMutex unlocked. Otherwise we would get a deadlock: we
+ // would be waiting here with gPSMutex locked for SamplerThread::Run() to
+ // return so the join operation within the destructor can complete, but Run()
+ // needs to lock gPSMutex to return.
+ //
+ // Because this call occurs with gPSMutex unlocked, it -- including the final
+ // iteration of Run()'s loop -- must be able detect deactivation and return
+ // in a way that's safe with respect to other gPSMutex-locking operations
+ // that may have occurred in the meantime.
+ delete samplerThread;
+}
+
+bool profiler_is_paused() {
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ PSAutoLock lock;
+
+ if (!ActivePS::Exists(lock)) {
+ return false;
+ }
+
+ return ActivePS::IsPaused(lock);
+}
+
+void profiler_pause() {
+ LOG("profiler_pause");
+
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ {
+ PSAutoLock lock;
+
+ if (!ActivePS::Exists(lock)) {
+ return;
+ }
+
+ RacyFeatures::SetPaused();
+ ActivePS::SetIsPaused(lock, true);
+ ActivePS::Buffer(lock).AddEntry(ProfileBufferEntry::Pause(profiler_time()));
+ }
+}
+
+void profiler_resume() {
+ LOG("profiler_resume");
+
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ {
+ PSAutoLock lock;
+
+ if (!ActivePS::Exists(lock)) {
+ return;
+ }
+
+ ActivePS::Buffer(lock).AddEntry(
+ ProfileBufferEntry::Resume(profiler_time()));
+ ActivePS::SetIsPaused(lock, false);
+ RacyFeatures::SetUnpaused();
+ }
+}
+
+bool profiler_is_sampling_paused() {
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ PSAutoLock lock;
+
+ if (!ActivePS::Exists(lock)) {
+ return false;
+ }
+
+ return ActivePS::IsSamplingPaused(lock);
+}
+
+void profiler_pause_sampling() {
+ LOG("profiler_pause_sampling");
+
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ {
+ PSAutoLock lock;
+
+ if (!ActivePS::Exists(lock)) {
+ return;
+ }
+
+ RacyFeatures::SetSamplingPaused();
+ ActivePS::SetIsSamplingPaused(lock, true);
+ ActivePS::Buffer(lock).AddEntry(
+ ProfileBufferEntry::PauseSampling(profiler_time()));
+ }
+}
+
+void profiler_resume_sampling() {
+ LOG("profiler_resume_sampling");
+
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ {
+ PSAutoLock lock;
+
+ if (!ActivePS::Exists(lock)) {
+ return;
+ }
+
+ ActivePS::Buffer(lock).AddEntry(
+ ProfileBufferEntry::ResumeSampling(profiler_time()));
+ ActivePS::SetIsSamplingPaused(lock, false);
+ RacyFeatures::SetSamplingUnpaused();
+ }
+}
+
+bool profiler_feature_active(uint32_t aFeature) {
+ // This function runs both on and off the main thread.
+
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ // This function is hot enough that we use RacyFeatures, not ActivePS.
+ return RacyFeatures::IsActiveWithFeature(aFeature);
+}
+
+void profiler_add_sampled_counter(BaseProfilerCount* aCounter) {
+ DEBUG_LOG("profiler_add_sampled_counter(%s)", aCounter->mLabel);
+ PSAutoLock lock;
+ CorePS::AppendCounter(lock, aCounter);
+}
+
+void profiler_remove_sampled_counter(BaseProfilerCount* aCounter) {
+ DEBUG_LOG("profiler_remove_sampled_counter(%s)", aCounter->mLabel);
+ PSAutoLock lock;
+ // Note: we don't enforce a final sample, though we could do so if the
+ // profiler was active
+ CorePS::RemoveCounter(lock, aCounter);
+}
+
+ProfilingStack* profiler_register_thread(const char* aName,
+ void* aGuessStackTop) {
+ DEBUG_LOG("profiler_register_thread(%s)", aName);
+
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ PSAutoLock lock;
+
+ if (RegisteredThread* thread = FindCurrentThreadRegisteredThread(lock);
+ thread) {
+ LOG("profiler_register_thread(%s) - thread %d already registered as %s",
+ aName, profiler_current_thread_id(), thread->Info()->Name());
+ // TODO: Use new name. This is currently not possible because the
+ // RegisteredThread's ThreadInfo cannot be changed.
+ // In the meantime, we record a marker that could be used in the frontend.
+ std::string text("Thread ");
+ text += std::to_string(profiler_current_thread_id());
+ text += " \"";
+ text += thread->Info()->Name();
+ text += "\" attempted to re-register as \"";
+ text += aName;
+ text += "\"";
+ BASE_PROFILER_MARKER_TEXT("profiler_register_thread again", OTHER_Profiling,
+ MarkerThreadId::MainThread(), text);
+
+ return &thread->RacyRegisteredThread().ProfilingStack();
+ }
+
+ void* stackTop = GetStackTop(aGuessStackTop);
+ return locked_register_thread(lock, aName, stackTop);
+}
+
+void profiler_unregister_thread() {
+ if (!CorePS::Exists()) {
+ // This function can be called after the main thread has already shut down.
+ return;
+ }
+
+ PSAutoLock lock;
+
+ RegisteredThread* registeredThread = FindCurrentThreadRegisteredThread(lock);
+ MOZ_RELEASE_ASSERT(registeredThread ==
+ TLSRegisteredThread::RegisteredThread(lock));
+ if (registeredThread) {
+ RefPtr<ThreadInfo> info = registeredThread->Info();
+
+ DEBUG_LOG("profiler_unregister_thread: %s", info->Name());
+
+ if (ActivePS::Exists(lock)) {
+ ActivePS::UnregisterThread(lock, registeredThread);
+ }
+
+ // Clear the pointer to the RegisteredThread object that we're about to
+ // destroy.
+ TLSRegisteredThread::SetRegisteredThread(lock, nullptr);
+
+ // Remove the thread from the list of registered threads. This deletes the
+ // registeredThread object.
+ CorePS::RemoveRegisteredThread(lock, registeredThread);
+ } else {
+ LOG("profiler_unregister_thread() - thread %d already unregistered",
+ profiler_current_thread_id());
+ // We cannot record a marker on this thread because it was already
+ // unregistered. Send it to the main thread (unless this *is* already the
+ // main thread, which has been unregistered); this may be useful to catch
+ // mismatched register/unregister pairs in Firefox.
+ if (int tid = profiler_current_thread_id();
+ tid != profiler_main_thread_id()) {
+ BASE_PROFILER_MARKER_TEXT("profiler_unregister_thread again",
+ OTHER_Profiling, MarkerThreadId::MainThread(),
+ std::to_string(profiler_current_thread_id()));
+ }
+ // There are two ways FindCurrentThreadRegisteredThread() might have failed.
+ //
+ // - TLSRegisteredThread::Init() failed in locked_register_thread().
+ //
+ // - We've already called profiler_unregister_thread() for this thread.
+ // (Whether or not it should, this does happen in practice.)
+ //
+ // Either way, TLSRegisteredThread should be empty.
+ MOZ_RELEASE_ASSERT(!TLSRegisteredThread::RegisteredThread(lock));
+ }
+}
+
+void profiler_register_page(uint64_t aBrowsingContextID,
+ uint64_t aInnerWindowID, const std::string& aUrl,
+ uint64_t aEmbedderInnerWindowID) {
+ DEBUG_LOG("profiler_register_page(%" PRIu64 ", %" PRIu64 ", %s, %" PRIu64 ")",
+ aBrowsingContextID, aInnerWindowID, aUrl.c_str(),
+ aEmbedderInnerWindowID);
+
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ PSAutoLock lock;
+
+ // When a Browsing context is first loaded, the first url loaded in it will be
+ // about:blank. Because of that, this call keeps the first non-about:blank
+ // registration of window and discards the previous one.
+ RefPtr<PageInformation> pageInfo = new PageInformation(
+ aBrowsingContextID, aInnerWindowID, aUrl, aEmbedderInnerWindowID);
+ CorePS::AppendRegisteredPage(lock, std::move(pageInfo));
+
+ // After appending the given page to CorePS, look for the expired
+ // pages and remove them if there are any.
+ if (ActivePS::Exists(lock)) {
+ ActivePS::DiscardExpiredPages(lock);
+ }
+}
+
+void profiler_unregister_page(uint64_t aRegisteredInnerWindowID) {
+ if (!CorePS::Exists()) {
+ // This function can be called after the main thread has already shut down.
+ return;
+ }
+
+ PSAutoLock lock;
+
+ // During unregistration, if the profiler is active, we have to keep the
+ // page information since there may be some markers associated with the given
+ // page. But if profiler is not active. we have no reason to keep the
+ // page information here because there can't be any marker associated with it.
+ if (ActivePS::Exists(lock)) {
+ ActivePS::UnregisterPage(lock, aRegisteredInnerWindowID);
+ } else {
+ CorePS::RemoveRegisteredPage(lock, aRegisteredInnerWindowID);
+ }
+}
+
+void profiler_clear_all_pages() {
+ if (!CorePS::Exists()) {
+ // This function can be called after the main thread has already shut down.
+ return;
+ }
+
+ {
+ PSAutoLock lock;
+ CorePS::ClearRegisteredPages(lock);
+ if (ActivePS::Exists(lock)) {
+ ActivePS::ClearUnregisteredPages(lock);
+ }
+ }
+}
+
+void profiler_thread_sleep() {
+ // This function runs both on and off the main thread.
+
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ RacyRegisteredThread* racyRegisteredThread =
+ TLSRegisteredThread::RacyRegisteredThread();
+ if (!racyRegisteredThread) {
+ return;
+ }
+
+ racyRegisteredThread->SetSleeping();
+}
+
+void profiler_thread_wake() {
+ // This function runs both on and off the main thread.
+
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ RacyRegisteredThread* racyRegisteredThread =
+ TLSRegisteredThread::RacyRegisteredThread();
+ if (!racyRegisteredThread) {
+ return;
+ }
+
+ racyRegisteredThread->SetAwake();
+}
+
+bool detail::IsThreadBeingProfiled() {
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ const RacyRegisteredThread* racyRegisteredThread =
+ TLSRegisteredThread::RacyRegisteredThread();
+ return racyRegisteredThread && racyRegisteredThread->IsBeingProfiled();
+}
+
+bool profiler_thread_is_sleeping() {
+ MOZ_RELEASE_ASSERT(profiler_is_main_thread());
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ RacyRegisteredThread* racyRegisteredThread =
+ TLSRegisteredThread::RacyRegisteredThread();
+ if (!racyRegisteredThread) {
+ return false;
+ }
+ return racyRegisteredThread->IsSleeping();
+}
+
+double profiler_time() {
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ TimeDuration delta = TimeStamp::NowUnfuzzed() - CorePS::ProcessStartTime();
+ return delta.ToMilliseconds();
+}
+
+bool profiler_capture_backtrace_into(ProfileChunkedBuffer& aChunkedBuffer) {
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ PSAutoLock lock;
+
+ if (!ActivePS::Exists(lock)) {
+ return false;
+ }
+
+ RegisteredThread* registeredThread =
+ TLSRegisteredThread::RegisteredThread(lock);
+ if (!registeredThread) {
+ MOZ_ASSERT(registeredThread);
+ return false;
+ }
+
+ ProfileBuffer profileBuffer(aChunkedBuffer);
+
+ Registers regs;
+#if defined(HAVE_NATIVE_UNWIND)
+ regs.SyncPopulate();
+#else
+ regs.Clear();
+#endif
+
+ DoSyncSample(lock, *registeredThread, TimeStamp::NowUnfuzzed(), regs,
+ profileBuffer);
+
+ return true;
+}
+
+UniquePtr<ProfileChunkedBuffer> profiler_capture_backtrace() {
+ MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+ // Quick is-active check before allocating a buffer.
+ if (!profiler_is_active()) {
+ return nullptr;
+ }
+
+ auto buffer = MakeUnique<ProfileChunkedBuffer>(
+ ProfileChunkedBuffer::ThreadSafety::WithoutMutex,
+ MakeUnique<ProfileBufferChunkManagerSingle>(
+ ProfileBufferChunkManager::scExpectedMaximumStackSize));
+
+ if (!profiler_capture_backtrace_into(*buffer)) {
+ return nullptr;
+ }
+
+ return buffer;
+}
+
+UniqueProfilerBacktrace profiler_get_backtrace() {
+ UniquePtr<ProfileChunkedBuffer> buffer = profiler_capture_backtrace();
+
+ if (!buffer) {
+ return nullptr;
+ }
+
+ return UniqueProfilerBacktrace(
+ new ProfilerBacktrace("SyncProfile", std::move(buffer)));
+}
+
+void ProfilerBacktraceDestructor::operator()(ProfilerBacktrace* aBacktrace) {
+ delete aBacktrace;
+}
+
+bool profiler_is_locked_on_current_thread() {
+ // This function is used to help users avoid calling `profiler_...` functions
+ // when the profiler may already have a lock in place, which would prevent a
+ // 2nd recursive lock (resulting in a crash or a never-ending wait).
+ // So we must return `true` for any of:
+ // - The main profiler mutex, used by most functions, and/or
+ // - The buffer mutex, used directly in some functions without locking the
+ // main mutex, e.g., marker-related functions.
+ return PSAutoLock::IsLockedOnCurrentThread() ||
+ CorePS::CoreBuffer().IsThreadSafeAndLockedOnCurrentThread();
+}
+
+// This is a simplified version of profiler_add_marker that can be easily passed
+// into the JS engine.
+void profiler_add_js_marker(const char* aMarkerName, const char* aMarkerText) {
+ BASE_PROFILER_MARKER_TEXT(
+ ProfilerString8View::WrapNullTerminatedString(aMarkerName), JS, {},
+ ProfilerString8View::WrapNullTerminatedString(aMarkerText));
+}
+
+// NOTE: aCollector's methods will be called while the target thread is paused.
+// Doing things in those methods like allocating -- which may try to claim
+// locks -- is a surefire way to deadlock.
+void profiler_suspend_and_sample_thread(int aThreadId, uint32_t aFeatures,
+ ProfilerStackCollector& aCollector,
+ bool aSampleNative /* = true */) {
+ // Lock the profiler mutex
+ PSAutoLock lock;
+
+ const Vector<UniquePtr<RegisteredThread>>& registeredThreads =
+ CorePS::RegisteredThreads(lock);
+ for (auto& thread : registeredThreads) {
+ RefPtr<ThreadInfo> info = thread->Info();
+ RegisteredThread& registeredThread = *thread.get();
+
+ if (info->ThreadId() == aThreadId) {
+ if (info->IsMainThread()) {
+ aCollector.SetIsMainThread();
+ }
+
+ // Allocate the space for the native stack
+ NativeStack nativeStack;
+
+ // Suspend, sample, and then resume the target thread.
+ Sampler sampler(lock);
+ TimeStamp now = TimeStamp::NowUnfuzzed();
+ sampler.SuspendAndSampleAndResumeThread(
+ lock, registeredThread, now,
+ [&](const Registers& aRegs, const TimeStamp& aNow) {
+ // The target thread is now suspended. Collect a native
+ // backtrace, and call the callback.
+ bool isSynchronous = false;
+#if defined(HAVE_FASTINIT_NATIVE_UNWIND)
+ if (aSampleNative) {
+ // We can only use FramePointerStackWalk or MozStackWalk from
+ // suspend_and_sample_thread as other stackwalking methods may not be
+ // initialized.
+# if defined(USE_FRAME_POINTER_STACK_WALK)
+ DoFramePointerBacktrace(lock, registeredThread, aRegs,
+ nativeStack);
+# elif defined(USE_MOZ_STACK_WALK)
+ DoMozStackWalkBacktrace(lock, registeredThread, aRegs,
+ nativeStack);
+# else
+# error "Invalid configuration"
+# endif
+
+ MergeStacks(aFeatures, isSynchronous, registeredThread, aRegs,
+ nativeStack, aCollector);
+ } else
+#endif
+ {
+ MergeStacks(aFeatures, isSynchronous, registeredThread, aRegs,
+ nativeStack, aCollector);
+
+ if (ProfilerFeature::HasLeaf(aFeatures)) {
+ aCollector.CollectNativeLeafAddr((void*)aRegs.mPC);
+ }
+ }
+ });
+
+ // NOTE: Make sure to disable the sampler before it is destroyed, in case
+ // the profiler is running at the same time.
+ sampler.Disable(lock);
+ break;
+ }
+ }
+}
+
+// END externally visible functions
+////////////////////////////////////////////////////////////////////////
+
+} // namespace baseprofiler
+} // namespace mozilla
diff --git a/mozglue/baseprofiler/core/platform.h b/mozglue/baseprofiler/core/platform.h
new file mode 100644
index 0000000000..1913a0def6
--- /dev/null
+++ b/mozglue/baseprofiler/core/platform.h
@@ -0,0 +1,132 @@
+// Copyright (c) 2006-2011 The Chromium Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in
+// the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google, Inc. nor the names of its contributors
+// may be used to endorse or promote products derived from this
+// software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+#ifndef TOOLS_PLATFORM_H_
+#define TOOLS_PLATFORM_H_
+
+#include "PlatformMacros.h"
+
+#include "BaseProfiler.h"
+
+#include "mozilla/Logging.h"
+#include "mozilla/UniquePtr.h"
+#include "mozilla/Vector.h"
+
+#include <functional>
+#include <stdint.h>
+#include <string>
+
+namespace mozilla {
+namespace baseprofiler {
+bool LogTest(int aLevelToTest);
+void PrintToConsole(const char* aFmt, ...) MOZ_FORMAT_PRINTF(1, 2);
+} // namespace baseprofiler
+} // namespace mozilla
+
+// These are for MOZ_BASE_PROFILER_LOGGING and above. It's the default logging
+// level for the profiler, and should be used sparingly.
+#define LOG_TEST ::mozilla::baseprofiler::LogTest(3)
+#define LOG(arg, ...) \
+ do { \
+ if (LOG_TEST) { \
+ ::mozilla::baseprofiler::PrintToConsole( \
+ "[I %d/%d] " arg "\n", profiler_current_process_id(), \
+ profiler_current_thread_id(), ##__VA_ARGS__); \
+ } \
+ } while (0)
+
+// These are for MOZ_BASE_PROFILER_DEBUG_LOGGING. It should be used for logging
+// that is somewhat more verbose than LOG.
+#define DEBUG_LOG_TEST ::mozilla::baseprofiler::LogTest(4)
+#define DEBUG_LOG(arg, ...) \
+ do { \
+ if (DEBUG_LOG_TEST) { \
+ ::mozilla::baseprofiler::PrintToConsole( \
+ "[D %d/%d] " arg "\n", profiler_current_process_id(), \
+ profiler_current_thread_id(), ##__VA_ARGS__); \
+ } \
+ } while (0)
+
+// These are for MOZ_BASE_PROFILER_VERBOSE_LOGGING. It should be used for
+// logging that is somewhat more verbose than DEBUG_LOG.
+#define VERBOSE_LOG_TEST ::mozilla::baseprofiler::LogTest(5)
+#define VERBOSE_LOG(arg, ...) \
+ do { \
+ if (VERBOSE_LOG_TEST) { \
+ ::mozilla::baseprofiler::PrintToConsole( \
+ "[V %d/%d] " arg "\n", profiler_current_process_id(), \
+ profiler_current_thread_id(), ##__VA_ARGS__); \
+ } \
+ } while (0)
+
+namespace mozilla {
+
+class JSONWriter;
+
+namespace baseprofiler {
+
+typedef uint8_t* Address;
+
+class PlatformData;
+
+// We can't new/delete the type safely without defining it
+// (-Wdelete-incomplete). Use these to hide the details from clients.
+struct PlatformDataDestructor {
+ void operator()(PlatformData*);
+};
+
+typedef UniquePtr<PlatformData, PlatformDataDestructor> UniquePlatformData;
+UniquePlatformData AllocPlatformData(int aThreadId);
+
+// Convert the array of strings to a bitfield.
+uint32_t ParseFeaturesFromStringArray(const char** aFeatures,
+ uint32_t aFeatureCount,
+ bool aIsStartup = false);
+
+void profiler_get_profile_json_into_lazily_allocated_buffer(
+ const std::function<char*(size_t)>& aAllocator, double aSinceTime,
+ bool aIsShuttingDown);
+
+// Flags to conveniently track various JS instrumentations.
+enum class JSInstrumentationFlags {
+ StackSampling = 0x1,
+ TraceLogging = 0x2,
+ Allocations = 0x4,
+};
+
+// Record an exit profile from a child process.
+void profiler_received_exit_profile(const std::string& aExitProfile);
+
+// Extract all received exit profiles that have not yet expired (i.e., they
+// still intersect with this process' buffer range).
+Vector<std::string> profiler_move_exit_profiles();
+
+} // namespace baseprofiler
+} // namespace mozilla
+
+#endif /* ndef TOOLS_PLATFORM_H_ */
diff --git a/mozglue/baseprofiler/core/shared-libraries-linux.cc b/mozglue/baseprofiler/core/shared-libraries-linux.cc
new file mode 100644
index 0000000000..c38e72378a
--- /dev/null
+++ b/mozglue/baseprofiler/core/shared-libraries-linux.cc
@@ -0,0 +1,835 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "BaseProfilerSharedLibraries.h"
+
+#define PATH_MAX_TOSTRING(x) #x
+#define PATH_MAX_STRING(x) PATH_MAX_TOSTRING(x)
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include <unistd.h>
+#include <fstream>
+#include "platform.h"
+#include "mozilla/Sprintf.h"
+#include "mozilla/Unused.h"
+
+#include <algorithm>
+#include <arpa/inet.h>
+#include <dlfcn.h>
+#include <elf.h>
+#include <fcntl.h>
+#if defined(GP_OS_linux) || defined(GP_OS_android)
+# include <features.h>
+#endif
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <vector>
+
+#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
+# include <link.h> // dl_phdr_info, ElfW()
+#else
+# error "Unexpected configuration"
+#endif
+
+#if defined(GP_OS_android)
+extern "C" MOZ_EXPORT __attribute__((weak)) int dl_iterate_phdr(
+ int (*callback)(struct dl_phdr_info* info, size_t size, void* data),
+ void* data);
+#endif
+
+#if defined(GP_OS_freebsd) && !defined(ElfW)
+# define ElfW(type) Elf_##type
+#endif
+
+// ----------------------------------------------------------------------------
+// Starting imports from toolkit/crashreporter/google-breakpad/, as needed by
+// this file when moved to mozglue.
+
+// Imported from
+// toolkit/crashreporter/google-breakpad/src/common/memory_range.h.
+// A lightweight wrapper with a pointer and a length to encapsulate a contiguous
+// range of memory. It provides helper methods for checked access of a subrange
+// of the memory. Its implemementation does not allocate memory or call into
+// libc functions, and is thus safer to use in a crashed environment.
+class MemoryRange {
+ public:
+ MemoryRange() : data_(NULL), length_(0) {}
+
+ MemoryRange(const void* data, size_t length) { Set(data, length); }
+
+ // Returns true if this memory range contains no data.
+ bool IsEmpty() const {
+ // Set() guarantees that |length_| is zero if |data_| is NULL.
+ return length_ == 0;
+ }
+
+ // Resets to an empty range.
+ void Reset() {
+ data_ = NULL;
+ length_ = 0;
+ }
+
+ // Sets this memory range to point to |data| and its length to |length|.
+ void Set(const void* data, size_t length) {
+ data_ = reinterpret_cast<const uint8_t*>(data);
+ // Always set |length_| to zero if |data_| is NULL.
+ length_ = data ? length : 0;
+ }
+
+ // Returns true if this range covers a subrange of |sub_length| bytes
+ // at |sub_offset| bytes of this memory range, or false otherwise.
+ bool Covers(size_t sub_offset, size_t sub_length) const {
+ // The following checks verify that:
+ // 1. sub_offset is within [ 0 .. length_ - 1 ]
+ // 2. sub_offset + sub_length is within
+ // [ sub_offset .. length_ ]
+ return sub_offset < length_ && sub_offset + sub_length >= sub_offset &&
+ sub_offset + sub_length <= length_;
+ }
+
+ // Returns a raw data pointer to a subrange of |sub_length| bytes at
+ // |sub_offset| bytes of this memory range, or NULL if the subrange
+ // is out of bounds.
+ const void* GetData(size_t sub_offset, size_t sub_length) const {
+ return Covers(sub_offset, sub_length) ? (data_ + sub_offset) : NULL;
+ }
+
+ // Same as the two-argument version of GetData() but uses sizeof(DataType)
+ // as the subrange length and returns an |DataType| pointer for convenience.
+ template <typename DataType>
+ const DataType* GetData(size_t sub_offset) const {
+ return reinterpret_cast<const DataType*>(
+ GetData(sub_offset, sizeof(DataType)));
+ }
+
+ // Returns a raw pointer to the |element_index|-th element of an array
+ // of elements of length |element_size| starting at |sub_offset| bytes
+ // of this memory range, or NULL if the element is out of bounds.
+ const void* GetArrayElement(size_t element_offset, size_t element_size,
+ unsigned element_index) const {
+ size_t sub_offset = element_offset + element_index * element_size;
+ return GetData(sub_offset, element_size);
+ }
+
+ // Same as the three-argument version of GetArrayElement() but deduces
+ // the element size using sizeof(ElementType) and returns an |ElementType|
+ // pointer for convenience.
+ template <typename ElementType>
+ const ElementType* GetArrayElement(size_t element_offset,
+ unsigned element_index) const {
+ return reinterpret_cast<const ElementType*>(
+ GetArrayElement(element_offset, sizeof(ElementType), element_index));
+ }
+
+ // Returns a subrange of |sub_length| bytes at |sub_offset| bytes of
+ // this memory range, or an empty range if the subrange is out of bounds.
+ MemoryRange Subrange(size_t sub_offset, size_t sub_length) const {
+ return Covers(sub_offset, sub_length)
+ ? MemoryRange(data_ + sub_offset, sub_length)
+ : MemoryRange();
+ }
+
+ // Returns a pointer to the beginning of this memory range.
+ const uint8_t* data() const { return data_; }
+
+ // Returns the length, in bytes, of this memory range.
+ size_t length() const { return length_; }
+
+ private:
+ // Pointer to the beginning of this memory range.
+ const uint8_t* data_;
+
+ // Length, in bytes, of this memory range.
+ size_t length_;
+};
+
+// Imported from
+// toolkit/crashreporter/google-breakpad/src/common/linux/memory_mapped_file.h
+// and inlined .cc.
+// A utility class for mapping a file into memory for read-only access of the
+// file content. Its implementation avoids calling into libc functions by
+// directly making system calls for open, close, mmap, and munmap.
+class MemoryMappedFile {
+ public:
+ MemoryMappedFile() {}
+
+ // Constructor that calls Map() to map a file at |path| into memory.
+ // If Map() fails, the object behaves as if it is default constructed.
+ MemoryMappedFile(const char* path, size_t offset) { Map(path, offset); }
+
+ MemoryMappedFile(const MemoryMappedFile&) = delete;
+ MemoryMappedFile& operator=(const MemoryMappedFile&) = delete;
+
+ ~MemoryMappedFile() {}
+
+ // Maps a file at |path| into memory, which can then be accessed via
+ // content() as a MemoryRange object or via data(), and returns true on
+ // success. Mapping an empty file will succeed but with data() and size()
+ // returning NULL and 0, respectively. An existing mapping is unmapped
+ // before a new mapping is created.
+ bool Map(const char* path, size_t offset) {
+ Unmap();
+
+ int fd = open(path, O_RDONLY, 0);
+ if (fd == -1) {
+ return false;
+ }
+
+#if defined(__x86_64__) || defined(__aarch64__) || \
+ (defined(__mips__) && _MIPS_SIM == _ABI64) || \
+ !(defined(GP_OS_linux) || defined(GP_OS_android))
+
+ struct stat st;
+ if (fstat(fd, &st) == -1 || st.st_size < 0) {
+#else
+ struct stat64 st;
+ if (fstat64(fd, &st) == -1 || st.st_size < 0) {
+#endif
+ close(fd);
+ return false;
+ }
+
+ // Strangely file size can be negative, but we check above that it is not.
+ size_t file_len = static_cast<size_t>(st.st_size);
+ // If the file does not extend beyond the offset, simply use an empty
+ // MemoryRange and return true. Don't bother to call mmap()
+ // even though mmap() can handle an empty file on some platforms.
+ if (offset >= file_len) {
+ close(fd);
+ return true;
+ }
+
+ void* data = mmap(NULL, file_len, PROT_READ, MAP_PRIVATE, fd, offset);
+ close(fd);
+ if (data == MAP_FAILED) {
+ return false;
+ }
+
+ content_.Set(data, file_len - offset);
+ return true;
+ }
+
+ // Unmaps the memory for the mapped file. It's a no-op if no file is
+ // mapped.
+ void Unmap() {
+ if (content_.data()) {
+ munmap(const_cast<uint8_t*>(content_.data()), content_.length());
+ content_.Set(NULL, 0);
+ }
+ }
+
+ // Returns a MemoryRange object that covers the memory for the mapped
+ // file. The MemoryRange object is empty if no file is mapped.
+ const MemoryRange& content() const { return content_; }
+
+ // Returns a pointer to the beginning of the memory for the mapped file.
+ // or NULL if no file is mapped or the mapped file is empty.
+ const void* data() const { return content_.data(); }
+
+ // Returns the size in bytes of the mapped file, or zero if no file
+ // is mapped.
+ size_t size() const { return content_.length(); }
+
+ private:
+ // Mapped file content as a MemoryRange object.
+ MemoryRange content_;
+};
+
+// Imported from
+// toolkit/crashreporter/google-breakpad/src/common/linux/file_id.h and inlined
+// .cc.
+// GNU binutils' ld defaults to 'sha1', which is 160 bits == 20 bytes,
+// so this is enough to fit that, which most binaries will use.
+// This is just a sensible default for vectors so most callers can get away with
+// stack allocation.
+static const size_t kDefaultBuildIdSize = 20;
+
+// Used in a few places for backwards-compatibility.
+typedef struct {
+ uint32_t data1;
+ uint16_t data2;
+ uint16_t data3;
+ uint8_t data4[8];
+} MDGUID; /* GUID */
+
+const size_t kMDGUIDSize = sizeof(MDGUID);
+
+class FileID {
+ public:
+ explicit FileID(const char* path) : path_(path) {}
+ ~FileID() {}
+
+ // Load the identifier for the elf file path specified in the constructor into
+ // |identifier|.
+ //
+ // The current implementation will look for a .note.gnu.build-id
+ // section and use that as the file id, otherwise it falls back to
+ // XORing the first 4096 bytes of the .text section to generate an identifier.
+ bool ElfFileIdentifier(std::vector<uint8_t>& identifier) {
+ MemoryMappedFile mapped_file(path_.c_str(), 0);
+ if (!mapped_file.data()) // Should probably check if size >= ElfW(Ehdr)?
+ return false;
+
+ return ElfFileIdentifierFromMappedFile(mapped_file.data(), identifier);
+ }
+
+ // Traits classes so consumers can write templatized code to deal
+ // with specific ELF bits.
+ struct ElfClass32 {
+ typedef Elf32_Addr Addr;
+ typedef Elf32_Ehdr Ehdr;
+ typedef Elf32_Nhdr Nhdr;
+ typedef Elf32_Phdr Phdr;
+ typedef Elf32_Shdr Shdr;
+ typedef Elf32_Half Half;
+ typedef Elf32_Off Off;
+ typedef Elf32_Sym Sym;
+ typedef Elf32_Word Word;
+
+ static const int kClass = ELFCLASS32;
+ static const uint16_t kMachine = EM_386;
+ static const size_t kAddrSize = sizeof(Elf32_Addr);
+ static constexpr const char* kMachineName = "x86";
+ };
+
+ struct ElfClass64 {
+ typedef Elf64_Addr Addr;
+ typedef Elf64_Ehdr Ehdr;
+ typedef Elf64_Nhdr Nhdr;
+ typedef Elf64_Phdr Phdr;
+ typedef Elf64_Shdr Shdr;
+ typedef Elf64_Half Half;
+ typedef Elf64_Off Off;
+ typedef Elf64_Sym Sym;
+ typedef Elf64_Word Word;
+
+ static const int kClass = ELFCLASS64;
+ static const uint16_t kMachine = EM_X86_64;
+ static const size_t kAddrSize = sizeof(Elf64_Addr);
+ static constexpr const char* kMachineName = "x86_64";
+ };
+
+ // Internal helper method, exposed for convenience for callers
+ // that already have more info.
+ template <typename ElfClass>
+ static const typename ElfClass::Shdr* FindElfSectionByName(
+ const char* name, typename ElfClass::Word section_type,
+ const typename ElfClass::Shdr* sections, const char* section_names,
+ const char* names_end, int nsection) {
+ if (!name || !sections || nsection == 0) {
+ return NULL;
+ }
+
+ int name_len = strlen(name);
+ if (name_len == 0) return NULL;
+
+ for (int i = 0; i < nsection; ++i) {
+ const char* section_name = section_names + sections[i].sh_name;
+ if (sections[i].sh_type == section_type &&
+ names_end - section_name >= name_len + 1 &&
+ strcmp(name, section_name) == 0) {
+ return sections + i;
+ }
+ }
+ return NULL;
+ }
+
+ struct ElfSegment {
+ const void* start;
+ size_t size;
+ };
+
+ // Convert an offset from an Elf header into a pointer to the mapped
+ // address in the current process. Takes an extra template parameter
+ // to specify the return type to avoid having to dynamic_cast the
+ // result.
+ template <typename ElfClass, typename T>
+ static const T* GetOffset(const typename ElfClass::Ehdr* elf_header,
+ typename ElfClass::Off offset) {
+ return reinterpret_cast<const T*>(reinterpret_cast<uintptr_t>(elf_header) +
+ offset);
+ }
+
+// ELF note name and desc are 32-bits word padded.
+#define NOTE_PADDING(a) ((a + 3) & ~3)
+
+ static bool ElfClassBuildIDNoteIdentifier(const void* section, size_t length,
+ std::vector<uint8_t>& identifier) {
+ static_assert(sizeof(ElfClass32::Nhdr) == sizeof(ElfClass64::Nhdr),
+ "Elf32_Nhdr and Elf64_Nhdr should be the same");
+ typedef typename ElfClass32::Nhdr Nhdr;
+
+ const void* section_end = reinterpret_cast<const char*>(section) + length;
+ const Nhdr* note_header = reinterpret_cast<const Nhdr*>(section);
+ while (reinterpret_cast<const void*>(note_header) < section_end) {
+ if (note_header->n_type == NT_GNU_BUILD_ID) break;
+ note_header = reinterpret_cast<const Nhdr*>(
+ reinterpret_cast<const char*>(note_header) + sizeof(Nhdr) +
+ NOTE_PADDING(note_header->n_namesz) +
+ NOTE_PADDING(note_header->n_descsz));
+ }
+ if (reinterpret_cast<const void*>(note_header) >= section_end ||
+ note_header->n_descsz == 0) {
+ return false;
+ }
+
+ const uint8_t* build_id = reinterpret_cast<const uint8_t*>(note_header) +
+ sizeof(Nhdr) +
+ NOTE_PADDING(note_header->n_namesz);
+ identifier.insert(identifier.end(), build_id,
+ build_id + note_header->n_descsz);
+
+ return true;
+ }
+
+ template <typename ElfClass>
+ static bool FindElfClassSection(const char* elf_base,
+ const char* section_name,
+ typename ElfClass::Word section_type,
+ const void** section_start,
+ size_t* section_size) {
+ typedef typename ElfClass::Ehdr Ehdr;
+ typedef typename ElfClass::Shdr Shdr;
+
+ if (!elf_base || !section_start || !section_size) {
+ return false;
+ }
+
+ if (strncmp(elf_base, ELFMAG, SELFMAG) != 0) {
+ return false;
+ }
+
+ const Ehdr* elf_header = reinterpret_cast<const Ehdr*>(elf_base);
+ if (elf_header->e_ident[EI_CLASS] != ElfClass::kClass) {
+ return false;
+ }
+
+ const Shdr* sections =
+ GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff);
+ const Shdr* section_names = sections + elf_header->e_shstrndx;
+ const char* names =
+ GetOffset<ElfClass, char>(elf_header, section_names->sh_offset);
+ const char* names_end = names + section_names->sh_size;
+
+ const Shdr* section =
+ FindElfSectionByName<ElfClass>(section_name, section_type, sections,
+ names, names_end, elf_header->e_shnum);
+
+ if (section != NULL && section->sh_size > 0) {
+ *section_start = elf_base + section->sh_offset;
+ *section_size = section->sh_size;
+ }
+
+ return true;
+ }
+
+ template <typename ElfClass>
+ static bool FindElfClassSegment(const char* elf_base,
+ typename ElfClass::Word segment_type,
+ std::vector<ElfSegment>* segments) {
+ typedef typename ElfClass::Ehdr Ehdr;
+ typedef typename ElfClass::Phdr Phdr;
+
+ if (!elf_base || !segments) {
+ return false;
+ }
+
+ if (strncmp(elf_base, ELFMAG, SELFMAG) != 0) {
+ return false;
+ }
+
+ const Ehdr* elf_header = reinterpret_cast<const Ehdr*>(elf_base);
+ if (elf_header->e_ident[EI_CLASS] != ElfClass::kClass) {
+ return false;
+ }
+
+ const Phdr* phdrs =
+ GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff);
+
+ for (int i = 0; i < elf_header->e_phnum; ++i) {
+ if (phdrs[i].p_type == segment_type) {
+ ElfSegment seg = {};
+ seg.start = elf_base + phdrs[i].p_offset;
+ seg.size = phdrs[i].p_filesz;
+ segments->push_back(seg);
+ }
+ }
+
+ return true;
+ }
+
+ static bool IsValidElf(const void* elf_base) {
+ return strncmp(reinterpret_cast<const char*>(elf_base), ELFMAG, SELFMAG) ==
+ 0;
+ }
+
+ static int ElfClass(const void* elf_base) {
+ const ElfW(Ehdr)* elf_header =
+ reinterpret_cast<const ElfW(Ehdr)*>(elf_base);
+
+ return elf_header->e_ident[EI_CLASS];
+ }
+
+ static bool FindElfSection(const void* elf_mapped_base,
+ const char* section_name, uint32_t section_type,
+ const void** section_start, size_t* section_size) {
+ if (!elf_mapped_base || !section_start || !section_size) {
+ return false;
+ }
+
+ *section_start = NULL;
+ *section_size = 0;
+
+ if (!IsValidElf(elf_mapped_base)) return false;
+
+ int cls = ElfClass(elf_mapped_base);
+ const char* elf_base = static_cast<const char*>(elf_mapped_base);
+
+ if (cls == ELFCLASS32) {
+ return FindElfClassSection<ElfClass32>(elf_base, section_name,
+ section_type, section_start,
+ section_size) &&
+ *section_start != NULL;
+ } else if (cls == ELFCLASS64) {
+ return FindElfClassSection<ElfClass64>(elf_base, section_name,
+ section_type, section_start,
+ section_size) &&
+ *section_start != NULL;
+ }
+
+ return false;
+ }
+
+ static bool FindElfSegments(const void* elf_mapped_base,
+ uint32_t segment_type,
+ std::vector<ElfSegment>* segments) {
+ if (!elf_mapped_base || !segments) {
+ return false;
+ }
+
+ if (!IsValidElf(elf_mapped_base)) return false;
+
+ int cls = ElfClass(elf_mapped_base);
+ const char* elf_base = static_cast<const char*>(elf_mapped_base);
+
+ if (cls == ELFCLASS32) {
+ return FindElfClassSegment<ElfClass32>(elf_base, segment_type, segments);
+ } else if (cls == ELFCLASS64) {
+ return FindElfClassSegment<ElfClass64>(elf_base, segment_type, segments);
+ }
+
+ return false;
+ }
+
+ // Attempt to locate a .note.gnu.build-id section in an ELF binary
+ // and copy it into |identifier|.
+ static bool FindElfBuildIDNote(const void* elf_mapped_base,
+ std::vector<uint8_t>& identifier) {
+ // lld normally creates 2 PT_NOTEs, gold normally creates 1.
+ std::vector<ElfSegment> segs;
+ if (FindElfSegments(elf_mapped_base, PT_NOTE, &segs)) {
+ for (ElfSegment& seg : segs) {
+ if (ElfClassBuildIDNoteIdentifier(seg.start, seg.size, identifier)) {
+ return true;
+ }
+ }
+ }
+
+ void* note_section;
+ size_t note_size;
+ if (FindElfSection(elf_mapped_base, ".note.gnu.build-id", SHT_NOTE,
+ (const void**)&note_section, &note_size)) {
+ return ElfClassBuildIDNoteIdentifier(note_section, note_size, identifier);
+ }
+
+ return false;
+ }
+
+ // Attempt to locate the .text section of an ELF binary and generate
+ // a simple hash by XORing the first page worth of bytes into |identifier|.
+ static bool HashElfTextSection(const void* elf_mapped_base,
+ std::vector<uint8_t>& identifier) {
+ identifier.resize(kMDGUIDSize);
+
+ void* text_section;
+ size_t text_size;
+ if (!FindElfSection(elf_mapped_base, ".text", SHT_PROGBITS,
+ (const void**)&text_section, &text_size) ||
+ text_size == 0) {
+ return false;
+ }
+
+ // Only provide |kMDGUIDSize| bytes to keep identifiers produced by this
+ // function backwards-compatible.
+ memset(&identifier[0], 0, kMDGUIDSize);
+ const uint8_t* ptr = reinterpret_cast<const uint8_t*>(text_section);
+ const uint8_t* ptr_end =
+ ptr + std::min(text_size, static_cast<size_t>(4096));
+ while (ptr < ptr_end) {
+ for (unsigned i = 0; i < kMDGUIDSize; i++) identifier[i] ^= ptr[i];
+ ptr += kMDGUIDSize;
+ }
+ return true;
+ }
+
+ // Load the identifier for the elf file mapped into memory at |base| into
+ // |identifier|. Return false if the identifier could not be created for this
+ // file.
+ static bool ElfFileIdentifierFromMappedFile(
+ const void* base, std::vector<uint8_t>& identifier) {
+ // Look for a build id note first.
+ if (FindElfBuildIDNote(base, identifier)) return true;
+
+ // Fall back on hashing the first page of the text section.
+ return HashElfTextSection(base, identifier);
+ }
+
+ // These three functions are not ever called in an unsafe context, so it's OK
+ // to allocate memory and use libc.
+ static std::string bytes_to_hex_string(const uint8_t* bytes, size_t count) {
+ std::string result;
+ for (unsigned int idx = 0; idx < count; ++idx) {
+ char buf[3];
+ SprintfLiteral(buf, "%02X", bytes[idx]);
+ result.append(buf);
+ }
+ return result;
+ }
+
+ // Convert the |identifier| data to a string. The string will
+ // be formatted as a UUID in all uppercase without dashes.
+ // (e.g., 22F065BBFC9C49F780FE26A7CEBD7BCE).
+ static std::string ConvertIdentifierToUUIDString(
+ const std::vector<uint8_t>& identifier) {
+ uint8_t identifier_swapped[kMDGUIDSize] = {0};
+
+ // Endian-ness swap to match dump processor expectation.
+ memcpy(identifier_swapped, &identifier[0],
+ std::min(kMDGUIDSize, identifier.size()));
+ uint32_t* data1 = reinterpret_cast<uint32_t*>(identifier_swapped);
+ *data1 = htonl(*data1);
+ uint16_t* data2 = reinterpret_cast<uint16_t*>(identifier_swapped + 4);
+ *data2 = htons(*data2);
+ uint16_t* data3 = reinterpret_cast<uint16_t*>(identifier_swapped + 6);
+ *data3 = htons(*data3);
+
+ return bytes_to_hex_string(identifier_swapped, kMDGUIDSize);
+ }
+
+ // Convert the entire |identifier| data to a hex string.
+ static std::string ConvertIdentifierToString(
+ const std::vector<uint8_t>& identifier) {
+ return bytes_to_hex_string(&identifier[0], identifier.size());
+ }
+
+ private:
+ // Storage for the path specified
+ std::string path_;
+};
+
+// End of imports from toolkit/crashreporter/google-breakpad/.
+// ----------------------------------------------------------------------------
+
+struct LoadedLibraryInfo {
+ LoadedLibraryInfo(const char* aName, unsigned long aBaseAddress,
+ unsigned long aFirstMappingStart,
+ unsigned long aLastMappingEnd)
+ : mName(aName),
+ mBaseAddress(aBaseAddress),
+ mFirstMappingStart(aFirstMappingStart),
+ mLastMappingEnd(aLastMappingEnd) {}
+
+ std::string mName;
+ unsigned long mBaseAddress;
+ unsigned long mFirstMappingStart;
+ unsigned long mLastMappingEnd;
+};
+
+static std::string IDtoUUIDString(const std::vector<uint8_t>& aIdentifier) {
+ std::string uuid = FileID::ConvertIdentifierToUUIDString(aIdentifier);
+ // This is '0', not '\0', since it represents the breakpad id age.
+ uuid += '0';
+ return uuid;
+}
+
+// Get the breakpad Id for the binary file pointed by bin_name
+static std::string getId(const char* bin_name) {
+ std::vector<uint8_t> identifier;
+ identifier.reserve(kDefaultBuildIdSize);
+
+ FileID file_id(bin_name);
+ if (file_id.ElfFileIdentifier(identifier)) {
+ return IDtoUUIDString(identifier);
+ }
+
+ return {};
+}
+
+static SharedLibrary SharedLibraryAtPath(const char* path,
+ unsigned long libStart,
+ unsigned long libEnd,
+ unsigned long offset = 0) {
+ std::string pathStr = path;
+
+ size_t pos = pathStr.rfind('\\');
+ std::string nameStr =
+ (pos != std::string::npos) ? pathStr.substr(pos + 1) : pathStr;
+
+ return SharedLibrary(libStart, libEnd, offset, getId(path), nameStr, pathStr,
+ nameStr, pathStr, std::string{}, "");
+}
+
+static int dl_iterate_callback(struct dl_phdr_info* dl_info, size_t size,
+ void* data) {
+ auto libInfoList = reinterpret_cast<std::vector<LoadedLibraryInfo>*>(data);
+
+ if (dl_info->dlpi_phnum <= 0) return 0;
+
+ unsigned long baseAddress = dl_info->dlpi_addr;
+ unsigned long firstMappingStart = -1;
+ unsigned long lastMappingEnd = 0;
+
+ for (size_t i = 0; i < dl_info->dlpi_phnum; i++) {
+ if (dl_info->dlpi_phdr[i].p_type != PT_LOAD) {
+ continue;
+ }
+ unsigned long start = dl_info->dlpi_addr + dl_info->dlpi_phdr[i].p_vaddr;
+ unsigned long end = start + dl_info->dlpi_phdr[i].p_memsz;
+ if (start < firstMappingStart) {
+ firstMappingStart = start;
+ }
+ if (end > lastMappingEnd) {
+ lastMappingEnd = end;
+ }
+ }
+
+ libInfoList->push_back(LoadedLibraryInfo(dl_info->dlpi_name, baseAddress,
+ firstMappingStart, lastMappingEnd));
+
+ return 0;
+}
+
+SharedLibraryInfo SharedLibraryInfo::GetInfoForSelf() {
+ SharedLibraryInfo info;
+
+#if defined(GP_OS_linux)
+ // We need to find the name of the executable (exeName, exeNameLen) and the
+ // address of its executable section (exeExeAddr) in the running image.
+ char exeName[PATH_MAX];
+ memset(exeName, 0, sizeof(exeName));
+
+ ssize_t exeNameLen = readlink("/proc/self/exe", exeName, sizeof(exeName) - 1);
+ if (exeNameLen == -1) {
+ // readlink failed for whatever reason. Note this, but keep going.
+ exeName[0] = '\0';
+ exeNameLen = 0;
+ // LOG("SharedLibraryInfo::GetInfoForSelf(): readlink failed");
+ } else {
+ // Assert no buffer overflow.
+ MOZ_RELEASE_ASSERT(exeNameLen >= 0 &&
+ exeNameLen < static_cast<ssize_t>(sizeof(exeName)));
+ }
+
+ unsigned long exeExeAddr = 0;
+#endif
+
+#if defined(GP_OS_android)
+ // If dl_iterate_phdr doesn't exist, we give up immediately.
+ if (!dl_iterate_phdr) {
+ // On ARM Android, dl_iterate_phdr is provided by the custom linker.
+ // So if libxul was loaded by the system linker (e.g. as part of
+ // xpcshell when running tests), it won't be available and we should
+ // not call it.
+ return info;
+ }
+#endif
+
+#if defined(GP_OS_linux) || defined(GP_OS_android)
+ // Read info from /proc/self/maps. We ignore most of it.
+ pid_t pid = mozilla::baseprofiler::profiler_current_process_id();
+ char path[PATH_MAX];
+ SprintfLiteral(path, "/proc/%d/maps", pid);
+ std::ifstream maps(path);
+ std::string line;
+ while (std::getline(maps, line)) {
+ int ret;
+ unsigned long start;
+ unsigned long end;
+ char perm[6 + 1] = "";
+ unsigned long offset;
+ char modulePath[PATH_MAX + 1] = "";
+ ret = sscanf(line.c_str(),
+ "%lx-%lx %6s %lx %*s %*x %" PATH_MAX_STRING(PATH_MAX) "s\n",
+ &start, &end, perm, &offset, modulePath);
+ if (!strchr(perm, 'x')) {
+ // Ignore non executable entries
+ continue;
+ }
+ if (ret != 5 && ret != 4) {
+ // LOG("SharedLibraryInfo::GetInfoForSelf(): "
+ // "reading /proc/self/maps failed");
+ continue;
+ }
+
+# if defined(GP_OS_linux)
+ // Try to establish the main executable's load address.
+ if (exeNameLen > 0 && strcmp(modulePath, exeName) == 0) {
+ exeExeAddr = start;
+ }
+# elif defined(GP_OS_android)
+ // Use /proc/pid/maps to get the dalvik-jit section since it has no
+ // associated phdrs.
+ if (0 == strcmp(modulePath, "/dev/ashmem/dalvik-jit-code-cache")) {
+ info.AddSharedLibrary(
+ SharedLibraryAtPath(modulePath, start, end, offset));
+ if (info.GetSize() > 10000) {
+ // LOG("SharedLibraryInfo::GetInfoForSelf(): "
+ // "implausibly large number of mappings acquired");
+ break;
+ }
+ }
+# endif
+ }
+#endif
+
+ std::vector<LoadedLibraryInfo> libInfoList;
+
+ // We collect the bulk of the library info using dl_iterate_phdr.
+ dl_iterate_phdr(dl_iterate_callback, &libInfoList);
+
+ for (const auto& libInfo : libInfoList) {
+ info.AddSharedLibrary(
+ SharedLibraryAtPath(libInfo.mName.c_str(), libInfo.mFirstMappingStart,
+ libInfo.mLastMappingEnd,
+ libInfo.mFirstMappingStart - libInfo.mBaseAddress));
+ }
+
+#if defined(GP_OS_linux)
+ // Make another pass over the information we just harvested from
+ // dl_iterate_phdr. If we see a nameless object mapped at what we earlier
+ // established to be the main executable's load address, attach the
+ // executable's name to that entry.
+ for (size_t i = 0; i < info.GetSize(); i++) {
+ SharedLibrary& lib = info.GetMutableEntry(i);
+ if (lib.GetStart() <= exeExeAddr && exeExeAddr <= lib.GetEnd() &&
+ lib.GetDebugPath().empty()) {
+ lib = SharedLibraryAtPath(exeName, lib.GetStart(), lib.GetEnd(),
+ lib.GetOffset());
+
+ // We only expect to see one such entry.
+ break;
+ }
+ }
+#endif
+
+ return info;
+}
+
+void SharedLibraryInfo::Initialize() { /* do nothing */
+}
diff --git a/mozglue/baseprofiler/core/shared-libraries-macos.cc b/mozglue/baseprofiler/core/shared-libraries-macos.cc
new file mode 100644
index 0000000000..13e66f9f26
--- /dev/null
+++ b/mozglue/baseprofiler/core/shared-libraries-macos.cc
@@ -0,0 +1,182 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "BaseProfilerSharedLibraries.h"
+
+#include "platform.h"
+
+#include "mozilla/Unused.h"
+#include <AvailabilityMacros.h>
+
+#include <dlfcn.h>
+#include <mach-o/arch.h>
+#include <mach-o/dyld_images.h>
+#include <mach-o/dyld.h>
+#include <mach-o/loader.h>
+#include <mach/mach_init.h>
+#include <mach/mach_traps.h>
+#include <mach/task_info.h>
+#include <mach/task.h>
+#include <sstream>
+#include <stdlib.h>
+#include <string.h>
+#include <vector>
+
+// Architecture specific abstraction.
+#if defined(GP_ARCH_x86)
+typedef mach_header platform_mach_header;
+typedef segment_command mach_segment_command_type;
+# define MACHO_MAGIC_NUMBER MH_MAGIC
+# define CMD_SEGMENT LC_SEGMENT
+# define seg_size uint32_t
+#else
+typedef mach_header_64 platform_mach_header;
+typedef segment_command_64 mach_segment_command_type;
+# define MACHO_MAGIC_NUMBER MH_MAGIC_64
+# define CMD_SEGMENT LC_SEGMENT_64
+# define seg_size uint64_t
+#endif
+
+struct NativeSharedLibrary {
+ const platform_mach_header* header;
+ std::string path;
+};
+static std::vector<NativeSharedLibrary>* sSharedLibrariesList = nullptr;
+
+class MOZ_RAII SharedLibrariesLock {
+ public:
+ SharedLibrariesLock() { sSharedLibrariesMutex.Lock(); }
+
+ ~SharedLibrariesLock() { sSharedLibrariesMutex.Unlock(); }
+
+ SharedLibrariesLock(const SharedLibrariesLock&) = delete;
+ void operator=(const SharedLibrariesLock&) = delete;
+
+ private:
+ static mozilla::baseprofiler::detail::BaseProfilerMutex sSharedLibrariesMutex;
+};
+
+mozilla::baseprofiler::detail::BaseProfilerMutex
+ SharedLibrariesLock::sSharedLibrariesMutex;
+
+static void SharedLibraryAddImage(const struct mach_header* mh,
+ intptr_t vmaddr_slide) {
+ // NOTE: Presumably for backwards-compatibility reasons, this function accepts
+ // a mach_header even on 64-bit where it ought to be a mach_header_64. We cast
+ // it to the right type here.
+ auto header = reinterpret_cast<const platform_mach_header*>(mh);
+
+ Dl_info info;
+ if (!dladdr(header, &info)) {
+ return;
+ }
+
+ SharedLibrariesLock lock;
+ if (!sSharedLibrariesList) {
+ return;
+ }
+
+ NativeSharedLibrary lib = {header, info.dli_fname};
+ sSharedLibrariesList->push_back(lib);
+}
+
+static void SharedLibraryRemoveImage(const struct mach_header* mh,
+ intptr_t vmaddr_slide) {
+ // NOTE: Presumably for backwards-compatibility reasons, this function accepts
+ // a mach_header even on 64-bit where it ought to be a mach_header_64. We cast
+ // it to the right type here.
+ auto header = reinterpret_cast<const platform_mach_header*>(mh);
+
+ SharedLibrariesLock lock;
+ if (!sSharedLibrariesList) {
+ return;
+ }
+
+ uint32_t count = sSharedLibrariesList->size();
+ for (uint32_t i = 0; i < count; ++i) {
+ if ((*sSharedLibrariesList)[i].header == header) {
+ sSharedLibrariesList->erase(sSharedLibrariesList->begin() + i);
+ return;
+ }
+ }
+}
+
+void SharedLibraryInfo::Initialize() {
+ // NOTE: We intentionally leak this memory here. We're allocating dynamically
+ // in order to avoid static initializers.
+ sSharedLibrariesList = new std::vector<NativeSharedLibrary>();
+
+ _dyld_register_func_for_add_image(SharedLibraryAddImage);
+ _dyld_register_func_for_remove_image(SharedLibraryRemoveImage);
+}
+
+static void addSharedLibrary(const platform_mach_header* header,
+ const char* path, SharedLibraryInfo& info) {
+ const struct load_command* cmd =
+ reinterpret_cast<const struct load_command*>(header + 1);
+
+ seg_size size = 0;
+ unsigned long long start = reinterpret_cast<unsigned long long>(header);
+ // Find the cmd segment in the macho image. It will contain the offset we care
+ // about.
+ const uint8_t* uuid_bytes = nullptr;
+ for (unsigned int i = 0;
+ cmd && (i < header->ncmds) && (uuid_bytes == nullptr || size == 0);
+ ++i) {
+ if (cmd->cmd == CMD_SEGMENT) {
+ const mach_segment_command_type* seg =
+ reinterpret_cast<const mach_segment_command_type*>(cmd);
+
+ if (!strcmp(seg->segname, "__TEXT")) {
+ size = seg->vmsize;
+ }
+ } else if (cmd->cmd == LC_UUID) {
+ const uuid_command* ucmd = reinterpret_cast<const uuid_command*>(cmd);
+ uuid_bytes = ucmd->uuid;
+ }
+
+ cmd = reinterpret_cast<const struct load_command*>(
+ reinterpret_cast<const char*>(cmd) + cmd->cmdsize);
+ }
+
+ std::string uuid;
+ if (uuid_bytes != nullptr) {
+ static constexpr char digits[16] = {'0', '1', '2', '3', '4', '5', '6', '7',
+ '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
+ for (int i = 0; i < 15; ++i) {
+ uint8_t byte = uuid_bytes[i];
+ uuid += digits[byte >> 4];
+ uuid += digits[byte & 0xFu];
+ }
+ // breakpad id age.
+ uuid += '0';
+ }
+
+ std::string pathStr = path;
+
+ size_t pos = pathStr.rfind('\\');
+ std::string nameStr =
+ (pos != std::string::npos) ? pathStr.substr(pos + 1) : pathStr;
+
+ const NXArchInfo* archInfo =
+ NXGetArchInfoFromCpuType(header->cputype, header->cpusubtype);
+
+ info.AddSharedLibrary(SharedLibrary(start, start + size, 0, uuid, nameStr,
+ pathStr, nameStr, pathStr, std::string{},
+ archInfo ? archInfo->name : ""));
+}
+
+// Translate the statically stored sSharedLibrariesList information into a
+// SharedLibraryInfo object.
+SharedLibraryInfo SharedLibraryInfo::GetInfoForSelf() {
+ SharedLibrariesLock lock;
+ SharedLibraryInfo sharedLibraryInfo;
+
+ for (auto& info : *sSharedLibrariesList) {
+ addSharedLibrary(info.header, info.path.c_str(), sharedLibraryInfo);
+ }
+
+ return sharedLibraryInfo;
+}
diff --git a/mozglue/baseprofiler/core/shared-libraries-win32.cc b/mozglue/baseprofiler/core/shared-libraries-win32.cc
new file mode 100644
index 0000000000..5bf7408193
--- /dev/null
+++ b/mozglue/baseprofiler/core/shared-libraries-win32.cc
@@ -0,0 +1,277 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <windows.h>
+#include <dbghelp.h>
+#include <sstream>
+#include <psapi.h>
+
+#include "BaseProfilerSharedLibraries.h"
+
+#include "mozilla/glue/WindowsUnicode.h"
+#include "mozilla/Unused.h"
+#include "mozilla/WindowsVersion.h"
+
+#include <cctype>
+#include <string>
+
+#define CV_SIGNATURE 0x53445352 // 'SDSR'
+
+struct CodeViewRecord70 {
+ uint32_t signature;
+ GUID pdbSignature;
+ uint32_t pdbAge;
+ // A UTF-8 string, according to
+ // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/PDB/dbi/locator.cpp#L785
+ char pdbFileName[1];
+};
+
+static constexpr char digits[16] = {'0', '1', '2', '3', '4', '5', '6', '7',
+ '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
+
+static void AppendHex(const unsigned char* aBegin, const unsigned char* aEnd,
+ std::string& aOut) {
+ for (const unsigned char* p = aBegin; p < aEnd; ++p) {
+ unsigned char c = *p;
+ aOut += digits[c >> 4];
+ aOut += digits[c & 0xFu];
+ }
+}
+
+static constexpr bool WITH_PADDING = true;
+static constexpr bool WITHOUT_PADDING = false;
+template <typename T>
+static void AppendHex(T aValue, std::string& aOut, bool aWithPadding) {
+ for (int i = sizeof(T) * 2 - 1; i >= 0; --i) {
+ unsigned nibble = (aValue >> (i * 4)) & 0xFu;
+ // If no-padding requested, skip starting zeroes -- unless we're on the very
+ // last nibble (so we don't output a blank).
+ if (!aWithPadding && i != 0) {
+ if (nibble == 0) {
+ // Requested no padding, skip zeroes.
+ continue;
+ }
+ // Requested no padding, got first non-zero, pretend we now want padding
+ // so we don't skip zeroes anymore.
+ aWithPadding = true;
+ }
+ aOut += digits[nibble];
+ }
+}
+
+static bool GetPdbInfo(uintptr_t aStart, std::string& aSignature,
+ uint32_t& aAge, char** aPdbName) {
+ if (!aStart) {
+ return false;
+ }
+
+ PIMAGE_DOS_HEADER dosHeader = reinterpret_cast<PIMAGE_DOS_HEADER>(aStart);
+ if (dosHeader->e_magic != IMAGE_DOS_SIGNATURE) {
+ return false;
+ }
+
+ PIMAGE_NT_HEADERS ntHeaders =
+ reinterpret_cast<PIMAGE_NT_HEADERS>(aStart + dosHeader->e_lfanew);
+ if (ntHeaders->Signature != IMAGE_NT_SIGNATURE) {
+ return false;
+ }
+
+ uint32_t relativeVirtualAddress =
+ ntHeaders->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_DEBUG]
+ .VirtualAddress;
+ if (!relativeVirtualAddress) {
+ return false;
+ }
+
+ PIMAGE_DEBUG_DIRECTORY debugDirectory =
+ reinterpret_cast<PIMAGE_DEBUG_DIRECTORY>(aStart + relativeVirtualAddress);
+ if (!debugDirectory || debugDirectory->Type != IMAGE_DEBUG_TYPE_CODEVIEW) {
+ return false;
+ }
+
+ CodeViewRecord70* debugInfo = reinterpret_cast<CodeViewRecord70*>(
+ aStart + debugDirectory->AddressOfRawData);
+ if (!debugInfo || debugInfo->signature != CV_SIGNATURE) {
+ return false;
+ }
+
+ aAge = debugInfo->pdbAge;
+ GUID& pdbSignature = debugInfo->pdbSignature;
+ AppendHex(pdbSignature.Data1, aSignature, WITH_PADDING);
+ AppendHex(pdbSignature.Data2, aSignature, WITH_PADDING);
+ AppendHex(pdbSignature.Data3, aSignature, WITH_PADDING);
+ AppendHex(reinterpret_cast<const unsigned char*>(&pdbSignature.Data4),
+ reinterpret_cast<const unsigned char*>(&pdbSignature.Data4) +
+ sizeof(pdbSignature.Data4),
+ aSignature);
+
+ // The PDB file name could be different from module filename, so report both
+ // e.g. The PDB for C:\Windows\SysWOW64\ntdll.dll is wntdll.pdb
+ *aPdbName = debugInfo->pdbFileName;
+
+ return true;
+}
+
+static std::string GetVersion(wchar_t* dllPath) {
+ DWORD infoSize = GetFileVersionInfoSizeW(dllPath, nullptr);
+ if (infoSize == 0) {
+ return {};
+ }
+
+ mozilla::UniquePtr<unsigned char[]> infoData =
+ mozilla::MakeUnique<unsigned char[]>(infoSize);
+ if (!GetFileVersionInfoW(dllPath, 0, infoSize, infoData.get())) {
+ return {};
+ }
+
+ VS_FIXEDFILEINFO* vInfo;
+ UINT vInfoLen;
+ if (!VerQueryValueW(infoData.get(), L"\\", (LPVOID*)&vInfo, &vInfoLen)) {
+ return {};
+ }
+ if (!vInfo) {
+ return {};
+ }
+
+ return std::to_string(vInfo->dwFileVersionMS >> 16) + '.' +
+ std::to_string(vInfo->dwFileVersionMS & 0xFFFF) + '.' +
+ std::to_string(vInfo->dwFileVersionLS >> 16) + '.' +
+ std::to_string(vInfo->dwFileVersionLS & 0xFFFF);
+}
+
+SharedLibraryInfo SharedLibraryInfo::GetInfoForSelf() {
+ SharedLibraryInfo sharedLibraryInfo;
+
+ HANDLE hProcess = GetCurrentProcess();
+ mozilla::UniquePtr<HMODULE[]> hMods;
+ size_t modulesNum = 0;
+ if (hProcess != NULL) {
+ DWORD modulesSize;
+ if (!EnumProcessModules(hProcess, nullptr, 0, &modulesSize)) {
+ return sharedLibraryInfo;
+ }
+ modulesNum = modulesSize / sizeof(HMODULE);
+ hMods = mozilla::MakeUnique<HMODULE[]>(modulesNum);
+ if (!EnumProcessModules(hProcess, hMods.get(), modulesNum * sizeof(HMODULE),
+ &modulesSize)) {
+ return sharedLibraryInfo;
+ }
+ // The list may have shrunk between calls
+ if (modulesSize / sizeof(HMODULE) < modulesNum) {
+ modulesNum = modulesSize / sizeof(HMODULE);
+ }
+ }
+
+ for (unsigned int i = 0; i < modulesNum; i++) {
+ wchar_t modulePath[MAX_PATH + 1];
+ if (!GetModuleFileNameExW(hProcess, hMods[i], modulePath,
+ std::size(modulePath))) {
+ continue;
+ }
+ mozilla::UniquePtr<char[]> utf8ModulePath(
+ mozilla::glue::WideToUTF8(modulePath));
+ if (!utf8ModulePath) {
+ continue;
+ }
+
+ MODULEINFO module = {0};
+ if (!GetModuleInformation(hProcess, hMods[i], &module,
+ sizeof(MODULEINFO))) {
+ continue;
+ }
+
+ std::string modulePathStr(utf8ModulePath.get());
+ size_t pos = modulePathStr.find_last_of("\\/");
+ std::string moduleNameStr = (pos != std::string::npos)
+ ? modulePathStr.substr(pos + 1)
+ : modulePathStr;
+
+ // Hackaround for Bug 1607574. Nvidia's shim driver nvd3d9wrap[x].dll
+ // detours LoadLibraryExW when it's loaded and the detour function causes
+ // AV when the code tries to access data pointing to an address within
+ // unloaded nvinit[x].dll.
+ // The crashing code is executed when a given parameter is "detoured.dll"
+ // and OS version is older than 6.2. We hit that crash at the following
+ // call to LoadLibraryEx even if we specify LOAD_LIBRARY_AS_DATAFILE.
+ // We work around it by skipping LoadLibraryEx, and add a library info with
+ // a dummy breakpad id instead.
+#if !defined(_M_ARM64)
+# if defined(_M_AMD64)
+ LPCWSTR kNvidiaShimDriver = L"nvd3d9wrapx.dll";
+ LPCWSTR kNvidiaInitDriver = L"nvinitx.dll";
+# elif defined(_M_IX86)
+ LPCWSTR kNvidiaShimDriver = L"nvd3d9wrap.dll";
+ LPCWSTR kNvidiaInitDriver = L"nvinit.dll";
+# endif
+ constexpr std::string_view detoured_dll = "detoured.dll";
+ if (std::equal(moduleNameStr.cbegin(), moduleNameStr.cend(),
+ detoured_dll.cbegin(), detoured_dll.cend(),
+ [](char aModuleChar, char aDetouredChar) {
+ return std::tolower(aModuleChar) == aDetouredChar;
+ }) &&
+ !mozilla::IsWin8OrLater() && ::GetModuleHandleW(kNvidiaShimDriver) &&
+ !::GetModuleHandleW(kNvidiaInitDriver)) {
+ const std::string pdbNameStr = "detoured.pdb";
+ SharedLibrary shlib((uintptr_t)module.lpBaseOfDll,
+ (uintptr_t)module.lpBaseOfDll + module.SizeOfImage,
+ 0, // DLLs are always mapped at offset 0 on Windows
+ "000000000000000000000000000000000", moduleNameStr,
+ modulePathStr, pdbNameStr, pdbNameStr, "", "");
+ sharedLibraryInfo.AddSharedLibrary(shlib);
+ continue;
+ }
+#endif // !defined(_M_ARM64)
+
+ std::string breakpadId;
+ // Load the module again to make sure that its handle will remain
+ // valid as we attempt to read the PDB information from it. We load the
+ // DLL as a datafile so that if the module actually gets unloaded between
+ // the call to EnumProcessModules and the following LoadLibraryEx, we
+ // don't end up running the now newly loaded module's DllMain function. If
+ // the module is already loaded, LoadLibraryEx just increments its
+ // refcount.
+ //
+ // Note that because of the race condition above, merely loading the DLL
+ // again is not safe enough, therefore we also need to make sure that we
+ // can read the memory mapped at the base address before we can safely
+ // proceed to actually access those pages.
+ HMODULE handleLock =
+ LoadLibraryExW(modulePath, NULL, LOAD_LIBRARY_AS_DATAFILE);
+ MEMORY_BASIC_INFORMATION vmemInfo = {0};
+ std::string pdbSig;
+ uint32_t pdbAge;
+ std::string pdbPathStr;
+ std::string pdbNameStr;
+ char* pdbName = nullptr;
+ if (handleLock &&
+ sizeof(vmemInfo) ==
+ VirtualQuery(module.lpBaseOfDll, &vmemInfo, sizeof(vmemInfo)) &&
+ vmemInfo.State == MEM_COMMIT &&
+ GetPdbInfo((uintptr_t)module.lpBaseOfDll, pdbSig, pdbAge, &pdbName)) {
+ MOZ_ASSERT(breakpadId.empty());
+ breakpadId += pdbSig;
+ AppendHex(pdbAge, breakpadId, WITHOUT_PADDING);
+
+ pdbPathStr = pdbName;
+ size_t pos = pdbPathStr.find_last_of("\\/");
+ pdbNameStr =
+ (pos != std::string::npos) ? pdbPathStr.substr(pos + 1) : pdbPathStr;
+ }
+
+ SharedLibrary shlib((uintptr_t)module.lpBaseOfDll,
+ (uintptr_t)module.lpBaseOfDll + module.SizeOfImage,
+ 0, // DLLs are always mapped at offset 0 on Windows
+ breakpadId, moduleNameStr, modulePathStr, pdbNameStr,
+ pdbPathStr, GetVersion(modulePath), "");
+ sharedLibraryInfo.AddSharedLibrary(shlib);
+
+ FreeLibrary(handleLock); // ok to free null handles
+ }
+
+ return sharedLibraryInfo;
+}
+
+void SharedLibraryInfo::Initialize() { /* do nothing */
+}
diff --git a/mozglue/baseprofiler/core/vtune/ittnotify.h b/mozglue/baseprofiler/core/vtune/ittnotify.h
new file mode 100644
index 0000000000..04adf9eb5e
--- /dev/null
+++ b/mozglue/baseprofiler/core/vtune/ittnotify.h
@@ -0,0 +1,4127 @@
+// clang-format off
+
+/* <copyright>
+ This file is provided under a dual BSD/GPLv2 license. When using or
+ redistributing this file, you may do so under either license.
+
+ GPL LICENSE SUMMARY
+
+ Copyright (c) 2005-2014 Intel Corporation. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of version 2 of the GNU General Public License as
+ published by the Free Software Foundation.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ The full GNU General Public License is included in this distribution
+ in the file called LICENSE.GPL.
+
+ Contact Information:
+ http://software.intel.com/en-us/articles/intel-vtune-amplifier-xe/
+
+ BSD LICENSE
+
+ Copyright (c) 2005-2014 Intel Corporation. All rights reserved.
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+</copyright> */
+#ifndef _ITTNOTIFY_H_
+# define _ITTNOTIFY_H_
+
+/**
+@file
+@brief Public User API functions and types
+@mainpage
+
+The ITT API is used to annotate a user's program with additional information
+that can be used by correctness and performance tools. The user inserts
+calls in their program. Those calls generate information that is collected
+at runtime, and used by Intel(R) Threading Tools.
+
+@section API Concepts
+The following general concepts are used throughout the API.
+
+@subsection Unicode Support
+Many API functions take character string arguments. On Windows, there
+are two versions of each such function. The function name is suffixed
+by W if Unicode support is enabled, and by A otherwise. Any API function
+that takes a character string argument adheres to this convention.
+
+@subsection Conditional Compilation
+Many users prefer having an option to modify ITT API code when linking it
+inside their runtimes. ITT API header file provides a mechanism to replace
+ITT API function names inside your code with empty strings. To do this,
+define the macros INTEL_NO_ITTNOTIFY_API during compilation and remove the
+static library from the linker script.
+
+@subsection Domains
+[see domains]
+Domains provide a way to separate notification for different modules or
+libraries in a program. Domains are specified by dotted character strings,
+e.g. TBB.Internal.Control.
+
+A mechanism (to be specified) is provided to enable and disable
+domains. By default, all domains are enabled.
+@subsection Named Entities and Instances
+Named entities (frames, regions, tasks, and markers) communicate
+information about the program to the analysis tools. A named entity often
+refers to a section of program code, or to some set of logical concepts
+that the programmer wants to group together.
+
+Named entities relate to the programmer's static view of the program. When
+the program actually executes, many instances of a given named entity
+may be created.
+
+The API annotations denote instances of named entities. The actual
+named entities are displayed using the analysis tools. In other words,
+the named entities come into existence when instances are created.
+
+Instances of named entities may have instance identifiers (IDs). Some
+API calls use instance identifiers to create relationships between
+different instances of named entities. Other API calls associate data
+with instances of named entities.
+
+Some named entities must always have instance IDs. In particular, regions
+and frames always have IDs. Task and markers need IDs only if the ID is
+needed in another API call (such as adding a relation or metadata).
+
+The lifetime of instance IDs is distinct from the lifetime of
+instances. This allows various relationships to be specified separate
+from the actual execution of instances. This flexibility comes at the
+expense of extra API calls.
+
+The same ID may not be reused for different instances, unless a previous
+[ref] __itt_id_destroy call for that ID has been issued.
+*/
+
+/** @cond exclude_from_documentation */
+#ifndef ITT_OS_WIN
+# define ITT_OS_WIN 1
+#endif /* ITT_OS_WIN */
+
+#ifndef ITT_OS_LINUX
+# define ITT_OS_LINUX 2
+#endif /* ITT_OS_LINUX */
+
+#ifndef ITT_OS_MAC
+# define ITT_OS_MAC 3
+#endif /* ITT_OS_MAC */
+
+#ifndef ITT_OS_FREEBSD
+# define ITT_OS_FREEBSD 4
+#endif /* ITT_OS_FREEBSD */
+
+#ifndef ITT_OS
+# if defined WIN32 || defined _WIN32
+# define ITT_OS ITT_OS_WIN
+# elif defined( __APPLE__ ) && defined( __MACH__ )
+# define ITT_OS ITT_OS_MAC
+# elif defined( __FreeBSD__ )
+# define ITT_OS ITT_OS_FREEBSD
+# else
+# define ITT_OS ITT_OS_LINUX
+# endif
+#endif /* ITT_OS */
+
+#ifndef ITT_PLATFORM_WIN
+# define ITT_PLATFORM_WIN 1
+#endif /* ITT_PLATFORM_WIN */
+
+#ifndef ITT_PLATFORM_POSIX
+# define ITT_PLATFORM_POSIX 2
+#endif /* ITT_PLATFORM_POSIX */
+
+#ifndef ITT_PLATFORM_MAC
+# define ITT_PLATFORM_MAC 3
+#endif /* ITT_PLATFORM_MAC */
+
+#ifndef ITT_PLATFORM_FREEBSD
+# define ITT_PLATFORM_FREEBSD 4
+#endif /* ITT_PLATFORM_FREEBSD */
+
+#ifndef ITT_PLATFORM
+# if ITT_OS==ITT_OS_WIN
+# define ITT_PLATFORM ITT_PLATFORM_WIN
+# elif ITT_OS==ITT_OS_MAC
+# define ITT_PLATFORM ITT_PLATFORM_MAC
+# elif ITT_OS==ITT_OS_FREEBSD
+# define ITT_PLATFORM ITT_PLATFORM_FREEBSD
+# else
+# define ITT_PLATFORM ITT_PLATFORM_POSIX
+# endif
+#endif /* ITT_PLATFORM */
+
+#if defined(_UNICODE) && !defined(UNICODE)
+#define UNICODE
+#endif
+
+#include <stddef.h>
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#include <tchar.h>
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#include <stdint.h>
+#if defined(UNICODE) || defined(_UNICODE)
+#include <wchar.h>
+#endif /* UNICODE || _UNICODE */
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#ifndef ITTAPI_CDECL
+# if ITT_PLATFORM==ITT_PLATFORM_WIN
+# define ITTAPI_CDECL __cdecl
+# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+# if defined _M_IX86 || defined __i386__
+# define ITTAPI_CDECL __attribute__ ((cdecl))
+# else /* _M_IX86 || __i386__ */
+# define ITTAPI_CDECL /* actual only on x86 platform */
+# endif /* _M_IX86 || __i386__ */
+# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* ITTAPI_CDECL */
+
+#ifndef STDCALL
+# if ITT_PLATFORM==ITT_PLATFORM_WIN
+# define STDCALL __stdcall
+# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+# if defined _M_IX86 || defined __i386__
+# define STDCALL __attribute__ ((stdcall))
+# else /* _M_IX86 || __i386__ */
+# define STDCALL /* supported only on x86 platform */
+# endif /* _M_IX86 || __i386__ */
+# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* STDCALL */
+
+#define ITTAPI ITTAPI_CDECL
+#define LIBITTAPI ITTAPI_CDECL
+
+/* TODO: Temporary for compatibility! */
+#define ITTAPI_CALL ITTAPI_CDECL
+#define LIBITTAPI_CALL ITTAPI_CDECL
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+/* use __forceinline (VC++ specific) */
+#define ITT_INLINE __forceinline
+#define ITT_INLINE_ATTRIBUTE /* nothing */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+/*
+ * Generally, functions are not inlined unless optimization is specified.
+ * For functions declared inline, this attribute inlines the function even
+ * if no optimization level was specified.
+ */
+#ifdef __STRICT_ANSI__
+#define ITT_INLINE static
+#define ITT_INLINE_ATTRIBUTE __attribute__((unused))
+#else /* __STRICT_ANSI__ */
+#define ITT_INLINE static inline
+#define ITT_INLINE_ATTRIBUTE __attribute__((always_inline, unused))
+#endif /* __STRICT_ANSI__ */
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+/** @endcond */
+
+#ifdef INTEL_ITTNOTIFY_ENABLE_LEGACY
+# if ITT_PLATFORM==ITT_PLATFORM_WIN
+# pragma message("WARNING!!! Deprecated API is used. Please undefine INTEL_ITTNOTIFY_ENABLE_LEGACY macro")
+# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+# warning "Deprecated API is used. Please undefine INTEL_ITTNOTIFY_ENABLE_LEGACY macro"
+# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+# include "vtune/legacy/ittnotify.h"
+#endif /* INTEL_ITTNOTIFY_ENABLE_LEGACY */
+
+/** @cond exclude_from_documentation */
+/* Helper macro for joining tokens */
+#define ITT_JOIN_AUX(p,n) p##n
+#define ITT_JOIN(p,n) ITT_JOIN_AUX(p,n)
+
+#ifdef ITT_MAJOR
+#undef ITT_MAJOR
+#endif
+#ifdef ITT_MINOR
+#undef ITT_MINOR
+#endif
+#define ITT_MAJOR 3
+#define ITT_MINOR 0
+
+/* Standard versioning of a token with major and minor version numbers */
+#define ITT_VERSIONIZE(x) \
+ ITT_JOIN(x, \
+ ITT_JOIN(_, \
+ ITT_JOIN(ITT_MAJOR, \
+ ITT_JOIN(_, ITT_MINOR))))
+
+#ifndef INTEL_ITTNOTIFY_PREFIX
+# define INTEL_ITTNOTIFY_PREFIX __itt_
+#endif /* INTEL_ITTNOTIFY_PREFIX */
+#ifndef INTEL_ITTNOTIFY_POSTFIX
+# define INTEL_ITTNOTIFY_POSTFIX _ptr_
+#endif /* INTEL_ITTNOTIFY_POSTFIX */
+
+#define ITTNOTIFY_NAME_AUX(n) ITT_JOIN(INTEL_ITTNOTIFY_PREFIX,n)
+#define ITTNOTIFY_NAME(n) ITT_VERSIONIZE(ITTNOTIFY_NAME_AUX(ITT_JOIN(n,INTEL_ITTNOTIFY_POSTFIX)))
+
+#define ITTNOTIFY_VOID(n) (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)
+#define ITTNOTIFY_DATA(n) (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)
+
+#define ITTNOTIFY_VOID_D0(n,d) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d)
+#define ITTNOTIFY_VOID_D1(n,d,x) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x)
+#define ITTNOTIFY_VOID_D2(n,d,x,y) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y)
+#define ITTNOTIFY_VOID_D3(n,d,x,y,z) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z)
+#define ITTNOTIFY_VOID_D4(n,d,x,y,z,a) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a)
+#define ITTNOTIFY_VOID_D5(n,d,x,y,z,a,b) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b)
+#define ITTNOTIFY_VOID_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c)
+#define ITTNOTIFY_DATA_D0(n,d) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d)
+#define ITTNOTIFY_DATA_D1(n,d,x) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x)
+#define ITTNOTIFY_DATA_D2(n,d,x,y) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y)
+#define ITTNOTIFY_DATA_D3(n,d,x,y,z) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z)
+#define ITTNOTIFY_DATA_D4(n,d,x,y,z,a) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a)
+#define ITTNOTIFY_DATA_D5(n,d,x,y,z,a,b) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b)
+#define ITTNOTIFY_DATA_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c)
+
+#ifdef ITT_STUB
+#undef ITT_STUB
+#endif
+#ifdef ITT_STUBV
+#undef ITT_STUBV
+#endif
+#define ITT_STUBV(api,type,name,args) \
+ typedef type (api* ITT_JOIN(ITTNOTIFY_NAME(name),_t)) args; \
+ extern ITT_JOIN(ITTNOTIFY_NAME(name),_t) ITTNOTIFY_NAME(name);
+#define ITT_STUB ITT_STUBV
+/** @endcond */
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/** @cond exclude_from_gpa_documentation */
+/**
+ * @defgroup public Public API
+ * @{
+ * @}
+ */
+
+/**
+ * @defgroup control Collection Control
+ * @ingroup public
+ * General behavior: application continues to run, but no profiling information is being collected
+ *
+ * Pausing occurs not only for the current thread but for all process as well as spawned processes
+ * - Intel(R) Parallel Inspector and Intel(R) Inspector XE:
+ * - Does not analyze or report errors that involve memory access.
+ * - Other errors are reported as usual. Pausing data collection in
+ * Intel(R) Parallel Inspector and Intel(R) Inspector XE
+ * only pauses tracing and analyzing memory access.
+ * It does not pause tracing or analyzing threading APIs.
+ * .
+ * - Intel(R) Parallel Amplifier and Intel(R) VTune(TM) Amplifier XE:
+ * - Does continue to record when new threads are started.
+ * .
+ * - Other effects:
+ * - Possible reduction of runtime overhead.
+ * .
+ * @{
+ */
+/** @brief Pause collection */
+void ITTAPI __itt_pause(void);
+/** @brief Resume collection */
+void ITTAPI __itt_resume(void);
+/** @brief Detach collection */
+void ITTAPI __itt_detach(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, pause, (void))
+ITT_STUBV(ITTAPI, void, resume, (void))
+ITT_STUBV(ITTAPI, void, detach, (void))
+#define __itt_pause ITTNOTIFY_VOID(pause)
+#define __itt_pause_ptr ITTNOTIFY_NAME(pause)
+#define __itt_resume ITTNOTIFY_VOID(resume)
+#define __itt_resume_ptr ITTNOTIFY_NAME(resume)
+#define __itt_detach ITTNOTIFY_VOID(detach)
+#define __itt_detach_ptr ITTNOTIFY_NAME(detach)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_pause()
+#define __itt_pause_ptr 0
+#define __itt_resume()
+#define __itt_resume_ptr 0
+#define __itt_detach()
+#define __itt_detach_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_pause_ptr 0
+#define __itt_resume_ptr 0
+#define __itt_detach_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} control group */
+/** @endcond */
+
+/**
+ * @defgroup threads Threads
+ * @ingroup public
+ * Give names to threads
+ * @{
+ */
+/**
+ * @brief Sets thread name of calling thread
+ * @param[in] name - name of thread
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_thread_set_nameA(const char *name);
+void ITTAPI __itt_thread_set_nameW(const wchar_t *name);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_thread_set_name __itt_thread_set_nameW
+# define __itt_thread_set_name_ptr __itt_thread_set_nameW_ptr
+#else /* UNICODE */
+# define __itt_thread_set_name __itt_thread_set_nameA
+# define __itt_thread_set_name_ptr __itt_thread_set_nameA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+void ITTAPI __itt_thread_set_name(const char *name);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, thread_set_nameA, (const char *name))
+ITT_STUBV(ITTAPI, void, thread_set_nameW, (const wchar_t *name))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, thread_set_name, (const char *name))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_thread_set_nameA ITTNOTIFY_VOID(thread_set_nameA)
+#define __itt_thread_set_nameA_ptr ITTNOTIFY_NAME(thread_set_nameA)
+#define __itt_thread_set_nameW ITTNOTIFY_VOID(thread_set_nameW)
+#define __itt_thread_set_nameW_ptr ITTNOTIFY_NAME(thread_set_nameW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_thread_set_name ITTNOTIFY_VOID(thread_set_name)
+#define __itt_thread_set_name_ptr ITTNOTIFY_NAME(thread_set_name)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_thread_set_nameA(name)
+#define __itt_thread_set_nameA_ptr 0
+#define __itt_thread_set_nameW(name)
+#define __itt_thread_set_nameW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_thread_set_name(name)
+#define __itt_thread_set_name_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_thread_set_nameA_ptr 0
+#define __itt_thread_set_nameW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_thread_set_name_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @cond exclude_from_gpa_documentation */
+
+/**
+ * @brief Mark current thread as ignored from this point on, for the duration of its existence.
+ */
+void ITTAPI __itt_thread_ignore(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, thread_ignore, (void))
+#define __itt_thread_ignore ITTNOTIFY_VOID(thread_ignore)
+#define __itt_thread_ignore_ptr ITTNOTIFY_NAME(thread_ignore)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_thread_ignore()
+#define __itt_thread_ignore_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_thread_ignore_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} threads group */
+
+/**
+ * @defgroup suppress Error suppression
+ * @ingroup public
+ * General behavior: application continues to run, but errors are suppressed
+ *
+ * @{
+ */
+
+/*****************************************************************//**
+ * @name group of functions used for error suppression in correctness tools
+ *********************************************************************/
+/** @{ */
+/**
+ * @hideinitializer
+ * @brief possible value for suppression mask
+ */
+#define __itt_suppress_all_errors 0x7fffffff
+
+/**
+ * @hideinitializer
+ * @brief possible value for suppression mask (suppresses errors from threading analysis)
+ */
+#define __itt_suppress_threading_errors 0x000000ff
+
+/**
+ * @hideinitializer
+ * @brief possible value for suppression mask (suppresses errors from memory analysis)
+ */
+#define __itt_suppress_memory_errors 0x0000ff00
+
+/**
+ * @brief Start suppressing errors identified in mask on this thread
+ */
+void ITTAPI __itt_suppress_push(unsigned int mask);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, suppress_push, (unsigned int mask))
+#define __itt_suppress_push ITTNOTIFY_VOID(suppress_push)
+#define __itt_suppress_push_ptr ITTNOTIFY_NAME(suppress_push)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_suppress_push(mask)
+#define __itt_suppress_push_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_suppress_push_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Undo the effects of the matching call to __itt_suppress_push
+ */
+void ITTAPI __itt_suppress_pop(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, suppress_pop, (void))
+#define __itt_suppress_pop ITTNOTIFY_VOID(suppress_pop)
+#define __itt_suppress_pop_ptr ITTNOTIFY_NAME(suppress_pop)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_suppress_pop()
+#define __itt_suppress_pop_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_suppress_pop_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @enum __itt_model_disable
+ * @brief Enumerator for the disable methods
+ */
+typedef enum __itt_suppress_mode {
+ __itt_unsuppress_range,
+ __itt_suppress_range
+} __itt_suppress_mode_t;
+
+/**
+ * @brief Mark a range of memory for error suppression or unsuppression for error types included in mask
+ */
+void ITTAPI __itt_suppress_mark_range(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, suppress_mark_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size))
+#define __itt_suppress_mark_range ITTNOTIFY_VOID(suppress_mark_range)
+#define __itt_suppress_mark_range_ptr ITTNOTIFY_NAME(suppress_mark_range)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_suppress_mark_range(mask)
+#define __itt_suppress_mark_range_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_suppress_mark_range_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Undo the effect of a matching call to __itt_suppress_mark_range. If not matching
+ * call is found, nothing is changed.
+ */
+void ITTAPI __itt_suppress_clear_range(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, suppress_clear_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size))
+#define __itt_suppress_clear_range ITTNOTIFY_VOID(suppress_clear_range)
+#define __itt_suppress_clear_range_ptr ITTNOTIFY_NAME(suppress_clear_range)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_suppress_clear_range(mask)
+#define __itt_suppress_clear_range_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_suppress_clear_range_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} */
+/** @} suppress group */
+
+/**
+ * @defgroup sync Synchronization
+ * @ingroup public
+ * Indicate user-written synchronization code
+ * @{
+ */
+/**
+ * @hideinitializer
+ * @brief possible value of attribute argument for sync object type
+ */
+#define __itt_attr_barrier 1
+
+/**
+ * @hideinitializer
+ * @brief possible value of attribute argument for sync object type
+ */
+#define __itt_attr_mutex 2
+
+/**
+@brief Name a synchronization object
+@param[in] addr Handle for the synchronization object. You should
+use a real address to uniquely identify the synchronization object.
+@param[in] objtype null-terminated object type string. If NULL is
+passed, the name will be "User Synchronization".
+@param[in] objname null-terminated object name string. If NULL,
+no name will be assigned to the object.
+@param[in] attribute one of [#__itt_attr_barrier, #__itt_attr_mutex]
+ */
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_sync_createA(void *addr, const char *objtype, const char *objname, int attribute);
+void ITTAPI __itt_sync_createW(void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_sync_create __itt_sync_createW
+# define __itt_sync_create_ptr __itt_sync_createW_ptr
+#else /* UNICODE */
+# define __itt_sync_create __itt_sync_createA
+# define __itt_sync_create_ptr __itt_sync_createA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+void ITTAPI __itt_sync_create (void *addr, const char *objtype, const char *objname, int attribute);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, sync_createA, (void *addr, const char *objtype, const char *objname, int attribute))
+ITT_STUBV(ITTAPI, void, sync_createW, (void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, sync_create, (void *addr, const char* objtype, const char* objname, int attribute))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_createA ITTNOTIFY_VOID(sync_createA)
+#define __itt_sync_createA_ptr ITTNOTIFY_NAME(sync_createA)
+#define __itt_sync_createW ITTNOTIFY_VOID(sync_createW)
+#define __itt_sync_createW_ptr ITTNOTIFY_NAME(sync_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_create ITTNOTIFY_VOID(sync_create)
+#define __itt_sync_create_ptr ITTNOTIFY_NAME(sync_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_createA(addr, objtype, objname, attribute)
+#define __itt_sync_createA_ptr 0
+#define __itt_sync_createW(addr, objtype, objname, attribute)
+#define __itt_sync_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_create(addr, objtype, objname, attribute)
+#define __itt_sync_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_createA_ptr 0
+#define __itt_sync_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+@brief Rename a synchronization object
+
+You can use the rename call to assign or reassign a name to a given
+synchronization object.
+@param[in] addr handle for the synchronization object.
+@param[in] name null-terminated object name string.
+*/
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_sync_renameA(void *addr, const char *name);
+void ITTAPI __itt_sync_renameW(void *addr, const wchar_t *name);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_sync_rename __itt_sync_renameW
+# define __itt_sync_rename_ptr __itt_sync_renameW_ptr
+#else /* UNICODE */
+# define __itt_sync_rename __itt_sync_renameA
+# define __itt_sync_rename_ptr __itt_sync_renameA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+void ITTAPI __itt_sync_rename(void *addr, const char *name);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, sync_renameA, (void *addr, const char *name))
+ITT_STUBV(ITTAPI, void, sync_renameW, (void *addr, const wchar_t *name))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, sync_rename, (void *addr, const char *name))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_renameA ITTNOTIFY_VOID(sync_renameA)
+#define __itt_sync_renameA_ptr ITTNOTIFY_NAME(sync_renameA)
+#define __itt_sync_renameW ITTNOTIFY_VOID(sync_renameW)
+#define __itt_sync_renameW_ptr ITTNOTIFY_NAME(sync_renameW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_rename ITTNOTIFY_VOID(sync_rename)
+#define __itt_sync_rename_ptr ITTNOTIFY_NAME(sync_rename)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_renameA(addr, name)
+#define __itt_sync_renameA_ptr 0
+#define __itt_sync_renameW(addr, name)
+#define __itt_sync_renameW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_rename(addr, name)
+#define __itt_sync_rename_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_renameA_ptr 0
+#define __itt_sync_renameW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_rename_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ @brief Destroy a synchronization object.
+ @param addr Handle for the synchronization object.
+ */
+void ITTAPI __itt_sync_destroy(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, sync_destroy, (void *addr))
+#define __itt_sync_destroy ITTNOTIFY_VOID(sync_destroy)
+#define __itt_sync_destroy_ptr ITTNOTIFY_NAME(sync_destroy)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_sync_destroy(addr)
+#define __itt_sync_destroy_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_sync_destroy_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/*****************************************************************//**
+ * @name group of functions is used for performance measurement tools
+ *********************************************************************/
+/** @{ */
+/**
+ * @brief Enter spin loop on user-defined sync object
+ */
+void ITTAPI __itt_sync_prepare(void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, sync_prepare, (void *addr))
+#define __itt_sync_prepare ITTNOTIFY_VOID(sync_prepare)
+#define __itt_sync_prepare_ptr ITTNOTIFY_NAME(sync_prepare)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_sync_prepare(addr)
+#define __itt_sync_prepare_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_sync_prepare_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Quit spin loop without acquiring spin object
+ */
+void ITTAPI __itt_sync_cancel(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, sync_cancel, (void *addr))
+#define __itt_sync_cancel ITTNOTIFY_VOID(sync_cancel)
+#define __itt_sync_cancel_ptr ITTNOTIFY_NAME(sync_cancel)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_sync_cancel(addr)
+#define __itt_sync_cancel_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_sync_cancel_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Successful spin loop completion (sync object acquired)
+ */
+void ITTAPI __itt_sync_acquired(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, sync_acquired, (void *addr))
+#define __itt_sync_acquired ITTNOTIFY_VOID(sync_acquired)
+#define __itt_sync_acquired_ptr ITTNOTIFY_NAME(sync_acquired)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_sync_acquired(addr)
+#define __itt_sync_acquired_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_sync_acquired_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Start sync object releasing code. Is called before the lock release call.
+ */
+void ITTAPI __itt_sync_releasing(void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, sync_releasing, (void *addr))
+#define __itt_sync_releasing ITTNOTIFY_VOID(sync_releasing)
+#define __itt_sync_releasing_ptr ITTNOTIFY_NAME(sync_releasing)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_sync_releasing(addr)
+#define __itt_sync_releasing_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_sync_releasing_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} */
+
+/** @} sync group */
+
+/**************************************************************//**
+ * @name group of functions is used for correctness checking tools
+ ******************************************************************/
+/** @{ */
+/**
+ * @ingroup legacy
+ * @deprecated Legacy API
+ * @brief Fast synchronization which does no require spinning.
+ * - This special function is to be used by TBB and OpenMP libraries only when they know
+ * there is no spin but they need to suppress TC warnings about shared variable modifications.
+ * - It only has corresponding pointers in static library and does not have corresponding function
+ * in dynamic library.
+ * @see void __itt_sync_prepare(void* addr);
+ */
+void ITTAPI __itt_fsync_prepare(void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, fsync_prepare, (void *addr))
+#define __itt_fsync_prepare ITTNOTIFY_VOID(fsync_prepare)
+#define __itt_fsync_prepare_ptr ITTNOTIFY_NAME(fsync_prepare)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_fsync_prepare(addr)
+#define __itt_fsync_prepare_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_fsync_prepare_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup legacy
+ * @deprecated Legacy API
+ * @brief Fast synchronization which does no require spinning.
+ * - This special function is to be used by TBB and OpenMP libraries only when they know
+ * there is no spin but they need to suppress TC warnings about shared variable modifications.
+ * - It only has corresponding pointers in static library and does not have corresponding function
+ * in dynamic library.
+ * @see void __itt_sync_cancel(void *addr);
+ */
+void ITTAPI __itt_fsync_cancel(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, fsync_cancel, (void *addr))
+#define __itt_fsync_cancel ITTNOTIFY_VOID(fsync_cancel)
+#define __itt_fsync_cancel_ptr ITTNOTIFY_NAME(fsync_cancel)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_fsync_cancel(addr)
+#define __itt_fsync_cancel_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_fsync_cancel_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup legacy
+ * @deprecated Legacy API
+ * @brief Fast synchronization which does no require spinning.
+ * - This special function is to be used by TBB and OpenMP libraries only when they know
+ * there is no spin but they need to suppress TC warnings about shared variable modifications.
+ * - It only has corresponding pointers in static library and does not have corresponding function
+ * in dynamic library.
+ * @see void __itt_sync_acquired(void *addr);
+ */
+void ITTAPI __itt_fsync_acquired(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, fsync_acquired, (void *addr))
+#define __itt_fsync_acquired ITTNOTIFY_VOID(fsync_acquired)
+#define __itt_fsync_acquired_ptr ITTNOTIFY_NAME(fsync_acquired)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_fsync_acquired(addr)
+#define __itt_fsync_acquired_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_fsync_acquired_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup legacy
+ * @deprecated Legacy API
+ * @brief Fast synchronization which does no require spinning.
+ * - This special function is to be used by TBB and OpenMP libraries only when they know
+ * there is no spin but they need to suppress TC warnings about shared variable modifications.
+ * - It only has corresponding pointers in static library and does not have corresponding function
+ * in dynamic library.
+ * @see void __itt_sync_releasing(void* addr);
+ */
+void ITTAPI __itt_fsync_releasing(void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, fsync_releasing, (void *addr))
+#define __itt_fsync_releasing ITTNOTIFY_VOID(fsync_releasing)
+#define __itt_fsync_releasing_ptr ITTNOTIFY_NAME(fsync_releasing)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_fsync_releasing(addr)
+#define __itt_fsync_releasing_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_fsync_releasing_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} */
+
+/**
+ * @defgroup model Modeling by Intel(R) Parallel Advisor
+ * @ingroup public
+ * This is the subset of itt used for modeling by Intel(R) Parallel Advisor.
+ * This API is called ONLY using annotate.h, by "Annotation" macros
+ * the user places in their sources during the parallelism modeling steps.
+ *
+ * site_begin/end and task_begin/end take the address of handle variables,
+ * which are writeable by the API. Handles must be 0 initialized prior
+ * to the first call to begin, or may cause a run-time failure.
+ * The handles are initialized in a multi-thread safe way by the API if
+ * the handle is 0. The commonly expected idiom is one static handle to
+ * identify a site or task. If a site or task of the same name has already
+ * been started during this collection, the same handle MAY be returned,
+ * but is not required to be - it is unspecified if data merging is done
+ * based on name. These routines also take an instance variable. Like
+ * the lexical instance, these must be 0 initialized. Unlike the lexical
+ * instance, this is used to track a single dynamic instance.
+ *
+ * API used by the Intel(R) Parallel Advisor to describe potential concurrency
+ * and related activities. User-added source annotations expand to calls
+ * to these procedures to enable modeling of a hypothetical concurrent
+ * execution serially.
+ * @{
+ */
+#if !defined(_ADVISOR_ANNOTATE_H_) || defined(ANNOTATE_EXPAND_NULL)
+
+typedef void* __itt_model_site; /*!< @brief handle for lexical site */
+typedef void* __itt_model_site_instance; /*!< @brief handle for dynamic instance */
+typedef void* __itt_model_task; /*!< @brief handle for lexical site */
+typedef void* __itt_model_task_instance; /*!< @brief handle for dynamic instance */
+
+/**
+ * @enum __itt_model_disable
+ * @brief Enumerator for the disable methods
+ */
+typedef enum {
+ __itt_model_disable_observation,
+ __itt_model_disable_collection
+} __itt_model_disable;
+
+#endif /* !_ADVISOR_ANNOTATE_H_ || ANNOTATE_EXPAND_NULL */
+
+/**
+ * @brief ANNOTATE_SITE_BEGIN/ANNOTATE_SITE_END support.
+ *
+ * site_begin/end model a potential concurrency site.
+ * site instances may be recursively nested with themselves.
+ * site_end exits the most recently started but unended site for the current
+ * thread. The handle passed to end may be used to validate structure.
+ * Instances of a site encountered on different threads concurrently
+ * are considered completely distinct. If the site name for two different
+ * lexical sites match, it is unspecified whether they are treated as the
+ * same or different for data presentation.
+ */
+void ITTAPI __itt_model_site_begin(__itt_model_site *site, __itt_model_site_instance *instance, const char *name);
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_model_site_beginW(const wchar_t *name);
+#endif
+void ITTAPI __itt_model_site_beginA(const char *name);
+void ITTAPI __itt_model_site_beginAL(const char *name, size_t siteNameLen);
+void ITTAPI __itt_model_site_end (__itt_model_site *site, __itt_model_site_instance *instance);
+void ITTAPI __itt_model_site_end_2(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_site_begin, (__itt_model_site *site, __itt_model_site_instance *instance, const char *name))
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, model_site_beginW, (const wchar_t *name))
+#endif
+ITT_STUBV(ITTAPI, void, model_site_beginA, (const char *name))
+ITT_STUBV(ITTAPI, void, model_site_beginAL, (const char *name, size_t siteNameLen))
+ITT_STUBV(ITTAPI, void, model_site_end, (__itt_model_site *site, __itt_model_site_instance *instance))
+ITT_STUBV(ITTAPI, void, model_site_end_2, (void))
+#define __itt_model_site_begin ITTNOTIFY_VOID(model_site_begin)
+#define __itt_model_site_begin_ptr ITTNOTIFY_NAME(model_site_begin)
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_model_site_beginW ITTNOTIFY_VOID(model_site_beginW)
+#define __itt_model_site_beginW_ptr ITTNOTIFY_NAME(model_site_beginW)
+#endif
+#define __itt_model_site_beginA ITTNOTIFY_VOID(model_site_beginA)
+#define __itt_model_site_beginA_ptr ITTNOTIFY_NAME(model_site_beginA)
+#define __itt_model_site_beginAL ITTNOTIFY_VOID(model_site_beginAL)
+#define __itt_model_site_beginAL_ptr ITTNOTIFY_NAME(model_site_beginAL)
+#define __itt_model_site_end ITTNOTIFY_VOID(model_site_end)
+#define __itt_model_site_end_ptr ITTNOTIFY_NAME(model_site_end)
+#define __itt_model_site_end_2 ITTNOTIFY_VOID(model_site_end_2)
+#define __itt_model_site_end_2_ptr ITTNOTIFY_NAME(model_site_end_2)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_site_begin(site, instance, name)
+#define __itt_model_site_begin_ptr 0
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_model_site_beginW(name)
+#define __itt_model_site_beginW_ptr 0
+#endif
+#define __itt_model_site_beginA(name)
+#define __itt_model_site_beginA_ptr 0
+#define __itt_model_site_beginAL(name, siteNameLen)
+#define __itt_model_site_beginAL_ptr 0
+#define __itt_model_site_end(site, instance)
+#define __itt_model_site_end_ptr 0
+#define __itt_model_site_end_2()
+#define __itt_model_site_end_2_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_model_site_begin_ptr 0
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_model_site_beginW_ptr 0
+#endif
+#define __itt_model_site_beginA_ptr 0
+#define __itt_model_site_beginAL_ptr 0
+#define __itt_model_site_end_ptr 0
+#define __itt_model_site_end_2_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_TASK_BEGIN/ANNOTATE_TASK_END support
+ *
+ * task_begin/end model a potential task, which is contained within the most
+ * closely enclosing dynamic site. task_end exits the most recently started
+ * but unended task. The handle passed to end may be used to validate
+ * structure. It is unspecified if bad dynamic nesting is detected. If it
+ * is, it should be encoded in the resulting data collection. The collector
+ * should not fail due to construct nesting issues, nor attempt to directly
+ * indicate the problem.
+ */
+void ITTAPI __itt_model_task_begin(__itt_model_task *task, __itt_model_task_instance *instance, const char *name);
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_model_task_beginW(const wchar_t *name);
+void ITTAPI __itt_model_iteration_taskW(const wchar_t *name);
+#endif
+void ITTAPI __itt_model_task_beginA(const char *name);
+void ITTAPI __itt_model_task_beginAL(const char *name, size_t taskNameLen);
+void ITTAPI __itt_model_iteration_taskA(const char *name);
+void ITTAPI __itt_model_iteration_taskAL(const char *name, size_t taskNameLen);
+void ITTAPI __itt_model_task_end (__itt_model_task *task, __itt_model_task_instance *instance);
+void ITTAPI __itt_model_task_end_2(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_task_begin, (__itt_model_task *task, __itt_model_task_instance *instance, const char *name))
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, model_task_beginW, (const wchar_t *name))
+ITT_STUBV(ITTAPI, void, model_iteration_taskW, (const wchar_t *name))
+#endif
+ITT_STUBV(ITTAPI, void, model_task_beginA, (const char *name))
+ITT_STUBV(ITTAPI, void, model_task_beginAL, (const char *name, size_t taskNameLen))
+ITT_STUBV(ITTAPI, void, model_iteration_taskA, (const char *name))
+ITT_STUBV(ITTAPI, void, model_iteration_taskAL, (const char *name, size_t taskNameLen))
+ITT_STUBV(ITTAPI, void, model_task_end, (__itt_model_task *task, __itt_model_task_instance *instance))
+ITT_STUBV(ITTAPI, void, model_task_end_2, (void))
+#define __itt_model_task_begin ITTNOTIFY_VOID(model_task_begin)
+#define __itt_model_task_begin_ptr ITTNOTIFY_NAME(model_task_begin)
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_model_task_beginW ITTNOTIFY_VOID(model_task_beginW)
+#define __itt_model_task_beginW_ptr ITTNOTIFY_NAME(model_task_beginW)
+#define __itt_model_iteration_taskW ITTNOTIFY_VOID(model_iteration_taskW)
+#define __itt_model_iteration_taskW_ptr ITTNOTIFY_NAME(model_iteration_taskW)
+#endif
+#define __itt_model_task_beginA ITTNOTIFY_VOID(model_task_beginA)
+#define __itt_model_task_beginA_ptr ITTNOTIFY_NAME(model_task_beginA)
+#define __itt_model_task_beginAL ITTNOTIFY_VOID(model_task_beginAL)
+#define __itt_model_task_beginAL_ptr ITTNOTIFY_NAME(model_task_beginAL)
+#define __itt_model_iteration_taskA ITTNOTIFY_VOID(model_iteration_taskA)
+#define __itt_model_iteration_taskA_ptr ITTNOTIFY_NAME(model_iteration_taskA)
+#define __itt_model_iteration_taskAL ITTNOTIFY_VOID(model_iteration_taskAL)
+#define __itt_model_iteration_taskAL_ptr ITTNOTIFY_NAME(model_iteration_taskAL)
+#define __itt_model_task_end ITTNOTIFY_VOID(model_task_end)
+#define __itt_model_task_end_ptr ITTNOTIFY_NAME(model_task_end)
+#define __itt_model_task_end_2 ITTNOTIFY_VOID(model_task_end_2)
+#define __itt_model_task_end_2_ptr ITTNOTIFY_NAME(model_task_end_2)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_task_begin(task, instance, name)
+#define __itt_model_task_begin_ptr 0
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_model_task_beginW(name)
+#define __itt_model_task_beginW_ptr 0
+#endif
+#define __itt_model_task_beginA(name)
+#define __itt_model_task_beginA_ptr 0
+#define __itt_model_task_beginAL(name, siteNameLen)
+#define __itt_model_task_beginAL_ptr 0
+#define __itt_model_iteration_taskA(name)
+#define __itt_model_iteration_taskA_ptr 0
+#define __itt_model_iteration_taskAL(name, siteNameLen)
+#define __itt_model_iteration_taskAL_ptr 0
+#define __itt_model_task_end(task, instance)
+#define __itt_model_task_end_ptr 0
+#define __itt_model_task_end_2()
+#define __itt_model_task_end_2_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_model_task_begin_ptr 0
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_model_task_beginW_ptr 0
+#endif
+#define __itt_model_task_beginA_ptr 0
+#define __itt_model_task_beginAL_ptr 0
+#define __itt_model_iteration_taskA_ptr 0
+#define __itt_model_iteration_taskAL_ptr 0
+#define __itt_model_task_end_ptr 0
+#define __itt_model_task_end_2_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_LOCK_ACQUIRE/ANNOTATE_LOCK_RELEASE support
+ *
+ * lock_acquire/release model a potential lock for both lockset and
+ * performance modeling. Each unique address is modeled as a separate
+ * lock, with invalid addresses being valid lock IDs. Specifically:
+ * no storage is accessed by the API at the specified address - it is only
+ * used for lock identification. Lock acquires may be self-nested and are
+ * unlocked by a corresponding number of releases.
+ * (These closely correspond to __itt_sync_acquired/__itt_sync_releasing,
+ * but may not have identical semantics.)
+ */
+void ITTAPI __itt_model_lock_acquire(void *lock);
+void ITTAPI __itt_model_lock_acquire_2(void *lock);
+void ITTAPI __itt_model_lock_release(void *lock);
+void ITTAPI __itt_model_lock_release_2(void *lock);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_lock_acquire, (void *lock))
+ITT_STUBV(ITTAPI, void, model_lock_acquire_2, (void *lock))
+ITT_STUBV(ITTAPI, void, model_lock_release, (void *lock))
+ITT_STUBV(ITTAPI, void, model_lock_release_2, (void *lock))
+#define __itt_model_lock_acquire ITTNOTIFY_VOID(model_lock_acquire)
+#define __itt_model_lock_acquire_ptr ITTNOTIFY_NAME(model_lock_acquire)
+#define __itt_model_lock_acquire_2 ITTNOTIFY_VOID(model_lock_acquire_2)
+#define __itt_model_lock_acquire_2_ptr ITTNOTIFY_NAME(model_lock_acquire_2)
+#define __itt_model_lock_release ITTNOTIFY_VOID(model_lock_release)
+#define __itt_model_lock_release_ptr ITTNOTIFY_NAME(model_lock_release)
+#define __itt_model_lock_release_2 ITTNOTIFY_VOID(model_lock_release_2)
+#define __itt_model_lock_release_2_ptr ITTNOTIFY_NAME(model_lock_release_2)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_lock_acquire(lock)
+#define __itt_model_lock_acquire_ptr 0
+#define __itt_model_lock_acquire_2(lock)
+#define __itt_model_lock_acquire_2_ptr 0
+#define __itt_model_lock_release(lock)
+#define __itt_model_lock_release_ptr 0
+#define __itt_model_lock_release_2(lock)
+#define __itt_model_lock_release_2_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_model_lock_acquire_ptr 0
+#define __itt_model_lock_acquire_2_ptr 0
+#define __itt_model_lock_release_ptr 0
+#define __itt_model_lock_release_2_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_RECORD_ALLOCATION/ANNOTATE_RECORD_DEALLOCATION support
+ *
+ * record_allocation/deallocation describe user-defined memory allocator
+ * behavior, which may be required for correctness modeling to understand
+ * when storage is not expected to be actually reused across threads.
+ */
+void ITTAPI __itt_model_record_allocation (void *addr, size_t size);
+void ITTAPI __itt_model_record_deallocation(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_record_allocation, (void *addr, size_t size))
+ITT_STUBV(ITTAPI, void, model_record_deallocation, (void *addr))
+#define __itt_model_record_allocation ITTNOTIFY_VOID(model_record_allocation)
+#define __itt_model_record_allocation_ptr ITTNOTIFY_NAME(model_record_allocation)
+#define __itt_model_record_deallocation ITTNOTIFY_VOID(model_record_deallocation)
+#define __itt_model_record_deallocation_ptr ITTNOTIFY_NAME(model_record_deallocation)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_record_allocation(addr, size)
+#define __itt_model_record_allocation_ptr 0
+#define __itt_model_record_deallocation(addr)
+#define __itt_model_record_deallocation_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_model_record_allocation_ptr 0
+#define __itt_model_record_deallocation_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_INDUCTION_USES support
+ *
+ * Note particular storage is inductive through the end of the current site
+ */
+void ITTAPI __itt_model_induction_uses(void* addr, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_induction_uses, (void *addr, size_t size))
+#define __itt_model_induction_uses ITTNOTIFY_VOID(model_induction_uses)
+#define __itt_model_induction_uses_ptr ITTNOTIFY_NAME(model_induction_uses)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_induction_uses(addr, size)
+#define __itt_model_induction_uses_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_model_induction_uses_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_REDUCTION_USES support
+ *
+ * Note particular storage is used for reduction through the end
+ * of the current site
+ */
+void ITTAPI __itt_model_reduction_uses(void* addr, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_reduction_uses, (void *addr, size_t size))
+#define __itt_model_reduction_uses ITTNOTIFY_VOID(model_reduction_uses)
+#define __itt_model_reduction_uses_ptr ITTNOTIFY_NAME(model_reduction_uses)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_reduction_uses(addr, size)
+#define __itt_model_reduction_uses_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_model_reduction_uses_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_OBSERVE_USES support
+ *
+ * Have correctness modeling record observations about uses of storage
+ * through the end of the current site
+ */
+void ITTAPI __itt_model_observe_uses(void* addr, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_observe_uses, (void *addr, size_t size))
+#define __itt_model_observe_uses ITTNOTIFY_VOID(model_observe_uses)
+#define __itt_model_observe_uses_ptr ITTNOTIFY_NAME(model_observe_uses)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_observe_uses(addr, size)
+#define __itt_model_observe_uses_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_model_observe_uses_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_CLEAR_USES support
+ *
+ * Clear the special handling of a piece of storage related to induction,
+ * reduction or observe_uses
+ */
+void ITTAPI __itt_model_clear_uses(void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_clear_uses, (void *addr))
+#define __itt_model_clear_uses ITTNOTIFY_VOID(model_clear_uses)
+#define __itt_model_clear_uses_ptr ITTNOTIFY_NAME(model_clear_uses)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_clear_uses(addr)
+#define __itt_model_clear_uses_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_model_clear_uses_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_DISABLE_*_PUSH/ANNOTATE_DISABLE_*_POP support
+ *
+ * disable_push/disable_pop push and pop disabling based on a parameter.
+ * Disabling observations stops processing of memory references during
+ * correctness modeling, and all annotations that occur in the disabled
+ * region. This allows description of code that is expected to be handled
+ * specially during conversion to parallelism or that is not recognized
+ * by tools (e.g. some kinds of synchronization operations.)
+ * This mechanism causes all annotations in the disabled region, other
+ * than disable_push and disable_pop, to be ignored. (For example, this
+ * might validly be used to disable an entire parallel site and the contained
+ * tasks and locking in it for data collection purposes.)
+ * The disable for collection is a more expensive operation, but reduces
+ * collector overhead significantly. This applies to BOTH correctness data
+ * collection and performance data collection. For example, a site
+ * containing a task might only enable data collection for the first 10
+ * iterations. Both performance and correctness data should reflect this,
+ * and the program should run as close to full speed as possible when
+ * collection is disabled.
+ */
+void ITTAPI __itt_model_disable_push(__itt_model_disable x);
+void ITTAPI __itt_model_disable_pop(void);
+void ITTAPI __itt_model_aggregate_task(size_t x);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_disable_push, (__itt_model_disable x))
+ITT_STUBV(ITTAPI, void, model_disable_pop, (void))
+ITT_STUBV(ITTAPI, void, model_aggregate_task, (size_t x))
+#define __itt_model_disable_push ITTNOTIFY_VOID(model_disable_push)
+#define __itt_model_disable_push_ptr ITTNOTIFY_NAME(model_disable_push)
+#define __itt_model_disable_pop ITTNOTIFY_VOID(model_disable_pop)
+#define __itt_model_disable_pop_ptr ITTNOTIFY_NAME(model_disable_pop)
+#define __itt_model_aggregate_task ITTNOTIFY_VOID(model_aggregate_task)
+#define __itt_model_aggregate_task_ptr ITTNOTIFY_NAME(model_aggregate_task)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_disable_push(x)
+#define __itt_model_disable_push_ptr 0
+#define __itt_model_disable_pop()
+#define __itt_model_disable_pop_ptr 0
+#define __itt_model_aggregate_task(x)
+#define __itt_model_aggregate_task_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_model_disable_push_ptr 0
+#define __itt_model_disable_pop_ptr 0
+#define __itt_model_aggregate_task_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} model group */
+
+/**
+ * @defgroup heap Heap
+ * @ingroup public
+ * Heap group
+ * @{
+ */
+
+typedef void* __itt_heap_function;
+
+/**
+ * @brief Create an identification for heap function
+ * @return non-zero identifier or NULL
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_heap_function ITTAPI __itt_heap_function_createA(const char* name, const char* domain);
+__itt_heap_function ITTAPI __itt_heap_function_createW(const wchar_t* name, const wchar_t* domain);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_heap_function_create __itt_heap_function_createW
+# define __itt_heap_function_create_ptr __itt_heap_function_createW_ptr
+#else
+# define __itt_heap_function_create __itt_heap_function_createA
+# define __itt_heap_function_create_ptr __itt_heap_function_createA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_heap_function ITTAPI __itt_heap_function_create(const char* name, const char* domain);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createA, (const char* name, const char* domain))
+ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createW, (const wchar_t* name, const wchar_t* domain))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_heap_function, heap_function_create, (const char* name, const char* domain))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_heap_function_createA ITTNOTIFY_DATA(heap_function_createA)
+#define __itt_heap_function_createA_ptr ITTNOTIFY_NAME(heap_function_createA)
+#define __itt_heap_function_createW ITTNOTIFY_DATA(heap_function_createW)
+#define __itt_heap_function_createW_ptr ITTNOTIFY_NAME(heap_function_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_heap_function_create ITTNOTIFY_DATA(heap_function_create)
+#define __itt_heap_function_create_ptr ITTNOTIFY_NAME(heap_function_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_heap_function_createA(name, domain) (__itt_heap_function)0
+#define __itt_heap_function_createA_ptr 0
+#define __itt_heap_function_createW(name, domain) (__itt_heap_function)0
+#define __itt_heap_function_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_heap_function_create(name, domain) (__itt_heap_function)0
+#define __itt_heap_function_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_heap_function_createA_ptr 0
+#define __itt_heap_function_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_heap_function_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an allocation begin occurrence.
+ */
+void ITTAPI __itt_heap_allocate_begin(__itt_heap_function h, size_t size, int initialized);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_allocate_begin, (__itt_heap_function h, size_t size, int initialized))
+#define __itt_heap_allocate_begin ITTNOTIFY_VOID(heap_allocate_begin)
+#define __itt_heap_allocate_begin_ptr ITTNOTIFY_NAME(heap_allocate_begin)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_allocate_begin(h, size, initialized)
+#define __itt_heap_allocate_begin_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_allocate_begin_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an allocation end occurrence.
+ */
+void ITTAPI __itt_heap_allocate_end(__itt_heap_function h, void** addr, size_t size, int initialized);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_allocate_end, (__itt_heap_function h, void** addr, size_t size, int initialized))
+#define __itt_heap_allocate_end ITTNOTIFY_VOID(heap_allocate_end)
+#define __itt_heap_allocate_end_ptr ITTNOTIFY_NAME(heap_allocate_end)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_allocate_end(h, addr, size, initialized)
+#define __itt_heap_allocate_end_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_allocate_end_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an free begin occurrence.
+ */
+void ITTAPI __itt_heap_free_begin(__itt_heap_function h, void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_free_begin, (__itt_heap_function h, void* addr))
+#define __itt_heap_free_begin ITTNOTIFY_VOID(heap_free_begin)
+#define __itt_heap_free_begin_ptr ITTNOTIFY_NAME(heap_free_begin)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_free_begin(h, addr)
+#define __itt_heap_free_begin_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_free_begin_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an free end occurrence.
+ */
+void ITTAPI __itt_heap_free_end(__itt_heap_function h, void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_free_end, (__itt_heap_function h, void* addr))
+#define __itt_heap_free_end ITTNOTIFY_VOID(heap_free_end)
+#define __itt_heap_free_end_ptr ITTNOTIFY_NAME(heap_free_end)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_free_end(h, addr)
+#define __itt_heap_free_end_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_free_end_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an reallocation begin occurrence.
+ */
+void ITTAPI __itt_heap_reallocate_begin(__itt_heap_function h, void* addr, size_t new_size, int initialized);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_reallocate_begin, (__itt_heap_function h, void* addr, size_t new_size, int initialized))
+#define __itt_heap_reallocate_begin ITTNOTIFY_VOID(heap_reallocate_begin)
+#define __itt_heap_reallocate_begin_ptr ITTNOTIFY_NAME(heap_reallocate_begin)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_reallocate_begin(h, addr, new_size, initialized)
+#define __itt_heap_reallocate_begin_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_reallocate_begin_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an reallocation end occurrence.
+ */
+void ITTAPI __itt_heap_reallocate_end(__itt_heap_function h, void* addr, void** new_addr, size_t new_size, int initialized);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_reallocate_end, (__itt_heap_function h, void* addr, void** new_addr, size_t new_size, int initialized))
+#define __itt_heap_reallocate_end ITTNOTIFY_VOID(heap_reallocate_end)
+#define __itt_heap_reallocate_end_ptr ITTNOTIFY_NAME(heap_reallocate_end)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_reallocate_end(h, addr, new_addr, new_size, initialized)
+#define __itt_heap_reallocate_end_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_reallocate_end_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @brief internal access begin */
+void ITTAPI __itt_heap_internal_access_begin(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_internal_access_begin, (void))
+#define __itt_heap_internal_access_begin ITTNOTIFY_VOID(heap_internal_access_begin)
+#define __itt_heap_internal_access_begin_ptr ITTNOTIFY_NAME(heap_internal_access_begin)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_internal_access_begin()
+#define __itt_heap_internal_access_begin_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_internal_access_begin_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @brief internal access end */
+void ITTAPI __itt_heap_internal_access_end(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_internal_access_end, (void))
+#define __itt_heap_internal_access_end ITTNOTIFY_VOID(heap_internal_access_end)
+#define __itt_heap_internal_access_end_ptr ITTNOTIFY_NAME(heap_internal_access_end)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_internal_access_end()
+#define __itt_heap_internal_access_end_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_internal_access_end_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @brief record memory growth begin */
+void ITTAPI __itt_heap_record_memory_growth_begin(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_record_memory_growth_begin, (void))
+#define __itt_heap_record_memory_growth_begin ITTNOTIFY_VOID(heap_record_memory_growth_begin)
+#define __itt_heap_record_memory_growth_begin_ptr ITTNOTIFY_NAME(heap_record_memory_growth_begin)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_record_memory_growth_begin()
+#define __itt_heap_record_memory_growth_begin_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_record_memory_growth_begin_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @brief record memory growth end */
+void ITTAPI __itt_heap_record_memory_growth_end(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_record_memory_growth_end, (void))
+#define __itt_heap_record_memory_growth_end ITTNOTIFY_VOID(heap_record_memory_growth_end)
+#define __itt_heap_record_memory_growth_end_ptr ITTNOTIFY_NAME(heap_record_memory_growth_end)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_record_memory_growth_end()
+#define __itt_heap_record_memory_growth_end_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_record_memory_growth_end_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Specify the type of heap detection/reporting to modify.
+ */
+/**
+ * @hideinitializer
+ * @brief Report on memory leaks.
+ */
+#define __itt_heap_leaks 0x00000001
+
+/**
+ * @hideinitializer
+ * @brief Report on memory growth.
+ */
+#define __itt_heap_growth 0x00000002
+
+
+/** @brief heap reset detection */
+void ITTAPI __itt_heap_reset_detection(unsigned int reset_mask);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_reset_detection, (unsigned int reset_mask))
+#define __itt_heap_reset_detection ITTNOTIFY_VOID(heap_reset_detection)
+#define __itt_heap_reset_detection_ptr ITTNOTIFY_NAME(heap_reset_detection)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_reset_detection()
+#define __itt_heap_reset_detection_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_reset_detection_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @brief report */
+void ITTAPI __itt_heap_record(unsigned int record_mask);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_record, (unsigned int record_mask))
+#define __itt_heap_record ITTNOTIFY_VOID(heap_record)
+#define __itt_heap_record_ptr ITTNOTIFY_NAME(heap_record)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_record()
+#define __itt_heap_record_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_record_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @} heap group */
+/** @endcond */
+/* ========================================================================== */
+
+/**
+ * @defgroup domains Domains
+ * @ingroup public
+ * Domains group
+ * @{
+ */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_domain
+{
+ volatile int flags; /*!< Zero if disabled, non-zero if enabled. The meaning of different non-zero values is reserved to the runtime */
+ const char* nameA; /*!< Copy of original name in ASCII. */
+#if defined(UNICODE) || defined(_UNICODE)
+ const wchar_t* nameW; /*!< Copy of original name in UNICODE. */
+#else /* UNICODE || _UNICODE */
+ void* nameW;
+#endif /* UNICODE || _UNICODE */
+ int extra1; /*!< Reserved to the runtime */
+ void* extra2; /*!< Reserved to the runtime */
+ struct ___itt_domain* next;
+} __itt_domain;
+
+#pragma pack(pop)
+/** @endcond */
+
+/**
+ * @ingroup domains
+ * @brief Create a domain.
+ * Create domain using some domain name: the URI naming style is recommended.
+ * Because the set of domains is expected to be static over the application's
+ * execution time, there is no mechanism to destroy a domain.
+ * Any domain can be accessed by any thread in the process, regardless of
+ * which thread created the domain. This call is thread-safe.
+ * @param[in] name name of domain
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_domain* ITTAPI __itt_domain_createA(const char *name);
+__itt_domain* ITTAPI __itt_domain_createW(const wchar_t *name);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_domain_create __itt_domain_createW
+# define __itt_domain_create_ptr __itt_domain_createW_ptr
+#else /* UNICODE */
+# define __itt_domain_create __itt_domain_createA
+# define __itt_domain_create_ptr __itt_domain_createA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_domain* ITTAPI __itt_domain_create(const char *name);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_domain*, domain_createA, (const char *name))
+ITT_STUB(ITTAPI, __itt_domain*, domain_createW, (const wchar_t *name))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_domain*, domain_create, (const char *name))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_domain_createA ITTNOTIFY_DATA(domain_createA)
+#define __itt_domain_createA_ptr ITTNOTIFY_NAME(domain_createA)
+#define __itt_domain_createW ITTNOTIFY_DATA(domain_createW)
+#define __itt_domain_createW_ptr ITTNOTIFY_NAME(domain_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_domain_create ITTNOTIFY_DATA(domain_create)
+#define __itt_domain_create_ptr ITTNOTIFY_NAME(domain_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_domain_createA(name) (__itt_domain*)0
+#define __itt_domain_createA_ptr 0
+#define __itt_domain_createW(name) (__itt_domain*)0
+#define __itt_domain_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_domain_create(name) (__itt_domain*)0
+#define __itt_domain_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_domain_createA_ptr 0
+#define __itt_domain_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_domain_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} domains group */
+
+/**
+ * @defgroup ids IDs
+ * @ingroup public
+ * IDs group
+ * @{
+ */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_id
+{
+ unsigned long long d1, d2, d3;
+} __itt_id;
+
+#pragma pack(pop)
+/** @endcond */
+
+const __itt_id __itt_null = { 0, 0, 0 };
+
+/**
+ * @ingroup ids
+ * @brief A convenience function is provided to create an ID without domain control.
+ * @brief This is a convenience function to initialize an __itt_id structure. This function
+ * does not affect the collector runtime in any way. After you make the ID with this
+ * function, you still must create it with the __itt_id_create function before using the ID
+ * to identify a named entity.
+ * @param[in] addr The address of object; high QWORD of the ID value.
+ * @param[in] extra The extra data to unique identify object; low QWORD of the ID value.
+ */
+
+ITT_INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra) ITT_INLINE_ATTRIBUTE;
+ITT_INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra)
+{
+ __itt_id id = __itt_null;
+ id.d1 = (unsigned long long)((uintptr_t)addr);
+ id.d2 = (unsigned long long)extra;
+ id.d3 = (unsigned long long)0; /* Reserved. Must be zero */
+ return id;
+}
+
+/**
+ * @ingroup ids
+ * @brief Create an instance of identifier.
+ * This establishes the beginning of the lifetime of an instance of
+ * the given ID in the trace. Once this lifetime starts, the ID
+ * can be used to tag named entity instances in calls such as
+ * __itt_task_begin, and to specify relationships among
+ * identified named entity instances, using the \ref relations APIs.
+ * Instance IDs are not domain specific!
+ * @param[in] domain The domain controlling the execution of this call.
+ * @param[in] id The ID to create.
+ */
+void ITTAPI __itt_id_create(const __itt_domain *domain, __itt_id id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, id_create, (const __itt_domain *domain, __itt_id id))
+#define __itt_id_create(d,x) ITTNOTIFY_VOID_D1(id_create,d,x)
+#define __itt_id_create_ptr ITTNOTIFY_NAME(id_create)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_id_create(domain,id)
+#define __itt_id_create_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_id_create_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup ids
+ * @brief Destroy an instance of identifier.
+ * This ends the lifetime of the current instance of the given ID value in the trace.
+ * Any relationships that are established after this lifetime ends are invalid.
+ * This call must be performed before the given ID value can be reused for a different
+ * named entity instance.
+ * @param[in] domain The domain controlling the execution of this call.
+ * @param[in] id The ID to destroy.
+ */
+void ITTAPI __itt_id_destroy(const __itt_domain *domain, __itt_id id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, id_destroy, (const __itt_domain *domain, __itt_id id))
+#define __itt_id_destroy(d,x) ITTNOTIFY_VOID_D1(id_destroy,d,x)
+#define __itt_id_destroy_ptr ITTNOTIFY_NAME(id_destroy)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_id_destroy(domain,id)
+#define __itt_id_destroy_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_id_destroy_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} ids group */
+
+/**
+ * @defgroup handless String Handles
+ * @ingroup public
+ * String Handles group
+ * @{
+ */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_string_handle
+{
+ const char* strA; /*!< Copy of original string in ASCII. */
+#if defined(UNICODE) || defined(_UNICODE)
+ const wchar_t* strW; /*!< Copy of original string in UNICODE. */
+#else /* UNICODE || _UNICODE */
+ void* strW;
+#endif /* UNICODE || _UNICODE */
+ int extra1; /*!< Reserved. Must be zero */
+ void* extra2; /*!< Reserved. Must be zero */
+ struct ___itt_string_handle* next;
+} __itt_string_handle;
+
+#pragma pack(pop)
+/** @endcond */
+
+/**
+ * @ingroup handles
+ * @brief Create a string handle.
+ * Create and return handle value that can be associated with a string.
+ * Consecutive calls to __itt_string_handle_create with the same name
+ * return the same value. Because the set of string handles is expected to remain
+ * static during the application's execution time, there is no mechanism to destroy a string handle.
+ * Any string handle can be accessed by any thread in the process, regardless of which thread created
+ * the string handle. This call is thread-safe.
+ * @param[in] name The input string
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_string_handle* ITTAPI __itt_string_handle_createA(const char *name);
+__itt_string_handle* ITTAPI __itt_string_handle_createW(const wchar_t *name);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_string_handle_create __itt_string_handle_createW
+# define __itt_string_handle_create_ptr __itt_string_handle_createW_ptr
+#else /* UNICODE */
+# define __itt_string_handle_create __itt_string_handle_createA
+# define __itt_string_handle_create_ptr __itt_string_handle_createA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_string_handle* ITTAPI __itt_string_handle_create(const char *name);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createA, (const char *name))
+ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createW, (const wchar_t *name))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_create, (const char *name))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_string_handle_createA ITTNOTIFY_DATA(string_handle_createA)
+#define __itt_string_handle_createA_ptr ITTNOTIFY_NAME(string_handle_createA)
+#define __itt_string_handle_createW ITTNOTIFY_DATA(string_handle_createW)
+#define __itt_string_handle_createW_ptr ITTNOTIFY_NAME(string_handle_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_string_handle_create ITTNOTIFY_DATA(string_handle_create)
+#define __itt_string_handle_create_ptr ITTNOTIFY_NAME(string_handle_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_string_handle_createA(name) (__itt_string_handle*)0
+#define __itt_string_handle_createA_ptr 0
+#define __itt_string_handle_createW(name) (__itt_string_handle*)0
+#define __itt_string_handle_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_string_handle_create(name) (__itt_string_handle*)0
+#define __itt_string_handle_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_string_handle_createA_ptr 0
+#define __itt_string_handle_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_string_handle_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} handles group */
+
+/** @cond exclude_from_documentation */
+typedef unsigned long long __itt_timestamp;
+/** @endcond */
+
+#define __itt_timestamp_none ((__itt_timestamp)-1LL)
+
+/** @cond exclude_from_gpa_documentation */
+
+/**
+ * @ingroup timestamps
+ * @brief Return timestamp corresponding to the current moment.
+ * This returns the timestamp in the format that is the most relevant for the current
+ * host or platform (RDTSC, QPC, and others). You can use the "<" operator to
+ * compare __itt_timestamp values.
+ */
+__itt_timestamp ITTAPI __itt_get_timestamp(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_timestamp, get_timestamp, (void))
+#define __itt_get_timestamp ITTNOTIFY_DATA(get_timestamp)
+#define __itt_get_timestamp_ptr ITTNOTIFY_NAME(get_timestamp)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_get_timestamp()
+#define __itt_get_timestamp_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_get_timestamp_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} timestamps */
+/** @endcond */
+
+/** @cond exclude_from_gpa_documentation */
+
+/**
+ * @defgroup regions Regions
+ * @ingroup public
+ * Regions group
+ * @{
+ */
+/**
+ * @ingroup regions
+ * @brief Begin of region instance.
+ * Successive calls to __itt_region_begin with the same ID are ignored
+ * until a call to __itt_region_end with the same ID
+ * @param[in] domain The domain for this region instance
+ * @param[in] id The instance ID for this region instance. Must not be __itt_null
+ * @param[in] parentid The instance ID for the parent of this region instance, or __itt_null
+ * @param[in] name The name of this region
+ */
+void ITTAPI __itt_region_begin(const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name);
+
+/**
+ * @ingroup regions
+ * @brief End of region instance.
+ * The first call to __itt_region_end with a given ID ends the
+ * region. Successive calls with the same ID are ignored, as are
+ * calls that do not have a matching __itt_region_begin call.
+ * @param[in] domain The domain for this region instance
+ * @param[in] id The instance ID for this region instance
+ */
+void ITTAPI __itt_region_end(const __itt_domain *domain, __itt_id id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, region_begin, (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name))
+ITT_STUBV(ITTAPI, void, region_end, (const __itt_domain *domain, __itt_id id))
+#define __itt_region_begin(d,x,y,z) ITTNOTIFY_VOID_D3(region_begin,d,x,y,z)
+#define __itt_region_begin_ptr ITTNOTIFY_NAME(region_begin)
+#define __itt_region_end(d,x) ITTNOTIFY_VOID_D1(region_end,d,x)
+#define __itt_region_end_ptr ITTNOTIFY_NAME(region_end)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_region_begin(d,x,y,z)
+#define __itt_region_begin_ptr 0
+#define __itt_region_end(d,x)
+#define __itt_region_end_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_region_begin_ptr 0
+#define __itt_region_end_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} regions group */
+
+/**
+ * @defgroup frames Frames
+ * @ingroup public
+ * Frames are similar to regions, but are intended to be easier to use and to implement.
+ * In particular:
+ * - Frames always represent periods of elapsed time
+ * - By default, frames have no nesting relationships
+ * @{
+ */
+
+/**
+ * @ingroup frames
+ * @brief Begin a frame instance.
+ * Successive calls to __itt_frame_begin with the
+ * same ID are ignored until a call to __itt_frame_end with the same ID.
+ * @param[in] domain The domain for this frame instance
+ * @param[in] id The instance ID for this frame instance or NULL
+ */
+void ITTAPI __itt_frame_begin_v3(const __itt_domain *domain, __itt_id *id);
+
+/**
+ * @ingroup frames
+ * @brief End a frame instance.
+ * The first call to __itt_frame_end with a given ID
+ * ends the frame. Successive calls with the same ID are ignored, as are
+ * calls that do not have a matching __itt_frame_begin call.
+ * @param[in] domain The domain for this frame instance
+ * @param[in] id The instance ID for this frame instance or NULL for current
+ */
+void ITTAPI __itt_frame_end_v3(const __itt_domain *domain, __itt_id *id);
+
+/**
+ * @ingroup frames
+ * @brief Submits a frame instance.
+ * Successive calls to __itt_frame_begin or __itt_frame_submit with the
+ * same ID are ignored until a call to __itt_frame_end or __itt_frame_submit
+ * with the same ID.
+ * Passing special __itt_timestamp_none value as "end" argument means
+ * take the current timestamp as the end timestamp.
+ * @param[in] domain The domain for this frame instance
+ * @param[in] id The instance ID for this frame instance or NULL
+ * @param[in] begin Timestamp of the beginning of the frame
+ * @param[in] end Timestamp of the end of the frame
+ */
+void ITTAPI __itt_frame_submit_v3(const __itt_domain *domain, __itt_id *id,
+ __itt_timestamp begin, __itt_timestamp end);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, frame_begin_v3, (const __itt_domain *domain, __itt_id *id))
+ITT_STUBV(ITTAPI, void, frame_end_v3, (const __itt_domain *domain, __itt_id *id))
+ITT_STUBV(ITTAPI, void, frame_submit_v3, (const __itt_domain *domain, __itt_id *id, __itt_timestamp begin, __itt_timestamp end))
+#define __itt_frame_begin_v3(d,x) ITTNOTIFY_VOID_D1(frame_begin_v3,d,x)
+#define __itt_frame_begin_v3_ptr ITTNOTIFY_NAME(frame_begin_v3)
+#define __itt_frame_end_v3(d,x) ITTNOTIFY_VOID_D1(frame_end_v3,d,x)
+#define __itt_frame_end_v3_ptr ITTNOTIFY_NAME(frame_end_v3)
+#define __itt_frame_submit_v3(d,x,b,e) ITTNOTIFY_VOID_D3(frame_submit_v3,d,x,b,e)
+#define __itt_frame_submit_v3_ptr ITTNOTIFY_NAME(frame_submit_v3)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_frame_begin_v3(domain,id)
+#define __itt_frame_begin_v3_ptr 0
+#define __itt_frame_end_v3(domain,id)
+#define __itt_frame_end_v3_ptr 0
+#define __itt_frame_submit_v3(domain,id,begin,end)
+#define __itt_frame_submit_v3_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_frame_begin_v3_ptr 0
+#define __itt_frame_end_v3_ptr 0
+#define __itt_frame_submit_v3_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} frames group */
+/** @endcond */
+
+/**
+ * @defgroup taskgroup Task Group
+ * @ingroup public
+ * Task Group
+ * @{
+ */
+/**
+ * @ingroup task_groups
+ * @brief Denotes a task_group instance.
+ * Successive calls to __itt_task_group with the same ID are ignored.
+ * @param[in] domain The domain for this task_group instance
+ * @param[in] id The instance ID for this task_group instance. Must not be __itt_null.
+ * @param[in] parentid The instance ID for the parent of this task_group instance, or __itt_null.
+ * @param[in] name The name of this task_group
+ */
+void ITTAPI __itt_task_group(const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, task_group, (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name))
+#define __itt_task_group(d,x,y,z) ITTNOTIFY_VOID_D3(task_group,d,x,y,z)
+#define __itt_task_group_ptr ITTNOTIFY_NAME(task_group)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_task_group(d,x,y,z)
+#define __itt_task_group_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_task_group_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} taskgroup group */
+
+/**
+ * @defgroup tasks Tasks
+ * @ingroup public
+ * A task instance represents a piece of work performed by a particular
+ * thread for a period of time. A call to __itt_task_begin creates a
+ * task instance. This becomes the current instance for that task on that
+ * thread. A following call to __itt_task_end on the same thread ends the
+ * instance. There may be multiple simultaneous instances of tasks with the
+ * same name on different threads. If an ID is specified, the task instance
+ * receives that ID. Nested tasks are allowed.
+ *
+ * Note: The task is defined by the bracketing of __itt_task_begin and
+ * __itt_task_end on the same thread. If some scheduling mechanism causes
+ * task switching (the thread executes a different user task) or task
+ * switching (the user task switches to a different thread) then this breaks
+ * the notion of current instance. Additional API calls are required to
+ * deal with that possibility.
+ * @{
+ */
+
+/**
+ * @ingroup tasks
+ * @brief Begin a task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] taskid The instance ID for this task instance, or __itt_null
+ * @param[in] parentid The parent instance to which this task instance belongs, or __itt_null
+ * @param[in] name The name of this task
+ */
+void ITTAPI __itt_task_begin(const __itt_domain *domain, __itt_id taskid, __itt_id parentid, __itt_string_handle *name);
+
+/**
+ * @ingroup tasks
+ * @brief Begin a task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] taskid The identifier for this task instance (may be 0)
+ * @param[in] parentid The parent of this task (may be 0)
+ * @param[in] fn The pointer to the function you are tracing
+ */
+void ITTAPI __itt_task_begin_fn(const __itt_domain *domain, __itt_id taskid, __itt_id parentid, void* fn);
+
+/**
+ * @ingroup tasks
+ * @brief End the current task instance.
+ * @param[in] domain The domain for this task
+ */
+void ITTAPI __itt_task_end(const __itt_domain *domain);
+
+/**
+ * @ingroup tasks
+ * @brief Begin an overlapped task instance.
+ * @param[in] domain The domain for this task.
+ * @param[in] taskid The identifier for this task instance, *cannot* be __itt_null.
+ * @param[in] parentid The parent of this task, or __itt_null.
+ * @param[in] name The name of this task.
+ */
+void ITTAPI __itt_task_begin_overlapped(const __itt_domain* domain, __itt_id taskid, __itt_id parentid, __itt_string_handle* name);
+
+/**
+ * @ingroup tasks
+ * @brief End an overlapped task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] taskid Explicit ID of finished task
+ */
+void ITTAPI __itt_task_end_overlapped(const __itt_domain *domain, __itt_id taskid);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, task_begin, (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name))
+ITT_STUBV(ITTAPI, void, task_begin_fn, (const __itt_domain *domain, __itt_id id, __itt_id parentid, void* fn))
+ITT_STUBV(ITTAPI, void, task_end, (const __itt_domain *domain))
+ITT_STUBV(ITTAPI, void, task_begin_overlapped, (const __itt_domain *domain, __itt_id taskid, __itt_id parentid, __itt_string_handle *name))
+ITT_STUBV(ITTAPI, void, task_end_overlapped, (const __itt_domain *domain, __itt_id taskid))
+#define __itt_task_begin(d,x,y,z) ITTNOTIFY_VOID_D3(task_begin,d,x,y,z)
+#define __itt_task_begin_ptr ITTNOTIFY_NAME(task_begin)
+#define __itt_task_begin_fn(d,x,y,z) ITTNOTIFY_VOID_D3(task_begin_fn,d,x,y,z)
+#define __itt_task_begin_fn_ptr ITTNOTIFY_NAME(task_begin_fn)
+#define __itt_task_end(d) ITTNOTIFY_VOID_D0(task_end,d)
+#define __itt_task_end_ptr ITTNOTIFY_NAME(task_end)
+#define __itt_task_begin_overlapped(d,x,y,z) ITTNOTIFY_VOID_D3(task_begin_overlapped,d,x,y,z)
+#define __itt_task_begin_overlapped_ptr ITTNOTIFY_NAME(task_begin_overlapped)
+#define __itt_task_end_overlapped(d,x) ITTNOTIFY_VOID_D1(task_end_overlapped,d,x)
+#define __itt_task_end_overlapped_ptr ITTNOTIFY_NAME(task_end_overlapped)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_task_begin(domain,id,parentid,name)
+#define __itt_task_begin_ptr 0
+#define __itt_task_begin_fn(domain,id,parentid,fn)
+#define __itt_task_begin_fn_ptr 0
+#define __itt_task_end(domain)
+#define __itt_task_end_ptr 0
+#define __itt_task_begin_overlapped(domain,taskid,parentid,name)
+#define __itt_task_begin_overlapped_ptr 0
+#define __itt_task_end_overlapped(domain,taskid)
+#define __itt_task_end_overlapped_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_task_begin_ptr 0
+#define __itt_task_begin_fn_ptr 0
+#define __itt_task_end_ptr 0
+#define __itt_task_begin_overlapped_ptr 0
+#define __itt_task_end_overlapped_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} tasks group */
+
+
+/**
+ * @defgroup markers Markers
+ * Markers represent a single discreet event in time. Markers have a scope,
+ * described by an enumerated type __itt_scope. Markers are created by
+ * the API call __itt_marker. A marker instance can be given an ID for use in
+ * adding metadata.
+ * @{
+ */
+
+/**
+ * @brief Describes the scope of an event object in the trace.
+ */
+typedef enum
+{
+ __itt_scope_unknown = 0,
+ __itt_scope_global,
+ __itt_scope_track_group,
+ __itt_scope_track,
+ __itt_scope_task,
+ __itt_scope_marker
+} __itt_scope;
+
+/** @cond exclude_from_documentation */
+#define __itt_marker_scope_unknown __itt_scope_unknown
+#define __itt_marker_scope_global __itt_scope_global
+#define __itt_marker_scope_process __itt_scope_track_group
+#define __itt_marker_scope_thread __itt_scope_track
+#define __itt_marker_scope_task __itt_scope_task
+/** @endcond */
+
+/**
+ * @ingroup markers
+ * @brief Create a marker instance
+ * @param[in] domain The domain for this marker
+ * @param[in] id The instance ID for this marker or __itt_null
+ * @param[in] name The name for this marker
+ * @param[in] scope The scope for this marker
+ */
+void ITTAPI __itt_marker(const __itt_domain *domain, __itt_id id, __itt_string_handle *name, __itt_scope scope);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, marker, (const __itt_domain *domain, __itt_id id, __itt_string_handle *name, __itt_scope scope))
+#define __itt_marker(d,x,y,z) ITTNOTIFY_VOID_D3(marker,d,x,y,z)
+#define __itt_marker_ptr ITTNOTIFY_NAME(marker)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_marker(domain,id,name,scope)
+#define __itt_marker_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_marker_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} markers group */
+
+/**
+ * @defgroup metadata Metadata
+ * The metadata API is used to attach extra information to named
+ * entities. Metadata can be attached to an identified named entity by ID,
+ * or to the current entity (which is always a task).
+ *
+ * Conceptually metadata has a type (what kind of metadata), a key (the
+ * name of the metadata), and a value (the actual data). The encoding of
+ * the value depends on the type of the metadata.
+ *
+ * The type of metadata is specified by an enumerated type __itt_metdata_type.
+ * @{
+ */
+
+/**
+ * @ingroup parameters
+ * @brief describes the type of metadata
+ */
+typedef enum {
+ __itt_metadata_unknown = 0,
+ __itt_metadata_u64, /**< Unsigned 64-bit integer */
+ __itt_metadata_s64, /**< Signed 64-bit integer */
+ __itt_metadata_u32, /**< Unsigned 32-bit integer */
+ __itt_metadata_s32, /**< Signed 32-bit integer */
+ __itt_metadata_u16, /**< Unsigned 16-bit integer */
+ __itt_metadata_s16, /**< Signed 16-bit integer */
+ __itt_metadata_float, /**< Signed 32-bit floating-point */
+ __itt_metadata_double /**< SIgned 64-bit floating-point */
+} __itt_metadata_type;
+
+/**
+ * @ingroup parameters
+ * @brief Add metadata to an instance of a named entity.
+ * @param[in] domain The domain controlling the call
+ * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task
+ * @param[in] key The name of the metadata
+ * @param[in] type The type of the metadata
+ * @param[in] count The number of elements of the given type. If count == 0, no metadata will be added.
+ * @param[in] data The metadata itself
+*/
+void ITTAPI __itt_metadata_add(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, metadata_add, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data))
+#define __itt_metadata_add(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(metadata_add,d,x,y,z,a,b)
+#define __itt_metadata_add_ptr ITTNOTIFY_NAME(metadata_add)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_metadata_add(d,x,y,z,a,b)
+#define __itt_metadata_add_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_metadata_add_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup parameters
+ * @brief Add string metadata to an instance of a named entity.
+ * @param[in] domain The domain controlling the call
+ * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task
+ * @param[in] key The name of the metadata
+ * @param[in] data The metadata itself
+ * @param[in] length The number of characters in the string, or -1 if the length is unknown but the string is null-terminated
+*/
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_metadata_str_addA(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length);
+void ITTAPI __itt_metadata_str_addW(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const wchar_t *data, size_t length);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_metadata_str_add __itt_metadata_str_addW
+# define __itt_metadata_str_add_ptr __itt_metadata_str_addW_ptr
+#else /* UNICODE */
+# define __itt_metadata_str_add __itt_metadata_str_addA
+# define __itt_metadata_str_add_ptr __itt_metadata_str_addA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+void ITTAPI __itt_metadata_str_add(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length);
+#endif
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, metadata_str_addA, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length))
+ITT_STUBV(ITTAPI, void, metadata_str_addW, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const wchar_t *data, size_t length))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, metadata_str_add, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_metadata_str_addA(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_addA,d,x,y,z,a)
+#define __itt_metadata_str_addA_ptr ITTNOTIFY_NAME(metadata_str_addA)
+#define __itt_metadata_str_addW(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_addW,d,x,y,z,a)
+#define __itt_metadata_str_addW_ptr ITTNOTIFY_NAME(metadata_str_addW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_metadata_str_add(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add,d,x,y,z,a)
+#define __itt_metadata_str_add_ptr ITTNOTIFY_NAME(metadata_str_add)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_metadata_str_addA(d,x,y,z,a)
+#define __itt_metadata_str_addA_ptr 0
+#define __itt_metadata_str_addW(d,x,y,z,a)
+#define __itt_metadata_str_addW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_metadata_str_add(d,x,y,z,a)
+#define __itt_metadata_str_add_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_metadata_str_addA_ptr 0
+#define __itt_metadata_str_addW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_metadata_str_add_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup parameters
+ * @brief Add metadata to an instance of a named entity.
+ * @param[in] domain The domain controlling the call
+ * @param[in] scope The scope of the instance to which the metadata is to be added
+
+ * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task
+
+ * @param[in] key The name of the metadata
+ * @param[in] type The type of the metadata
+ * @param[in] count The number of elements of the given type. If count == 0, no metadata will be added.
+ * @param[in] data The metadata itself
+*/
+void ITTAPI __itt_metadata_add_with_scope(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, metadata_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data))
+#define __itt_metadata_add_with_scope(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(metadata_add_with_scope,d,x,y,z,a,b)
+#define __itt_metadata_add_with_scope_ptr ITTNOTIFY_NAME(metadata_add_with_scope)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_metadata_add_with_scope(d,x,y,z,a,b)
+#define __itt_metadata_add_with_scope_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_metadata_add_with_scope_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup parameters
+ * @brief Add string metadata to an instance of a named entity.
+ * @param[in] domain The domain controlling the call
+ * @param[in] scope The scope of the instance to which the metadata is to be added
+
+ * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task
+
+ * @param[in] key The name of the metadata
+ * @param[in] data The metadata itself
+ * @param[in] length The number of characters in the string, or -1 if the length is unknown but the string is null-terminated
+*/
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_metadata_str_add_with_scopeA(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length);
+void ITTAPI __itt_metadata_str_add_with_scopeW(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const wchar_t *data, size_t length);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_metadata_str_add_with_scope __itt_metadata_str_add_with_scopeW
+# define __itt_metadata_str_add_with_scope_ptr __itt_metadata_str_add_with_scopeW_ptr
+#else /* UNICODE */
+# define __itt_metadata_str_add_with_scope __itt_metadata_str_add_with_scopeA
+# define __itt_metadata_str_add_with_scope_ptr __itt_metadata_str_add_with_scopeA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+void ITTAPI __itt_metadata_str_add_with_scope(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length);
+#endif
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeA, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length))
+ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeW, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const wchar_t *data, size_t length))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, metadata_str_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_metadata_str_add_with_scopeA(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add_with_scopeA,d,x,y,z,a)
+#define __itt_metadata_str_add_with_scopeA_ptr ITTNOTIFY_NAME(metadata_str_add_with_scopeA)
+#define __itt_metadata_str_add_with_scopeW(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add_with_scopeW,d,x,y,z,a)
+#define __itt_metadata_str_add_with_scopeW_ptr ITTNOTIFY_NAME(metadata_str_add_with_scopeW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_metadata_str_add_with_scope(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add_with_scope,d,x,y,z,a)
+#define __itt_metadata_str_add_with_scope_ptr ITTNOTIFY_NAME(metadata_str_add_with_scope)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_metadata_str_add_with_scopeA(d,x,y,z,a)
+#define __itt_metadata_str_add_with_scopeA_ptr 0
+#define __itt_metadata_str_add_with_scopeW(d,x,y,z,a)
+#define __itt_metadata_str_add_with_scopeW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_metadata_str_add_with_scope(d,x,y,z,a)
+#define __itt_metadata_str_add_with_scope_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_metadata_str_add_with_scopeA_ptr 0
+#define __itt_metadata_str_add_with_scopeW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_metadata_str_add_with_scope_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @} metadata group */
+
+/**
+ * @defgroup relations Relations
+ * Instances of named entities can be explicitly associated with other
+ * instances using instance IDs and the relationship API calls.
+ *
+ * @{
+ */
+
+/**
+ * @ingroup relations
+ * @brief The kind of relation between two instances is specified by the enumerated type __itt_relation.
+ * Relations between instances can be added with an API call. The relation
+ * API uses instance IDs. Relations can be added before or after the actual
+ * instances are created and persist independently of the instances. This
+ * is the motivation for having different lifetimes for instance IDs and
+ * the actual instances.
+ */
+typedef enum
+{
+ __itt_relation_is_unknown = 0,
+ __itt_relation_is_dependent_on, /**< "A is dependent on B" means that A cannot start until B completes */
+ __itt_relation_is_sibling_of, /**< "A is sibling of B" means that A and B were created as a group */
+ __itt_relation_is_parent_of, /**< "A is parent of B" means that A created B */
+ __itt_relation_is_continuation_of, /**< "A is continuation of B" means that A assumes the dependencies of B */
+ __itt_relation_is_child_of, /**< "A is child of B" means that A was created by B (inverse of is_parent_of) */
+ __itt_relation_is_continued_by, /**< "A is continued by B" means that B assumes the dependencies of A (inverse of is_continuation_of) */
+ __itt_relation_is_predecessor_to /**< "A is predecessor to B" means that B cannot start until A completes (inverse of is_dependent_on) */
+} __itt_relation;
+
+/**
+ * @ingroup relations
+ * @brief Add a relation to the current task instance.
+ * The current task instance is the head of the relation.
+ * @param[in] domain The domain controlling this call
+ * @param[in] relation The kind of relation
+ * @param[in] tail The ID for the tail of the relation
+ */
+void ITTAPI __itt_relation_add_to_current(const __itt_domain *domain, __itt_relation relation, __itt_id tail);
+
+/**
+ * @ingroup relations
+ * @brief Add a relation between two instance identifiers.
+ * @param[in] domain The domain controlling this call
+ * @param[in] head The ID for the head of the relation
+ * @param[in] relation The kind of relation
+ * @param[in] tail The ID for the tail of the relation
+ */
+void ITTAPI __itt_relation_add(const __itt_domain *domain, __itt_id head, __itt_relation relation, __itt_id tail);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, relation_add_to_current, (const __itt_domain *domain, __itt_relation relation, __itt_id tail))
+ITT_STUBV(ITTAPI, void, relation_add, (const __itt_domain *domain, __itt_id head, __itt_relation relation, __itt_id tail))
+#define __itt_relation_add_to_current(d,x,y) ITTNOTIFY_VOID_D2(relation_add_to_current,d,x,y)
+#define __itt_relation_add_to_current_ptr ITTNOTIFY_NAME(relation_add_to_current)
+#define __itt_relation_add(d,x,y,z) ITTNOTIFY_VOID_D3(relation_add,d,x,y,z)
+#define __itt_relation_add_ptr ITTNOTIFY_NAME(relation_add)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_relation_add_to_current(d,x,y)
+#define __itt_relation_add_to_current_ptr 0
+#define __itt_relation_add(d,x,y,z)
+#define __itt_relation_add_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_relation_add_to_current_ptr 0
+#define __itt_relation_add_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} relations group */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_clock_info
+{
+ unsigned long long clock_freq; /*!< Clock domain frequency */
+ unsigned long long clock_base; /*!< Clock domain base timestamp */
+} __itt_clock_info;
+
+#pragma pack(pop)
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+typedef void (ITTAPI *__itt_get_clock_info_fn)(__itt_clock_info* clock_info, void* data);
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_clock_domain
+{
+ __itt_clock_info info; /*!< Most recent clock domain info */
+ __itt_get_clock_info_fn fn; /*!< Callback function pointer */
+ void* fn_data; /*!< Input argument for the callback function */
+ int extra1; /*!< Reserved. Must be zero */
+ void* extra2; /*!< Reserved. Must be zero */
+ struct ___itt_clock_domain* next;
+} __itt_clock_domain;
+
+#pragma pack(pop)
+/** @endcond */
+
+/**
+ * @ingroup clockdomains
+ * @brief Create a clock domain.
+ * Certain applications require the capability to trace their application using
+ * a clock domain different than the CPU, for instance the instrumentation of events
+ * that occur on a GPU.
+ * Because the set of domains is expected to be static over the application's execution time,
+ * there is no mechanism to destroy a domain.
+ * Any domain can be accessed by any thread in the process, regardless of which thread created
+ * the domain. This call is thread-safe.
+ * @param[in] fn A pointer to a callback function which retrieves alternative CPU timestamps
+ * @param[in] fn_data Argument for a callback function; may be NULL
+ */
+__itt_clock_domain* ITTAPI __itt_clock_domain_create(__itt_get_clock_info_fn fn, void* fn_data);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_clock_domain*, clock_domain_create, (__itt_get_clock_info_fn fn, void* fn_data))
+#define __itt_clock_domain_create ITTNOTIFY_DATA(clock_domain_create)
+#define __itt_clock_domain_create_ptr ITTNOTIFY_NAME(clock_domain_create)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_clock_domain_create(fn,fn_data) (__itt_clock_domain*)0
+#define __itt_clock_domain_create_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_clock_domain_create_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup clockdomains
+ * @brief Recalculate clock domains frequences and clock base timestamps.
+ */
+void ITTAPI __itt_clock_domain_reset(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, clock_domain_reset, (void))
+#define __itt_clock_domain_reset ITTNOTIFY_VOID(clock_domain_reset)
+#define __itt_clock_domain_reset_ptr ITTNOTIFY_NAME(clock_domain_reset)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_clock_domain_reset()
+#define __itt_clock_domain_reset_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_clock_domain_reset_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup clockdomain
+ * @brief Create an instance of identifier. This establishes the beginning of the lifetime of
+ * an instance of the given ID in the trace. Once this lifetime starts, the ID can be used to
+ * tag named entity instances in calls such as __itt_task_begin, and to specify relationships among
+ * identified named entity instances, using the \ref relations APIs.
+ * @param[in] domain The domain controlling the execution of this call.
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] id The ID to create.
+ */
+void ITTAPI __itt_id_create_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id);
+
+/**
+ * @ingroup clockdomain
+ * @brief Destroy an instance of identifier. This ends the lifetime of the current instance of the
+ * given ID value in the trace. Any relationships that are established after this lifetime ends are
+ * invalid. This call must be performed before the given ID value can be reused for a different
+ * named entity instance.
+ * @param[in] domain The domain controlling the execution of this call.
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] id The ID to destroy.
+ */
+void ITTAPI __itt_id_destroy_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, id_create_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id))
+ITT_STUBV(ITTAPI, void, id_destroy_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id))
+#define __itt_id_create_ex(d,x,y,z) ITTNOTIFY_VOID_D3(id_create_ex,d,x,y,z)
+#define __itt_id_create_ex_ptr ITTNOTIFY_NAME(id_create_ex)
+#define __itt_id_destroy_ex(d,x,y,z) ITTNOTIFY_VOID_D3(id_destroy_ex,d,x,y,z)
+#define __itt_id_destroy_ex_ptr ITTNOTIFY_NAME(id_destroy_ex)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_id_create_ex(domain,clock_domain,timestamp,id)
+#define __itt_id_create_ex_ptr 0
+#define __itt_id_destroy_ex(domain,clock_domain,timestamp,id)
+#define __itt_id_destroy_ex_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_id_create_ex_ptr 0
+#define __itt_id_destroy_ex_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup clockdomain
+ * @brief Begin a task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] taskid The instance ID for this task instance, or __itt_null
+ * @param[in] parentid The parent instance to which this task instance belongs, or __itt_null
+ * @param[in] name The name of this task
+ */
+void ITTAPI __itt_task_begin_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name);
+
+/**
+ * @ingroup clockdomain
+ * @brief Begin a task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] taskid The identifier for this task instance, or __itt_null
+ * @param[in] parentid The parent of this task, or __itt_null
+ * @param[in] fn The pointer to the function you are tracing
+ */
+void ITTAPI __itt_task_begin_fn_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, void* fn);
+
+/**
+ * @ingroup clockdomain
+ * @brief End the current task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ */
+void ITTAPI __itt_task_end_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, task_begin_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, __itt_string_handle *name))
+ITT_STUBV(ITTAPI, void, task_begin_fn_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, void* fn))
+ITT_STUBV(ITTAPI, void, task_end_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp))
+#define __itt_task_begin_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(task_begin_ex,d,x,y,z,a,b)
+#define __itt_task_begin_ex_ptr ITTNOTIFY_NAME(task_begin_ex)
+#define __itt_task_begin_fn_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(task_begin_fn_ex,d,x,y,z,a,b)
+#define __itt_task_begin_fn_ex_ptr ITTNOTIFY_NAME(task_begin_fn_ex)
+#define __itt_task_end_ex(d,x,y) ITTNOTIFY_VOID_D2(task_end_ex,d,x,y)
+#define __itt_task_end_ex_ptr ITTNOTIFY_NAME(task_end_ex)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_task_begin_ex(domain,clock_domain,timestamp,id,parentid,name)
+#define __itt_task_begin_ex_ptr 0
+#define __itt_task_begin_fn_ex(domain,clock_domain,timestamp,id,parentid,fn)
+#define __itt_task_begin_fn_ex_ptr 0
+#define __itt_task_end_ex(domain,clock_domain,timestamp)
+#define __itt_task_end_ex_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_task_begin_ex_ptr 0
+#define __itt_task_begin_fn_ex_ptr 0
+#define __itt_task_end_ex_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @defgroup counters Counters
+ * @ingroup public
+ * Counters are user-defined objects with a monotonically increasing
+ * value. Counter values are 64-bit unsigned integers.
+ * Counters have names that can be displayed in
+ * the tools.
+ * @{
+ */
+
+/**
+ * @brief opaque structure for counter identification
+ */
+/** @cond exclude_from_documentation */
+
+typedef struct ___itt_counter* __itt_counter;
+
+/**
+ * @brief Create an unsigned 64 bits integer counter with given name/domain
+ *
+ * After __itt_counter_create() is called, __itt_counter_inc(id), __itt_counter_inc_delta(id, delta),
+ * __itt_counter_set_value(id, value_ptr) or __itt_counter_set_value_ex(id, clock_domain, timestamp, value_ptr)
+ * can be used to change the value of the counter, where value_ptr is a pointer to an unsigned 64 bits integer
+ *
+ * The call is equal to __itt_counter_create_typed(name, domain, __itt_metadata_u64)
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_counter ITTAPI __itt_counter_createA(const char *name, const char *domain);
+__itt_counter ITTAPI __itt_counter_createW(const wchar_t *name, const wchar_t *domain);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_counter_create __itt_counter_createW
+# define __itt_counter_create_ptr __itt_counter_createW_ptr
+#else /* UNICODE */
+# define __itt_counter_create __itt_counter_createA
+# define __itt_counter_create_ptr __itt_counter_createA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_counter ITTAPI __itt_counter_create(const char *name, const char *domain);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_counter, counter_createA, (const char *name, const char *domain))
+ITT_STUB(ITTAPI, __itt_counter, counter_createW, (const wchar_t *name, const wchar_t *domain))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_counter, counter_create, (const char *name, const char *domain))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_counter_createA ITTNOTIFY_DATA(counter_createA)
+#define __itt_counter_createA_ptr ITTNOTIFY_NAME(counter_createA)
+#define __itt_counter_createW ITTNOTIFY_DATA(counter_createW)
+#define __itt_counter_createW_ptr ITTNOTIFY_NAME(counter_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_counter_create ITTNOTIFY_DATA(counter_create)
+#define __itt_counter_create_ptr ITTNOTIFY_NAME(counter_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_counter_createA(name, domain)
+#define __itt_counter_createA_ptr 0
+#define __itt_counter_createW(name, domain)
+#define __itt_counter_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_counter_create(name, domain)
+#define __itt_counter_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_counter_createA_ptr 0
+#define __itt_counter_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_counter_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Increment the unsigned 64 bits integer counter value
+ *
+ * Calling this function to non-unsigned 64 bits integer counters has no effect
+ */
+void ITTAPI __itt_counter_inc(__itt_counter id);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_inc, (__itt_counter id))
+#define __itt_counter_inc ITTNOTIFY_VOID(counter_inc)
+#define __itt_counter_inc_ptr ITTNOTIFY_NAME(counter_inc)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_inc(id)
+#define __itt_counter_inc_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_inc_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/**
+ * @brief Increment the unsigned 64 bits integer counter value with x
+ *
+ * Calling this function to non-unsigned 64 bits integer counters has no effect
+ */
+void ITTAPI __itt_counter_inc_delta(__itt_counter id, unsigned long long value);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_inc_delta, (__itt_counter id, unsigned long long value))
+#define __itt_counter_inc_delta ITTNOTIFY_VOID(counter_inc_delta)
+#define __itt_counter_inc_delta_ptr ITTNOTIFY_NAME(counter_inc_delta)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_inc_delta(id, value)
+#define __itt_counter_inc_delta_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_inc_delta_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Decrement the unsigned 64 bits integer counter value
+ *
+ * Calling this function to non-unsigned 64 bits integer counters has no effect
+ */
+void ITTAPI __itt_counter_dec(__itt_counter id);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_dec, (__itt_counter id))
+#define __itt_counter_dec ITTNOTIFY_VOID(counter_dec)
+#define __itt_counter_dec_ptr ITTNOTIFY_NAME(counter_dec)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_dec(id)
+#define __itt_counter_dec_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_dec_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/**
+ * @brief Decrement the unsigned 64 bits integer counter value with x
+ *
+ * Calling this function to non-unsigned 64 bits integer counters has no effect
+ */
+void ITTAPI __itt_counter_dec_delta(__itt_counter id, unsigned long long value);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_dec_delta, (__itt_counter id, unsigned long long value))
+#define __itt_counter_dec_delta ITTNOTIFY_VOID(counter_dec_delta)
+#define __itt_counter_dec_delta_ptr ITTNOTIFY_NAME(counter_dec_delta)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_dec_delta(id, value)
+#define __itt_counter_dec_delta_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_dec_delta_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup counters
+ * @brief Increment a counter by one.
+ * The first call with a given name creates a counter by that name and sets its
+ * value to zero. Successive calls increment the counter value.
+ * @param[in] domain The domain controlling the call. Counter names are not domain specific.
+ * The domain argument is used only to enable or disable the API calls.
+ * @param[in] name The name of the counter
+ */
+void ITTAPI __itt_counter_inc_v3(const __itt_domain *domain, __itt_string_handle *name);
+
+/**
+ * @ingroup counters
+ * @brief Increment a counter by the value specified in delta.
+ * @param[in] domain The domain controlling the call. Counter names are not domain specific.
+ * The domain argument is used only to enable or disable the API calls.
+ * @param[in] name The name of the counter
+ * @param[in] delta The amount by which to increment the counter
+ */
+void ITTAPI __itt_counter_inc_delta_v3(const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_inc_v3, (const __itt_domain *domain, __itt_string_handle *name))
+ITT_STUBV(ITTAPI, void, counter_inc_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta))
+#define __itt_counter_inc_v3(d,x) ITTNOTIFY_VOID_D1(counter_inc_v3,d,x)
+#define __itt_counter_inc_v3_ptr ITTNOTIFY_NAME(counter_inc_v3)
+#define __itt_counter_inc_delta_v3(d,x,y) ITTNOTIFY_VOID_D2(counter_inc_delta_v3,d,x,y)
+#define __itt_counter_inc_delta_v3_ptr ITTNOTIFY_NAME(counter_inc_delta_v3)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_inc_v3(domain,name)
+#define __itt_counter_inc_v3_ptr 0
+#define __itt_counter_inc_delta_v3(domain,name,delta)
+#define __itt_counter_inc_delta_v3_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_inc_v3_ptr 0
+#define __itt_counter_inc_delta_v3_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+
+/**
+ * @ingroup counters
+ * @brief Decrement a counter by one.
+ * The first call with a given name creates a counter by that name and sets its
+ * value to zero. Successive calls decrement the counter value.
+ * @param[in] domain The domain controlling the call. Counter names are not domain specific.
+ * The domain argument is used only to enable or disable the API calls.
+ * @param[in] name The name of the counter
+ */
+void ITTAPI __itt_counter_dec_v3(const __itt_domain *domain, __itt_string_handle *name);
+
+/**
+ * @ingroup counters
+ * @brief Decrement a counter by the value specified in delta.
+ * @param[in] domain The domain controlling the call. Counter names are not domain specific.
+ * The domain argument is used only to enable or disable the API calls.
+ * @param[in] name The name of the counter
+ * @param[in] delta The amount by which to decrement the counter
+ */
+void ITTAPI __itt_counter_dec_delta_v3(const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_dec_v3, (const __itt_domain *domain, __itt_string_handle *name))
+ITT_STUBV(ITTAPI, void, counter_dec_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta))
+#define __itt_counter_dec_v3(d,x) ITTNOTIFY_VOID_D1(counter_dec_v3,d,x)
+#define __itt_counter_dec_v3_ptr ITTNOTIFY_NAME(counter_dec_v3)
+#define __itt_counter_dec_delta_v3(d,x,y) ITTNOTIFY_VOID_D2(counter_dec_delta_v3,d,x,y)
+#define __itt_counter_dec_delta_v3_ptr ITTNOTIFY_NAME(counter_dec_delta_v3)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_dec_v3(domain,name)
+#define __itt_counter_dec_v3_ptr 0
+#define __itt_counter_dec_delta_v3(domain,name,delta)
+#define __itt_counter_dec_delta_v3_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_dec_v3_ptr 0
+#define __itt_counter_dec_delta_v3_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @} counters group */
+
+
+/**
+ * @brief Set the counter value
+ */
+void ITTAPI __itt_counter_set_value(__itt_counter id, void *value_ptr);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_set_value, (__itt_counter id, void *value_ptr))
+#define __itt_counter_set_value ITTNOTIFY_VOID(counter_set_value)
+#define __itt_counter_set_value_ptr ITTNOTIFY_NAME(counter_set_value)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_set_value(id, value_ptr)
+#define __itt_counter_set_value_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_set_value_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Set the counter value
+ */
+void ITTAPI __itt_counter_set_value_ex(__itt_counter id, __itt_clock_domain *clock_domain, unsigned long long timestamp, void *value_ptr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_set_value_ex, (__itt_counter id, __itt_clock_domain *clock_domain, unsigned long long timestamp, void *value_ptr))
+#define __itt_counter_set_value_ex ITTNOTIFY_VOID(counter_set_value_ex)
+#define __itt_counter_set_value_ex_ptr ITTNOTIFY_NAME(counter_set_value_ex)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_set_value_ex(id, clock_domain, timestamp, value_ptr)
+#define __itt_counter_set_value_ex_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_set_value_ex_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Create a typed counter with given name/domain
+ *
+ * After __itt_counter_create_typed() is called, __itt_counter_inc(id), __itt_counter_inc_delta(id, delta),
+ * __itt_counter_set_value(id, value_ptr) or __itt_counter_set_value_ex(id, clock_domain, timestamp, value_ptr)
+ * can be used to change the value of the counter
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_counter ITTAPI __itt_counter_create_typedA(const char *name, const char *domain, __itt_metadata_type type);
+__itt_counter ITTAPI __itt_counter_create_typedW(const wchar_t *name, const wchar_t *domain, __itt_metadata_type type);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_counter_create_typed __itt_counter_create_typedW
+# define __itt_counter_create_typed_ptr __itt_counter_create_typedW_ptr
+#else /* UNICODE */
+# define __itt_counter_create_typed __itt_counter_create_typedA
+# define __itt_counter_create_typed_ptr __itt_counter_create_typedA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_counter ITTAPI __itt_counter_create_typed(const char *name, const char *domain, __itt_metadata_type type);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_counter, counter_create_typedA, (const char *name, const char *domain, __itt_metadata_type type))
+ITT_STUB(ITTAPI, __itt_counter, counter_create_typedW, (const wchar_t *name, const wchar_t *domain, __itt_metadata_type type))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_counter, counter_create_typed, (const char *name, const char *domain, __itt_metadata_type type))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_counter_create_typedA ITTNOTIFY_DATA(counter_create_typedA)
+#define __itt_counter_create_typedA_ptr ITTNOTIFY_NAME(counter_create_typedA)
+#define __itt_counter_create_typedW ITTNOTIFY_DATA(counter_create_typedW)
+#define __itt_counter_create_typedW_ptr ITTNOTIFY_NAME(counter_create_typedW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_counter_create_typed ITTNOTIFY_DATA(counter_create_typed)
+#define __itt_counter_create_typed_ptr ITTNOTIFY_NAME(counter_create_typed)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_counter_create_typedA(name, domain, type)
+#define __itt_counter_create_typedA_ptr 0
+#define __itt_counter_create_typedW(name, domain, type)
+#define __itt_counter_create_typedW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_counter_create_typed(name, domain, type)
+#define __itt_counter_create_typed_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_counter_create_typedA_ptr 0
+#define __itt_counter_create_typedW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_counter_create_typed_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Destroy the counter identified by the pointer previously returned by __itt_counter_create() or
+ * __itt_counter_create_typed()
+ */
+void ITTAPI __itt_counter_destroy(__itt_counter id);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_destroy, (__itt_counter id))
+#define __itt_counter_destroy ITTNOTIFY_VOID(counter_destroy)
+#define __itt_counter_destroy_ptr ITTNOTIFY_NAME(counter_destroy)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_destroy(id)
+#define __itt_counter_destroy_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_destroy_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} counters group */
+
+/**
+ * @ingroup markers
+ * @brief Create a marker instance.
+ * @param[in] domain The domain for this marker
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] id The instance ID for this marker, or __itt_null
+ * @param[in] name The name for this marker
+ * @param[in] scope The scope for this marker
+ */
+void ITTAPI __itt_marker_ex(const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_string_handle *name, __itt_scope scope);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, marker_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_string_handle *name, __itt_scope scope))
+#define __itt_marker_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(marker_ex,d,x,y,z,a,b)
+#define __itt_marker_ex_ptr ITTNOTIFY_NAME(marker_ex)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_marker_ex(domain,clock_domain,timestamp,id,name,scope)
+#define __itt_marker_ex_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_marker_ex_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup clockdomain
+ * @brief Add a relation to the current task instance.
+ * The current task instance is the head of the relation.
+ * @param[in] domain The domain controlling this call
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] relation The kind of relation
+ * @param[in] tail The ID for the tail of the relation
+ */
+void ITTAPI __itt_relation_add_to_current_ex(const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_relation relation, __itt_id tail);
+
+/**
+ * @ingroup clockdomain
+ * @brief Add a relation between two instance identifiers.
+ * @param[in] domain The domain controlling this call
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] head The ID for the head of the relation
+ * @param[in] relation The kind of relation
+ * @param[in] tail The ID for the tail of the relation
+ */
+void ITTAPI __itt_relation_add_ex(const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id head, __itt_relation relation, __itt_id tail);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, relation_add_to_current_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_relation relation, __itt_id tail))
+ITT_STUBV(ITTAPI, void, relation_add_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id head, __itt_relation relation, __itt_id tail))
+#define __itt_relation_add_to_current_ex(d,x,y,z,a) ITTNOTIFY_VOID_D4(relation_add_to_current_ex,d,x,y,z,a)
+#define __itt_relation_add_to_current_ex_ptr ITTNOTIFY_NAME(relation_add_to_current_ex)
+#define __itt_relation_add_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(relation_add_ex,d,x,y,z,a,b)
+#define __itt_relation_add_ex_ptr ITTNOTIFY_NAME(relation_add_ex)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_relation_add_to_current_ex(domain,clock_domain,timestame,relation,tail)
+#define __itt_relation_add_to_current_ex_ptr 0
+#define __itt_relation_add_ex(domain,clock_domain,timestamp,head,relation,tail)
+#define __itt_relation_add_ex_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_relation_add_to_current_ex_ptr 0
+#define __itt_relation_add_ex_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+typedef enum ___itt_track_group_type
+{
+ __itt_track_group_type_normal = 0
+} __itt_track_group_type;
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_track_group
+{
+ __itt_string_handle* name; /*!< Name of the track group */
+ struct ___itt_track* track; /*!< List of child tracks */
+ __itt_track_group_type tgtype; /*!< Type of the track group */
+ int extra1; /*!< Reserved. Must be zero */
+ void* extra2; /*!< Reserved. Must be zero */
+ struct ___itt_track_group* next;
+} __itt_track_group;
+
+#pragma pack(pop)
+/** @endcond */
+
+/**
+ * @brief Placeholder for custom track types. Currently, "normal" custom track
+ * is the only available track type.
+ */
+typedef enum ___itt_track_type
+{
+ __itt_track_type_normal = 0
+#ifdef INTEL_ITTNOTIFY_API_PRIVATE
+ , __itt_track_type_queue
+#endif /* INTEL_ITTNOTIFY_API_PRIVATE */
+} __itt_track_type;
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_track
+{
+ __itt_string_handle* name; /*!< Name of the track group */
+ __itt_track_group* group; /*!< Parent group to a track */
+ __itt_track_type ttype; /*!< Type of the track */
+ int extra1; /*!< Reserved. Must be zero */
+ void* extra2; /*!< Reserved. Must be zero */
+ struct ___itt_track* next;
+} __itt_track;
+
+#pragma pack(pop)
+/** @endcond */
+
+/**
+ * @brief Create logical track group.
+ */
+__itt_track_group* ITTAPI __itt_track_group_create(__itt_string_handle* name, __itt_track_group_type track_group_type);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_track_group*, track_group_create, (__itt_string_handle* name, __itt_track_group_type track_group_type))
+#define __itt_track_group_create ITTNOTIFY_DATA(track_group_create)
+#define __itt_track_group_create_ptr ITTNOTIFY_NAME(track_group_create)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_track_group_create(name) (__itt_track_group*)0
+#define __itt_track_group_create_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_track_group_create_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Create logical track.
+ */
+__itt_track* ITTAPI __itt_track_create(__itt_track_group* track_group, __itt_string_handle* name, __itt_track_type track_type);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_track*, track_create, (__itt_track_group* track_group,__itt_string_handle* name, __itt_track_type track_type))
+#define __itt_track_create ITTNOTIFY_DATA(track_create)
+#define __itt_track_create_ptr ITTNOTIFY_NAME(track_create)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_track_create(track_group,name,track_type) (__itt_track*)0
+#define __itt_track_create_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_track_create_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Set the logical track.
+ */
+void ITTAPI __itt_set_track(__itt_track* track);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, set_track, (__itt_track *track))
+#define __itt_set_track ITTNOTIFY_VOID(set_track)
+#define __itt_set_track_ptr ITTNOTIFY_NAME(set_track)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_set_track(track)
+#define __itt_set_track_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_set_track_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/* ========================================================================== */
+/** @cond exclude_from_gpa_documentation */
+/**
+ * @defgroup events Events
+ * @ingroup public
+ * Events group
+ * @{
+ */
+/** @brief user event type */
+typedef int __itt_event;
+
+/**
+ * @brief Create an event notification
+ * @note name or namelen being null/name and namelen not matching, user event feature not enabled
+ * @return non-zero event identifier upon success and __itt_err otherwise
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_event LIBITTAPI __itt_event_createA(const char *name, int namelen);
+__itt_event LIBITTAPI __itt_event_createW(const wchar_t *name, int namelen);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_event_create __itt_event_createW
+# define __itt_event_create_ptr __itt_event_createW_ptr
+#else
+# define __itt_event_create __itt_event_createA
+# define __itt_event_create_ptr __itt_event_createA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_event LIBITTAPI __itt_event_create(const char *name, int namelen);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(LIBITTAPI, __itt_event, event_createA, (const char *name, int namelen))
+ITT_STUB(LIBITTAPI, __itt_event, event_createW, (const wchar_t *name, int namelen))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(LIBITTAPI, __itt_event, event_create, (const char *name, int namelen))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_event_createA ITTNOTIFY_DATA(event_createA)
+#define __itt_event_createA_ptr ITTNOTIFY_NAME(event_createA)
+#define __itt_event_createW ITTNOTIFY_DATA(event_createW)
+#define __itt_event_createW_ptr ITTNOTIFY_NAME(event_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_event_create ITTNOTIFY_DATA(event_create)
+#define __itt_event_create_ptr ITTNOTIFY_NAME(event_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_event_createA(name, namelen) (__itt_event)0
+#define __itt_event_createA_ptr 0
+#define __itt_event_createW(name, namelen) (__itt_event)0
+#define __itt_event_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_event_create(name, namelen) (__itt_event)0
+#define __itt_event_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_event_createA_ptr 0
+#define __itt_event_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_event_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an event occurrence.
+ * @return __itt_err upon failure (invalid event id/user event feature not enabled)
+ */
+int LIBITTAPI __itt_event_start(__itt_event event);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(LIBITTAPI, int, event_start, (__itt_event event))
+#define __itt_event_start ITTNOTIFY_DATA(event_start)
+#define __itt_event_start_ptr ITTNOTIFY_NAME(event_start)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_event_start(event) (int)0
+#define __itt_event_start_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_event_start_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an event end occurrence.
+ * @note It is optional if events do not have durations.
+ * @return __itt_err upon failure (invalid event id/user event feature not enabled)
+ */
+int LIBITTAPI __itt_event_end(__itt_event event);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(LIBITTAPI, int, event_end, (__itt_event event))
+#define __itt_event_end ITTNOTIFY_DATA(event_end)
+#define __itt_event_end_ptr ITTNOTIFY_NAME(event_end)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_event_end(event) (int)0
+#define __itt_event_end_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_event_end_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} events group */
+
+
+/**
+ * @defgroup arrays Arrays Visualizer
+ * @ingroup public
+ * Visualize arrays
+ * @{
+ */
+
+/**
+ * @enum __itt_av_data_type
+ * @brief Defines types of arrays data (for C/C++ intrinsic types)
+ */
+typedef enum
+{
+ __itt_e_first = 0,
+ __itt_e_char = 0, /* 1-byte integer */
+ __itt_e_uchar, /* 1-byte unsigned integer */
+ __itt_e_int16, /* 2-byte integer */
+ __itt_e_uint16, /* 2-byte unsigned integer */
+ __itt_e_int32, /* 4-byte integer */
+ __itt_e_uint32, /* 4-byte unsigned integer */
+ __itt_e_int64, /* 8-byte integer */
+ __itt_e_uint64, /* 8-byte unsigned integer */
+ __itt_e_float, /* 4-byte floating */
+ __itt_e_double, /* 8-byte floating */
+ __itt_e_last = __itt_e_double
+} __itt_av_data_type;
+
+/**
+ * @brief Save an array data to a file.
+ * Output format is defined by the file extension. The csv and bmp formats are supported (bmp - for 2-dimensional array only).
+ * @param[in] data - pointer to the array data
+ * @param[in] rank - the rank of the array
+ * @param[in] dimensions - pointer to an array of integers, which specifies the array dimensions.
+ * The size of dimensions must be equal to the rank
+ * @param[in] type - the type of the array, specified as one of the __itt_av_data_type values (for intrinsic types)
+ * @param[in] filePath - the file path; the output format is defined by the file extension
+ * @param[in] columnOrder - defines how the array is stored in the linear memory.
+ * It should be 1 for column-major order (e.g. in FORTRAN) or 0 - for row-major order (e.g. in C).
+ */
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+int ITTAPI __itt_av_saveA(void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder);
+int ITTAPI __itt_av_saveW(void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_av_save __itt_av_saveW
+# define __itt_av_save_ptr __itt_av_saveW_ptr
+#else /* UNICODE */
+# define __itt_av_save __itt_av_saveA
+# define __itt_av_save_ptr __itt_av_saveA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+int ITTAPI __itt_av_save(void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, int, av_saveA, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder))
+ITT_STUB(ITTAPI, int, av_saveW, (void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, int, av_save, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_av_saveA ITTNOTIFY_DATA(av_saveA)
+#define __itt_av_saveA_ptr ITTNOTIFY_NAME(av_saveA)
+#define __itt_av_saveW ITTNOTIFY_DATA(av_saveW)
+#define __itt_av_saveW_ptr ITTNOTIFY_NAME(av_saveW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_av_save ITTNOTIFY_DATA(av_save)
+#define __itt_av_save_ptr ITTNOTIFY_NAME(av_save)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_av_saveA(name)
+#define __itt_av_saveA_ptr 0
+#define __itt_av_saveW(name)
+#define __itt_av_saveW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_av_save(name)
+#define __itt_av_save_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_av_saveA_ptr 0
+#define __itt_av_saveW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_av_save_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+void ITTAPI __itt_enable_attach(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, enable_attach, (void))
+#define __itt_enable_attach ITTNOTIFY_VOID(enable_attach)
+#define __itt_enable_attach_ptr ITTNOTIFY_NAME(enable_attach)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_enable_attach()
+#define __itt_enable_attach_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_enable_attach_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @cond exclude_from_gpa_documentation */
+
+/** @} arrays group */
+
+/** @endcond */
+
+/**
+ * @brief Module load info
+ * This API is used to report necessary information in case of module relocation
+ * @param[in] start_addr - relocated module start address
+ * @param[in] end_addr - relocated module end address
+ * @param[in] path - file system path to the module
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_module_loadA(void *start_addr, void *end_addr, const char *path);
+void ITTAPI __itt_module_loadW(void *start_addr, void *end_addr, const wchar_t *path);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_module_load __itt_module_loadW
+# define __itt_module_load_ptr __itt_module_loadW_ptr
+#else /* UNICODE */
+# define __itt_module_load __itt_module_loadA
+# define __itt_module_load_ptr __itt_module_loadA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+void ITTAPI __itt_module_load(void *start_addr, void *end_addr, const char *path);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, void, module_loadA, (void *start_addr, void *end_addr, const char *path))
+ITT_STUB(ITTAPI, void, module_loadW, (void *start_addr, void *end_addr, const wchar_t *path))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, void, module_load, (void *start_addr, void *end_addr, const char *path))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_module_loadA ITTNOTIFY_VOID(module_loadA)
+#define __itt_module_loadA_ptr ITTNOTIFY_NAME(module_loadA)
+#define __itt_module_loadW ITTNOTIFY_VOID(module_loadW)
+#define __itt_module_loadW_ptr ITTNOTIFY_NAME(module_loadW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_module_load ITTNOTIFY_VOID(module_load)
+#define __itt_module_load_ptr ITTNOTIFY_NAME(module_load)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_module_loadA(start_addr, end_addr, path)
+#define __itt_module_loadA_ptr 0
+#define __itt_module_loadW(start_addr, end_addr, path)
+#define __itt_module_loadW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_module_load(start_addr, end_addr, path)
+#define __itt_module_load_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_module_loadA_ptr 0
+#define __itt_module_loadW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_module_load_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* _ITTNOTIFY_H_ */
+
+#ifdef INTEL_ITTNOTIFY_API_PRIVATE
+
+#ifndef _ITTNOTIFY_PRIVATE_
+#define _ITTNOTIFY_PRIVATE_
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/**
+ * @ingroup clockdomain
+ * @brief Begin an overlapped task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] taskid The identifier for this task instance, *cannot* be __itt_null.
+ * @param[in] parentid The parent of this task, or __itt_null.
+ * @param[in] name The name of this task.
+ */
+void ITTAPI __itt_task_begin_overlapped_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name);
+
+/**
+ * @ingroup clockdomain
+ * @brief End an overlapped task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] taskid Explicit ID of finished task
+ */
+void ITTAPI __itt_task_end_overlapped_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, task_begin_overlapped_ex, (const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name))
+ITT_STUBV(ITTAPI, void, task_end_overlapped_ex, (const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid))
+#define __itt_task_begin_overlapped_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(task_begin_overlapped_ex,d,x,y,z,a,b)
+#define __itt_task_begin_overlapped_ex_ptr ITTNOTIFY_NAME(task_begin_overlapped_ex)
+#define __itt_task_end_overlapped_ex(d,x,y,z) ITTNOTIFY_VOID_D3(task_end_overlapped_ex,d,x,y,z)
+#define __itt_task_end_overlapped_ex_ptr ITTNOTIFY_NAME(task_end_overlapped_ex)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_task_begin_overlapped_ex(domain,clock_domain,timestamp,taskid,parentid,name)
+#define __itt_task_begin_overlapped_ex_ptr 0
+#define __itt_task_end_overlapped_ex(domain,clock_domain,timestamp,taskid)
+#define __itt_task_end_overlapped_ex_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_task_begin_overlapped_ex_ptr 0
+#define __itt_task_end_overlapped_ptr 0
+#define __itt_task_end_overlapped_ex_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @defgroup makrs_internal Marks
+ * @ingroup internal
+ * Marks group
+ * @warning Internal API:
+ * - It is not shipped to outside of Intel
+ * - It is delivered to internal Intel teams using e-mail or SVN access only
+ * @{
+ */
+/** @brief user mark type */
+typedef int __itt_mark_type;
+
+/**
+ * @brief Creates a user mark type with the specified name using char or Unicode string.
+ * @param[in] name - name of mark to create
+ * @return Returns a handle to the mark type
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_mark_type ITTAPI __itt_mark_createA(const char *name);
+__itt_mark_type ITTAPI __itt_mark_createW(const wchar_t *name);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_mark_create __itt_mark_createW
+# define __itt_mark_create_ptr __itt_mark_createW_ptr
+#else /* UNICODE */
+# define __itt_mark_create __itt_mark_createA
+# define __itt_mark_create_ptr __itt_mark_createA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_mark_type ITTAPI __itt_mark_create(const char *name);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_mark_type, mark_createA, (const char *name))
+ITT_STUB(ITTAPI, __itt_mark_type, mark_createW, (const wchar_t *name))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_mark_type, mark_create, (const char *name))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_mark_createA ITTNOTIFY_DATA(mark_createA)
+#define __itt_mark_createA_ptr ITTNOTIFY_NAME(mark_createA)
+#define __itt_mark_createW ITTNOTIFY_DATA(mark_createW)
+#define __itt_mark_createW_ptr ITTNOTIFY_NAME(mark_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_create ITTNOTIFY_DATA(mark_create)
+#define __itt_mark_create_ptr ITTNOTIFY_NAME(mark_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_mark_createA(name) (__itt_mark_type)0
+#define __itt_mark_createA_ptr 0
+#define __itt_mark_createW(name) (__itt_mark_type)0
+#define __itt_mark_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_create(name) (__itt_mark_type)0
+#define __itt_mark_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_mark_createA_ptr 0
+#define __itt_mark_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Creates a "discrete" user mark type of the specified type and an optional parameter using char or Unicode string.
+ *
+ * - The mark of "discrete" type is placed to collection results in case of success. It appears in overtime view(s) as a special tick sign.
+ * - The call is "synchronous" - function returns after mark is actually added to results.
+ * - This function is useful, for example, to mark different phases of application
+ * (beginning of the next mark automatically meand end of current region).
+ * - Can be used together with "continuous" marks (see below) at the same collection session
+ * @param[in] mt - mark, created by __itt_mark_create(const char* name) function
+ * @param[in] parameter - string parameter of mark
+ * @return Returns zero value in case of success, non-zero value otherwise.
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+int ITTAPI __itt_markA(__itt_mark_type mt, const char *parameter);
+int ITTAPI __itt_markW(__itt_mark_type mt, const wchar_t *parameter);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_mark __itt_markW
+# define __itt_mark_ptr __itt_markW_ptr
+#else /* UNICODE */
+# define __itt_mark __itt_markA
+# define __itt_mark_ptr __itt_markA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+int ITTAPI __itt_mark(__itt_mark_type mt, const char *parameter);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, int, markA, (__itt_mark_type mt, const char *parameter))
+ITT_STUB(ITTAPI, int, markW, (__itt_mark_type mt, const wchar_t *parameter))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, int, mark, (__itt_mark_type mt, const char *parameter))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_markA ITTNOTIFY_DATA(markA)
+#define __itt_markA_ptr ITTNOTIFY_NAME(markA)
+#define __itt_markW ITTNOTIFY_DATA(markW)
+#define __itt_markW_ptr ITTNOTIFY_NAME(markW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark ITTNOTIFY_DATA(mark)
+#define __itt_mark_ptr ITTNOTIFY_NAME(mark)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_markA(mt, parameter) (int)0
+#define __itt_markA_ptr 0
+#define __itt_markW(mt, parameter) (int)0
+#define __itt_markW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark(mt, parameter) (int)0
+#define __itt_mark_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_markA_ptr 0
+#define __itt_markW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Use this if necessary to create a "discrete" user event type (mark) for process
+ * rather then for one thread
+ * @see int __itt_mark(__itt_mark_type mt, const char* parameter);
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+int ITTAPI __itt_mark_globalA(__itt_mark_type mt, const char *parameter);
+int ITTAPI __itt_mark_globalW(__itt_mark_type mt, const wchar_t *parameter);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_mark_global __itt_mark_globalW
+# define __itt_mark_global_ptr __itt_mark_globalW_ptr
+#else /* UNICODE */
+# define __itt_mark_global __itt_mark_globalA
+# define __itt_mark_global_ptr __itt_mark_globalA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+int ITTAPI __itt_mark_global(__itt_mark_type mt, const char *parameter);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, int, mark_globalA, (__itt_mark_type mt, const char *parameter))
+ITT_STUB(ITTAPI, int, mark_globalW, (__itt_mark_type mt, const wchar_t *parameter))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, int, mark_global, (__itt_mark_type mt, const char *parameter))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_mark_globalA ITTNOTIFY_DATA(mark_globalA)
+#define __itt_mark_globalA_ptr ITTNOTIFY_NAME(mark_globalA)
+#define __itt_mark_globalW ITTNOTIFY_DATA(mark_globalW)
+#define __itt_mark_globalW_ptr ITTNOTIFY_NAME(mark_globalW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_global ITTNOTIFY_DATA(mark_global)
+#define __itt_mark_global_ptr ITTNOTIFY_NAME(mark_global)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_mark_globalA(mt, parameter) (int)0
+#define __itt_mark_globalA_ptr 0
+#define __itt_mark_globalW(mt, parameter) (int)0
+#define __itt_mark_globalW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_global(mt, parameter) (int)0
+#define __itt_mark_global_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_mark_globalA_ptr 0
+#define __itt_mark_globalW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_global_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Creates an "end" point for "continuous" mark with specified name.
+ *
+ * - Returns zero value in case of success, non-zero value otherwise.
+ * Also returns non-zero value when preceding "begin" point for the
+ * mark with the same name failed to be created or not created.
+ * - The mark of "continuous" type is placed to collection results in
+ * case of success. It appears in overtime view(s) as a special tick
+ * sign (different from "discrete" mark) together with line from
+ * corresponding "begin" mark to "end" mark.
+ * @note Continuous marks can overlap and be nested inside each other.
+ * Discrete mark can be nested inside marked region
+ * @param[in] mt - mark, created by __itt_mark_create(const char* name) function
+ * @return Returns zero value in case of success, non-zero value otherwise.
+ */
+int ITTAPI __itt_mark_off(__itt_mark_type mt);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, int, mark_off, (__itt_mark_type mt))
+#define __itt_mark_off ITTNOTIFY_DATA(mark_off)
+#define __itt_mark_off_ptr ITTNOTIFY_NAME(mark_off)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_mark_off(mt) (int)0
+#define __itt_mark_off_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_mark_off_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Use this if necessary to create an "end" point for mark of process
+ * @see int __itt_mark_off(__itt_mark_type mt);
+ */
+int ITTAPI __itt_mark_global_off(__itt_mark_type mt);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, int, mark_global_off, (__itt_mark_type mt))
+#define __itt_mark_global_off ITTNOTIFY_DATA(mark_global_off)
+#define __itt_mark_global_off_ptr ITTNOTIFY_NAME(mark_global_off)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_mark_global_off(mt) (int)0
+#define __itt_mark_global_off_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_mark_global_off_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} marks group */
+
+/**
+ * @defgroup counters_internal Counters
+ * @ingroup internal
+ * Counters group
+ * @{
+ */
+
+
+/**
+ * @defgroup stitch Stack Stitching
+ * @ingroup internal
+ * Stack Stitching group
+ * @{
+ */
+/**
+ * @brief opaque structure for counter identification
+ */
+typedef struct ___itt_caller *__itt_caller;
+
+/**
+ * @brief Create the stitch point e.g. a point in call stack where other stacks should be stitched to.
+ * The function returns a unique identifier which is used to match the cut points with corresponding stitch points.
+ */
+__itt_caller ITTAPI __itt_stack_caller_create(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_caller, stack_caller_create, (void))
+#define __itt_stack_caller_create ITTNOTIFY_DATA(stack_caller_create)
+#define __itt_stack_caller_create_ptr ITTNOTIFY_NAME(stack_caller_create)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_stack_caller_create() (__itt_caller)0
+#define __itt_stack_caller_create_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_stack_caller_create_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Destroy the inforamtion about stitch point identified by the pointer previously returned by __itt_stack_caller_create()
+ */
+void ITTAPI __itt_stack_caller_destroy(__itt_caller id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, stack_caller_destroy, (__itt_caller id))
+#define __itt_stack_caller_destroy ITTNOTIFY_VOID(stack_caller_destroy)
+#define __itt_stack_caller_destroy_ptr ITTNOTIFY_NAME(stack_caller_destroy)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_stack_caller_destroy(id)
+#define __itt_stack_caller_destroy_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_stack_caller_destroy_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Sets the cut point. Stack from each event which occurs after this call will be cut
+ * at the same stack level the function was called and stitched to the corresponding stitch point.
+ */
+void ITTAPI __itt_stack_callee_enter(__itt_caller id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, stack_callee_enter, (__itt_caller id))
+#define __itt_stack_callee_enter ITTNOTIFY_VOID(stack_callee_enter)
+#define __itt_stack_callee_enter_ptr ITTNOTIFY_NAME(stack_callee_enter)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_stack_callee_enter(id)
+#define __itt_stack_callee_enter_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_stack_callee_enter_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief This function eliminates the cut point which was set by latest __itt_stack_callee_enter().
+ */
+void ITTAPI __itt_stack_callee_leave(__itt_caller id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, stack_callee_leave, (__itt_caller id))
+#define __itt_stack_callee_leave ITTNOTIFY_VOID(stack_callee_leave)
+#define __itt_stack_callee_leave_ptr ITTNOTIFY_NAME(stack_callee_leave)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_stack_callee_leave(id)
+#define __itt_stack_callee_leave_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_stack_callee_leave_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @} stitch group */
+
+/* ***************************************************************************************************************************** */
+
+#include <stdarg.h>
+
+/** @cond exclude_from_documentation */
+typedef enum __itt_error_code
+{
+ __itt_error_success = 0, /*!< no error */
+ __itt_error_no_module = 1, /*!< module can't be loaded */
+ /* %1$s -- library name; win: %2$d -- system error code; unx: %2$s -- system error message. */
+ __itt_error_no_symbol = 2, /*!< symbol not found */
+ /* %1$s -- library name, %2$s -- symbol name. */
+ __itt_error_unknown_group = 3, /*!< unknown group specified */
+ /* %1$s -- env var name, %2$s -- group name. */
+ __itt_error_cant_read_env = 4, /*!< GetEnvironmentVariable() failed */
+ /* %1$s -- env var name, %2$d -- system error. */
+ __itt_error_env_too_long = 5, /*!< variable value too long */
+ /* %1$s -- env var name, %2$d -- actual length of the var, %3$d -- max allowed length. */
+ __itt_error_system = 6 /*!< pthread_mutexattr_init or pthread_mutex_init failed */
+ /* %1$s -- function name, %2$d -- errno. */
+} __itt_error_code;
+
+typedef void (__itt_error_handler_t)(__itt_error_code code, va_list);
+__itt_error_handler_t* __itt_set_error_handler(__itt_error_handler_t*);
+
+const char* ITTAPI __itt_api_version(void);
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#define __itt_error_handler ITT_JOIN(INTEL_ITTNOTIFY_PREFIX, error_handler)
+void __itt_error_handler(__itt_error_code code, va_list args);
+extern const int ITTNOTIFY_NAME(err);
+#define __itt_err ITTNOTIFY_NAME(err)
+ITT_STUB(ITTAPI, const char*, api_version, (void))
+#define __itt_api_version ITTNOTIFY_DATA(api_version)
+#define __itt_api_version_ptr ITTNOTIFY_NAME(api_version)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_api_version() (const char*)0
+#define __itt_api_version_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_api_version_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* _ITTNOTIFY_PRIVATE_ */
+
+#endif /* INTEL_ITTNOTIFY_API_PRIVATE */
+
+// clang-format on