diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
commit | 2aa4a82499d4becd2284cdb482213d541b8804dd (patch) | |
tree | b80bf8bf13c3766139fbacc530efd0dd9d54394c /mozglue/baseprofiler | |
parent | Initial commit. (diff) | |
download | firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.tar.xz firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.zip |
Adding upstream version 86.0.1.upstream/86.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'mozglue/baseprofiler')
74 files changed, 35423 insertions, 0 deletions
diff --git a/mozglue/baseprofiler/core/EHABIStackWalk.cpp b/mozglue/baseprofiler/core/EHABIStackWalk.cpp new file mode 100644 index 0000000000..0c2c855c9b --- /dev/null +++ b/mozglue/baseprofiler/core/EHABIStackWalk.cpp @@ -0,0 +1,592 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * This is an implementation of stack unwinding according to a subset + * of the ARM Exception Handling ABI, as described in: + * http://infocenter.arm.com/help/topic/com.arm.doc.ihi0038a/IHI0038A_ehabi.pdf + * + * This handles only the ARM-defined "personality routines" (chapter + * 9), and don't track the value of FP registers, because profiling + * needs only chain of PC/SP values. + * + * Because the exception handling info may not be accurate for all + * possible places where an async signal could occur (e.g., in a + * prologue or epilogue), this bounds-checks all stack accesses. + * + * This file uses "struct" for structures in the exception tables and + * "class" otherwise. We should avoid violating the C++11 + * standard-layout rules in the former. + */ + +#include "BaseProfiler.h" + +#include "EHABIStackWalk.h" + +#include "BaseProfilerSharedLibraries.h" +#include "platform.h" + +#include "mozilla/Atomics.h" +#include "mozilla/DebugOnly.h" +#include "mozilla/EndianUtils.h" + +#include <algorithm> +#include <elf.h> +#include <stdint.h> +#include <vector> +#include <string> + +#ifndef PT_ARM_EXIDX +# define PT_ARM_EXIDX 0x70000001 +#endif + +namespace mozilla { +namespace baseprofiler { + +struct PRel31 { + uint32_t mBits; + bool topBit() const { return mBits & 0x80000000; } + uint32_t value() const { return mBits & 0x7fffffff; } + int32_t offset() const { return (static_cast<int32_t>(mBits) << 1) >> 1; } + const void* compute() const { + return reinterpret_cast<const char*>(this) + offset(); + } + + private: + PRel31(const PRel31& copied) = delete; + PRel31() = delete; +}; + +struct EHEntry { + PRel31 startPC; + PRel31 exidx; + + private: + EHEntry(const EHEntry& copied) = delete; + EHEntry() = delete; +}; + +class EHState { + // Note that any core register can be used as a "frame pointer" to + // influence the unwinding process, so this must track all of them. + uint32_t mRegs[16]; + + public: + bool unwind(const EHEntry* aEntry, const void* stackBase); + uint32_t& operator[](int i) { return mRegs[i]; } + const uint32_t& operator[](int i) const { return mRegs[i]; } + explicit EHState(const mcontext_t&); +}; + +enum { R_SP = 13, R_LR = 14, R_PC = 15 }; + +class EHTable { + uint32_t mStartPC; + uint32_t mEndPC; + uint32_t mBaseAddress; + const EHEntry* mEntriesBegin; + const EHEntry* mEntriesEnd; + std::string mName; + + public: + EHTable(const void* aELF, size_t aSize, const std::string& aName); + const EHEntry* lookup(uint32_t aPC) const; + bool isValid() const { return mEntriesEnd != mEntriesBegin; } + const std::string& name() const { return mName; } + uint32_t startPC() const { return mStartPC; } + uint32_t endPC() const { return mEndPC; } + uint32_t baseAddress() const { return mBaseAddress; } +}; + +class EHAddrSpace { + std::vector<uint32_t> mStarts; + std::vector<EHTable> mTables; + static Atomic<const EHAddrSpace*> sCurrent; + + public: + explicit EHAddrSpace(const std::vector<EHTable>& aTables); + const EHTable* lookup(uint32_t aPC) const; + static void Update(); + static const EHAddrSpace* Get(); +}; + +void EHABIStackWalkInit() { EHAddrSpace::Update(); } + +size_t EHABIStackWalk(const mcontext_t& aContext, void* stackBase, void** aSPs, + void** aPCs, const size_t aNumFrames) { + const EHAddrSpace* space = EHAddrSpace::Get(); + EHState state(aContext); + size_t count = 0; + + while (count < aNumFrames) { + uint32_t pc = state[R_PC], sp = state[R_SP]; + aPCs[count] = reinterpret_cast<void*>(pc); + aSPs[count] = reinterpret_cast<void*>(sp); + count++; + + if (!space) break; + // TODO: cache these lookups. Binary-searching libxul is + // expensive (possibly more expensive than doing the actual + // unwind), and even a small cache should help. + const EHTable* table = space->lookup(pc); + if (!table) break; + const EHEntry* entry = table->lookup(pc); + if (!entry) break; + if (!state.unwind(entry, stackBase)) break; + } + + return count; +} + +class EHInterp { + public: + // Note that stackLimit is exclusive and stackBase is inclusive + // (i.e, stackLimit < SP <= stackBase), following the convention + // set by the AAPCS spec. + EHInterp(EHState& aState, const EHEntry* aEntry, uint32_t aStackLimit, + uint32_t aStackBase) + : mState(aState), + mStackLimit(aStackLimit), + mStackBase(aStackBase), + mNextWord(0), + mWordsLeft(0), + mFailed(false) { + const PRel31& exidx = aEntry->exidx; + uint32_t firstWord; + + if (exidx.mBits == 1) { // EXIDX_CANTUNWIND + mFailed = true; + return; + } + if (exidx.topBit()) { + firstWord = exidx.mBits; + } else { + mNextWord = reinterpret_cast<const uint32_t*>(exidx.compute()); + firstWord = *mNextWord++; + } + + switch (firstWord >> 24) { + case 0x80: // short + mWord = firstWord << 8; + mBytesLeft = 3; + break; + case 0x81: + case 0x82: // long; catch descriptor size ignored + mWord = firstWord << 16; + mBytesLeft = 2; + mWordsLeft = (firstWord >> 16) & 0xff; + break; + default: + // unknown personality + mFailed = true; + } + } + + bool unwind(); + + private: + // TODO: GCC has been observed not CSEing repeated reads of + // mState[R_SP] with writes to mFailed between them, suggesting that + // it hasn't determined that they can't alias and is thus missing + // optimization opportunities. So, we may want to flatten EHState + // into this class; this may also make the code simpler. + EHState& mState; + uint32_t mStackLimit; + uint32_t mStackBase; + const uint32_t* mNextWord; + uint32_t mWord; + uint8_t mWordsLeft; + uint8_t mBytesLeft; + bool mFailed; + + enum { + I_ADDSP = 0x00, // 0sxxxxxx (subtract if s) + M_ADDSP = 0x80, + I_POPMASK = 0x80, // 1000iiii iiiiiiii (if any i set) + M_POPMASK = 0xf0, + I_MOVSP = 0x90, // 1001nnnn + M_MOVSP = 0xf0, + I_POPN = 0xa0, // 1010lnnn + M_POPN = 0xf0, + I_FINISH = 0xb0, // 10110000 + I_POPLO = 0xb1, // 10110001 0000iiii (if any i set) + I_ADDSPBIG = 0xb2, // 10110010 uleb128 + I_POPFDX = 0xb3, // 10110011 sssscccc + I_POPFDX8 = 0xb8, // 10111nnn + M_POPFDX8 = 0xf8, + // "Intel Wireless MMX" extensions omitted. + I_POPFDD = 0xc8, // 1100100h sssscccc + M_POPFDD = 0xfe, + I_POPFDD8 = 0xd0, // 11010nnn + M_POPFDD8 = 0xf8 + }; + + uint8_t next() { + if (mBytesLeft == 0) { + if (mWordsLeft == 0) { + return I_FINISH; + } + mWordsLeft--; + mWord = *mNextWord++; + mBytesLeft = 4; + } + mBytesLeft--; + mWord = (mWord << 8) | (mWord >> 24); // rotate + return mWord; + } + + uint32_t& vSP() { return mState[R_SP]; } + uint32_t* ptrSP() { return reinterpret_cast<uint32_t*>(vSP()); } + + void checkStackBase() { + if (vSP() > mStackBase) mFailed = true; + } + void checkStackLimit() { + if (vSP() <= mStackLimit) mFailed = true; + } + void checkStackAlign() { + if ((vSP() & 3) != 0) mFailed = true; + } + void checkStack() { + checkStackBase(); + checkStackLimit(); + checkStackAlign(); + } + + void popRange(uint8_t first, uint8_t last, uint16_t mask) { + bool hasSP = false; + uint32_t tmpSP; + if (mask == 0) mFailed = true; + for (uint8_t r = first; r <= last; ++r) { + if (mask & 1) { + if (r == R_SP) { + hasSP = true; + tmpSP = *ptrSP(); + } else + mState[r] = *ptrSP(); + vSP() += 4; + checkStackBase(); + if (mFailed) return; + } + mask >>= 1; + } + if (hasSP) { + vSP() = tmpSP; + checkStack(); + } + } +}; + +bool EHState::unwind(const EHEntry* aEntry, const void* stackBasePtr) { + // The unwinding program cannot set SP to less than the initial value. + uint32_t stackLimit = mRegs[R_SP] - 4; + uint32_t stackBase = reinterpret_cast<uint32_t>(stackBasePtr); + EHInterp interp(*this, aEntry, stackLimit, stackBase); + return interp.unwind(); +} + +bool EHInterp::unwind() { + mState[R_PC] = 0; + checkStack(); + while (!mFailed) { + uint8_t insn = next(); +#if DEBUG_EHABI_UNWIND + LOG("unwind insn = %02x", (unsigned)insn); +#endif + // Try to put the common cases first. + + // 00xxxxxx: vsp = vsp + (xxxxxx << 2) + 4 + // 01xxxxxx: vsp = vsp - (xxxxxx << 2) - 4 + if ((insn & M_ADDSP) == I_ADDSP) { + uint32_t offset = ((insn & 0x3f) << 2) + 4; + if (insn & 0x40) { + vSP() -= offset; + checkStackLimit(); + } else { + vSP() += offset; + checkStackBase(); + } + continue; + } + + // 10100nnn: Pop r4-r[4+nnn] + // 10101nnn: Pop r4-r[4+nnn], r14 + if ((insn & M_POPN) == I_POPN) { + uint8_t n = (insn & 0x07) + 1; + bool lr = insn & 0x08; + uint32_t* ptr = ptrSP(); + vSP() += (n + (lr ? 1 : 0)) * 4; + checkStackBase(); + for (uint8_t r = 4; r < 4 + n; ++r) mState[r] = *ptr++; + if (lr) mState[R_LR] = *ptr++; + continue; + } + + // 1011000: Finish + if (insn == I_FINISH) { + if (mState[R_PC] == 0) { + mState[R_PC] = mState[R_LR]; + // Non-standard change (bug 916106): Prevent the caller from + // re-using LR. Since the caller is by definition not a leaf + // routine, it will have to restore LR from somewhere to + // return to its own caller, so we can safely zero it here. + // This makes a difference only if an error in unwinding + // (e.g., caused by starting from within a prologue/epilogue) + // causes us to load a pointer to a leaf routine as LR; if we + // don't do something, we'll go into an infinite loop of + // "returning" to that same function. + mState[R_LR] = 0; + } + return true; + } + + // 1001nnnn: Set vsp = r[nnnn] + if ((insn & M_MOVSP) == I_MOVSP) { + vSP() = mState[insn & 0x0f]; + checkStack(); + continue; + } + + // 11001000 sssscccc: Pop VFP regs D[16+ssss]-D[16+ssss+cccc] (as FLDMFDD) + // 11001001 sssscccc: Pop VFP regs D[ssss]-D[ssss+cccc] (as FLDMFDD) + if ((insn & M_POPFDD) == I_POPFDD) { + uint8_t n = (next() & 0x0f) + 1; + // Note: if the 16+ssss+cccc > 31, the encoding is reserved. + // As the space is currently unused, we don't try to check. + vSP() += 8 * n; + checkStackBase(); + continue; + } + + // 11010nnn: Pop VFP regs D[8]-D[8+nnn] (as FLDMFDD) + if ((insn & M_POPFDD8) == I_POPFDD8) { + uint8_t n = (insn & 0x07) + 1; + vSP() += 8 * n; + checkStackBase(); + continue; + } + + // 10110010 uleb128: vsp = vsp + 0x204 + (uleb128 << 2) + if (insn == I_ADDSPBIG) { + uint32_t acc = 0; + uint8_t shift = 0; + uint8_t byte; + do { + if (shift >= 32) return false; + byte = next(); + acc |= (byte & 0x7f) << shift; + shift += 7; + } while (byte & 0x80); + uint32_t offset = 0x204 + (acc << 2); + // The calculations above could have overflowed. + // But the one we care about is this: + if (vSP() + offset < vSP()) mFailed = true; + vSP() += offset; + // ...so that this is the only other check needed: + checkStackBase(); + continue; + } + + // 1000iiii iiiiiiii (i not all 0): Pop under masks {r15-r12}, {r11-r4} + if ((insn & M_POPMASK) == I_POPMASK) { + popRange(4, 15, ((insn & 0x0f) << 8) | next()); + continue; + } + + // 1011001 0000iiii (i not all 0): Pop under mask {r3-r0} + if (insn == I_POPLO) { + popRange(0, 3, next() & 0x0f); + continue; + } + + // 10110011 sssscccc: Pop VFP regs D[ssss]-D[ssss+cccc] (as FLDMFDX) + if (insn == I_POPFDX) { + uint8_t n = (next() & 0x0f) + 1; + vSP() += 8 * n + 4; + checkStackBase(); + continue; + } + + // 10111nnn: Pop VFP regs D[8]-D[8+nnn] (as FLDMFDX) + if ((insn & M_POPFDX8) == I_POPFDX8) { + uint8_t n = (insn & 0x07) + 1; + vSP() += 8 * n + 4; + checkStackBase(); + continue; + } + + // unhandled instruction +#ifdef DEBUG_EHABI_UNWIND + LOG("Unhandled EHABI instruction 0x%02x", insn); +#endif + mFailed = true; + } + return false; +} + +bool operator<(const EHTable& lhs, const EHTable& rhs) { + return lhs.startPC() < rhs.startPC(); +} + +// Async signal unsafe. +EHAddrSpace::EHAddrSpace(const std::vector<EHTable>& aTables) + : mTables(aTables) { + std::sort(mTables.begin(), mTables.end()); + DebugOnly<uint32_t> lastEnd = 0; + for (std::vector<EHTable>::iterator i = mTables.begin(); i != mTables.end(); + ++i) { + MOZ_ASSERT(i->startPC() >= lastEnd); + mStarts.push_back(i->startPC()); + lastEnd = i->endPC(); + } +} + +const EHTable* EHAddrSpace::lookup(uint32_t aPC) const { + ptrdiff_t i = (std::upper_bound(mStarts.begin(), mStarts.end(), aPC) - + mStarts.begin()) - + 1; + + if (i < 0 || aPC >= mTables[i].endPC()) return 0; + return &mTables[i]; +} + +const EHEntry* EHTable::lookup(uint32_t aPC) const { + MOZ_ASSERT(aPC >= mStartPC); + if (aPC >= mEndPC) return nullptr; + + const EHEntry* begin = mEntriesBegin; + const EHEntry* end = mEntriesEnd; + MOZ_ASSERT(begin < end); + if (aPC < reinterpret_cast<uint32_t>(begin->startPC.compute())) + return nullptr; + + while (end - begin > 1) { +#ifdef EHABI_UNWIND_MORE_ASSERTS + if ((end - 1)->startPC.compute() < begin->startPC.compute()) { + MOZ_CRASH("unsorted exidx"); + } +#endif + const EHEntry* mid = begin + (end - begin) / 2; + if (aPC < reinterpret_cast<uint32_t>(mid->startPC.compute())) + end = mid; + else + begin = mid; + } + return begin; +} + +#if MOZ_LITTLE_ENDIAN() +static const unsigned char hostEndian = ELFDATA2LSB; +#elif MOZ_BIG_ENDIAN() +static const unsigned char hostEndian = ELFDATA2MSB; +#else +# error "No endian?" +#endif + +// Async signal unsafe: std::vector::reserve, std::string copy ctor. +EHTable::EHTable(const void* aELF, size_t aSize, const std::string& aName) + : mStartPC(~0), // largest uint32_t + mEndPC(0), + mEntriesBegin(nullptr), + mEntriesEnd(nullptr), + mName(aName) { + const uint32_t fileHeaderAddr = reinterpret_cast<uint32_t>(aELF); + + if (aSize < sizeof(Elf32_Ehdr)) return; + + const Elf32_Ehdr& file = *(reinterpret_cast<Elf32_Ehdr*>(fileHeaderAddr)); + if (memcmp(&file.e_ident[EI_MAG0], ELFMAG, SELFMAG) != 0 || + file.e_ident[EI_CLASS] != ELFCLASS32 || + file.e_ident[EI_DATA] != hostEndian || + file.e_ident[EI_VERSION] != EV_CURRENT || file.e_machine != EM_ARM || + file.e_version != EV_CURRENT) + // e_flags? + return; + + MOZ_ASSERT(file.e_phoff + file.e_phnum * file.e_phentsize <= aSize); + const Elf32_Phdr *exidxHdr = 0, *zeroHdr = 0; + for (unsigned i = 0; i < file.e_phnum; ++i) { + const Elf32_Phdr& phdr = *(reinterpret_cast<Elf32_Phdr*>( + fileHeaderAddr + file.e_phoff + i * file.e_phentsize)); + if (phdr.p_type == PT_ARM_EXIDX) { + exidxHdr = &phdr; + } else if (phdr.p_type == PT_LOAD) { + if (phdr.p_offset == 0) { + zeroHdr = &phdr; + } + if (phdr.p_flags & PF_X) { + mStartPC = std::min(mStartPC, phdr.p_vaddr); + mEndPC = std::max(mEndPC, phdr.p_vaddr + phdr.p_memsz); + } + } + } + if (!exidxHdr) return; + if (!zeroHdr) return; + mBaseAddress = fileHeaderAddr - zeroHdr->p_vaddr; + mStartPC += mBaseAddress; + mEndPC += mBaseAddress; + mEntriesBegin = + reinterpret_cast<const EHEntry*>(mBaseAddress + exidxHdr->p_vaddr); + mEntriesEnd = reinterpret_cast<const EHEntry*>( + mBaseAddress + exidxHdr->p_vaddr + exidxHdr->p_memsz); +} + +Atomic<const EHAddrSpace*> EHAddrSpace::sCurrent(nullptr); + +// Async signal safe; can fail if Update() hasn't returned yet. +const EHAddrSpace* EHAddrSpace::Get() { return sCurrent; } + +// Collect unwinding information from loaded objects. Calls after the +// first have no effect. Async signal unsafe. +void EHAddrSpace::Update() { + const EHAddrSpace* space = sCurrent; + if (space) return; + + SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf(); + std::vector<EHTable> tables; + + for (size_t i = 0; i < info.GetSize(); ++i) { + const SharedLibrary& lib = info.GetEntry(i); + // FIXME: This isn't correct if the start address isn't p_offset 0, because + // the start address will not point at the file header. But this is worked + // around by magic number checks in the EHTable constructor. + EHTable tab(reinterpret_cast<const void*>(lib.GetStart()), + lib.GetEnd() - lib.GetStart(), lib.GetDebugPath()); + if (tab.isValid()) tables.push_back(tab); + } + space = new EHAddrSpace(tables); + + if (!sCurrent.compareExchange(nullptr, space)) { + delete space; + space = sCurrent; + } +} + +EHState::EHState(const mcontext_t& context) { +#ifdef linux + mRegs[0] = context.arm_r0; + mRegs[1] = context.arm_r1; + mRegs[2] = context.arm_r2; + mRegs[3] = context.arm_r3; + mRegs[4] = context.arm_r4; + mRegs[5] = context.arm_r5; + mRegs[6] = context.arm_r6; + mRegs[7] = context.arm_r7; + mRegs[8] = context.arm_r8; + mRegs[9] = context.arm_r9; + mRegs[10] = context.arm_r10; + mRegs[11] = context.arm_fp; + mRegs[12] = context.arm_ip; + mRegs[13] = context.arm_sp; + mRegs[14] = context.arm_lr; + mRegs[15] = context.arm_pc; +#else +# error "Unhandled OS for ARM EHABI unwinding" +#endif +} + +} // namespace baseprofiler +} // namespace mozilla diff --git a/mozglue/baseprofiler/core/EHABIStackWalk.h b/mozglue/baseprofiler/core/EHABIStackWalk.h new file mode 100644 index 0000000000..d5f4edc0d7 --- /dev/null +++ b/mozglue/baseprofiler/core/EHABIStackWalk.h @@ -0,0 +1,30 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * This is an implementation of stack unwinding according to a subset + * of the ARM Exception Handling ABI; see the comment at the top of + * the .cpp file for details. + */ + +#ifndef mozilla_EHABIStackWalk_h__ +#define mozilla_EHABIStackWalk_h__ + +#include <stddef.h> +#include <ucontext.h> + +namespace mozilla { +namespace baseprofiler { + +void EHABIStackWalkInit(); + +size_t EHABIStackWalk(const mcontext_t& aContext, void* stackBase, void** aSPs, + void** aPCs, size_t aNumFrames); + +} // namespace baseprofiler +} // namespace mozilla + +#endif diff --git a/mozglue/baseprofiler/core/PageInformation.cpp b/mozglue/baseprofiler/core/PageInformation.cpp new file mode 100644 index 0000000000..7ce47e86d7 --- /dev/null +++ b/mozglue/baseprofiler/core/PageInformation.cpp @@ -0,0 +1,50 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "PageInformation.h" + +#include "BaseProfiler.h" + +#include "mozilla/BaseProfileJSONWriter.h" + +namespace mozilla { +namespace baseprofiler { + +PageInformation::PageInformation(uint64_t aBrowsingContextID, + uint64_t aInnerWindowID, + const std::string& aUrl, + uint64_t aEmbedderInnerWindowID) + : mBrowsingContextID(aBrowsingContextID), + mInnerWindowID(aInnerWindowID), + mUrl(aUrl), + mEmbedderInnerWindowID(aEmbedderInnerWindowID), + mRefCnt(0) {} + +bool PageInformation::Equals(PageInformation* aOtherPageInfo) const { + // It's enough to check inner window IDs because they are unique for each + // page. Therefore, we don't have to check browsing context ID or url. + return InnerWindowID() == aOtherPageInfo->InnerWindowID(); +} + +void PageInformation::StreamJSON(SpliceableJSONWriter& aWriter) const { + aWriter.StartObjectElement(); + // Here, we are converting uint64_t to double. Both Browsing Context and Inner + // Window IDs are creating using `nsContentUtils::GenerateProcessSpecificId`, + // which is specifically designed to only use 53 of the 64 bits to be lossless + // when passed into and out of JS as a double. + aWriter.DoubleProperty("browsingContextID", BrowsingContextID()); + aWriter.DoubleProperty("innerWindowID", InnerWindowID()); + aWriter.StringProperty("url", Url()); + aWriter.DoubleProperty("embedderInnerWindowID", EmbedderInnerWindowID()); + aWriter.EndObject(); +} + +size_t PageInformation::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const { + return aMallocSizeOf(this); +} + +} // namespace baseprofiler +} // namespace mozilla diff --git a/mozglue/baseprofiler/core/PageInformation.h b/mozglue/baseprofiler/core/PageInformation.h new file mode 100644 index 0000000000..158b172bdf --- /dev/null +++ b/mozglue/baseprofiler/core/PageInformation.h @@ -0,0 +1,77 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef PageInformation_h +#define PageInformation_h + +#include "mozilla/Atomics.h" +#include "mozilla/Maybe.h" +#include "mozilla/MemoryReporting.h" + +#include <string> + +namespace mozilla { +namespace baseprofiler { + +class SpliceableJSONWriter; + +// This class contains information that's relevant to a single page only +// while the page information is important and registered with the profiler, +// but regardless of whether the profiler is running. All accesses to it are +// protected by the profiler state lock. +// When the page gets unregistered, we keep the profiler buffer position +// to determine if we are still using this page. If not, we unregister +// it in the next page registration. +class PageInformation final { + public: + PageInformation(uint64_t aBrowsingContextID, uint64_t aInnerWindowID, + const std::string& aUrl, uint64_t aEmbedderInnerWindowID); + + // Using hand-rolled ref-counting, because RefCounted.h macros don't produce + // the same code between mozglue and libxul, see bug 1536656. + MFBT_API void AddRef() const { ++mRefCnt; } + MFBT_API void Release() const { + MOZ_ASSERT(int32_t(mRefCnt) > 0); + if (--mRefCnt) { + delete this; + } + } + + size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const; + bool Equals(PageInformation* aOtherPageInfo) const; + void StreamJSON(SpliceableJSONWriter& aWriter) const; + + uint64_t InnerWindowID() const { return mInnerWindowID; } + uint64_t BrowsingContextID() const { return mBrowsingContextID; } + const std::string& Url() const { return mUrl; } + uint64_t EmbedderInnerWindowID() const { return mEmbedderInnerWindowID; } + + Maybe<uint64_t> BufferPositionWhenUnregistered() const { + return mBufferPositionWhenUnregistered; + } + + void NotifyUnregistered(uint64_t aBufferPosition) { + mBufferPositionWhenUnregistered = Some(aBufferPosition); + } + + private: + const uint64_t mBrowsingContextID; + const uint64_t mInnerWindowID; + const std::string mUrl; + const uint64_t mEmbedderInnerWindowID; + + // Holds the buffer position when page is unregistered. + // It's used to determine if we still use this page in the profiler or + // not. + Maybe<uint64_t> mBufferPositionWhenUnregistered; + + mutable Atomic<int32_t, MemoryOrdering::ReleaseAcquire> mRefCnt; +}; + +} // namespace baseprofiler +} // namespace mozilla + +#endif // PageInformation_h diff --git a/mozglue/baseprofiler/core/PlatformMacros.h b/mozglue/baseprofiler/core/PlatformMacros.h new file mode 100644 index 0000000000..c72e94c128 --- /dev/null +++ b/mozglue/baseprofiler/core/PlatformMacros.h @@ -0,0 +1,130 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef PLATFORM_MACROS_H +#define PLATFORM_MACROS_H + +// Define platform selection macros in a consistent way. Don't add anything +// else to this file, so it can remain freestanding. The primary factorisation +// is on (ARCH,OS) pairs ("PLATforms") but ARCH_ and OS_ macros are defined +// too, since they are sometimes convenient. +// +// Note: "GP" is short for "Gecko Profiler". + +#undef GP_PLAT_x86_android +#undef GP_PLAT_amd64_android +#undef GP_PLAT_arm_android +#undef GP_PLAT_arm64_android +#undef GP_PLAT_x86_linux +#undef GP_PLAT_amd64_linux +#undef GP_PLAT_arm_linux +#undef GP_PLAT_mips64_linux +#undef GP_PLAT_amd64_darwin +#undef GP_PLAT_arm64_darwin +#undef GP_PLAT_x86_windows +#undef GP_PLAT_amd64_windows +#undef GP_PLAT_arm64_windows + +#undef GP_ARCH_x86 +#undef GP_ARCH_amd64 +#undef GP_ARCH_arm +#undef GP_ARCH_arm64 +#undef GP_ARCH_mips64 + +#undef GP_OS_android +#undef GP_OS_linux +#undef GP_OS_darwin +#undef GP_OS_windows + +// We test __ANDROID__ before __linux__ because __linux__ is defined on both +// Android and Linux, whereas GP_OS_android is not defined on vanilla Linux. + +#if defined(__ANDROID__) && defined(__i386__) +# define GP_PLAT_x86_android 1 +# define GP_ARCH_x86 1 +# define GP_OS_android 1 + +#elif defined(__ANDROID__) && defined(__x86_64__) +# define GP_PLAT_amd64_android 1 +# define GP_ARCH_amd64 1 +# define GP_OS_android 1 + +#elif defined(__ANDROID__) && defined(__arm__) +# define GP_PLAT_arm_android 1 +# define GP_ARCH_arm 1 +# define GP_OS_android 1 + +#elif defined(__ANDROID__) && defined(__aarch64__) +# define GP_PLAT_arm64_android 1 +# define GP_ARCH_arm64 1 +# define GP_OS_android 1 + +#elif defined(__linux__) && defined(__i386__) +# define GP_PLAT_x86_linux 1 +# define GP_ARCH_x86 1 +# define GP_OS_linux 1 + +#elif defined(__linux__) && defined(__x86_64__) +# define GP_PLAT_amd64_linux 1 +# define GP_ARCH_amd64 1 +# define GP_OS_linux 1 + +#elif defined(__linux__) && defined(__arm__) +# define GP_PLAT_arm_linux 1 +# define GP_ARCH_arm 1 +# define GP_OS_linux 1 + +#elif defined(__linux__) && defined(__aarch64__) +# define GP_PLAT_arm64_linux 1 +# define GP_ARCH_arm64 1 +# define GP_OS_linux 1 + +#elif defined(__linux__) && defined(__mips64) +# define GP_PLAT_mips64_linux 1 +# define GP_ARCH_mips64 1 +# define GP_OS_linux 1 + +#elif defined(__APPLE__) && defined(__aarch64__) +# define GP_PLAT_arm64_darwin 1 +# define GP_ARCH_arm64 1 +# define GP_OS_darwin 1 + +#elif defined(__APPLE__) && defined(__x86_64__) +# define GP_PLAT_amd64_darwin 1 +# define GP_ARCH_amd64 1 +# define GP_OS_darwin 1 + +#elif defined(__FreeBSD__) && defined(__x86_64__) +# define GP_PLAT_amd64_freebsd 1 +# define GP_ARCH_amd64 1 +# define GP_OS_freebsd 1 + +#elif defined(__FreeBSD__) && defined(__aarch64__) +# define GP_PLAT_arm64_freebsd 1 +# define GP_ARCH_arm64 1 +# define GP_OS_freebsd 1 + +#elif (defined(_MSC_VER) || defined(__MINGW32__)) && \ + (defined(_M_IX86) || defined(__i386__)) +# define GP_PLAT_x86_windows 1 +# define GP_ARCH_x86 1 +# define GP_OS_windows 1 + +#elif (defined(_MSC_VER) || defined(__MINGW32__)) && \ + (defined(_M_X64) || defined(__x86_64__)) +# define GP_PLAT_amd64_windows 1 +# define GP_ARCH_amd64 1 +# define GP_OS_windows 1 + +#elif defined(_MSC_VER) && defined(_M_ARM64) +# define GP_PLAT_arm64_windows 1 +# define GP_ARCH_arm64 1 +# define GP_OS_windows 1 + +#else +# error "Unsupported platform" +#endif + +#endif /* ndef PLATFORM_MACROS_H */ diff --git a/mozglue/baseprofiler/core/ProfileBuffer.cpp b/mozglue/baseprofiler/core/ProfileBuffer.cpp new file mode 100644 index 0000000000..f39244ee91 --- /dev/null +++ b/mozglue/baseprofiler/core/ProfileBuffer.cpp @@ -0,0 +1,210 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ProfileBuffer.h" + +#include "mozilla/MathAlgorithms.h" + +#include "BaseProfiler.h" + +namespace mozilla { +namespace baseprofiler { + +ProfileBuffer::ProfileBuffer(ProfileChunkedBuffer& aBuffer) + : mEntries(aBuffer) { + // Assume the given buffer is in-session. + MOZ_ASSERT(mEntries.IsInSession()); +} + +/* static */ +ProfileBufferBlockIndex ProfileBuffer::AddEntry( + ProfileChunkedBuffer& aProfileChunkedBuffer, + const ProfileBufferEntry& aEntry) { + switch (aEntry.GetKind()) { +#define SWITCH_KIND(KIND, TYPE, SIZE) \ + case ProfileBufferEntry::Kind::KIND: { \ + return aProfileChunkedBuffer.PutFrom(&aEntry, 1 + (SIZE)); \ + break; \ + } + + FOR_EACH_PROFILE_BUFFER_ENTRY_KIND(SWITCH_KIND) + +#undef SWITCH_KIND + default: + MOZ_ASSERT(false, "Unhandled baseprofiler::ProfilerBuffer entry KIND"); + return ProfileBufferBlockIndex{}; + } +} + +// Called from signal, call only reentrant functions +uint64_t ProfileBuffer::AddEntry(const ProfileBufferEntry& aEntry) { + return AddEntry(mEntries, aEntry).ConvertToProfileBufferIndex(); +} + +/* static */ +ProfileBufferBlockIndex ProfileBuffer::AddThreadIdEntry( + ProfileChunkedBuffer& aProfileChunkedBuffer, int aThreadId) { + return AddEntry(aProfileChunkedBuffer, + ProfileBufferEntry::ThreadId(aThreadId)); +} + +uint64_t ProfileBuffer::AddThreadIdEntry(int aThreadId) { + return AddThreadIdEntry(mEntries, aThreadId).ConvertToProfileBufferIndex(); +} + +void ProfileBuffer::CollectCodeLocation( + const char* aLabel, const char* aStr, uint32_t aFrameFlags, + uint64_t aInnerWindowID, const Maybe<uint32_t>& aLineNumber, + const Maybe<uint32_t>& aColumnNumber, + const Maybe<ProfilingCategoryPair>& aCategoryPair) { + AddEntry(ProfileBufferEntry::Label(aLabel)); + AddEntry(ProfileBufferEntry::FrameFlags(uint64_t(aFrameFlags))); + + if (aStr) { + // Store the string using one or more DynamicStringFragment entries. + size_t strLen = strlen(aStr) + 1; // +1 for the null terminator + // If larger than the prescribed limit, we will cut the string and end it + // with an ellipsis. + const bool tooBig = strLen > kMaxFrameKeyLength; + if (tooBig) { + strLen = kMaxFrameKeyLength; + } + char chars[ProfileBufferEntry::kNumChars]; + for (size_t j = 0;; j += ProfileBufferEntry::kNumChars) { + // Store up to kNumChars characters in the entry. + size_t len = ProfileBufferEntry::kNumChars; + const bool last = j + len >= strLen; + if (last) { + // Only the last entry may be smaller than kNumChars. + len = strLen - j; + if (tooBig) { + // That last entry is part of a too-big string, replace the end + // characters with an ellipsis "...". + len = std::max(len, size_t(4)); + chars[len - 4] = '.'; + chars[len - 3] = '.'; + chars[len - 2] = '.'; + chars[len - 1] = '\0'; + // Make sure the memcpy will not overwrite our ellipsis! + len -= 4; + } + } + memcpy(chars, &aStr[j], len); + AddEntry(ProfileBufferEntry::DynamicStringFragment(chars)); + if (last) { + break; + } + } + } + + if (aInnerWindowID) { + AddEntry(ProfileBufferEntry::InnerWindowID(aInnerWindowID)); + } + + if (aLineNumber) { + AddEntry(ProfileBufferEntry::LineNumber(*aLineNumber)); + } + + if (aColumnNumber) { + AddEntry(ProfileBufferEntry::ColumnNumber(*aColumnNumber)); + } + + if (aCategoryPair.isSome()) { + AddEntry(ProfileBufferEntry::CategoryPair(int(*aCategoryPair))); + } +} + +size_t ProfileBuffer::SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const { + // Measurement of the following members may be added later if DMD finds it + // is worthwhile: + // - memory pointed to by the elements within mEntries + return mEntries.SizeOfExcludingThis(aMallocSizeOf); +} + +size_t ProfileBuffer::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const { + return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf); +} + +void ProfileBuffer::CollectOverheadStats(TimeDuration aSamplingTime, + TimeDuration aLocking, + TimeDuration aCleaning, + TimeDuration aCounters, + TimeDuration aThreads) { + double timeUs = aSamplingTime.ToMilliseconds() * 1000.0; + if (mFirstSamplingTimeUs == 0.0) { + mFirstSamplingTimeUs = timeUs; + } else { + // Note that we'll have 1 fewer interval than other numbers (because + // we need both ends of an interval to know its duration). The final + // difference should be insignificant over the expected many thousands + // of iterations. + mIntervalsUs.Count(timeUs - mLastSamplingTimeUs); + } + mLastSamplingTimeUs = timeUs; + // Time to take the lock before sampling. + double lockingUs = aLocking.ToMilliseconds() * 1000.0; + // Time to discard expired data. + double cleaningUs = aCleaning.ToMilliseconds() * 1000.0; + // Time to gather all counters. + double countersUs = aCounters.ToMilliseconds() * 1000.0; + // Time to sample all threads. + double threadsUs = aThreads.ToMilliseconds() * 1000.0; + + // Add to our gathered stats. + mOverheadsUs.Count(lockingUs + cleaningUs + countersUs + threadsUs); + mLockingsUs.Count(lockingUs); + mCleaningsUs.Count(cleaningUs); + mCountersUs.Count(countersUs); + mThreadsUs.Count(threadsUs); + + // Record details in buffer. + AddEntry(ProfileBufferEntry::ProfilerOverheadTime(timeUs)); + AddEntry(ProfileBufferEntry::ProfilerOverheadDuration(lockingUs)); + AddEntry(ProfileBufferEntry::ProfilerOverheadDuration(cleaningUs)); + AddEntry(ProfileBufferEntry::ProfilerOverheadDuration(countersUs)); + AddEntry(ProfileBufferEntry::ProfilerOverheadDuration(threadsUs)); +} + +ProfilerBufferInfo ProfileBuffer::GetProfilerBufferInfo() const { + return {BufferRangeStart(), + BufferRangeEnd(), + static_cast<uint32_t>(*mEntries.BufferLength() / + 8), // 8 bytes per entry. + mIntervalsUs, + mOverheadsUs, + mLockingsUs, + mCleaningsUs, + mCountersUs, + mThreadsUs}; +} + +/* ProfileBufferCollector */ + +void ProfileBufferCollector::CollectNativeLeafAddr(void* aAddr) { + mBuf.AddEntry(ProfileBufferEntry::NativeLeafAddr(aAddr)); +} + +void ProfileBufferCollector::CollectProfilingStackFrame( + const ProfilingStackFrame& aFrame) { + // WARNING: this function runs within the profiler's "critical section". + + MOZ_ASSERT(aFrame.isLabelFrame() || + (aFrame.isJsFrame() && !aFrame.isOSRFrame())); + + const char* label = aFrame.label(); + const char* dynamicString = aFrame.dynamicString(); + Maybe<uint32_t> line; + Maybe<uint32_t> column; + + MOZ_ASSERT(aFrame.isLabelFrame()); + + mBuf.CollectCodeLocation(label, dynamicString, aFrame.flags(), + aFrame.realmID(), line, column, + Some(aFrame.categoryPair())); +} + +} // namespace baseprofiler +} // namespace mozilla diff --git a/mozglue/baseprofiler/core/ProfileBuffer.h b/mozglue/baseprofiler/core/ProfileBuffer.h new file mode 100644 index 0000000000..b7a0af5e93 --- /dev/null +++ b/mozglue/baseprofiler/core/ProfileBuffer.h @@ -0,0 +1,186 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef MOZ_PROFILE_BUFFER_H +#define MOZ_PROFILE_BUFFER_H + +#include "ProfileBufferEntry.h" + +#include "mozilla/Maybe.h" +#include "mozilla/PowerOfTwo.h" +#include "mozilla/ProfileBufferChunkManagerSingle.h" +#include "mozilla/ProfileChunkedBuffer.h" + +namespace mozilla { +namespace baseprofiler { + +// Class storing most profiling data in a ProfileChunkedBuffer. +// +// This class is used as a queue of entries which, after construction, never +// allocates. This makes it safe to use in the profiler's "critical section". +class ProfileBuffer final { + public: + // ProfileBuffer constructor + // @param aBuffer The in-session ProfileChunkedBuffer to use as buffer + // manager. + explicit ProfileBuffer(ProfileChunkedBuffer& aBuffer); + + ProfileChunkedBuffer& UnderlyingChunkedBuffer() const { return mEntries; } + + bool IsThreadSafe() const { return mEntries.IsThreadSafe(); } + + // Add |aEntry| to the buffer, ignoring what kind of entry it is. + // Returns the position of the entry. + uint64_t AddEntry(const ProfileBufferEntry& aEntry); + + // Add to the buffer a sample start (ThreadId) entry for aThreadId. + // Returns the position of the entry. + uint64_t AddThreadIdEntry(int aThreadId); + + void CollectCodeLocation(const char* aLabel, const char* aStr, + uint32_t aFrameFlags, uint64_t aInnerWindowID, + const Maybe<uint32_t>& aLineNumber, + const Maybe<uint32_t>& aColumnNumber, + const Maybe<ProfilingCategoryPair>& aCategoryPair); + + // Maximum size of a frameKey string that we'll handle. + static const size_t kMaxFrameKeyLength = 512; + + // Stream JSON for samples in the buffer to aWriter, using the supplied + // UniqueStacks object. + // Only streams samples for the given thread ID and which were taken at or + // after aSinceTime. If ID is 0, ignore the stored thread ID; this should only + // be used when the buffer contains only one sample. + // Return the thread ID of the streamed sample(s), or 0. + int StreamSamplesToJSON(SpliceableJSONWriter& aWriter, int aThreadId, + double aSinceTime, UniqueStacks& aUniqueStacks) const; + + void StreamMarkersToJSON(SpliceableJSONWriter& aWriter, int aThreadId, + const TimeStamp& aProcessStartTime, + double aSinceTime, + UniqueStacks& aUniqueStacks) const; + void StreamPausedRangesToJSON(SpliceableJSONWriter& aWriter, + double aSinceTime) const; + void StreamProfilerOverheadToJSON(SpliceableJSONWriter& aWriter, + const TimeStamp& aProcessStartTime, + double aSinceTime) const; + void StreamCountersToJSON(SpliceableJSONWriter& aWriter, + const TimeStamp& aProcessStartTime, + double aSinceTime) const; + + // Find (via |aLastSample|) the most recent sample for the thread denoted by + // |aThreadId| and clone it, patching in the current time as appropriate. + // Mutate |aLastSample| to point to the newly inserted sample. + // Returns whether duplication was successful. + bool DuplicateLastSample(int aThreadId, const TimeStamp& aProcessStartTime, + Maybe<uint64_t>& aLastSample); + + void DiscardSamplesBeforeTime(double aTime); + + size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const; + size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const; + + void CollectOverheadStats(TimeDuration aSamplingTime, TimeDuration aLocking, + TimeDuration aCleaning, TimeDuration aCounters, + TimeDuration aThreads); + + ProfilerBufferInfo GetProfilerBufferInfo() const; + + private: + // Add |aEntry| to the provider ProfileChunkedBuffer. + // `static` because it may be used to add an entry to a `ProfileChunkedBuffer` + // that is not attached to a `ProfileBuffer`. + static ProfileBufferBlockIndex AddEntry( + ProfileChunkedBuffer& aProfileChunkedBuffer, + const ProfileBufferEntry& aEntry); + + // Add a sample start (ThreadId) entry for aThreadId to the provided + // ProfileChunkedBuffer. Returns the position of the entry. + // `static` because it may be used to add an entry to a `ProfileChunkedBuffer` + // that is not attached to a `ProfileBuffer`. + static ProfileBufferBlockIndex AddThreadIdEntry( + ProfileChunkedBuffer& aProfileChunkedBuffer, int aThreadId); + + // The storage in which this ProfileBuffer stores its entries. + ProfileChunkedBuffer& mEntries; + + public: + // `BufferRangeStart()` and `BufferRangeEnd()` return `uint64_t` values + // corresponding to the first entry and past the last entry stored in + // `mEntries`. + // + // The returned values are not guaranteed to be stable, because other threads + // may also be accessing the buffer concurrently. But they will always + // increase, and can therefore give an indication of how far these values have + // *at least* reached. In particular: + // - Entries whose index is strictly less that `BufferRangeStart()` have been + // discarded by now, so any related data may also be safely discarded. + // - It is safe to try and read entries at any index strictly less than + // `BufferRangeEnd()` -- but note that these reads may fail by the time you + // request them, as old entries get overwritten by new ones. + uint64_t BufferRangeStart() const { return mEntries.GetState().mRangeStart; } + uint64_t BufferRangeEnd() const { return mEntries.GetState().mRangeEnd; } + + private: + // Single pre-allocated chunk (to avoid spurious mallocs), used when: + // - Duplicating sleeping stacks (hence scExpectedMaximumStackSize). + // - Adding JIT info. + // - Streaming stacks to JSON. + // Mutable because it's accessed from non-multithreaded const methods. + mutable ProfileBufferChunkManagerSingle mWorkerChunkManager{ + ProfileBufferChunk::Create( + ProfileBufferChunk::SizeofChunkMetadata() + + ProfileBufferChunkManager::scExpectedMaximumStackSize)}; + + // Time from launch (us) when first sampling was recorded. + double mFirstSamplingTimeUs = 0.0; + // Time from launch (us) when last sampling was recorded. + double mLastSamplingTimeUs = 0.0; + // Sampling stats: Interval (us) between successive samplings. + ProfilerStats mIntervalsUs; + // Sampling stats: Total duration (us) of each sampling. (Split detail below.) + ProfilerStats mOverheadsUs; + // Sampling stats: Time (us) to acquire the lock before sampling. + ProfilerStats mLockingsUs; + // Sampling stats: Time (us) to discard expired data. + ProfilerStats mCleaningsUs; + // Sampling stats: Time (us) to collect counter data. + ProfilerStats mCountersUs; + // Sampling stats: Time (us) to sample thread stacks. + ProfilerStats mThreadsUs; +}; + +/** + * Helper type used to implement ProfilerStackCollector. This type is used as + * the collector for MergeStacks by ProfileBuffer. It holds a reference to the + * buffer, as well as additional feature flags which are needed to control the + * data collection strategy + */ +class ProfileBufferCollector final : public ProfilerStackCollector { + public: + ProfileBufferCollector(ProfileBuffer& aBuf, uint64_t aSamplePos) + : mBuf(aBuf), mSamplePositionInBuffer(aSamplePos) {} + + Maybe<uint64_t> SamplePositionInBuffer() override { + return Some(mSamplePositionInBuffer); + } + + Maybe<uint64_t> BufferRangeStart() override { + return Some(mBuf.BufferRangeStart()); + } + + virtual void CollectNativeLeafAddr(void* aAddr) override; + virtual void CollectProfilingStackFrame( + const ProfilingStackFrame& aFrame) override; + + private: + ProfileBuffer& mBuf; + uint64_t mSamplePositionInBuffer; +}; + +} // namespace baseprofiler +} // namespace mozilla + +#endif diff --git a/mozglue/baseprofiler/core/ProfileBufferEntry.cpp b/mozglue/baseprofiler/core/ProfileBufferEntry.cpp new file mode 100644 index 0000000000..3d3f68b655 --- /dev/null +++ b/mozglue/baseprofiler/core/ProfileBufferEntry.cpp @@ -0,0 +1,1337 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ProfileBufferEntry.h" + +#include <ostream> +#include <type_traits> + +#include "mozilla/Logging.h" +#include "mozilla/ScopeExit.h" +#include "mozilla/Sprintf.h" +#include "mozilla/StackWalk.h" + +#include "BaseProfiler.h" +#include "mozilla/BaseProfilerMarkers.h" +#include "platform.h" +#include "ProfileBuffer.h" +#include "ProfilerBacktrace.h" + +namespace mozilla { +namespace baseprofiler { + +//////////////////////////////////////////////////////////////////////// +// BEGIN ProfileBufferEntry + +ProfileBufferEntry::ProfileBufferEntry() + : mKind(Kind::INVALID), mStorage{0, 0, 0, 0, 0, 0, 0, 0} {} + +// aString must be a static string. +ProfileBufferEntry::ProfileBufferEntry(Kind aKind, const char* aString) + : mKind(aKind) { + memcpy(mStorage, &aString, sizeof(aString)); +} + +ProfileBufferEntry::ProfileBufferEntry(Kind aKind, char aChars[kNumChars]) + : mKind(aKind) { + memcpy(mStorage, aChars, kNumChars); +} + +ProfileBufferEntry::ProfileBufferEntry(Kind aKind, void* aPtr) : mKind(aKind) { + memcpy(mStorage, &aPtr, sizeof(aPtr)); +} + +ProfileBufferEntry::ProfileBufferEntry(Kind aKind, double aDouble) + : mKind(aKind) { + memcpy(mStorage, &aDouble, sizeof(aDouble)); +} + +ProfileBufferEntry::ProfileBufferEntry(Kind aKind, int aInt) : mKind(aKind) { + memcpy(mStorage, &aInt, sizeof(aInt)); +} + +ProfileBufferEntry::ProfileBufferEntry(Kind aKind, int64_t aInt64) + : mKind(aKind) { + memcpy(mStorage, &aInt64, sizeof(aInt64)); +} + +ProfileBufferEntry::ProfileBufferEntry(Kind aKind, uint64_t aUint64) + : mKind(aKind) { + memcpy(mStorage, &aUint64, sizeof(aUint64)); +} + +const char* ProfileBufferEntry::GetString() const { + const char* result; + memcpy(&result, mStorage, sizeof(result)); + return result; +} + +void* ProfileBufferEntry::GetPtr() const { + void* result; + memcpy(&result, mStorage, sizeof(result)); + return result; +} + +double ProfileBufferEntry::GetDouble() const { + double result; + memcpy(&result, mStorage, sizeof(result)); + return result; +} + +int ProfileBufferEntry::GetInt() const { + int result; + memcpy(&result, mStorage, sizeof(result)); + return result; +} + +int64_t ProfileBufferEntry::GetInt64() const { + int64_t result; + memcpy(&result, mStorage, sizeof(result)); + return result; +} + +uint64_t ProfileBufferEntry::GetUint64() const { + uint64_t result; + memcpy(&result, mStorage, sizeof(result)); + return result; +} + +void ProfileBufferEntry::CopyCharsInto(char (&aOutArray)[kNumChars]) const { + memcpy(aOutArray, mStorage, kNumChars); +} + +// END ProfileBufferEntry +//////////////////////////////////////////////////////////////////////// + +// As mentioned in ProfileBufferEntry.h, the JSON format contains many +// arrays whose elements are laid out according to various schemas to help +// de-duplication. This RAII class helps write these arrays by keeping track of +// the last non-null element written and adding the appropriate number of null +// elements when writing new non-null elements. It also automatically opens and +// closes an array element on the given JSON writer. +// +// You grant the AutoArraySchemaWriter exclusive access to the JSONWriter and +// the UniqueJSONStrings objects for the lifetime of AutoArraySchemaWriter. Do +// not access them independently while the AutoArraySchemaWriter is alive. +// If you need to add complex objects, call FreeFormElement(), which will give +// you temporary access to the writer. +// +// Example usage: +// +// // Define the schema of elements in this type of array: [FOO, BAR, BAZ] +// enum Schema : uint32_t { +// FOO = 0, +// BAR = 1, +// BAZ = 2 +// }; +// +// AutoArraySchemaWriter writer(someJsonWriter, someUniqueStrings); +// if (shouldWriteFoo) { +// writer.IntElement(FOO, getFoo()); +// } +// ... etc ... +// +// The elements need to be added in-order. +class MOZ_RAII AutoArraySchemaWriter { + public: + explicit AutoArraySchemaWriter(SpliceableJSONWriter& aWriter) + : mJSONWriter(aWriter), mNextFreeIndex(0) { + mJSONWriter.StartArrayElement(SpliceableJSONWriter::SingleLineStyle); + } + + ~AutoArraySchemaWriter() { mJSONWriter.EndArray(); } + + template <typename T> + void IntElement(uint32_t aIndex, T aValue) { + static_assert(!std::is_same_v<T, uint64_t>, + "Narrowing uint64 -> int64 conversion not allowed"); + FillUpTo(aIndex); + mJSONWriter.IntElement(static_cast<int64_t>(aValue)); + } + + void DoubleElement(uint32_t aIndex, double aValue) { + FillUpTo(aIndex); + mJSONWriter.DoubleElement(aValue); + } + + void BoolElement(uint32_t aIndex, bool aValue) { + FillUpTo(aIndex); + mJSONWriter.BoolElement(aValue); + } + + protected: + SpliceableJSONWriter& Writer() { return mJSONWriter; } + + void FillUpTo(uint32_t aIndex) { + MOZ_ASSERT(aIndex >= mNextFreeIndex); + mJSONWriter.NullElements(aIndex - mNextFreeIndex); + mNextFreeIndex = aIndex + 1; + } + + private: + SpliceableJSONWriter& mJSONWriter; + uint32_t mNextFreeIndex; +}; + +// Same as AutoArraySchemaWriter, but this can also write strings (output as +// indexes into the table of unique strings). +class MOZ_RAII AutoArraySchemaWithStringsWriter : public AutoArraySchemaWriter { + public: + AutoArraySchemaWithStringsWriter(SpliceableJSONWriter& aWriter, + UniqueJSONStrings& aStrings) + : AutoArraySchemaWriter(aWriter), mStrings(aStrings) {} + + void StringElement(uint32_t aIndex, const Span<const char>& aValue) { + FillUpTo(aIndex); + mStrings.WriteElement(Writer(), aValue); + } + + private: + UniqueJSONStrings& mStrings; +}; + +UniqueStacks::StackKey UniqueStacks::BeginStack(const FrameKey& aFrame) { + return StackKey(GetOrAddFrameIndex(aFrame)); +} + +UniqueStacks::StackKey UniqueStacks::AppendFrame(const StackKey& aStack, + const FrameKey& aFrame) { + return StackKey(aStack, GetOrAddStackIndex(aStack), + GetOrAddFrameIndex(aFrame)); +} + +bool UniqueStacks::FrameKey::NormalFrameData::operator==( + const NormalFrameData& aOther) const { + return mLocation == aOther.mLocation && + mRelevantForJS == aOther.mRelevantForJS && + mInnerWindowID == aOther.mInnerWindowID && mLine == aOther.mLine && + mColumn == aOther.mColumn && mCategoryPair == aOther.mCategoryPair; +} + +UniqueStacks::UniqueStacks() : mUniqueStrings(MakeUnique<UniqueJSONStrings>()) { + mFrameTableWriter.StartBareList(); + mStackTableWriter.StartBareList(); +} + +uint32_t UniqueStacks::GetOrAddStackIndex(const StackKey& aStack) { + uint32_t count = mStackToIndexMap.count(); + auto entry = mStackToIndexMap.lookupForAdd(aStack); + if (entry) { + MOZ_ASSERT(entry->value() < count); + return entry->value(); + } + + MOZ_RELEASE_ASSERT(mStackToIndexMap.add(entry, aStack, count)); + StreamStack(aStack); + return count; +} + +uint32_t UniqueStacks::GetOrAddFrameIndex(const FrameKey& aFrame) { + uint32_t count = mFrameToIndexMap.count(); + auto entry = mFrameToIndexMap.lookupForAdd(aFrame); + if (entry) { + MOZ_ASSERT(entry->value() < count); + return entry->value(); + } + + MOZ_RELEASE_ASSERT(mFrameToIndexMap.add(entry, aFrame, count)); + StreamNonJITFrame(aFrame); + return count; +} + +void UniqueStacks::SpliceFrameTableElements(SpliceableJSONWriter& aWriter) { + mFrameTableWriter.EndBareList(); + aWriter.TakeAndSplice(mFrameTableWriter.TakeChunkedWriteFunc()); +} + +void UniqueStacks::SpliceStackTableElements(SpliceableJSONWriter& aWriter) { + mStackTableWriter.EndBareList(); + aWriter.TakeAndSplice(mStackTableWriter.TakeChunkedWriteFunc()); +} + +void UniqueStacks::StreamStack(const StackKey& aStack) { + enum Schema : uint32_t { PREFIX = 0, FRAME = 1 }; + + AutoArraySchemaWriter writer(mStackTableWriter); + if (aStack.mPrefixStackIndex.isSome()) { + writer.IntElement(PREFIX, *aStack.mPrefixStackIndex); + } + writer.IntElement(FRAME, aStack.mFrameIndex); +} + +void UniqueStacks::StreamNonJITFrame(const FrameKey& aFrame) { + using NormalFrameData = FrameKey::NormalFrameData; + + enum Schema : uint32_t { + LOCATION = 0, + RELEVANT_FOR_JS = 1, + INNER_WINDOW_ID = 2, + IMPLEMENTATION = 3, + OPTIMIZATIONS = 4, + LINE = 5, + COLUMN = 6, + CATEGORY = 7, + SUBCATEGORY = 8 + }; + + AutoArraySchemaWithStringsWriter writer(mFrameTableWriter, *mUniqueStrings); + + const NormalFrameData& data = aFrame.mData.as<NormalFrameData>(); + writer.StringElement(LOCATION, data.mLocation); + writer.BoolElement(RELEVANT_FOR_JS, data.mRelevantForJS); + + // It's okay to convert uint64_t to double here because DOM always creates IDs + // that are convertible to double. + writer.DoubleElement(INNER_WINDOW_ID, data.mInnerWindowID); + + if (data.mLine.isSome()) { + writer.IntElement(LINE, *data.mLine); + } + if (data.mColumn.isSome()) { + writer.IntElement(COLUMN, *data.mColumn); + } + if (data.mCategoryPair.isSome()) { + const ProfilingCategoryPairInfo& info = + GetProfilingCategoryPairInfo(*data.mCategoryPair); + writer.IntElement(CATEGORY, uint32_t(info.mCategory)); + writer.IntElement(SUBCATEGORY, info.mSubcategoryIndex); + } +} + +struct CStringWriteFunc : public JSONWriteFunc { + std::string& mBuffer; // The struct must not outlive this buffer + explicit CStringWriteFunc(std::string& aBuffer) : mBuffer(aBuffer) {} + + void Write(const Span<const char>& aStr) override { + mBuffer.append(aStr.data(), aStr.size()); + } +}; + +struct ProfileSample { + uint32_t mStack; + double mTime; + Maybe<double> mResponsiveness; +}; + +static void WriteSample(SpliceableJSONWriter& aWriter, + const ProfileSample& aSample) { + enum Schema : uint32_t { + STACK = 0, + TIME = 1, + EVENT_DELAY = 2, + }; + + AutoArraySchemaWriter writer(aWriter); + + writer.IntElement(STACK, aSample.mStack); + + writer.DoubleElement(TIME, aSample.mTime); + + if (aSample.mResponsiveness.isSome()) { + writer.DoubleElement(EVENT_DELAY, *aSample.mResponsiveness); + } +} + +class EntryGetter { + public: + explicit EntryGetter(ProfileChunkedBuffer::Reader& aReader, + uint64_t aInitialReadPos = 0) + : mBlockIt( + aReader.At(ProfileBufferBlockIndex::CreateFromProfileBufferIndex( + aInitialReadPos))), + mBlockItEnd(aReader.end()) { + if (!ReadLegacyOrEnd()) { + // Find and read the next non-legacy entry. + Next(); + } + } + + bool Has() const { return mBlockIt != mBlockItEnd; } + + const ProfileBufferEntry& Get() const { + MOZ_ASSERT(Has(), "Caller should have checked `Has()` before `Get()`"); + return mEntry; + } + + void Next() { + MOZ_ASSERT(Has(), "Caller should have checked `Has()` before `Next()`"); + for (;;) { + ++mBlockIt; + if (ReadLegacyOrEnd()) { + // Either we're at the end, or we could read a legacy entry -> Done. + break; + } + // Otherwise loop around until we hit the end or a legacy entry. + } + } + + ProfileBufferBlockIndex CurBlockIndex() const { + return mBlockIt.CurrentBlockIndex(); + } + + uint64_t CurPos() const { + return CurBlockIndex().ConvertToProfileBufferIndex(); + } + + private: + // Try to read the entry at the current `mBlockIt` position. + // * If we're at the end of the buffer, just return `true`. + // * If there is a "legacy" entry (containing a real `ProfileBufferEntry`), + // read it into `mEntry`, and return `true` as well. + // * Otherwise the entry contains a "modern" type that cannot be read into + // `mEntry`, return `false` (so `EntryGetter` can skip to another entry). + bool ReadLegacyOrEnd() { + if (!Has()) { + return true; + } + // Read the entry "kind", which is always at the start of all entries. + ProfileBufferEntryReader aER = *mBlockIt; + auto type = static_cast<ProfileBufferEntry::Kind>( + aER.ReadObject<ProfileBufferEntry::KindUnderlyingType>()); + MOZ_ASSERT(static_cast<ProfileBufferEntry::KindUnderlyingType>(type) < + static_cast<ProfileBufferEntry::KindUnderlyingType>( + ProfileBufferEntry::Kind::MODERN_LIMIT)); + if (type >= ProfileBufferEntry::Kind::LEGACY_LIMIT) { + aER.SetRemainingBytes(0); + return false; + } + // Here, we have a legacy item, we need to read it from the start. + // Because the above `ReadObject` moved the reader, we ned to reset it to + // the start of the entry before reading the whole entry. + aER = *mBlockIt; + aER.ReadBytes(&mEntry, aER.RemainingBytes()); + return true; + } + + ProfileBufferEntry mEntry; + ProfileChunkedBuffer::BlockIterator mBlockIt; + const ProfileChunkedBuffer::BlockIterator mBlockItEnd; +}; + +// The following grammar shows legal sequences of profile buffer entries. +// The sequences beginning with a ThreadId entry are known as "samples". +// +// ( +// ( /* Samples */ +// ThreadId +// Time +// ( NativeLeafAddr +// | Label FrameFlags? DynamicStringFragment* LineNumber? CategoryPair? +// | JitReturnAddr +// )+ +// Responsiveness? +// ) +// | MarkerData +// | ( /* Counters */ +// CounterId +// Time +// ( +// CounterKey +// Count +// Number? +// )* +// ) +// | CollectionStart +// | CollectionEnd +// | Pause +// | Resume +// | ( ProfilerOverheadTime /* Sampling start timestamp */ +// ProfilerOverheadDuration /* Lock acquisition */ +// ProfilerOverheadDuration /* Expired data cleaning */ +// ProfilerOverheadDuration /* Counters */ +// ProfilerOverheadDuration /* Threads */ +// ) +// )* +// +// The most complicated part is the stack entry sequence that begins with +// Label. Here are some examples. +// +// - ProfilingStack frames without a dynamic string: +// +// Label("js::RunScript") +// CategoryPair(ProfilingCategoryPair::JS) +// +// Label("XREMain::XRE_main") +// LineNumber(4660) +// CategoryPair(ProfilingCategoryPair::OTHER) +// +// Label("ElementRestyler::ComputeStyleChangeFor") +// LineNumber(3003) +// CategoryPair(ProfilingCategoryPair::CSS) +// +// - ProfilingStack frames with a dynamic string: +// +// Label("nsObserverService::NotifyObservers") +// FrameFlags(uint64_t(ProfilingStackFrame::Flags::IS_LABEL_FRAME)) +// DynamicStringFragment("domwindo") +// DynamicStringFragment("wopened") +// LineNumber(291) +// CategoryPair(ProfilingCategoryPair::OTHER) +// +// Label("") +// FrameFlags(uint64_t(ProfilingStackFrame::Flags::IS_JS_FRAME)) +// DynamicStringFragment("closeWin") +// DynamicStringFragment("dow (chr") +// DynamicStringFragment("ome://gl") +// DynamicStringFragment("obal/con") +// DynamicStringFragment("tent/glo") +// DynamicStringFragment("balOverl") +// DynamicStringFragment("ay.js:5)") +// DynamicStringFragment("") # this string holds the closing '\0' +// LineNumber(25) +// CategoryPair(ProfilingCategoryPair::JS) +// +// Label("") +// FrameFlags(uint64_t(ProfilingStackFrame::Flags::IS_JS_FRAME)) +// DynamicStringFragment("bound (s") +// DynamicStringFragment("elf-host") +// DynamicStringFragment("ed:914)") +// LineNumber(945) +// CategoryPair(ProfilingCategoryPair::JS) +// +// - A profiling stack frame with an overly long dynamic string: +// +// Label("") +// FrameFlags(uint64_t(ProfilingStackFrame::Flags::IS_LABEL_FRAME)) +// DynamicStringFragment("(too lon") +// DynamicStringFragment("g)") +// LineNumber(100) +// CategoryPair(ProfilingCategoryPair::NETWORK) +// +// - A wasm JIT frame: +// +// Label("") +// FrameFlags(uint64_t(0)) +// DynamicStringFragment("wasm-fun") +// DynamicStringFragment("ction[87") +// DynamicStringFragment("36] (blo") +// DynamicStringFragment("b:http:/") +// DynamicStringFragment("/webasse") +// DynamicStringFragment("mbly.org") +// DynamicStringFragment("/3dc5759") +// DynamicStringFragment("4-ce58-4") +// DynamicStringFragment("626-975b") +// DynamicStringFragment("-08ad116") +// DynamicStringFragment("30bc1:38") +// DynamicStringFragment("29856)") +// +// - A JS frame in a synchronous sample: +// +// Label("") +// FrameFlags(uint64_t(ProfilingStackFrame::Flags::IS_LABEL_FRAME)) +// DynamicStringFragment("u (https") +// DynamicStringFragment("://perf-") +// DynamicStringFragment("html.io/") +// DynamicStringFragment("ac0da204") +// DynamicStringFragment("aaa44d75") +// DynamicStringFragment("a800.bun") +// DynamicStringFragment("dle.js:2") +// DynamicStringFragment("5)") + +// Because this is a format entirely internal to the Profiler, any parsing +// error indicates a bug in the ProfileBuffer writing or the parser itself, +// or possibly flaky hardware. +#define ERROR_AND_CONTINUE(msg) \ + { \ + fprintf(stderr, "ProfileBuffer parse error: %s", msg); \ + MOZ_ASSERT(false, msg); \ + continue; \ + } + +int ProfileBuffer::StreamSamplesToJSON(SpliceableJSONWriter& aWriter, + int aThreadId, double aSinceTime, + UniqueStacks& aUniqueStacks) const { + UniquePtr<char[]> dynStrBuf = MakeUnique<char[]>(kMaxFrameKeyLength); + + return mEntries.Read([&](ProfileChunkedBuffer::Reader* aReader) { + MOZ_ASSERT(aReader, + "ProfileChunkedBuffer cannot be out-of-session when sampler is " + "running"); + + int processedThreadId = 0; + + EntryGetter e(*aReader); + + for (;;) { + // This block skips entries until we find the start of the next sample. + // This is useful in three situations. + // + // - The circular buffer overwrites old entries, so when we start parsing + // we might be in the middle of a sample, and we must skip forward to + // the start of the next sample. + // + // - We skip samples that don't have an appropriate ThreadId or Time. + // + // - We skip range Pause, Resume, CollectionStart, Counter and + // CollectionEnd entries between samples. + while (e.Has()) { + if (e.Get().IsThreadId()) { + break; + } + e.Next(); + } + + if (!e.Has()) { + break; + } + + // Due to the skip_to_next_sample block above, if we have an entry here it + // must be a ThreadId entry. + MOZ_ASSERT(e.Get().IsThreadId()); + + int threadId = e.Get().GetInt(); + e.Next(); + + // Ignore samples that are for the wrong thread. + if (threadId != aThreadId && aThreadId != 0) { + continue; + } + + MOZ_ASSERT(aThreadId != 0 || processedThreadId == 0, + "aThreadId==0 should only be used with 1-sample buffer"); + + ProfileSample sample; + + if (e.Has() && e.Get().IsTime()) { + sample.mTime = e.Get().GetDouble(); + e.Next(); + + // Ignore samples that are too old. + if (sample.mTime < aSinceTime) { + continue; + } + } else { + ERROR_AND_CONTINUE("expected a Time entry"); + } + + UniqueStacks::StackKey stack = + aUniqueStacks.BeginStack(UniqueStacks::FrameKey("(root)")); + + int numFrames = 0; + while (e.Has()) { + if (e.Get().IsNativeLeafAddr()) { + numFrames++; + + void* pc = e.Get().GetPtr(); + e.Next(); + + static const uint32_t BUF_SIZE = 256; + char buf[BUF_SIZE]; + + // Bug 753041: We need a double cast here to tell GCC that we don't + // want to sign extend 32-bit addresses starting with 0xFXXXXXX. + unsigned long long pcULL = (unsigned long long)(uintptr_t)pc; + SprintfLiteral(buf, "%#llx", pcULL); + + // If the "MOZ_PROFILER_SYMBOLICATE" env-var is set, we add a local + // symbolication description to the PC address. This is off by + // default, and mainly intended for local development. + static const bool preSymbolicate = []() { + const char* symbolicate = getenv("MOZ_PROFILER_SYMBOLICATE"); + return symbolicate && symbolicate[0] != '\0'; + }(); + if (preSymbolicate) { + MozCodeAddressDetails details; + if (MozDescribeCodeAddress(pc, &details)) { + // Replace \0 terminator with space. + const uint32_t pcLen = strlen(buf); + buf[pcLen] = ' '; + // Add description after space. Note: Using a frame number of 0, + // as using `numFrames` wouldn't help here, and would prevent + // combining same function calls that happen at different depths. + // TODO: Remove unsightly "#00: " if too annoying. :-) + MozFormatCodeAddressDetails( + buf + pcLen + 1, BUF_SIZE - (pcLen + 1), 0, pc, &details); + } + } + + stack = aUniqueStacks.AppendFrame(stack, UniqueStacks::FrameKey(buf)); + + } else if (e.Get().IsLabel()) { + numFrames++; + + const char* label = e.Get().GetString(); + e.Next(); + + using FrameFlags = ProfilingStackFrame::Flags; + uint32_t frameFlags = 0; + if (e.Has() && e.Get().IsFrameFlags()) { + frameFlags = uint32_t(e.Get().GetUint64()); + e.Next(); + } + + bool relevantForJS = + frameFlags & uint32_t(FrameFlags::RELEVANT_FOR_JS); + + // Copy potential dynamic string fragments into dynStrBuf, so that + // dynStrBuf will then contain the entire dynamic string. + size_t i = 0; + dynStrBuf[0] = '\0'; + while (e.Has()) { + if (e.Get().IsDynamicStringFragment()) { + char chars[ProfileBufferEntry::kNumChars]; + e.Get().CopyCharsInto(chars); + for (char c : chars) { + if (i < kMaxFrameKeyLength) { + dynStrBuf[i] = c; + i++; + } + } + e.Next(); + } else { + break; + } + } + dynStrBuf[kMaxFrameKeyLength - 1] = '\0'; + bool hasDynamicString = (i != 0); + + std::string frameLabel; + if (label[0] != '\0' && hasDynamicString) { + if (frameFlags & uint32_t(FrameFlags::STRING_TEMPLATE_METHOD)) { + frameLabel += label; + frameLabel += '.'; + frameLabel += dynStrBuf.get(); + } else if (frameFlags & + uint32_t(FrameFlags::STRING_TEMPLATE_GETTER)) { + frameLabel += "get "; + frameLabel += label; + frameLabel += '.'; + frameLabel += dynStrBuf.get(); + } else if (frameFlags & + uint32_t(FrameFlags::STRING_TEMPLATE_SETTER)) { + frameLabel += "set "; + frameLabel += label; + frameLabel += '.'; + frameLabel += dynStrBuf.get(); + } else { + frameLabel += label; + frameLabel += ' '; + frameLabel += dynStrBuf.get(); + } + } else if (hasDynamicString) { + frameLabel += dynStrBuf.get(); + } else { + frameLabel += label; + } + + uint64_t innerWindowID = 0; + if (e.Has() && e.Get().IsInnerWindowID()) { + innerWindowID = uint64_t(e.Get().GetUint64()); + e.Next(); + } + + Maybe<unsigned> line; + if (e.Has() && e.Get().IsLineNumber()) { + line = Some(unsigned(e.Get().GetInt())); + e.Next(); + } + + Maybe<unsigned> column; + if (e.Has() && e.Get().IsColumnNumber()) { + column = Some(unsigned(e.Get().GetInt())); + e.Next(); + } + + Maybe<ProfilingCategoryPair> categoryPair; + if (e.Has() && e.Get().IsCategoryPair()) { + categoryPair = + Some(ProfilingCategoryPair(uint32_t(e.Get().GetInt()))); + e.Next(); + } + + stack = aUniqueStacks.AppendFrame( + stack, UniqueStacks::FrameKey(std::move(frameLabel), + relevantForJS, innerWindowID, line, + column, categoryPair)); + + } else { + break; + } + } + + if (numFrames == 0) { + // It is possible to have empty stacks if native stackwalking is + // disabled. Skip samples with empty stacks. (See Bug 1497985). + // Thus, don't use ERROR_AND_CONTINUE, but just continue. + continue; + } + + sample.mStack = aUniqueStacks.GetOrAddStackIndex(stack); + + if (e.Has() && e.Get().IsResponsiveness()) { + sample.mResponsiveness = Some(e.Get().GetDouble()); + e.Next(); + } + + WriteSample(aWriter, sample); + + processedThreadId = threadId; + } + + return processedThreadId; + }); +} + +void ProfileBuffer::StreamMarkersToJSON(SpliceableJSONWriter& aWriter, + int aThreadId, + const TimeStamp& aProcessStartTime, + double aSinceTime, + UniqueStacks& aUniqueStacks) const { + mEntries.ReadEach([&](ProfileBufferEntryReader& aER) { + auto type = static_cast<ProfileBufferEntry::Kind>( + aER.ReadObject<ProfileBufferEntry::KindUnderlyingType>()); + MOZ_ASSERT(static_cast<ProfileBufferEntry::KindUnderlyingType>(type) < + static_cast<ProfileBufferEntry::KindUnderlyingType>( + ProfileBufferEntry::Kind::MODERN_LIMIT)); + bool entryWasFullyRead = false; + + if (type == ProfileBufferEntry::Kind::Marker) { + entryWasFullyRead = ::mozilla::base_profiler_markers_detail:: + DeserializeAfterKindAndStream( + aER, aWriter, aThreadId, + [&](ProfileChunkedBuffer& aChunkedBuffer) { + ProfilerBacktrace backtrace("", &aChunkedBuffer); + backtrace.StreamJSON(aWriter, TimeStamp::ProcessCreation(), + aUniqueStacks); + }); + } + + if (!entryWasFullyRead) { + // Not a marker, or marker for another thread. + // We probably didn't read the whole entry, so we need to skip to the end. + aER.SetRemainingBytes(0); + } + }); +} + +void ProfileBuffer::StreamProfilerOverheadToJSON( + SpliceableJSONWriter& aWriter, const TimeStamp& aProcessStartTime, + double aSinceTime) const { + mEntries.Read([&](ProfileChunkedBuffer::Reader* aReader) { + MOZ_ASSERT(aReader, + "ProfileChunkedBuffer cannot be out-of-session when sampler is " + "running"); + + EntryGetter e(*aReader); + + enum Schema : uint32_t { + TIME = 0, + LOCKING = 1, + MARKER_CLEANING = 2, + COUNTERS = 3, + THREADS = 4 + }; + + aWriter.StartObjectProperty("profilerOverhead"); + aWriter.StartObjectProperty("samples"); + // Stream all sampling overhead data. We skip other entries, because we + // process them in StreamSamplesToJSON()/etc. + { + JSONSchemaWriter schema(aWriter); + schema.WriteField("time"); + schema.WriteField("locking"); + schema.WriteField("expiredMarkerCleaning"); + schema.WriteField("counters"); + schema.WriteField("threads"); + } + + aWriter.StartArrayProperty("data"); + double firstTime = 0.0; + double lastTime = 0.0; + ProfilerStats intervals, overheads, lockings, cleanings, counters, threads; + while (e.Has()) { + // valid sequence: ProfilerOverheadTime, ProfilerOverheadDuration * 4 + if (e.Get().IsProfilerOverheadTime()) { + double time = e.Get().GetDouble(); + if (time >= aSinceTime) { + e.Next(); + if (!e.Has() || !e.Get().IsProfilerOverheadDuration()) { + ERROR_AND_CONTINUE( + "expected a ProfilerOverheadDuration entry after " + "ProfilerOverheadTime"); + } + double locking = e.Get().GetDouble(); + e.Next(); + if (!e.Has() || !e.Get().IsProfilerOverheadDuration()) { + ERROR_AND_CONTINUE( + "expected a ProfilerOverheadDuration entry after " + "ProfilerOverheadTime,ProfilerOverheadDuration"); + } + double cleaning = e.Get().GetDouble(); + e.Next(); + if (!e.Has() || !e.Get().IsProfilerOverheadDuration()) { + ERROR_AND_CONTINUE( + "expected a ProfilerOverheadDuration entry after " + "ProfilerOverheadTime,ProfilerOverheadDuration*2"); + } + double counter = e.Get().GetDouble(); + e.Next(); + if (!e.Has() || !e.Get().IsProfilerOverheadDuration()) { + ERROR_AND_CONTINUE( + "expected a ProfilerOverheadDuration entry after " + "ProfilerOverheadTime,ProfilerOverheadDuration*3"); + } + double thread = e.Get().GetDouble(); + + if (firstTime == 0.0) { + firstTime = time; + } else { + // Note that we'll have 1 fewer interval than other numbers (because + // we need both ends of an interval to know its duration). The final + // difference should be insignificant over the expected many + // thousands of iterations. + intervals.Count(time - lastTime); + } + lastTime = time; + overheads.Count(locking + cleaning + counter + thread); + lockings.Count(locking); + cleanings.Count(cleaning); + counters.Count(counter); + threads.Count(thread); + + AutoArraySchemaWriter writer(aWriter); + writer.DoubleElement(TIME, time); + writer.DoubleElement(LOCKING, locking); + writer.DoubleElement(MARKER_CLEANING, cleaning); + writer.DoubleElement(COUNTERS, counter); + writer.DoubleElement(THREADS, thread); + } + } + e.Next(); + } + aWriter.EndArray(); // data + aWriter.EndObject(); // samples + + // Only output statistics if there is at least one full interval (and + // therefore at least two samplings.) + if (intervals.n > 0) { + aWriter.StartObjectProperty("statistics"); + aWriter.DoubleProperty("profiledDuration", lastTime - firstTime); + aWriter.IntProperty("samplingCount", overheads.n); + aWriter.DoubleProperty("overheadDurations", overheads.sum); + aWriter.DoubleProperty("overheadPercentage", + overheads.sum / (lastTime - firstTime)); +#define PROFILER_STATS(name, var) \ + aWriter.DoubleProperty("mean" name, (var).sum / (var).n); \ + aWriter.DoubleProperty("min" name, (var).min); \ + aWriter.DoubleProperty("max" name, (var).max); + PROFILER_STATS("Interval", intervals); + PROFILER_STATS("Overhead", overheads); + PROFILER_STATS("Lockings", lockings); + PROFILER_STATS("Cleaning", cleanings); + PROFILER_STATS("Counter", counters); + PROFILER_STATS("Thread", threads); +#undef PROFILER_STATS + aWriter.EndObject(); // statistics + } + aWriter.EndObject(); // profilerOverhead + }); +} + +struct CounterKeyedSample { + double mTime; + uint64_t mNumber; + int64_t mCount; +}; + +using CounterKeyedSamples = Vector<CounterKeyedSample>; + +using CounterMap = HashMap<uint64_t, CounterKeyedSamples>; + +// HashMap lookup, if not found, a default value is inserted. +// Returns reference to (existing or new) value inside the HashMap. +template <typename HashM, typename Key> +static auto& LookupOrAdd(HashM& aMap, Key&& aKey) { + auto addPtr = aMap.lookupForAdd(aKey); + if (!addPtr) { + MOZ_RELEASE_ASSERT(aMap.add(addPtr, std::forward<Key>(aKey), + typename HashM::Entry::ValueType{})); + MOZ_ASSERT(!!addPtr); + } + return addPtr->value(); +} + +void ProfileBuffer::StreamCountersToJSON(SpliceableJSONWriter& aWriter, + const TimeStamp& aProcessStartTime, + double aSinceTime) const { + // Because this is a format entirely internal to the Profiler, any parsing + // error indicates a bug in the ProfileBuffer writing or the parser itself, + // or possibly flaky hardware. + + mEntries.Read([&](ProfileChunkedBuffer::Reader* aReader) { + MOZ_ASSERT(aReader, + "ProfileChunkedBuffer cannot be out-of-session when sampler is " + "running"); + + EntryGetter e(*aReader); + + enum Schema : uint32_t { TIME = 0, NUMBER = 1, COUNT = 2 }; + + // Stream all counters. We skip other entries, because we process them in + // StreamSamplesToJSON()/etc. + // + // Valid sequence in the buffer: + // CounterID + // Time + // ( CounterKey Count Number? )* + // + // And the JSON (example): + // "counters": { + // "name": "malloc", + // "category": "Memory", + // "description": "Amount of allocated memory", + // "sample_groups": { + // "id": 0, + // "samples": { + // "schema": {"time": 0, "number": 1, "count": 2}, + // "data": [ + // [ + // 16117.033968000002, + // 2446216, + // 6801320 + // ], + // [ + // 16118.037638, + // 2446216, + // 6801320 + // ], + // ], + // } + // } + // }, + + // Build the map of counters and populate it + HashMap<void*, CounterMap> counters; + + while (e.Has()) { + // skip all non-Counters, including if we start in the middle of a counter + if (e.Get().IsCounterId()) { + void* id = e.Get().GetPtr(); + CounterMap& counter = LookupOrAdd(counters, id); + e.Next(); + if (!e.Has() || !e.Get().IsTime()) { + ERROR_AND_CONTINUE("expected a Time entry"); + } + double time = e.Get().GetDouble(); + if (time >= aSinceTime) { + e.Next(); + while (e.Has() && e.Get().IsCounterKey()) { + uint64_t key = e.Get().GetUint64(); + CounterKeyedSamples& data = LookupOrAdd(counter, key); + e.Next(); + if (!e.Has() || !e.Get().IsCount()) { + ERROR_AND_CONTINUE("expected a Count entry"); + } + int64_t count = e.Get().GetUint64(); + e.Next(); + uint64_t number; + if (!e.Has() || !e.Get().IsNumber()) { + number = 0; + } else { + number = e.Get().GetInt64(); + } + CounterKeyedSample sample = {time, number, count}; + MOZ_RELEASE_ASSERT(data.append(sample)); + } + } else { + // skip counter sample - only need to skip the initial counter + // id, then let the loop at the top skip the rest + } + } + e.Next(); + } + // we have a map of a map of counter entries; dump them to JSON + if (counters.count() == 0) { + return; + } + + aWriter.StartArrayProperty("counters"); + for (auto iter = counters.iter(); !iter.done(); iter.next()) { + CounterMap& counter = iter.get().value(); + const BaseProfilerCount* base_counter = + static_cast<const BaseProfilerCount*>(iter.get().key()); + + aWriter.Start(); + aWriter.StringProperty("name", MakeStringSpan(base_counter->mLabel)); + aWriter.StringProperty("category", + MakeStringSpan(base_counter->mCategory)); + aWriter.StringProperty("description", + MakeStringSpan(base_counter->mDescription)); + + aWriter.StartArrayProperty("sample_groups"); + for (auto counter_iter = counter.iter(); !counter_iter.done(); + counter_iter.next()) { + CounterKeyedSamples& samples = counter_iter.get().value(); + uint64_t key = counter_iter.get().key(); + + size_t size = samples.length(); + if (size == 0) { + continue; + } + + aWriter.StartObjectElement(); + { + aWriter.IntProperty("id", static_cast<int64_t>(key)); + aWriter.StartObjectProperty("samples"); + { + // XXX Can we assume a missing count means 0? + JSONSchemaWriter schema(aWriter); + schema.WriteField("time"); + schema.WriteField("number"); + schema.WriteField("count"); + } + + aWriter.StartArrayProperty("data"); + uint64_t previousNumber = 0; + int64_t previousCount = 0; + for (size_t i = 0; i < size; i++) { + // Encode as deltas, and only encode if different than the last + // sample + if (i == 0 || samples[i].mNumber != previousNumber || + samples[i].mCount != previousCount) { + MOZ_ASSERT(i == 0 || samples[i].mTime >= samples[i - 1].mTime); + MOZ_ASSERT(samples[i].mNumber >= previousNumber); + MOZ_ASSERT(samples[i].mNumber - previousNumber <= + uint64_t(std::numeric_limits<int64_t>::max())); + + AutoArraySchemaWriter writer(aWriter); + writer.DoubleElement(TIME, samples[i].mTime); + writer.IntElement( + NUMBER, + static_cast<int64_t>(samples[i].mNumber - previousNumber)); + writer.IntElement(COUNT, samples[i].mCount - previousCount); + previousNumber = samples[i].mNumber; + previousCount = samples[i].mCount; + } + } + aWriter.EndArray(); // data + aWriter.EndObject(); // samples + } + aWriter.EndObject(); // sample_groups item + } + aWriter.EndArray(); // sample groups + aWriter.End(); // for each counter + } + aWriter.EndArray(); // counters + }); +} + +#undef ERROR_AND_CONTINUE + +static void AddPausedRange(SpliceableJSONWriter& aWriter, const char* aReason, + const Maybe<double>& aStartTime, + const Maybe<double>& aEndTime) { + aWriter.Start(); + if (aStartTime) { + aWriter.DoubleProperty("startTime", *aStartTime); + } else { + aWriter.NullProperty("startTime"); + } + if (aEndTime) { + aWriter.DoubleProperty("endTime", *aEndTime); + } else { + aWriter.NullProperty("endTime"); + } + aWriter.StringProperty("reason", MakeStringSpan(aReason)); + aWriter.End(); +} + +void ProfileBuffer::StreamPausedRangesToJSON(SpliceableJSONWriter& aWriter, + double aSinceTime) const { + mEntries.Read([&](ProfileChunkedBuffer::Reader* aReader) { + MOZ_ASSERT(aReader, + "ProfileChunkedBuffer cannot be out-of-session when sampler is " + "running"); + + EntryGetter e(*aReader); + + Maybe<double> currentPauseStartTime; + Maybe<double> currentCollectionStartTime; + + while (e.Has()) { + if (e.Get().IsPause()) { + currentPauseStartTime = Some(e.Get().GetDouble()); + } else if (e.Get().IsResume()) { + AddPausedRange(aWriter, "profiler-paused", currentPauseStartTime, + Some(e.Get().GetDouble())); + currentPauseStartTime = Nothing(); + } else if (e.Get().IsCollectionStart()) { + currentCollectionStartTime = Some(e.Get().GetDouble()); + } else if (e.Get().IsCollectionEnd()) { + AddPausedRange(aWriter, "collecting", currentCollectionStartTime, + Some(e.Get().GetDouble())); + currentCollectionStartTime = Nothing(); + } + e.Next(); + } + + if (currentPauseStartTime) { + AddPausedRange(aWriter, "profiler-paused", currentPauseStartTime, + Nothing()); + } + if (currentCollectionStartTime) { + AddPausedRange(aWriter, "collecting", currentCollectionStartTime, + Nothing()); + } + }); +} + +bool ProfileBuffer::DuplicateLastSample(int aThreadId, + const TimeStamp& aProcessStartTime, + Maybe<uint64_t>& aLastSample) { + if (!aLastSample) { + return false; + } + + ProfileChunkedBuffer tempBuffer( + ProfileChunkedBuffer::ThreadSafety::WithoutMutex, mWorkerChunkManager); + + auto retrieveWorkerChunk = MakeScopeExit( + [&]() { mWorkerChunkManager.Reset(tempBuffer.GetAllChunks()); }); + + const bool ok = mEntries.Read([&](ProfileChunkedBuffer::Reader* aReader) { + MOZ_ASSERT(aReader, + "ProfileChunkedBuffer cannot be out-of-session when sampler is " + "running"); + + EntryGetter e(*aReader, *aLastSample); + + if (e.CurPos() != *aLastSample) { + // The last sample is no longer within the buffer range, so we cannot + // use it. Reset the stored buffer position to Nothing(). + aLastSample.reset(); + return false; + } + + MOZ_RELEASE_ASSERT(e.Has() && e.Get().IsThreadId() && + e.Get().GetInt() == aThreadId); + + e.Next(); + + // Go through the whole entry and duplicate it, until we find the next + // one. + while (e.Has()) { + switch (e.Get().GetKind()) { + case ProfileBufferEntry::Kind::Pause: + case ProfileBufferEntry::Kind::Resume: + case ProfileBufferEntry::Kind::PauseSampling: + case ProfileBufferEntry::Kind::ResumeSampling: + case ProfileBufferEntry::Kind::CollectionStart: + case ProfileBufferEntry::Kind::CollectionEnd: + case ProfileBufferEntry::Kind::ThreadId: + // We're done. + return true; + case ProfileBufferEntry::Kind::Time: + // Copy with new time + AddEntry(tempBuffer, + ProfileBufferEntry::Time( + (TimeStamp::NowUnfuzzed() - aProcessStartTime) + .ToMilliseconds())); + break; + case ProfileBufferEntry::Kind::CounterKey: + case ProfileBufferEntry::Kind::Number: + case ProfileBufferEntry::Kind::Count: + case ProfileBufferEntry::Kind::Responsiveness: + // Don't copy anything not part of a thread's stack sample + break; + case ProfileBufferEntry::Kind::CounterId: + // CounterId is normally followed by Time - if so, we'd like + // to skip it. If we duplicate Time, it won't hurt anything, just + // waste buffer space (and this can happen if the CounterId has + // fallen off the end of the buffer, but Time (and Number/Count) + // are still in the buffer). + e.Next(); + if (e.Has() && e.Get().GetKind() != ProfileBufferEntry::Kind::Time) { + // this would only happen if there was an invalid sequence + // in the buffer. Don't skip it. + continue; + } + // we've skipped Time + break; + case ProfileBufferEntry::Kind::ProfilerOverheadTime: + // ProfilerOverheadTime is normally followed by + // ProfilerOverheadDuration*4 - if so, we'd like to skip it. Don't + // duplicate, as we are in the middle of a sampling and will soon + // capture its own overhead. + e.Next(); + // A missing Time would only happen if there was an invalid + // sequence in the buffer. Don't skip unexpected entry. + if (e.Has() && + e.Get().GetKind() != + ProfileBufferEntry::Kind::ProfilerOverheadDuration) { + continue; + } + e.Next(); + if (e.Has() && + e.Get().GetKind() != + ProfileBufferEntry::Kind::ProfilerOverheadDuration) { + continue; + } + e.Next(); + if (e.Has() && + e.Get().GetKind() != + ProfileBufferEntry::Kind::ProfilerOverheadDuration) { + continue; + } + e.Next(); + if (e.Has() && + e.Get().GetKind() != + ProfileBufferEntry::Kind::ProfilerOverheadDuration) { + continue; + } + // we've skipped ProfilerOverheadTime and + // ProfilerOverheadDuration*4. + break; + default: { + // Copy anything else we don't know about. + AddEntry(tempBuffer, e.Get()); + break; + } + } + e.Next(); + } + return true; + }); + + if (!ok) { + return false; + } + + // If the buffer was big enough, there won't be any cleared blocks. + if (tempBuffer.GetState().mClearedBlockCount != 0) { + // No need to try to read stack again as it won't fit. Reset the stored + // buffer position to Nothing(). + aLastSample.reset(); + return false; + } + + aLastSample = Some(AddThreadIdEntry(aThreadId)); + + tempBuffer.Read([&](ProfileChunkedBuffer::Reader* aReader) { + MOZ_ASSERT(aReader, "tempBuffer cannot be out-of-session"); + + EntryGetter e(*aReader); + + while (e.Has()) { + AddEntry(e.Get()); + e.Next(); + } + }); + + return true; +} + +void ProfileBuffer::DiscardSamplesBeforeTime(double aTime) { + // This function does nothing! + // The duration limit will be removed from Firefox, see bug 1632365. + Unused << aTime; +} + +// END ProfileBuffer +//////////////////////////////////////////////////////////////////////// + +} // namespace baseprofiler +} // namespace mozilla diff --git a/mozglue/baseprofiler/core/ProfileBufferEntry.h b/mozglue/baseprofiler/core/ProfileBufferEntry.h new file mode 100644 index 0000000000..6422a34a85 --- /dev/null +++ b/mozglue/baseprofiler/core/ProfileBufferEntry.h @@ -0,0 +1,358 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ProfileBufferEntry_h +#define ProfileBufferEntry_h + +#include "BaseProfilingCategory.h" +#include "gtest/MozGtestFriend.h" +#include "mozilla/BaseProfileJSONWriter.h" +#include "mozilla/HashFunctions.h" +#include "mozilla/HashTable.h" +#include "mozilla/Maybe.h" +#include "mozilla/ProfileBufferEntryKinds.h" +#include "mozilla/UniquePtr.h" +#include "mozilla/Variant.h" +#include "mozilla/Vector.h" + +#include <string> + +namespace mozilla { +namespace baseprofiler { + +class ProfileBufferEntry { + public: + using KindUnderlyingType = ::mozilla::ProfileBufferEntryKindUnderlyingType; + using Kind = ::mozilla::ProfileBufferEntryKind; + + ProfileBufferEntry(); + + static constexpr size_t kNumChars = ::mozilla::ProfileBufferEntryNumChars; + + private: + // aString must be a static string. + ProfileBufferEntry(Kind aKind, const char* aString); + ProfileBufferEntry(Kind aKind, char aChars[kNumChars]); + ProfileBufferEntry(Kind aKind, void* aPtr); + ProfileBufferEntry(Kind aKind, double aDouble); + ProfileBufferEntry(Kind aKind, int64_t aInt64); + ProfileBufferEntry(Kind aKind, uint64_t aUint64); + ProfileBufferEntry(Kind aKind, int aInt); + + public: +#define CTOR(KIND, TYPE, SIZE) \ + static ProfileBufferEntry KIND(TYPE aVal) { \ + return ProfileBufferEntry(Kind::KIND, aVal); \ + } + FOR_EACH_PROFILE_BUFFER_ENTRY_KIND(CTOR) +#undef CTOR + + Kind GetKind() const { return mKind; } + +#define IS_KIND(KIND, TYPE, SIZE) \ + bool Is##KIND() const { return mKind == Kind::KIND; } + FOR_EACH_PROFILE_BUFFER_ENTRY_KIND(IS_KIND) +#undef IS_KIND + + private: + FRIEND_TEST(ThreadProfile, InsertOneEntry); + FRIEND_TEST(ThreadProfile, InsertOneEntryWithTinyBuffer); + FRIEND_TEST(ThreadProfile, InsertEntriesNoWrap); + FRIEND_TEST(ThreadProfile, InsertEntriesWrap); + FRIEND_TEST(ThreadProfile, MemoryMeasure); + friend class ProfileBuffer; + + Kind mKind; + uint8_t mStorage[kNumChars]; + + const char* GetString() const; + void* GetPtr() const; + double GetDouble() const; + int GetInt() const; + int64_t GetInt64() const; + uint64_t GetUint64() const; + void CopyCharsInto(char (&aOutArray)[kNumChars]) const; +}; + +// Packed layout: 1 byte for the tag + 8 bytes for the value. +static_assert(sizeof(ProfileBufferEntry) == 9, "bad ProfileBufferEntry size"); + +class UniqueStacks { + public: + struct FrameKey { + explicit FrameKey(const char* aLocation) + : mData(NormalFrameData{std::string(aLocation), false, 0, Nothing(), + Nothing()}) {} + + FrameKey(std::string&& aLocation, bool aRelevantForJS, + uint64_t aInnerWindowID, const Maybe<unsigned>& aLine, + const Maybe<unsigned>& aColumn, + const Maybe<ProfilingCategoryPair>& aCategoryPair) + : mData(NormalFrameData{aLocation, aRelevantForJS, aInnerWindowID, + aLine, aColumn, aCategoryPair}) {} + + FrameKey(const FrameKey& aToCopy) = default; + + uint32_t Hash() const; + bool operator==(const FrameKey& aOther) const { + return mData == aOther.mData; + } + + struct NormalFrameData { + bool operator==(const NormalFrameData& aOther) const; + + std::string mLocation; + bool mRelevantForJS; + uint64_t mInnerWindowID; + Maybe<unsigned> mLine; + Maybe<unsigned> mColumn; + Maybe<ProfilingCategoryPair> mCategoryPair; + }; + Variant<NormalFrameData> mData; + }; + + struct FrameKeyHasher { + using Lookup = FrameKey; + + static HashNumber hash(const FrameKey& aLookup) { + HashNumber hash = 0; + if (aLookup.mData.is<FrameKey::NormalFrameData>()) { + const FrameKey::NormalFrameData& data = + aLookup.mData.as<FrameKey::NormalFrameData>(); + if (!data.mLocation.empty()) { + hash = AddToHash(hash, HashString(data.mLocation.c_str())); + } + hash = AddToHash(hash, data.mRelevantForJS); + hash = mozilla::AddToHash(hash, data.mInnerWindowID); + if (data.mLine.isSome()) { + hash = AddToHash(hash, *data.mLine); + } + if (data.mColumn.isSome()) { + hash = AddToHash(hash, *data.mColumn); + } + if (data.mCategoryPair.isSome()) { + hash = AddToHash(hash, static_cast<uint32_t>(*data.mCategoryPair)); + } + } + return hash; + } + + static bool match(const FrameKey& aKey, const FrameKey& aLookup) { + return aKey == aLookup; + } + + static void rekey(FrameKey& aKey, const FrameKey& aNewKey) { + aKey = aNewKey; + } + }; + + struct StackKey { + Maybe<uint32_t> mPrefixStackIndex; + uint32_t mFrameIndex; + + explicit StackKey(uint32_t aFrame) + : mFrameIndex(aFrame), mHash(HashGeneric(aFrame)) {} + + StackKey(const StackKey& aPrefix, uint32_t aPrefixStackIndex, + uint32_t aFrame) + : mPrefixStackIndex(Some(aPrefixStackIndex)), + mFrameIndex(aFrame), + mHash(AddToHash(aPrefix.mHash, aFrame)) {} + + HashNumber Hash() const { return mHash; } + + bool operator==(const StackKey& aOther) const { + return mPrefixStackIndex == aOther.mPrefixStackIndex && + mFrameIndex == aOther.mFrameIndex; + } + + private: + HashNumber mHash; + }; + + struct StackKeyHasher { + using Lookup = StackKey; + + static HashNumber hash(const StackKey& aLookup) { return aLookup.Hash(); } + + static bool match(const StackKey& aKey, const StackKey& aLookup) { + return aKey == aLookup; + } + + static void rekey(StackKey& aKey, const StackKey& aNewKey) { + aKey = aNewKey; + } + }; + + UniqueStacks(); + + // Return a StackKey for aFrame as the stack's root frame (no prefix). + [[nodiscard]] StackKey BeginStack(const FrameKey& aFrame); + + // Return a new StackKey that is obtained by appending aFrame to aStack. + [[nodiscard]] StackKey AppendFrame(const StackKey& aStack, + const FrameKey& aFrame); + + [[nodiscard]] uint32_t GetOrAddFrameIndex(const FrameKey& aFrame); + [[nodiscard]] uint32_t GetOrAddStackIndex(const StackKey& aStack); + + void SpliceFrameTableElements(SpliceableJSONWriter& aWriter); + void SpliceStackTableElements(SpliceableJSONWriter& aWriter); + + private: + void StreamNonJITFrame(const FrameKey& aFrame); + void StreamStack(const StackKey& aStack); + + public: + UniquePtr<UniqueJSONStrings> mUniqueStrings; + + private: + SpliceableChunkedJSONWriter mFrameTableWriter; + HashMap<FrameKey, uint32_t, FrameKeyHasher> mFrameToIndexMap; + + SpliceableChunkedJSONWriter mStackTableWriter; + HashMap<StackKey, uint32_t, StackKeyHasher> mStackToIndexMap; +}; + +// +// Thread profile JSON Format +// -------------------------- +// +// The profile contains much duplicate information. The output JSON of the +// profile attempts to deduplicate strings, frames, and stack prefixes, to cut +// down on size and to increase JSON streaming speed. Deduplicated values are +// streamed as indices into their respective tables. +// +// Further, arrays of objects with the same set of properties (e.g., samples, +// frames) are output as arrays according to a schema instead of an object +// with property names. A property that is not present is represented in the +// array as null or undefined. +// +// The format of the thread profile JSON is shown by the following example +// with 1 sample and 1 marker: +// +// { +// "name": "Foo", +// "tid": 42, +// "samples": +// { +// "schema": +// { +// "stack": 0, /* index into stackTable */ +// "time": 1, /* number */ +// "eventDelay": 2, /* number */ +// }, +// "data": +// [ +// [ 1, 0.0, 0.0 ] /* { stack: 1, time: 0.0, eventDelay: 0.0 } */ +// ] +// }, +// +// "markers": +// { +// "schema": +// { +// "name": 0, /* index into stringTable */ +// "time": 1, /* number */ +// "data": 2 /* arbitrary JSON */ +// }, +// "data": +// [ +// [ 3, 0.1 ] /* { name: 'example marker', time: 0.1 } */ +// ] +// }, +// +// "stackTable": +// { +// "schema": +// { +// "prefix": 0, /* index into stackTable */ +// "frame": 1 /* index into frameTable */ +// }, +// "data": +// [ +// [ null, 0 ], /* (root) */ +// [ 0, 1 ] /* (root) > foo.js */ +// ] +// }, +// +// "frameTable": +// { +// "schema": +// { +// "location": 0, /* index into stringTable */ +// "relevantForJS": 1, /* bool */ +// "innerWindowID": 2, /* inner window ID of global JS `window` object */ +// "implementation": 3, /* index into stringTable */ +// "optimizations": 4, /* arbitrary JSON */ +// "line": 5, /* number */ +// "column": 6, /* number */ +// "category": 7, /* index into profile.meta.categories */ +// "subcategory": 8 /* index into +// profile.meta.categories[category].subcategories */ +// }, +// "data": +// [ +// [ 0 ], /* { location: '(root)' } */ +// [ 1, 2 ] /* { location: 'foo.js', +// implementation: 'baseline' } */ +// ] +// }, +// +// "stringTable": +// [ +// "(root)", +// "foo.js", +// "baseline", +// "example marker" +// ] +// } +// +// Process: +// { +// "name": "Bar", +// "pid": 24, +// "threads": +// [ +// <0-N threads from above> +// ], +// "counters": /* includes the memory counter */ +// [ +// { +// "name": "qwerty", +// "category": "uiop", +// "description": "this is qwerty uiop", +// "sample_groups: +// [ +// { +// "id": 42, /* number (thread id, or object identifier (tab), etc) */ +// "samples: +// { +// "schema": +// { +// "time": 1, /* number */ +// "number": 2, /* number (of times the counter was touched) */ +// "count": 3 /* number (total for the counter) */ +// }, +// "data": +// [ +// [ 0.1, 1824, +// 454622 ] /* { time: 0.1, number: 1824, count: 454622 } */ +// ] +// }, +// }, +// /* more sample-group objects with different id's */ +// ] +// }, +// /* more counters */ +// ], +// } +// + +} // namespace baseprofiler +} // namespace mozilla + +#endif /* ndef ProfileBufferEntry_h */ diff --git a/mozglue/baseprofiler/core/ProfileJSONWriter.cpp b/mozglue/baseprofiler/core/ProfileJSONWriter.cpp new file mode 100644 index 0000000000..966ff2f515 --- /dev/null +++ b/mozglue/baseprofiler/core/ProfileJSONWriter.cpp @@ -0,0 +1,52 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/BaseProfileJSONWriter.h" + +namespace mozilla::baseprofiler { + +UniqueJSONStrings::UniqueJSONStrings(JSONWriter::CollectionStyle aStyle) { + mStringTableWriter.StartBareList(aStyle); +} + +UniqueJSONStrings::UniqueJSONStrings(const UniqueJSONStrings& aOther, + JSONWriter::CollectionStyle aStyle) { + mStringTableWriter.StartBareList(aStyle); + uint32_t count = aOther.mStringHashToIndexMap.count(); + if (count != 0) { + MOZ_RELEASE_ASSERT(mStringHashToIndexMap.reserve(count)); + for (auto iter = aOther.mStringHashToIndexMap.iter(); !iter.done(); + iter.next()) { + mStringHashToIndexMap.putNewInfallible(iter.get().key(), + iter.get().value()); + } + mStringTableWriter.CopyAndSplice( + aOther.mStringTableWriter.ChunkedWriteFunc()); + } +} + +UniqueJSONStrings::~UniqueJSONStrings() = default; + +void UniqueJSONStrings::SpliceStringTableElements( + SpliceableJSONWriter& aWriter) { + aWriter.TakeAndSplice(mStringTableWriter.TakeChunkedWriteFunc()); +} + +uint32_t UniqueJSONStrings::GetOrAddIndex(const Span<const char>& aStr) { + uint32_t count = mStringHashToIndexMap.count(); + HashNumber hash = HashString(aStr.data(), aStr.size()); + auto entry = mStringHashToIndexMap.lookupForAdd(hash); + if (entry) { + MOZ_ASSERT(entry->value() < count); + return entry->value(); + } + + MOZ_RELEASE_ASSERT(mStringHashToIndexMap.add(entry, hash, count)); + mStringTableWriter.StringElement(aStr); + return count; +} + +} // namespace mozilla::baseprofiler diff --git a/mozglue/baseprofiler/core/ProfiledThreadData.cpp b/mozglue/baseprofiler/core/ProfiledThreadData.cpp new file mode 100644 index 0000000000..4dc600d97c --- /dev/null +++ b/mozglue/baseprofiler/core/ProfiledThreadData.cpp @@ -0,0 +1,187 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ProfiledThreadData.h" + +#include "BaseProfiler.h" +#include "ProfileBuffer.h" + +#include "mozilla/BaseProfileJSONWriter.h" + +#if defined(GP_OS_darwin) +# include <pthread.h> +#endif + +namespace mozilla { +namespace baseprofiler { + +ProfiledThreadData::ProfiledThreadData(ThreadInfo* aThreadInfo) + : mThreadInfo(aThreadInfo) {} + +ProfiledThreadData::~ProfiledThreadData() {} + +void ProfiledThreadData::StreamJSON(const ProfileBuffer& aBuffer, + SpliceableJSONWriter& aWriter, + const std::string& aProcessName, + const std::string& aETLDplus1, + const TimeStamp& aProcessStartTime, + double aSinceTime) { + UniqueStacks uniqueStacks; + + MOZ_ASSERT(uniqueStacks.mUniqueStrings); + aWriter.SetUniqueStrings(*uniqueStacks.mUniqueStrings); + + aWriter.Start(); + { + StreamSamplesAndMarkers(mThreadInfo->Name(), mThreadInfo->ThreadId(), + aBuffer, aWriter, aProcessName, aETLDplus1, + aProcessStartTime, mThreadInfo->RegisterTime(), + mUnregisterTime, aSinceTime, uniqueStacks); + + aWriter.StartObjectProperty("stackTable"); + { + { + JSONSchemaWriter schema(aWriter); + schema.WriteField("prefix"); + schema.WriteField("frame"); + } + + aWriter.StartArrayProperty("data"); + { uniqueStacks.SpliceStackTableElements(aWriter); } + aWriter.EndArray(); + } + aWriter.EndObject(); + + aWriter.StartObjectProperty("frameTable"); + { + { + JSONSchemaWriter schema(aWriter); + schema.WriteField("location"); + schema.WriteField("relevantForJS"); + schema.WriteField("innerWindowID"); + schema.WriteField("implementation"); + schema.WriteField("optimizations"); + schema.WriteField("line"); + schema.WriteField("column"); + schema.WriteField("category"); + schema.WriteField("subcategory"); + } + + aWriter.StartArrayProperty("data"); + { uniqueStacks.SpliceFrameTableElements(aWriter); } + aWriter.EndArray(); + } + aWriter.EndObject(); + + aWriter.StartArrayProperty("stringTable"); + { + std::move(*uniqueStacks.mUniqueStrings) + .SpliceStringTableElements(aWriter); + } + aWriter.EndArray(); + } + aWriter.End(); + + aWriter.ResetUniqueStrings(); +} + +int StreamSamplesAndMarkers( + const char* aName, int aThreadId, const ProfileBuffer& aBuffer, + SpliceableJSONWriter& aWriter, const std::string& aProcessName, + const std::string& aETLDplus1, const TimeStamp& aProcessStartTime, + const TimeStamp& aRegisterTime, const TimeStamp& aUnregisterTime, + double aSinceTime, UniqueStacks& aUniqueStacks) { + int processedThreadId = 0; + + aWriter.StringProperty( + "processType", + "(unknown)" /* XRE_GeckoProcessTypeToString(XRE_GetProcessType()) */); + + { + std::string name = aName; + // We currently need to distinguish threads output by Base Profiler from + // those in Gecko Profiler, as the frontend could get confused and lose + // tracks with the same name. + // TODO: As part of the profilers de-duplication, thread data from both + // profilers should end up in the same track, at which point this won't be + // necessary anymore. See meta bug 1557566. + name += " (pre-xul)"; + aWriter.StringProperty("name", name); + } + + // Use given process name (if any). + if (!aProcessName.empty()) { + aWriter.StringProperty("processName", aProcessName); + } + if (!aETLDplus1.empty()) { + aWriter.StringProperty("eTLD+1", aETLDplus1); + } + + if (aRegisterTime) { + aWriter.DoubleProperty( + "registerTime", (aRegisterTime - aProcessStartTime).ToMilliseconds()); + } else { + aWriter.NullProperty("registerTime"); + } + + if (aUnregisterTime) { + aWriter.DoubleProperty( + "unregisterTime", + (aUnregisterTime - aProcessStartTime).ToMilliseconds()); + } else { + aWriter.NullProperty("unregisterTime"); + } + + aWriter.StartObjectProperty("samples"); + { + { + JSONSchemaWriter schema(aWriter); + schema.WriteField("stack"); + schema.WriteField("time"); + schema.WriteField("eventDelay"); + } + + aWriter.StartArrayProperty("data"); + { + processedThreadId = aBuffer.StreamSamplesToJSON( + aWriter, aThreadId, aSinceTime, aUniqueStacks); + } + aWriter.EndArray(); + } + aWriter.EndObject(); + + aWriter.StartObjectProperty("markers"); + { + { + JSONSchemaWriter schema(aWriter); + schema.WriteField("name"); + schema.WriteField("startTime"); + schema.WriteField("endTime"); + schema.WriteField("phase"); + schema.WriteField("category"); + schema.WriteField("data"); + } + + aWriter.StartArrayProperty("data"); + { + aBuffer.StreamMarkersToJSON(aWriter, aThreadId, aProcessStartTime, + aSinceTime, aUniqueStacks); + } + aWriter.EndArray(); + } + aWriter.EndObject(); + + aWriter.IntProperty("pid", + static_cast<int64_t>(profiler_current_process_id())); + aWriter.IntProperty( + "tid", + static_cast<int64_t>(aThreadId != 0 ? aThreadId : processedThreadId)); + + return processedThreadId; +} + +} // namespace baseprofiler +} // namespace mozilla diff --git a/mozglue/baseprofiler/core/ProfiledThreadData.h b/mozglue/baseprofiler/core/ProfiledThreadData.h new file mode 100644 index 0000000000..c45c02a7bb --- /dev/null +++ b/mozglue/baseprofiler/core/ProfiledThreadData.h @@ -0,0 +1,119 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ProfiledThreadData_h +#define ProfiledThreadData_h + +#include "BaseProfilingStack.h" +#include "platform.h" +#include "ProfileBufferEntry.h" +#include "ThreadInfo.h" + +#include "mozilla/RefPtr.h" +#include "mozilla/TimeStamp.h" +#include "mozilla/UniquePtr.h" + +#include <string> + +namespace mozilla { +namespace baseprofiler { + +class ProfileBuffer; + +// This class contains information about a thread that is only relevant while +// the profiler is running, for any threads (both alive and dead) whose thread +// name matches the "thread filter" in the current profiler run. +// ProfiledThreadData objects may be kept alive even after the thread is +// unregistered, as long as there is still data for that thread in the profiler +// buffer. +// +// Accesses to this class are protected by the profiler state lock. +// +// Created as soon as the following are true for the thread: +// - The profiler is running, and +// - the thread matches the profiler's thread filter, and +// - the thread is registered with the profiler. +// So it gets created in response to either (1) the profiler being started (for +// an existing registered thread) or (2) the thread being registered (if the +// profiler is already running). +// +// The thread may be unregistered during the lifetime of ProfiledThreadData. +// If that happens, NotifyUnregistered() is called. +// +// This class is the right place to store buffer positions. Profiler buffer +// positions become invalid if the profiler buffer is destroyed, which happens +// when the profiler is stopped. +class ProfiledThreadData final { + public: + explicit ProfiledThreadData(ThreadInfo* aThreadInfo); + ~ProfiledThreadData(); + + void NotifyUnregistered(uint64_t aBufferPosition) { + mLastSample = Nothing(); + MOZ_ASSERT(!mBufferPositionWhenReceivedJSContext, + "JSContext should have been cleared before the thread was " + "unregistered"); + mUnregisterTime = TimeStamp::NowUnfuzzed(); + mBufferPositionWhenUnregistered = Some(aBufferPosition); + } + Maybe<uint64_t> BufferPositionWhenUnregistered() { + return mBufferPositionWhenUnregistered; + } + + Maybe<uint64_t>& LastSample() { return mLastSample; } + + void StreamJSON(const ProfileBuffer& aBuffer, SpliceableJSONWriter& aWriter, + const std::string& aProcessName, + const std::string& aETLDplus1, + const TimeStamp& aProcessStartTime, double aSinceTime); + + const RefPtr<ThreadInfo> Info() const { return mThreadInfo; } + + void NotifyReceivedJSContext(uint64_t aCurrentBufferPosition) { + mBufferPositionWhenReceivedJSContext = Some(aCurrentBufferPosition); + } + + private: + // Group A: + // The following fields are interesting for the entire lifetime of a + // ProfiledThreadData object. + + // This thread's thread info. + const RefPtr<ThreadInfo> mThreadInfo; + + // Group B: + // The following fields are only used while this thread is alive and + // registered. They become Nothing() once the thread is unregistered. + + // When sampling, this holds the position in ActivePS::mBuffer of the most + // recent sample for this thread, or Nothing() if there is no sample for this + // thread in the buffer. + Maybe<uint64_t> mLastSample; + + // Only non-Nothing() if the thread currently has a JSContext. + Maybe<uint64_t> mBufferPositionWhenReceivedJSContext; + + // Group C: + // The following fields are only used once this thread has been unregistered. + + Maybe<uint64_t> mBufferPositionWhenUnregistered; + TimeStamp mUnregisterTime; +}; + +// Stream all samples and markers from aBuffer with the given aThreadId (or 0 +// for everything, which is assumed to be a single backtrace sample.) +// Returns the thread id of the output sample(s), or 0 if none was present. +int StreamSamplesAndMarkers( + const char* aName, int aThreadId, const ProfileBuffer& aBuffer, + SpliceableJSONWriter& aWriter, const std::string& aProcessName, + const std::string& aETLDplus1, const TimeStamp& aProcessStartTime, + const TimeStamp& aRegisterTime, const TimeStamp& aUnregisterTime, + double aSinceTime, UniqueStacks& aUniqueStacks); + +} // namespace baseprofiler +} // namespace mozilla + +#endif // ProfiledThreadData_h diff --git a/mozglue/baseprofiler/core/ProfilerBacktrace.cpp b/mozglue/baseprofiler/core/ProfilerBacktrace.cpp new file mode 100644 index 0000000000..166e72fd9c --- /dev/null +++ b/mozglue/baseprofiler/core/ProfilerBacktrace.cpp @@ -0,0 +1,123 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ProfilerBacktrace.h" + +#include "BaseProfiler.h" +#include "ProfileBuffer.h" +#include "ProfiledThreadData.h" +#include "ThreadInfo.h" + +#include "mozilla/BaseProfileJSONWriter.h" + +namespace mozilla { +namespace baseprofiler { + +ProfilerBacktrace::ProfilerBacktrace( + const char* aName, + UniquePtr<ProfileChunkedBuffer> aProfileChunkedBufferStorage, + UniquePtr<ProfileBuffer> aProfileBufferStorageOrNull /* = nullptr */) + : mName(aName), + mOptionalProfileChunkedBufferStorage( + std::move(aProfileChunkedBufferStorage)), + mProfileChunkedBuffer(mOptionalProfileChunkedBufferStorage.get()), + mOptionalProfileBufferStorage(std::move(aProfileBufferStorageOrNull)), + mProfileBuffer(mOptionalProfileBufferStorage.get()) { + if (mProfileBuffer) { + MOZ_RELEASE_ASSERT(mProfileChunkedBuffer, + "If we take ownership of a ProfileBuffer, we must also " + "receive ownership of a ProfileChunkedBuffer"); + MOZ_RELEASE_ASSERT( + mProfileChunkedBuffer == &mProfileBuffer->UnderlyingChunkedBuffer(), + "If we take ownership of a ProfileBuffer, we must also receive " + "ownership of its ProfileChunkedBuffer"); + } + MOZ_ASSERT( + !mProfileChunkedBuffer || !mProfileChunkedBuffer->IsThreadSafe(), + "ProfilerBacktrace only takes a non-thread-safe ProfileChunkedBuffer"); +} + +ProfilerBacktrace::ProfilerBacktrace( + const char* aName, + ProfileChunkedBuffer* aExternalProfileChunkedBufferOrNull /* = nullptr */, + ProfileBuffer* aExternalProfileBufferOrNull /* = nullptr */) + : mName(aName), + mProfileChunkedBuffer(aExternalProfileChunkedBufferOrNull), + mProfileBuffer(aExternalProfileBufferOrNull) { + if (!mProfileChunkedBuffer) { + if (mProfileBuffer) { + // We don't have a ProfileChunkedBuffer but we have a ProfileBuffer, use + // the latter's ProfileChunkedBuffer. + mProfileChunkedBuffer = &mProfileBuffer->UnderlyingChunkedBuffer(); + MOZ_ASSERT(!mProfileChunkedBuffer->IsThreadSafe(), + "ProfilerBacktrace only takes a non-thread-safe " + "ProfileChunkedBuffer"); + } + } else { + if (mProfileBuffer) { + MOZ_RELEASE_ASSERT( + mProfileChunkedBuffer == &mProfileBuffer->UnderlyingChunkedBuffer(), + "If we reference both ProfileChunkedBuffer and ProfileBuffer, they " + "must already be connected"); + } + MOZ_ASSERT(!mProfileChunkedBuffer->IsThreadSafe(), + "ProfilerBacktrace only takes a non-thread-safe " + "ProfileChunkedBuffer"); + } +} + +ProfilerBacktrace::~ProfilerBacktrace() {} + +int ProfilerBacktrace::StreamJSON(SpliceableJSONWriter& aWriter, + const TimeStamp& aProcessStartTime, + UniqueStacks& aUniqueStacks) { + int processedThreadId = 0; + + // Unlike ProfiledThreadData::StreamJSON, we don't need to call + // ProfileBuffer::AddJITInfoForRange because ProfileBuffer does not contain + // any JitReturnAddr entries. For synchronous samples, JIT frames get expanded + // at sample time. + if (mProfileBuffer) { + processedThreadId = StreamSamplesAndMarkers( + mName.c_str(), 0, *mProfileBuffer, aWriter, "", "", aProcessStartTime, + /* aRegisterTime */ TimeStamp(), + /* aUnregisterTime */ TimeStamp(), + /* aSinceTime */ 0, aUniqueStacks); + } else if (mProfileChunkedBuffer) { + ProfileBuffer profileBuffer(*mProfileChunkedBuffer); + processedThreadId = StreamSamplesAndMarkers( + mName.c_str(), 0, profileBuffer, aWriter, "", "", aProcessStartTime, + /* aRegisterTime */ TimeStamp(), + /* aUnregisterTime */ TimeStamp(), + /* aSinceTime */ 0, aUniqueStacks); + } + // If there are no buffers, the backtrace is empty and nothing is streamed. + + return processedThreadId; +} + +} // namespace baseprofiler + +// static +template <typename Destructor> +UniquePtr<baseprofiler::ProfilerBacktrace, Destructor> +ProfileBufferEntryReader:: + Deserializer<UniquePtr<baseprofiler::ProfilerBacktrace, Destructor>>::Read( + ProfileBufferEntryReader& aER) { + auto profileChunkedBuffer = aER.ReadObject<UniquePtr<ProfileChunkedBuffer>>(); + if (!profileChunkedBuffer) { + return nullptr; + } + MOZ_ASSERT( + !profileChunkedBuffer->IsThreadSafe(), + "ProfilerBacktrace only stores non-thread-safe ProfileChunkedBuffers"); + std::string name = aER.ReadObject<std::string>(); + return UniquePtr<baseprofiler::ProfilerBacktrace, Destructor>{ + new baseprofiler::ProfilerBacktrace(name.c_str(), + std::move(profileChunkedBuffer))}; +}; + +} // namespace mozilla diff --git a/mozglue/baseprofiler/core/ProfilerBacktrace.h b/mozglue/baseprofiler/core/ProfilerBacktrace.h new file mode 100644 index 0000000000..b5365cd4ac --- /dev/null +++ b/mozglue/baseprofiler/core/ProfilerBacktrace.h @@ -0,0 +1,162 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef __PROFILER_BACKTRACE_H +#define __PROFILER_BACKTRACE_H + +#include "mozilla/UniquePtrExtensions.h" + +#include <string> + +namespace mozilla { + +class ProfileChunkedBuffer; +class TimeStamp; + +namespace baseprofiler { + +class ProfileBuffer; +class SpliceableJSONWriter; +class ThreadInfo; +class UniqueStacks; + +// ProfilerBacktrace encapsulates a synchronous sample. +// It can work with a ProfileBuffer and/or a ProfileChunkedBuffer (if both, they +// must already be linked together). The ProfileChunkedBuffer contains all the +// data; the ProfileBuffer is not strictly needed, only provide it if it is +// already available at the call site. +// And these buffers can either be: +// - owned here, so that the ProfilerBacktrace object can be kept for later +// use), OR +// - referenced through pointers (in cases where the backtrace is immediately +// streamed out, so we only need temporary references to external buffers); +// these pointers may be null for empty backtraces. +class ProfilerBacktrace { + public: + // Take ownership of external buffers and use them to keep, and to stream a + // backtrace. If a ProfileBuffer is given, its underlying chunked buffer must + // be provided as well. + explicit ProfilerBacktrace( + const char* aName, + UniquePtr<ProfileChunkedBuffer> aProfileChunkedBufferStorage, + UniquePtr<ProfileBuffer> aProfileBufferStorageOrNull = nullptr); + + // Take pointers to external buffers and use them to stream a backtrace. + // If null, the backtrace is effectively empty. + // If both are provided, they must already be connected. + explicit ProfilerBacktrace( + const char* aName, + ProfileChunkedBuffer* aExternalProfileChunkedBufferOrNull = nullptr, + ProfileBuffer* aExternalProfileBufferOrNull = nullptr); + + ~ProfilerBacktrace(); + + [[nodiscard]] bool IsEmpty() const { + return !mProfileChunkedBuffer || + ProfileBufferEntryWriter::Serializer<ProfileChunkedBuffer>::Bytes( + *mProfileChunkedBuffer) <= ULEB128Size(0u); + } + + // ProfilerBacktraces' stacks are deduplicated in the context of the + // profile that contains the backtrace as a marker payload. + // + // That is, markers that contain backtraces should not need their own stack, + // frame, and string tables. They should instead reuse their parent + // profile's tables. + int StreamJSON(SpliceableJSONWriter& aWriter, + const TimeStamp& aProcessStartTime, + UniqueStacks& aUniqueStacks); + + private: + // Used to de/serialize a ProfilerBacktrace. + friend ProfileBufferEntryWriter::Serializer<ProfilerBacktrace>; + friend ProfileBufferEntryReader::Deserializer<ProfilerBacktrace>; + + std::string mName; + + // `ProfileChunkedBuffer` in which `mProfileBuffer` stores its data; must be + // located before `mProfileBuffer` so that it's destroyed after. + UniquePtr<ProfileChunkedBuffer> mOptionalProfileChunkedBufferStorage; + // If null, there is no need to check mProfileBuffer's (if present) underlying + // buffer because this is done when constructed. + ProfileChunkedBuffer* mProfileChunkedBuffer; + + UniquePtr<ProfileBuffer> mOptionalProfileBufferStorage; + ProfileBuffer* mProfileBuffer; +}; + +} // namespace baseprofiler + +// Format: [ UniquePtr<BlockRingsBuffer> | name ] +// Initial len==0 marks a nullptr or empty backtrace. +template <> +struct ProfileBufferEntryWriter::Serializer<baseprofiler::ProfilerBacktrace> { + static Length Bytes(const baseprofiler::ProfilerBacktrace& aBacktrace) { + if (!aBacktrace.mProfileChunkedBuffer) { + // No buffer. + return ULEB128Size(0u); + } + auto bufferBytes = SumBytes(*aBacktrace.mProfileChunkedBuffer); + if (bufferBytes <= ULEB128Size(0u)) { + // Empty buffer. + return ULEB128Size(0u); + } + return bufferBytes + SumBytes(aBacktrace.mName); + } + + static void Write(ProfileBufferEntryWriter& aEW, + const baseprofiler::ProfilerBacktrace& aBacktrace) { + if (!aBacktrace.mProfileChunkedBuffer || + SumBytes(*aBacktrace.mProfileChunkedBuffer) <= ULEB128Size(0u)) { + // No buffer, or empty buffer. + aEW.WriteULEB128(0u); + return; + } + aEW.WriteObject(*aBacktrace.mProfileChunkedBuffer); + aEW.WriteObject(aBacktrace.mName); + } +}; + +template <typename Destructor> +struct ProfileBufferEntryWriter::Serializer< + UniquePtr<baseprofiler::ProfilerBacktrace, Destructor>> { + static Length Bytes(const UniquePtr<baseprofiler::ProfilerBacktrace, + Destructor>& aBacktrace) { + if (!aBacktrace) { + // Null backtrace pointer (treated like an empty backtrace). + return ULEB128Size(0u); + } + return SumBytes(*aBacktrace); + } + + static void Write(ProfileBufferEntryWriter& aEW, + const UniquePtr<baseprofiler::ProfilerBacktrace, + Destructor>& aBacktrace) { + if (!aBacktrace) { + // Null backtrace pointer (treated like an empty backtrace). + aEW.WriteULEB128(0u); + return; + } + aEW.WriteObject(*aBacktrace); + } +}; + +template <typename Destructor> +struct ProfileBufferEntryReader::Deserializer< + UniquePtr<baseprofiler::ProfilerBacktrace, Destructor>> { + static void ReadInto( + ProfileBufferEntryReader& aER, + UniquePtr<baseprofiler::ProfilerBacktrace, Destructor>& aBacktrace) { + aBacktrace = Read(aER); + } + + static UniquePtr<baseprofiler::ProfilerBacktrace, Destructor> Read( + ProfileBufferEntryReader& aER); +}; + +} // namespace mozilla + +#endif // __PROFILER_BACKTRACE_H diff --git a/mozglue/baseprofiler/core/ProfilerMarkers.cpp b/mozglue/baseprofiler/core/ProfilerMarkers.cpp new file mode 100644 index 0000000000..bff2a9ebdd --- /dev/null +++ b/mozglue/baseprofiler/core/ProfilerMarkers.cpp @@ -0,0 +1,221 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/BaseProfilerMarkers.h" + +#include "mozilla/Likely.h" + +#include <limits> + +namespace mozilla { +namespace base_profiler_markers_detail { + +// We need an atomic type that can hold a `DeserializerTag`. (Atomic doesn't +// work with too-small types.) +using DeserializerTagAtomic = unsigned; + +// Number of currently-registered deserializers and other marker type functions. +static Atomic<DeserializerTagAtomic, MemoryOrdering::Relaxed> + sDeserializerCount{0}; + +// This needs to be big enough to handle all possible marker types. If one day +// this needs to be higher, the underlying DeserializerTag type will have to be +// changed. +static constexpr DeserializerTagAtomic DeserializerMax = 250; + +static_assert( + DeserializerMax <= std::numeric_limits<Streaming::DeserializerTag>::max(), + "The maximum number of deserializers must fit in the DeserializerTag type"); + +// Array of marker type functions. +// 1-based, i.e.: [0] -> tag 1, [DeserializerMax - 1] -> tag DeserializerMax. +// Elements are added at the next available atomically-incremented +// `sDeserializerCount` (minus 1) whenever a new marker type is used in a +// Firefox session; the content is kept between profiler runs in that session. +// There is theoretically a race between the increment and the time the entry is +// fully written, but in practice all new elements are written (during +// profiling, using a marker type for the first time) long before they are read +// (after profiling is paused). +static Streaming::MarkerTypeFunctions + sMarkerTypeFunctions1Based[DeserializerMax]; + +/* static */ Streaming::DeserializerTag Streaming::TagForMarkerTypeFunctions( + Streaming::MarkerDataDeserializer aDeserializer, + Streaming::MarkerTypeNameFunction aMarkerTypeNameFunction, + Streaming::MarkerSchemaFunction aMarkerSchemaFunction) { + MOZ_RELEASE_ASSERT(!!aDeserializer); + MOZ_RELEASE_ASSERT(!!aMarkerTypeNameFunction); + MOZ_RELEASE_ASSERT(!!aMarkerSchemaFunction); + + DeserializerTagAtomic tag = ++sDeserializerCount; + MOZ_RELEASE_ASSERT( + tag <= DeserializerMax, + "Too many deserializers, consider increasing DeserializerMax. " + "Or is a deserializer stored again and again?"); + sMarkerTypeFunctions1Based[tag - 1] = {aDeserializer, aMarkerTypeNameFunction, + aMarkerSchemaFunction}; + + return static_cast<DeserializerTag>(tag); +} + +/* static */ Streaming::MarkerDataDeserializer Streaming::DeserializerForTag( + Streaming::DeserializerTag aTag) { + MOZ_RELEASE_ASSERT( + aTag > 0 && static_cast<DeserializerTagAtomic>(aTag) <= + static_cast<DeserializerTagAtomic>(sDeserializerCount), + "Out-of-range tag value"); + return sMarkerTypeFunctions1Based[aTag - 1].mMarkerDataDeserializer; +} + +/* static */ Span<const Streaming::MarkerTypeFunctions> +Streaming::MarkerTypeFunctionsArray() { + return {sMarkerTypeFunctions1Based, sDeserializerCount}; +} + +} // namespace base_profiler_markers_detail + +void MarkerSchema::Stream(JSONWriter& aWriter, + const Span<const char>& aName) && { + // The caller should have started a JSON array, in which we can add an object + // that defines a marker schema. + + if (mLocations.empty()) { + // SpecialFrontendLocation case, don't output anything for this type. + return; + } + + aWriter.StartObjectElement(); + { + aWriter.StringProperty("name", aName); + + if (!mChartLabel.empty()) { + aWriter.StringProperty("chartLabel", mChartLabel); + } + + if (!mTooltipLabel.empty()) { + aWriter.StringProperty("tooltipLabel", mTooltipLabel); + } + + if (!mTableLabel.empty()) { + aWriter.StringProperty("tableLabel", mTableLabel); + } + + aWriter.StartArrayProperty("display"); + { + for (Location location : mLocations) { + aWriter.StringElement(LocationToStringSpan(location)); + } + } + aWriter.EndArray(); + + aWriter.StartArrayProperty("data"); + { + for (const DataRow& row : mData) { + aWriter.StartObjectElement(); + { + row.match( + [&aWriter](const DynamicData& aData) { + aWriter.StringProperty("key", aData.mKey); + if (aData.mLabel) { + aWriter.StringProperty("label", *aData.mLabel); + } + aWriter.StringProperty("format", + FormatToStringSpan(aData.mFormat)); + if (aData.mSearchable) { + aWriter.BoolProperty( + "searchable", + *aData.mSearchable == Searchable::searchable); + } + }, + [&aWriter](const StaticData& aStaticData) { + aWriter.StringProperty("label", aStaticData.mLabel); + aWriter.StringProperty("value", aStaticData.mValue); + }); + } + aWriter.EndObject(); + } + } + aWriter.EndArray(); + } + aWriter.EndObject(); +} + +/* static */ +Span<const char> MarkerSchema::LocationToStringSpan( + MarkerSchema::Location aLocation) { + switch (aLocation) { + case Location::markerChart: + return mozilla::MakeStringSpan("marker-chart"); + case Location::markerTable: + return mozilla::MakeStringSpan("marker-table"); + case Location::timelineOverview: + return mozilla::MakeStringSpan("timeline-overview"); + case Location::timelineMemory: + return mozilla::MakeStringSpan("timeline-memory"); + case Location::timelineIPC: + return mozilla::MakeStringSpan("timeline-ipc"); + case Location::timelineFileIO: + return mozilla::MakeStringSpan("timeline-fileio"); + case Location::stackChart: + return mozilla::MakeStringSpan("stack-chart"); + default: + MOZ_CRASH("Unexpected Location enum"); + return {}; + } +} + +/* static */ +Span<const char> MarkerSchema::FormatToStringSpan( + MarkerSchema::Format aFormat) { + switch (aFormat) { + case Format::url: + return mozilla::MakeStringSpan("url"); + case Format::filePath: + return mozilla::MakeStringSpan("file-path"); + case Format::string: + return mozilla::MakeStringSpan("string"); + case Format::duration: + return mozilla::MakeStringSpan("duration"); + case Format::time: + return mozilla::MakeStringSpan("time"); + case Format::seconds: + return mozilla::MakeStringSpan("seconds"); + case Format::milliseconds: + return mozilla::MakeStringSpan("milliseconds"); + case Format::microseconds: + return mozilla::MakeStringSpan("microseconds"); + case Format::nanoseconds: + return mozilla::MakeStringSpan("nanoseconds"); + case Format::bytes: + return mozilla::MakeStringSpan("bytes"); + case Format::percentage: + return mozilla::MakeStringSpan("percentage"); + case Format::integer: + return mozilla::MakeStringSpan("integer"); + case Format::decimal: + return mozilla::MakeStringSpan("decimal"); + default: + MOZ_CRASH("Unexpected Format enum"); + return {}; + } +} + +} // namespace mozilla + +namespace mozilla::baseprofiler { +template MFBT_API ProfileBufferBlockIndex AddMarker(const ProfilerString8View&, + const MarkerCategory&, + MarkerOptions&&, + markers::TextMarker, + const std::string&); + +template MFBT_API ProfileBufferBlockIndex +AddMarkerToBuffer(ProfileChunkedBuffer&, const ProfilerString8View&, + const MarkerCategory&, MarkerOptions&&, markers::NoPayload); + +template MFBT_API ProfileBufferBlockIndex AddMarkerToBuffer( + ProfileChunkedBuffer&, const ProfilerString8View&, const MarkerCategory&, + MarkerOptions&&, markers::TextMarker, const std::string&); +} // namespace mozilla::baseprofiler diff --git a/mozglue/baseprofiler/core/ProfilingCategory.cpp b/mozglue/baseprofiler/core/ProfilingCategory.cpp new file mode 100644 index 0000000000..8ff2b15555 --- /dev/null +++ b/mozglue/baseprofiler/core/ProfilingCategory.cpp @@ -0,0 +1,71 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "BaseProfilingCategory.h" + +#include "mozilla/ArrayUtils.h" +#include "mozilla/Assertions.h" + +#include "BaseProfiler.h" + +namespace mozilla { +namespace baseprofiler { + +// clang-format off + +// ProfilingSubcategory_X: +// One enum for each category X, listing that category's subcategories. This +// allows the sProfilingCategoryInfo macro construction below to look up a +// per-category index for a subcategory. +#define SUBCATEGORY_ENUMS_BEGIN_CATEGORY(name, labelAsString, color) \ + enum class ProfilingSubcategory_##name : uint32_t { +#define SUBCATEGORY_ENUMS_SUBCATEGORY(category, name, labelAsString) \ + name, +#define SUBCATEGORY_ENUMS_END_CATEGORY \ + }; +MOZ_PROFILING_CATEGORY_LIST(SUBCATEGORY_ENUMS_BEGIN_CATEGORY, + SUBCATEGORY_ENUMS_SUBCATEGORY, + SUBCATEGORY_ENUMS_END_CATEGORY) +#undef SUBCATEGORY_ENUMS_BEGIN_CATEGORY +#undef SUBCATEGORY_ENUMS_SUBCATEGORY +#undef SUBCATEGORY_ENUMS_END_CATEGORY + +// sProfilingCategoryPairInfo: +// A list of ProfilingCategoryPairInfos with the same order as +// ProfilingCategoryPair, which can be used to map a ProfilingCategoryPair to +// its information. +#define CATEGORY_INFO_BEGIN_CATEGORY(name, labelAsString, color) +#define CATEGORY_INFO_SUBCATEGORY(category, name, labelAsString) \ + {ProfilingCategory::category, \ + uint32_t(ProfilingSubcategory_##category::name), labelAsString}, +#define CATEGORY_INFO_END_CATEGORY +const ProfilingCategoryPairInfo sProfilingCategoryPairInfo[] = { + MOZ_PROFILING_CATEGORY_LIST(CATEGORY_INFO_BEGIN_CATEGORY, + CATEGORY_INFO_SUBCATEGORY, + CATEGORY_INFO_END_CATEGORY) +}; +#undef CATEGORY_INFO_BEGIN_CATEGORY +#undef CATEGORY_INFO_SUBCATEGORY +#undef CATEGORY_INFO_END_CATEGORY + +// clang-format on + +const ProfilingCategoryPairInfo& GetProfilingCategoryPairInfo( + ProfilingCategoryPair aCategoryPair) { + static_assert( + MOZ_ARRAY_LENGTH(sProfilingCategoryPairInfo) == + uint32_t(ProfilingCategoryPair::COUNT), + "sProfilingCategoryPairInfo and ProfilingCategory need to have the " + "same order and the same length"); + + uint32_t categoryPairIndex = uint32_t(aCategoryPair); + MOZ_RELEASE_ASSERT(categoryPairIndex <= + uint32_t(ProfilingCategoryPair::LAST)); + return sProfilingCategoryPairInfo[categoryPairIndex]; +} + +} // namespace baseprofiler +} // namespace mozilla diff --git a/mozglue/baseprofiler/core/ProfilingStack.cpp b/mozglue/baseprofiler/core/ProfilingStack.cpp new file mode 100644 index 0000000000..f5cd2ddd04 --- /dev/null +++ b/mozglue/baseprofiler/core/ProfilingStack.cpp @@ -0,0 +1,52 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "BaseProfilingStack.h" + +#include <algorithm> + +#include "mozilla/IntegerRange.h" +#include "mozilla/UniquePtr.h" +#include "mozilla/UniquePtrExtensions.h" + +#include "BaseProfiler.h" + +namespace mozilla { +namespace baseprofiler { + +ProfilingStack::~ProfilingStack() { + // The label macros keep a reference to the ProfilingStack to avoid a TLS + // access. If these are somehow not all cleared we will get a + // use-after-free so better to crash now. + MOZ_RELEASE_ASSERT(stackPointer == 0); + + delete[] frames; +} + +void ProfilingStack::ensureCapacitySlow() { + MOZ_ASSERT(stackPointer >= capacity); + const uint32_t kInitialCapacity = 128; + + uint32_t sp = stackPointer; + auto newCapacity = + std::max(sp + 1, capacity ? capacity * 2 : kInitialCapacity); + + auto* newFrames = new ProfilingStackFrame[newCapacity]; + + // It's important that `frames` / `capacity` / `stackPointer` remain + // consistent here at all times. + for (auto i : IntegerRange(capacity)) { + newFrames[i] = frames[i]; + } + + ProfilingStackFrame* oldFrames = frames; + frames = newFrames; + capacity = newCapacity; + delete[] oldFrames; +} + +} // namespace baseprofiler +} // namespace mozilla diff --git a/mozglue/baseprofiler/core/RegisteredThread.cpp b/mozglue/baseprofiler/core/RegisteredThread.cpp new file mode 100644 index 0000000000..85a7fc2c6d --- /dev/null +++ b/mozglue/baseprofiler/core/RegisteredThread.cpp @@ -0,0 +1,42 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "RegisteredThread.h" + +#include "BaseProfiler.h" + +namespace mozilla { +namespace baseprofiler { + +RegisteredThread::RegisteredThread(ThreadInfo* aInfo, void* aStackTop) + : mRacyRegisteredThread(aInfo->ThreadId()), + mPlatformData(AllocPlatformData(aInfo->ThreadId())), + mStackTop(aStackTop), + mThreadInfo(aInfo) { + // We don't have to guess on mac +#if defined(GP_OS_darwin) + pthread_t self = pthread_self(); + mStackTop = pthread_get_stackaddr_np(self); +#endif +} + +RegisteredThread::~RegisteredThread() {} + +size_t RegisteredThread::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const { + size_t n = aMallocSizeOf(this); + + // Measurement of the following members may be added later if DMD finds it + // is worthwhile: + // - mPlatformData + // + // The following members are not measured: + // - mThreadInfo: because it is non-owning + + return n; +} + +} // namespace baseprofiler +} // namespace mozilla diff --git a/mozglue/baseprofiler/core/RegisteredThread.h b/mozglue/baseprofiler/core/RegisteredThread.h new file mode 100644 index 0000000000..6ae12b823f --- /dev/null +++ b/mozglue/baseprofiler/core/RegisteredThread.h @@ -0,0 +1,166 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef RegisteredThread_h +#define RegisteredThread_h + +#include "platform.h" +#include "ThreadInfo.h" + +#include "mozilla/UniquePtr.h" + +namespace mozilla { +namespace baseprofiler { + +// This class contains the state for a single thread that is accessible without +// protection from gPSMutex in platform.cpp. Because there is no external +// protection against data races, it must provide internal protection. Hence +// the "Racy" prefix. +// +class RacyRegisteredThread final { + public: + explicit RacyRegisteredThread(int aThreadId) + : mThreadId(aThreadId), mSleep(AWAKE), mIsBeingProfiled(false) {} + + ~RacyRegisteredThread() {} + + void SetIsBeingProfiled(bool aIsBeingProfiled) { + mIsBeingProfiled = aIsBeingProfiled; + } + + bool IsBeingProfiled() const { return mIsBeingProfiled; } + + // This is called on every profiler restart. Put things that should happen at + // that time here. + void ReinitializeOnResume() { + // This is needed to cause an initial sample to be taken from sleeping + // threads that had been observed prior to the profiler stopping and + // restarting. Otherwise sleeping threads would not have any samples to + // copy forward while sleeping. + (void)mSleep.compareExchange(SLEEPING_OBSERVED, SLEEPING_NOT_OBSERVED); + } + + // This returns true for the second and subsequent calls in each sleep cycle. + bool CanDuplicateLastSampleDueToSleep() { + if (mSleep == AWAKE) { + return false; + } + + if (mSleep.compareExchange(SLEEPING_NOT_OBSERVED, SLEEPING_OBSERVED)) { + return false; + } + + return true; + } + + // Call this whenever the current thread sleeps. Calling it twice in a row + // without an intervening setAwake() call is an error. + void SetSleeping() { + MOZ_ASSERT(mSleep == AWAKE); + mSleep = SLEEPING_NOT_OBSERVED; + } + + // Call this whenever the current thread wakes. Calling it twice in a row + // without an intervening setSleeping() call is an error. + void SetAwake() { + MOZ_ASSERT(mSleep != AWAKE); + mSleep = AWAKE; + } + + bool IsSleeping() { return mSleep != AWAKE; } + + int ThreadId() const { return mThreadId; } + + class ProfilingStack& ProfilingStack() { + return mProfilingStack; + } + const class ProfilingStack& ProfilingStack() const { return mProfilingStack; } + + private: + class ProfilingStack mProfilingStack; + + // mThreadId contains the thread ID of the current thread. It is safe to read + // this from multiple threads concurrently, as it will never be mutated. + const int mThreadId; + + // mSleep tracks whether the thread is sleeping, and if so, whether it has + // been previously observed. This is used for an optimization: in some cases, + // when a thread is asleep, we duplicate the previous sample, which is + // cheaper than taking a new sample. + // + // mSleep is atomic because it is accessed from multiple threads. + // + // - It is written only by this thread, via setSleeping() and setAwake(). + // + // - It is read by SamplerThread::Run(). + // + // There are two cases where racing between threads can cause an issue. + // + // - If CanDuplicateLastSampleDueToSleep() returns false but that result is + // invalidated before being acted upon, we will take a full sample + // unnecessarily. This is additional work but won't cause any correctness + // issues. (In actual fact, this case is impossible. In order to go from + // CanDuplicateLastSampleDueToSleep() returning false to it returning true + // requires an intermediate call to it in order for mSleep to go from + // SLEEPING_NOT_OBSERVED to SLEEPING_OBSERVED.) + // + // - If CanDuplicateLastSampleDueToSleep() returns true but that result is + // invalidated before being acted upon -- i.e. the thread wakes up before + // DuplicateLastSample() is called -- we will duplicate the previous + // sample. This is inaccurate, but only slightly... we will effectively + // treat the thread as having slept a tiny bit longer than it really did. + // + // This latter inaccuracy could be avoided by moving the + // CanDuplicateLastSampleDueToSleep() check within the thread-freezing code, + // e.g. the section where Tick() is called. But that would reduce the + // effectiveness of the optimization because more code would have to be run + // before we can tell that duplication is allowed. + // + static const int AWAKE = 0; + static const int SLEEPING_NOT_OBSERVED = 1; + static const int SLEEPING_OBSERVED = 2; + Atomic<int> mSleep; + + // Is this thread being profiled? (e.g., should markers be recorded?) + Atomic<bool, MemoryOrdering::Relaxed> mIsBeingProfiled; +}; + +// This class contains information that's relevant to a single thread only +// while that thread is running and registered with the profiler, but +// regardless of whether the profiler is running. All accesses to it are +// protected by the profiler state lock. +class RegisteredThread final { + public: + RegisteredThread(ThreadInfo* aInfo, void* aStackTop); + ~RegisteredThread(); + + class RacyRegisteredThread& RacyRegisteredThread() { + return mRacyRegisteredThread; + } + const class RacyRegisteredThread& RacyRegisteredThread() const { + return mRacyRegisteredThread; + } + + PlatformData* GetPlatformData() const { return mPlatformData.get(); } + const void* StackTop() const { return mStackTop; } + + size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const; + + const RefPtr<ThreadInfo> Info() const { return mThreadInfo; } + + private: + class RacyRegisteredThread mRacyRegisteredThread; + + const UniquePlatformData mPlatformData; + const void* mStackTop; + + const RefPtr<ThreadInfo> mThreadInfo; +}; + +} // namespace baseprofiler +} // namespace mozilla + +#endif // RegisteredThread_h diff --git a/mozglue/baseprofiler/core/ThreadInfo.h b/mozglue/baseprofiler/core/ThreadInfo.h new file mode 100644 index 0000000000..4be84a45a9 --- /dev/null +++ b/mozglue/baseprofiler/core/ThreadInfo.h @@ -0,0 +1,61 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ThreadInfo_h +#define ThreadInfo_h + +#include "mozilla/Atomics.h" +#include "mozilla/TimeStamp.h" + +namespace mozilla { +namespace baseprofiler { + +// This class contains information about a thread which needs to be stored +// across restarts of the profiler and which can be useful even after the +// thread has stopped running. +// It uses threadsafe refcounting and only contains immutable data. +class ThreadInfo final { + public: + ThreadInfo(const char* aName, int aThreadId, bool aIsMainThread, + const TimeStamp& aRegisterTime = TimeStamp::NowUnfuzzed()) + : mName(aName), + mRegisterTime(aRegisterTime), + mThreadId(aThreadId), + mIsMainThread(aIsMainThread), + mRefCnt(0) { + // I don't know if we can assert this. But we should warn. + MOZ_ASSERT(aThreadId >= 0, "native thread ID is < 0"); + MOZ_ASSERT(aThreadId <= INT32_MAX, "native thread ID is > INT32_MAX"); + } + + // Using hand-rolled ref-counting, because RefCounted.h macros don't produce + // the same code between mozglue and libxul, see bug 1536656. + MFBT_API void AddRef() const { ++mRefCnt; } + MFBT_API void Release() const { + MOZ_ASSERT(int32_t(mRefCnt) > 0); + if (--mRefCnt == 0) { + delete this; + } + } + + const char* Name() const { return mName.c_str(); } + TimeStamp RegisterTime() const { return mRegisterTime; } + int ThreadId() const { return mThreadId; } + bool IsMainThread() const { return mIsMainThread; } + + private: + const std::string mName; + const TimeStamp mRegisterTime; + const int mThreadId; + const bool mIsMainThread; + + mutable Atomic<int32_t, MemoryOrdering::ReleaseAcquire> mRefCnt; +}; + +} // namespace baseprofiler +} // namespace mozilla + +#endif // ThreadInfo_h diff --git a/mozglue/baseprofiler/core/VTuneProfiler.cpp b/mozglue/baseprofiler/core/VTuneProfiler.cpp new file mode 100644 index 0000000000..2911c39f08 --- /dev/null +++ b/mozglue/baseprofiler/core/VTuneProfiler.cpp @@ -0,0 +1,92 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef XP_WIN +# undef UNICODE +# undef _UNICODE +#endif + +#include "VTuneProfiler.h" + +#include <memory> + +#include "BaseProfiler.h" + +namespace mozilla { +namespace baseprofiler { + +VTuneProfiler* VTuneProfiler::mInstance = nullptr; + +void VTuneProfiler::Initialize() { + // This is just a 'dirty trick' to find out if the ittnotify DLL was found. + // If it wasn't this function always returns 0, otherwise it returns + // incrementing numbers, if the library was found this wastes 2 events but + // that should be okay. + // TODO re-implement here if vtune is needed + // __itt_event testEvent = + // __itt_event_create("Test event", strlen("Test event")); + // testEvent = __itt_event_create("Test event 2", strlen("Test event 2")); + + // if (testEvent) { + // mInstance = new VTuneProfiler(); + // } +} + +void VTuneProfiler::Shutdown() {} + +void VTuneProfiler::TraceInternal(const char* aName, TracingKind aKind) { + // TODO re-implement here if vtune is needed + // std::string str(aName); + + // auto iter = mStrings.find(str); + + // __itt_event event; + // if (iter != mStrings.end()) { + // event = iter->second; + // } else { + // event = __itt_event_create(aName, str.length()); + // mStrings.insert({str, event}); + // } + + // if (aKind == TRACING_INTERVAL_START || aKind == TRACING_EVENT) { + // // VTune will consider starts not matched with an end to be single point + // in + // // time events. + // __itt_event_start(event); + // } else { + // __itt_event_end(event); + // } +} + +void VTuneProfiler::RegisterThreadInternal(const char* aName) { + // TODO re-implement here if vtune is needed + // std::string str(aName); + + // if (!str.compare("Main Thread (Base Profiler)")) { + // // Process main thread. + // switch (XRE_GetProcessType()) { + // case GeckoProcessType::GeckoProcessType_Default: + // __itt_thread_set_name("Main Process"); + // break; + // case GeckoProcessType::GeckoProcessType_Content: + // __itt_thread_set_name("Content Process"); + // break; + // case GeckoProcessType::GeckoProcessType_GMPlugin: + // __itt_thread_set_name("Plugin Process"); + // break; + // case GeckoProcessType::GeckoProcessType_GPU: + // __itt_thread_set_name("GPU Process"); + // break; + // default: + // __itt_thread_set_name("Unknown Process"); + // } + // return; + // } + // __itt_thread_set_name(aName); +} + +} // namespace baseprofiler +} // namespace mozilla diff --git a/mozglue/baseprofiler/core/VTuneProfiler.h b/mozglue/baseprofiler/core/VTuneProfiler.h new file mode 100644 index 0000000000..cf94ab7242 --- /dev/null +++ b/mozglue/baseprofiler/core/VTuneProfiler.h @@ -0,0 +1,84 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef VTuneProfiler_h +#define VTuneProfiler_h + +// The intent here is to add 0 overhead for regular users. In order to build +// the VTune profiler code at all --enable-vtune-instrumentation needs to be +// set as a build option. Even then, when none of the environment variables +// is specified that allow us to find the ittnotify DLL, these functions +// should be minimal overhead. When starting Firefox under VTune, these +// env vars will be automatically defined, otherwise INTEL_LIBITTNOTIFY32/64 +// should be set to point at the ittnotify DLL. +#ifndef MOZ_VTUNE_INSTRUMENTATION + +# define VTUNE_INIT() +# define VTUNE_SHUTDOWN() + +# define VTUNE_TRACING(name, kind) +# define VTUNE_REGISTER_THREAD(name) + +#else + +# include "BaseProfiler.h" + +// This is the regular Intel header, these functions are actually defined for +// us inside js/src/vtune by an intel C file which actually dynamically resolves +// them to the correct DLL. Through libxul these will 'magically' resolve. +# include "vtune/ittnotify.h" + +# include <stddef.h> +# include <unordered_map> +# include <string> + +namespace mozilla { +namespace baseprofiler { + +class VTuneProfiler { + public: + static void Initialize(); + static void Shutdown(); + + enum TracingKind { + TRACING_EVENT, + TRACING_INTERVAL_START, + TRACING_INTERVAL_END, + }; + + static void Trace(const char* aName, TracingKind aKind) { + if (mInstance) { + mInstance->TraceInternal(aName, aKind); + } + } + static void RegisterThread(const char* aName) { + if (mInstance) { + mInstance->RegisterThreadInternal(aName); + } + } + + private: + void TraceInternal(const char* aName, TracingKind aKind); + void RegisterThreadInternal(const char* aName); + + // This is null when the ittnotify DLL could not be found. + static VTuneProfiler* mInstance; + + std::unordered_map<std::string, __itt_event> mStrings; +}; + +# define VTUNE_INIT() VTuneProfiler::Initialize() +# define VTUNE_SHUTDOWN() VTuneProfiler::Shutdown() + +# define VTUNE_TRACING(name, kind) VTuneProfiler::Trace(name, kind) +# define VTUNE_REGISTER_THREAD(name) VTuneProfiler::RegisterThread(name) + +} // namespace baseprofiler +} // namespace mozilla + +#endif + +#endif /* VTuneProfiler_h */ diff --git a/mozglue/baseprofiler/core/platform-linux-android.cpp b/mozglue/baseprofiler/core/platform-linux-android.cpp new file mode 100644 index 0000000000..210bc4dd31 --- /dev/null +++ b/mozglue/baseprofiler/core/platform-linux-android.cpp @@ -0,0 +1,550 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +// Copyright (c) 2006-2011 The Chromium Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in +// the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google, Inc. nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS +// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +// SUCH DAMAGE. + +// This file is used for both Linux and Android. + +#include <stdio.h> +#include <math.h> + +#include <pthread.h> +#if defined(GP_OS_freebsd) +# include <sys/thr.h> +#endif +#include <semaphore.h> +#include <signal.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <stdlib.h> +#include <sched.h> +#include <ucontext.h> +// Ubuntu Dapper requires memory pages to be marked as +// executable. Otherwise, OS raises an exception when executing code +// in that page. +#include <sys/types.h> // mmap & munmap +#include <sys/mman.h> // mmap & munmap +#include <sys/stat.h> // open +#include <fcntl.h> // open +#include <unistd.h> // sysconf +#include <semaphore.h> +#ifdef __GLIBC__ +# include <execinfo.h> // backtrace, backtrace_symbols +#endif // def __GLIBC__ +#include <strings.h> // index +#include <errno.h> +#include <stdarg.h> + +#include "prenv.h" +#include "mozilla/PodOperations.h" +#include "mozilla/DebugOnly.h" + +#include <string.h> +#include <list> + +using namespace mozilla; + +namespace mozilla { +namespace baseprofiler { + +int profiler_current_process_id() { return getpid(); } + +int profiler_current_thread_id() { +#if defined(GP_OS_linux) || defined(GP_OS_android) + // glibc doesn't provide a wrapper for gettid(). + return static_cast<int>(static_cast<pid_t>(syscall(SYS_gettid))); +#elif defined(GP_OS_freebsd) + long id; + (void)thr_self(&id); + return static_cast<int>(id); +#else +# error "bad platform" +#endif +} + +static int64_t MicrosecondsSince1970() { + struct timeval tv; + gettimeofday(&tv, NULL); + return int64_t(tv.tv_sec) * 1000000 + int64_t(tv.tv_usec); +} + +void* GetStackTop(void* aGuess) { return aGuess; } + +static void PopulateRegsFromContext(Registers& aRegs, ucontext_t* aContext) { + aRegs.mContext = aContext; + mcontext_t& mcontext = aContext->uc_mcontext; + + // Extracting the sample from the context is extremely machine dependent. +#if defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android) + aRegs.mPC = reinterpret_cast<Address>(mcontext.gregs[REG_EIP]); + aRegs.mSP = reinterpret_cast<Address>(mcontext.gregs[REG_ESP]); + aRegs.mFP = reinterpret_cast<Address>(mcontext.gregs[REG_EBP]); + aRegs.mLR = 0; +#elif defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android) + aRegs.mPC = reinterpret_cast<Address>(mcontext.gregs[REG_RIP]); + aRegs.mSP = reinterpret_cast<Address>(mcontext.gregs[REG_RSP]); + aRegs.mFP = reinterpret_cast<Address>(mcontext.gregs[REG_RBP]); + aRegs.mLR = 0; +#elif defined(GP_PLAT_amd64_freebsd) + aRegs.mPC = reinterpret_cast<Address>(mcontext.mc_rip); + aRegs.mSP = reinterpret_cast<Address>(mcontext.mc_rsp); + aRegs.mFP = reinterpret_cast<Address>(mcontext.mc_rbp); + aRegs.mLR = 0; +#elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android) + aRegs.mPC = reinterpret_cast<Address>(mcontext.arm_pc); + aRegs.mSP = reinterpret_cast<Address>(mcontext.arm_sp); + aRegs.mFP = reinterpret_cast<Address>(mcontext.arm_fp); + aRegs.mLR = reinterpret_cast<Address>(mcontext.arm_lr); +#elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android) + aRegs.mPC = reinterpret_cast<Address>(mcontext.pc); + aRegs.mSP = reinterpret_cast<Address>(mcontext.sp); + aRegs.mFP = reinterpret_cast<Address>(mcontext.regs[29]); + aRegs.mLR = reinterpret_cast<Address>(mcontext.regs[30]); +#elif defined(GP_PLAT_arm64_freebsd) + aRegs.mPC = reinterpret_cast<Address>(mcontext.mc_gpregs.gp_elr); + aRegs.mSP = reinterpret_cast<Address>(mcontext.mc_gpregs.gp_sp); + aRegs.mFP = reinterpret_cast<Address>(mcontext.mc_gpregs.gp_x[29]); + aRegs.mLR = reinterpret_cast<Address>(mcontext.mc_gpregs.gp_lr); +#elif defined(GP_PLAT_mips64_linux) || defined(GP_PLAT_mips64_android) + aRegs.mPC = reinterpret_cast<Address>(mcontext.pc); + aRegs.mSP = reinterpret_cast<Address>(mcontext.gregs[29]); + aRegs.mFP = reinterpret_cast<Address>(mcontext.gregs[30]); + +#else +# error "bad platform" +#endif +} + +#if defined(GP_OS_android) +# define SYS_tgkill __NR_tgkill +#endif + +#if defined(GP_OS_linux) || defined(GP_OS_android) +int tgkill(pid_t tgid, pid_t tid, int signalno) { + return syscall(SYS_tgkill, tgid, tid, signalno); +} +#endif + +#if defined(GP_OS_freebsd) +# define tgkill thr_kill2 +#endif + +class PlatformData { + public: + explicit PlatformData(int aThreadId) {} + + ~PlatformData() {} +}; + +//////////////////////////////////////////////////////////////////////// +// BEGIN Sampler target specifics + +// The only way to reliably interrupt a Linux thread and inspect its register +// and stack state is by sending a signal to it, and doing the work inside the +// signal handler. But we don't want to run much code inside the signal +// handler, since POSIX severely restricts what we can do in signal handlers. +// So we use a system of semaphores to suspend the thread and allow the +// sampler thread to do all the work of unwinding and copying out whatever +// data it wants. +// +// A four-message protocol is used to reliably suspend and later resume the +// thread to be sampled (the samplee): +// +// Sampler (signal sender) thread Samplee (thread to be sampled) +// +// Prepare the SigHandlerCoordinator +// and point sSigHandlerCoordinator at it +// +// send SIGPROF to samplee ------- MSG 1 ----> (enter signal handler) +// wait(mMessage2) Copy register state +// into sSigHandlerCoordinator +// <------ MSG 2 ----- post(mMessage2) +// Samplee is now suspended. wait(mMessage3) +// Examine its stack/register +// state at leisure +// +// Release samplee: +// post(mMessage3) ------- MSG 3 -----> +// wait(mMessage4) Samplee now resumes. Tell +// the sampler that we are done. +// <------ MSG 4 ------ post(mMessage4) +// Now we know the samplee's signal (leave signal handler) +// handler has finished using +// sSigHandlerCoordinator. We can +// safely reuse it for some other thread. +// + +// A type used to coordinate between the sampler (signal sending) thread and +// the thread currently being sampled (the samplee, which receives the +// signals). +// +// The first message is sent using a SIGPROF signal delivery. The subsequent +// three are sent using sem_wait/sem_post pairs. They are named accordingly +// in the following struct. +struct SigHandlerCoordinator { + SigHandlerCoordinator() { + PodZero(&mUContext); + int r = sem_init(&mMessage2, /* pshared */ 0, 0); + r |= sem_init(&mMessage3, /* pshared */ 0, 0); + r |= sem_init(&mMessage4, /* pshared */ 0, 0); + MOZ_ASSERT(r == 0); + } + + ~SigHandlerCoordinator() { + int r = sem_destroy(&mMessage2); + r |= sem_destroy(&mMessage3); + r |= sem_destroy(&mMessage4); + MOZ_ASSERT(r == 0); + } + + sem_t mMessage2; // To sampler: "context is in sSigHandlerCoordinator" + sem_t mMessage3; // To samplee: "resume" + sem_t mMessage4; // To sampler: "finished with sSigHandlerCoordinator" + ucontext_t mUContext; // Context at signal +}; + +struct SigHandlerCoordinator* Sampler::sSigHandlerCoordinator = nullptr; + +static void SigprofHandler(int aSignal, siginfo_t* aInfo, void* aContext) { + // Avoid TSan warning about clobbering errno. + int savedErrno = errno; + + MOZ_ASSERT(aSignal == SIGPROF); + MOZ_ASSERT(Sampler::sSigHandlerCoordinator); + + // By sending us this signal, the sampler thread has sent us message 1 in + // the comment above, with the meaning "|sSigHandlerCoordinator| is ready + // for use, please copy your register context into it." + Sampler::sSigHandlerCoordinator->mUContext = + *static_cast<ucontext_t*>(aContext); + + // Send message 2: tell the sampler thread that the context has been copied + // into |sSigHandlerCoordinator->mUContext|. sem_post can never fail by + // being interrupted by a signal, so there's no loop around this call. + int r = sem_post(&Sampler::sSigHandlerCoordinator->mMessage2); + MOZ_ASSERT(r == 0); + + // At this point, the sampler thread assumes we are suspended, so we must + // not touch any global state here. + + // Wait for message 3: the sampler thread tells us to resume. + while (true) { + r = sem_wait(&Sampler::sSigHandlerCoordinator->mMessage3); + if (r == -1 && errno == EINTR) { + // Interrupted by a signal. Try again. + continue; + } + // We don't expect any other kind of failure + MOZ_ASSERT(r == 0); + break; + } + + // Send message 4: tell the sampler thread that we are finished accessing + // |sSigHandlerCoordinator|. After this point it is not safe to touch + // |sSigHandlerCoordinator|. + r = sem_post(&Sampler::sSigHandlerCoordinator->mMessage4); + MOZ_ASSERT(r == 0); + + errno = savedErrno; +} + +Sampler::Sampler(PSLockRef aLock) + : mMyPid(profiler_current_process_id()) + // We don't know what the sampler thread's ID will be until it runs, so + // set mSamplerTid to a dummy value and fill it in for real in + // SuspendAndSampleAndResumeThread(). + , + mSamplerTid(-1) { +#if defined(USE_EHABI_STACKWALK) + EHABIStackWalkInit(); +#endif + + // NOTE: We don't initialize LUL here, instead initializing it in + // SamplerThread's constructor. This is because with the + // profiler_suspend_and_sample_thread entry point, we want to be able to + // sample without waiting for LUL to be initialized. + + // Request profiling signals. + struct sigaction sa; + sa.sa_sigaction = SigprofHandler; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_RESTART | SA_SIGINFO; + if (sigaction(SIGPROF, &sa, &mOldSigprofHandler) != 0) { + MOZ_CRASH("Error installing SIGPROF handler in the profiler"); + } +} + +void Sampler::Disable(PSLockRef aLock) { + // Restore old signal handler. This is global state so it's important that + // we do it now, while gPSMutex is locked. + sigaction(SIGPROF, &mOldSigprofHandler, 0); +} + +template <typename Func> +void Sampler::SuspendAndSampleAndResumeThread( + PSLockRef aLock, const RegisteredThread& aRegisteredThread, + const TimeStamp& aNow, const Func& aProcessRegs) { + // Only one sampler thread can be sampling at once. So we expect to have + // complete control over |sSigHandlerCoordinator|. + MOZ_ASSERT(!sSigHandlerCoordinator); + + if (mSamplerTid == -1) { + mSamplerTid = profiler_current_thread_id(); + } + int sampleeTid = aRegisteredThread.Info()->ThreadId(); + MOZ_RELEASE_ASSERT(sampleeTid != mSamplerTid); + + //----------------------------------------------------------------// + // Suspend the samplee thread and get its context. + + SigHandlerCoordinator coord; // on sampler thread's stack + sSigHandlerCoordinator = &coord; + + // Send message 1 to the samplee (the thread to be sampled), by + // signalling at it. + // This could fail if the thread doesn't exist anymore. + int r = tgkill(mMyPid, sampleeTid, SIGPROF); + if (r == 0) { + // Wait for message 2 from the samplee, indicating that the context + // is available and that the thread is suspended. + while (true) { + r = sem_wait(&sSigHandlerCoordinator->mMessage2); + if (r == -1 && errno == EINTR) { + // Interrupted by a signal. Try again. + continue; + } + // We don't expect any other kind of failure. + MOZ_ASSERT(r == 0); + break; + } + + //----------------------------------------------------------------// + // Sample the target thread. + + // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING + // + // The profiler's "critical section" begins here. In the critical section, + // we must not do any dynamic memory allocation, nor try to acquire any lock + // or any other unshareable resource. This is because the thread to be + // sampled has been suspended at some entirely arbitrary point, and we have + // no idea which unsharable resources (locks, essentially) it holds. So any + // attempt to acquire any lock, including the implied locks used by the + // malloc implementation, risks deadlock. This includes TimeStamp::Now(), + // which gets a lock on Windows. + + // The samplee thread is now frozen and sSigHandlerCoordinator->mUContext is + // valid. We can poke around in it and unwind its stack as we like. + + // Extract the current register values. + Registers regs; + PopulateRegsFromContext(regs, &sSigHandlerCoordinator->mUContext); + aProcessRegs(regs, aNow); + + //----------------------------------------------------------------// + // Resume the target thread. + + // Send message 3 to the samplee, which tells it to resume. + r = sem_post(&sSigHandlerCoordinator->mMessage3); + MOZ_ASSERT(r == 0); + + // Wait for message 4 from the samplee, which tells us that it has + // finished with |sSigHandlerCoordinator|. + while (true) { + r = sem_wait(&sSigHandlerCoordinator->mMessage4); + if (r == -1 && errno == EINTR) { + continue; + } + MOZ_ASSERT(r == 0); + break; + } + + // The profiler's critical section ends here. After this point, none of the + // critical section limitations documented above apply. + // + // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING + } + + // This isn't strictly necessary, but doing so does help pick up anomalies + // in which the signal handler is running when it shouldn't be. + sSigHandlerCoordinator = nullptr; +} + +// END Sampler target specifics +//////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////// +// BEGIN SamplerThread target specifics + +static void* ThreadEntry(void* aArg) { + auto thread = static_cast<SamplerThread*>(aArg); + thread->Run(); + return nullptr; +} + +SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration, + double aIntervalMilliseconds) + : mSampler(aLock), + mActivityGeneration(aActivityGeneration), + mIntervalMicroseconds( + std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5)))) { +#if defined(USE_LUL_STACKWALK) + lul::LUL* lul = CorePS::Lul(aLock); + if (!lul) { + CorePS::SetLul(aLock, MakeUnique<lul::LUL>(logging_sink_for_LUL)); + // Read all the unwind info currently available. + lul = CorePS::Lul(aLock); + read_procmaps(lul); + + // Switch into unwind mode. After this point, we can't add or remove any + // unwind info to/from this LUL instance. The only thing we can do with + // it is Unwind() calls. + lul->EnableUnwinding(); + + // Has a test been requested? + if (getenv("MOZ_PROFILER_LUL_TEST")) { + int nTests = 0, nTestsPassed = 0; + RunLulUnitTests(&nTests, &nTestsPassed, lul); + } + } +#endif + + // Start the sampling thread. It repeatedly sends a SIGPROF signal. Sending + // the signal ourselves instead of relying on itimer provides much better + // accuracy. + if (pthread_create(&mThread, nullptr, ThreadEntry, this) != 0) { + MOZ_CRASH("pthread_create failed"); + } +} + +SamplerThread::~SamplerThread() { pthread_join(mThread, nullptr); } + +void SamplerThread::SleepMicro(uint32_t aMicroseconds) { + if (aMicroseconds >= 1000000) { + // Use usleep for larger intervals, because the nanosleep + // code below only supports intervals < 1 second. + MOZ_ALWAYS_TRUE(!::usleep(aMicroseconds)); + return; + } + + struct timespec ts; + ts.tv_sec = 0; + ts.tv_nsec = aMicroseconds * 1000UL; + + int rv = ::nanosleep(&ts, &ts); + + while (rv != 0 && errno == EINTR) { + // Keep waiting in case of interrupt. + // nanosleep puts the remaining time back into ts. + rv = ::nanosleep(&ts, &ts); + } + + MOZ_ASSERT(!rv, "nanosleep call failed"); +} + +void SamplerThread::Stop(PSLockRef aLock) { + // Restore old signal handler. This is global state so it's important that + // we do it now, while gPSMutex is locked. It's safe to do this now even + // though this SamplerThread is still alive, because the next time the main + // loop of Run() iterates it won't get past the mActivityGeneration check, + // and so won't send any signals. + mSampler.Disable(aLock); +} + +// END SamplerThread target specifics +//////////////////////////////////////////////////////////////////////// + +#if defined(GP_OS_linux) || defined(GP_OS_freebsd) + +// We use pthread_atfork() to temporarily disable signal delivery during any +// fork() call. Without that, fork() can be repeatedly interrupted by signal +// delivery, requiring it to be repeatedly restarted, which can lead to *long* +// delays. See bug 837390. +// +// We provide no paf_child() function to run in the child after forking. This +// is fine because we always immediately exec() after fork(), and exec() +// clobbers all process state. (At one point we did have a paf_child() +// function, but it caused problems related to locking gPSMutex. See bug +// 1348374.) +// +// Unfortunately all this is only doable on non-Android because Bionic doesn't +// have pthread_atfork. + +// In the parent, before the fork, record IsSamplingPaused, and then pause. +static void paf_prepare() { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + PSAutoLock lock; + + if (ActivePS::Exists(lock)) { + ActivePS::SetWasSamplingPaused(lock, ActivePS::IsSamplingPaused(lock)); + ActivePS::SetIsSamplingPaused(lock, true); + } +} + +// In the parent, after the fork, return IsSamplingPaused to the pre-fork state. +static void paf_parent() { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + PSAutoLock lock; + + if (ActivePS::Exists(lock)) { + ActivePS::SetIsSamplingPaused(lock, ActivePS::WasSamplingPaused(lock)); + ActivePS::SetWasSamplingPaused(lock, false); + } +} + +static void PlatformInit(PSLockRef aLock) { + // Set up the fork handlers. + pthread_atfork(paf_prepare, paf_parent, nullptr); +} + +#else + +static void PlatformInit(PSLockRef aLock) {} + +#endif + +#if defined(HAVE_NATIVE_UNWIND) +// Context used by synchronous samples. It's safe to have a single one because +// only one synchronous sample can be taken at a time (due to +// profiler_get_backtrace()'s PSAutoLock). +// ucontext_t sSyncUContext; + +void Registers::SyncPopulate() { + // TODO port getcontext from breakpad, if profiler_get_backtrace is needed. + MOZ_CRASH("profiler_get_backtrace() unsupported"); + // if (!getcontext(&sSyncUContext)) { + // PopulateRegsFromContext(*this, &sSyncUContext); + // } +} +#endif + +} // namespace baseprofiler +} // namespace mozilla diff --git a/mozglue/baseprofiler/core/platform-macos.cpp b/mozglue/baseprofiler/core/platform-macos.cpp new file mode 100644 index 0000000000..fc847886ee --- /dev/null +++ b/mozglue/baseprofiler/core/platform-macos.cpp @@ -0,0 +1,233 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <unistd.h> +#include <sys/mman.h> +#include <mach/mach_init.h> +#include <mach-o/getsect.h> + +#include <AvailabilityMacros.h> + +#include <pthread.h> +#include <semaphore.h> +#include <signal.h> +#include <libkern/OSAtomic.h> +#include <mach/mach.h> +#include <mach/semaphore.h> +#include <mach/task.h> +#include <mach/thread_act.h> +#include <mach/vm_statistics.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/sysctl.h> +#include <stdarg.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <math.h> + +// this port is based off of v8 svn revision 9837 + +namespace mozilla { +namespace baseprofiler { + +int profiler_current_process_id() { return getpid(); } + +int profiler_current_thread_id() { + return static_cast<int>(static_cast<pid_t>(syscall(SYS_thread_selfid))); +} + +static int64_t MicrosecondsSince1970() { + struct timeval tv; + gettimeofday(&tv, NULL); + return int64_t(tv.tv_sec) * 1000000 + int64_t(tv.tv_usec); +} + +void* GetStackTop(void* aGuess) { + pthread_t thread = pthread_self(); + return pthread_get_stackaddr_np(thread); +} + +class PlatformData { + public: + explicit PlatformData(int aThreadId) : mProfiledThread(mach_thread_self()) {} + + ~PlatformData() { + // Deallocate Mach port for thread. + mach_port_deallocate(mach_task_self(), mProfiledThread); + } + + thread_act_t ProfiledThread() { return mProfiledThread; } + + private: + // Note: for mProfiledThread Mach primitives are used instead of pthread's + // because the latter doesn't provide thread manipulation primitives required. + // For details, consult "Mac OS X Internals" book, Section 7.3. + thread_act_t mProfiledThread; +}; + +//////////////////////////////////////////////////////////////////////// +// BEGIN Sampler target specifics + +Sampler::Sampler(PSLockRef aLock) {} + +void Sampler::Disable(PSLockRef aLock) {} + +template <typename Func> +void Sampler::SuspendAndSampleAndResumeThread( + PSLockRef aLock, const RegisteredThread& aRegisteredThread, + const TimeStamp& aNow, const Func& aProcessRegs) { + thread_act_t samplee_thread = + aRegisteredThread.GetPlatformData()->ProfiledThread(); + + //----------------------------------------------------------------// + // Suspend the samplee thread and get its context. + + // We're using thread_suspend on OS X because pthread_kill (which is what we + // at one time used on Linux) has less consistent performance and causes + // strange crashes, see bug 1166778 and bug 1166808. thread_suspend + // is also just a lot simpler to use. + + if (KERN_SUCCESS != thread_suspend(samplee_thread)) { + return; + } + + //----------------------------------------------------------------// + // Sample the target thread. + + // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING + // + // The profiler's "critical section" begins here. We must be very careful + // what we do here, or risk deadlock. See the corresponding comment in + // platform-linux-android.cpp for details. + +#if defined(__x86_64__) + thread_state_flavor_t flavor = x86_THREAD_STATE64; + x86_thread_state64_t state; + mach_msg_type_number_t count = x86_THREAD_STATE64_COUNT; +# if __DARWIN_UNIX03 +# define REGISTER_FIELD(name) __r##name +# else +# define REGISTER_FIELD(name) r##name +# endif // __DARWIN_UNIX03 +#elif defined(__aarch64__) + thread_state_flavor_t flavor = ARM_THREAD_STATE64; + arm_thread_state64_t state; + mach_msg_type_number_t count = ARM_THREAD_STATE64_COUNT; +# if __DARWIN_UNIX03 +# define REGISTER_FIELD(name) __##name +# else +# define REGISTER_FIELD(name) name +# endif // __DARWIN_UNIX03 +#else +# error "unknown architecture" +#endif + + if (thread_get_state(samplee_thread, flavor, + reinterpret_cast<natural_t*>(&state), + &count) == KERN_SUCCESS) { + Registers regs; +#if defined(__x86_64__) + regs.mPC = reinterpret_cast<Address>(state.REGISTER_FIELD(ip)); + regs.mSP = reinterpret_cast<Address>(state.REGISTER_FIELD(sp)); + regs.mFP = reinterpret_cast<Address>(state.REGISTER_FIELD(bp)); +#elif defined(__aarch64__) + regs.mPC = reinterpret_cast<Address>(state.REGISTER_FIELD(pc)); + regs.mSP = reinterpret_cast<Address>(state.REGISTER_FIELD(sp)); + regs.mFP = reinterpret_cast<Address>(state.REGISTER_FIELD(fp)); +#else +# error "unknown architecture" +#endif + regs.mLR = 0; + + aProcessRegs(regs, aNow); + } + +#undef REGISTER_FIELD + + //----------------------------------------------------------------// + // Resume the target thread. + + thread_resume(samplee_thread); + + // The profiler's critical section ends here. + // + // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING +} + +// END Sampler target specifics +//////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////// +// BEGIN SamplerThread target specifics + +static void* ThreadEntry(void* aArg) { + auto thread = static_cast<SamplerThread*>(aArg); + thread->Run(); + return nullptr; +} + +SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration, + double aIntervalMilliseconds) + : mSampler(aLock), + mActivityGeneration(aActivityGeneration), + mIntervalMicroseconds( + std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5)))), + mThread{nullptr} { + pthread_attr_t* attr_ptr = nullptr; + if (pthread_create(&mThread, attr_ptr, ThreadEntry, this) != 0) { + MOZ_CRASH("pthread_create failed"); + } +} + +SamplerThread::~SamplerThread() { pthread_join(mThread, nullptr); } + +void SamplerThread::SleepMicro(uint32_t aMicroseconds) { + usleep(aMicroseconds); + // FIXME: the OSX 10.12 page for usleep says "The usleep() function is + // obsolescent. Use nanosleep(2) instead." This implementation could be + // merged with the linux-android version. Also, this doesn't handle the + // case where the usleep call is interrupted by a signal. +} + +void SamplerThread::Stop(PSLockRef aLock) { mSampler.Disable(aLock); } + +// END SamplerThread target specifics +//////////////////////////////////////////////////////////////////////// + +static void PlatformInit(PSLockRef aLock) {} + +#if defined(HAVE_NATIVE_UNWIND) +void Registers::SyncPopulate() { +# if defined(__x86_64__) + asm( + // Compute caller's %rsp by adding to %rbp: + // 8 bytes for previous %rbp, 8 bytes for return address + "leaq 0x10(%%rbp), %0\n\t" + // Dereference %rbp to get previous %rbp + "movq (%%rbp), %1\n\t" + : "=r"(mSP), "=r"(mFP)); +# elif defined(__aarch64__) + asm( + // Compute caller's sp by adding to fp: + // 8 bytes for previous fp, 8 bytes for return address + "add %0, x29, #0x10\n\t" + // Dereference fp to get previous fp + "ldr %1, [x29]\n\t" + : "=r"(mSP), "=r"(mFP)); +# else +# error "unknown architecture" +# endif + mPC = reinterpret_cast<Address>( + __builtin_extract_return_addr(__builtin_return_address(0))); + mLR = 0; +} +#endif + +} // namespace baseprofiler +} // namespace mozilla diff --git a/mozglue/baseprofiler/core/platform-win32.cpp b/mozglue/baseprofiler/core/platform-win32.cpp new file mode 100644 index 0000000000..22b8a8462b --- /dev/null +++ b/mozglue/baseprofiler/core/platform-win32.cpp @@ -0,0 +1,351 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +// Copyright (c) 2006-2011 The Chromium Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in +// the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google, Inc. nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS +// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +// SUCH DAMAGE. + +#include <windows.h> +#include <mmsystem.h> +#include <process.h> + +#include "nsWindowsDllInterceptor.h" +#include "mozilla/StackWalk_windows.h" +#include "mozilla/WindowsVersion.h" + +namespace mozilla { +namespace baseprofiler { + +int profiler_current_process_id() { return _getpid(); } + +int profiler_current_thread_id() { + DWORD threadId = GetCurrentThreadId(); + MOZ_ASSERT(threadId <= INT32_MAX, "native thread ID is > INT32_MAX"); + return int(threadId); +} + +static int64_t MicrosecondsSince1970() { + int64_t prt; + FILETIME ft; + SYSTEMTIME st; + + GetSystemTime(&st); + SystemTimeToFileTime(&st, &ft); + static_assert(sizeof(ft) == sizeof(prt), "Expect FILETIME to be 64 bits"); + memcpy(&prt, &ft, sizeof(prt)); + const int64_t epochBias = 116444736000000000LL; + prt = (prt - epochBias) / 10; + + return prt; +} + +void* GetStackTop(void* aGuess) { + PNT_TIB pTib = reinterpret_cast<PNT_TIB>(NtCurrentTeb()); + return reinterpret_cast<void*>(pTib->StackBase); +} + +static void PopulateRegsFromContext(Registers& aRegs, CONTEXT* aContext) { +#if defined(GP_ARCH_amd64) + aRegs.mPC = reinterpret_cast<Address>(aContext->Rip); + aRegs.mSP = reinterpret_cast<Address>(aContext->Rsp); + aRegs.mFP = reinterpret_cast<Address>(aContext->Rbp); +#elif defined(GP_ARCH_x86) + aRegs.mPC = reinterpret_cast<Address>(aContext->Eip); + aRegs.mSP = reinterpret_cast<Address>(aContext->Esp); + aRegs.mFP = reinterpret_cast<Address>(aContext->Ebp); +#elif defined(GP_ARCH_arm64) + aRegs.mPC = reinterpret_cast<Address>(aContext->Pc); + aRegs.mSP = reinterpret_cast<Address>(aContext->Sp); + aRegs.mFP = reinterpret_cast<Address>(aContext->Fp); +#else +# error "bad arch" +#endif + aRegs.mLR = 0; +} + +// Gets a real (i.e. not pseudo) handle for the current thread, with the +// permissions needed for profiling. +// @return a real HANDLE for the current thread. +static HANDLE GetRealCurrentThreadHandleForProfiling() { + HANDLE realCurrentThreadHandle; + if (!::DuplicateHandle( + ::GetCurrentProcess(), ::GetCurrentThread(), ::GetCurrentProcess(), + &realCurrentThreadHandle, + THREAD_GET_CONTEXT | THREAD_SUSPEND_RESUME | THREAD_QUERY_INFORMATION, + FALSE, 0)) { + return nullptr; + } + + return realCurrentThreadHandle; +} + +class PlatformData { + public: + // Get a handle to the calling thread. This is the thread that we are + // going to profile. We need a real handle because we are going to use it in + // the sampler thread. + explicit PlatformData(int aThreadId) + : mProfiledThread(GetRealCurrentThreadHandleForProfiling()) { + MOZ_ASSERT(aThreadId == ::GetCurrentThreadId()); + } + + ~PlatformData() { + if (mProfiledThread != nullptr) { + CloseHandle(mProfiledThread); + mProfiledThread = nullptr; + } + } + + HANDLE ProfiledThread() { return mProfiledThread; } + + private: + HANDLE mProfiledThread; +}; + +#if defined(USE_MOZ_STACK_WALK) +HANDLE +GetThreadHandle(PlatformData* aData) { return aData->ProfiledThread(); } +#endif + +static const HANDLE kNoThread = INVALID_HANDLE_VALUE; + +//////////////////////////////////////////////////////////////////////// +// BEGIN Sampler target specifics + +Sampler::Sampler(PSLockRef aLock) {} + +void Sampler::Disable(PSLockRef aLock) {} + +template <typename Func> +void Sampler::SuspendAndSampleAndResumeThread( + PSLockRef aLock, const RegisteredThread& aRegisteredThread, + const TimeStamp& aNow, const Func& aProcessRegs) { + HANDLE profiled_thread = + aRegisteredThread.GetPlatformData()->ProfiledThread(); + if (profiled_thread == nullptr) { + return; + } + + // Context used for sampling the register state of the profiled thread. + CONTEXT context; + memset(&context, 0, sizeof(context)); + + //----------------------------------------------------------------// + // Suspend the samplee thread and get its context. + + static const DWORD kSuspendFailed = static_cast<DWORD>(-1); + if (SuspendThread(profiled_thread) == kSuspendFailed) { + return; + } + + // SuspendThread is asynchronous, so the thread may still be running. + // Call GetThreadContext first to ensure the thread is really suspended. + // See https://blogs.msdn.microsoft.com/oldnewthing/20150205-00/?p=44743. + + // Using only CONTEXT_CONTROL is faster but on 64-bit it causes crashes in + // RtlVirtualUnwind (see bug 1120126) so we set all the flags. +#if defined(GP_ARCH_amd64) + context.ContextFlags = CONTEXT_FULL; +#else + context.ContextFlags = CONTEXT_CONTROL; +#endif + if (!GetThreadContext(profiled_thread, &context)) { + ResumeThread(profiled_thread); + return; + } + + //----------------------------------------------------------------// + // Sample the target thread. + + // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING + // + // The profiler's "critical section" begins here. We must be very careful + // what we do here, or risk deadlock. See the corresponding comment in + // platform-linux-android.cpp for details. + + Registers regs; + PopulateRegsFromContext(regs, &context); + aProcessRegs(regs, aNow); + + //----------------------------------------------------------------// + // Resume the target thread. + + ResumeThread(profiled_thread); + + // The profiler's critical section ends here. + // + // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING +} + +// END Sampler target specifics +//////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////// +// BEGIN SamplerThread target specifics + +static unsigned int __stdcall ThreadEntry(void* aArg) { + auto thread = static_cast<SamplerThread*>(aArg); + thread->Run(); + return 0; +} + +SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration, + double aIntervalMilliseconds) + : mSampler(aLock), + mActivityGeneration(aActivityGeneration), + mIntervalMicroseconds( + std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5)))) { + // By default we'll not adjust the timer resolution which tends to be + // around 16ms. However, if the requested interval is sufficiently low + // we'll try to adjust the resolution to match. + if (mIntervalMicroseconds < 10 * 1000) { + ::timeBeginPeriod(mIntervalMicroseconds / 1000); + } + + // Create a new thread. It is important to use _beginthreadex() instead of + // the Win32 function CreateThread(), because the CreateThread() does not + // initialize thread-specific structures in the C runtime library. + mThread = reinterpret_cast<HANDLE>(_beginthreadex(nullptr, + /* stack_size */ 0, + ThreadEntry, this, + /* initflag */ 0, nullptr)); + if (mThread == 0) { + MOZ_CRASH("_beginthreadex failed"); + } +} + +SamplerThread::~SamplerThread() { + WaitForSingleObject(mThread, INFINITE); + + // Close our own handle for the thread. + if (mThread != kNoThread) { + CloseHandle(mThread); + } +} + +void SamplerThread::SleepMicro(uint32_t aMicroseconds) { + // For now, keep the old behaviour of minimum Sleep(1), even for + // smaller-than-usual sleeps after an overshoot, unless the user has + // explicitly opted into a sub-millisecond profiler interval. + if (mIntervalMicroseconds >= 1000) { + ::Sleep(std::max(1u, aMicroseconds / 1000)); + } else { + TimeStamp start = TimeStamp::NowUnfuzzed(); + TimeStamp end = start + TimeDuration::FromMicroseconds(aMicroseconds); + + // First, sleep for as many whole milliseconds as possible. + if (aMicroseconds >= 1000) { + ::Sleep(aMicroseconds / 1000); + } + + // Then, spin until enough time has passed. + while (TimeStamp::NowUnfuzzed() < end) { + YieldProcessor(); + } + } +} + +void SamplerThread::Stop(PSLockRef aLock) { + // Disable any timer resolution changes we've made. Do it now while + // gPSMutex is locked, i.e. before any other SamplerThread can be created + // and call ::timeBeginPeriod(). + // + // It's safe to do this now even though this SamplerThread is still alive, + // because the next time the main loop of Run() iterates it won't get past + // the mActivityGeneration check, and so it won't make any more ::Sleep() + // calls. + if (mIntervalMicroseconds < 10 * 1000) { + ::timeEndPeriod(mIntervalMicroseconds / 1000); + } + + mSampler.Disable(aLock); +} + +// END SamplerThread target specifics +//////////////////////////////////////////////////////////////////////// + +static void PlatformInit(PSLockRef aLock) {} + +#if defined(HAVE_NATIVE_UNWIND) +void Registers::SyncPopulate() { + CONTEXT context; + RtlCaptureContext(&context); + PopulateRegsFromContext(*this, &context); +} +#endif + +#if defined(GP_PLAT_amd64_windows) +static WindowsDllInterceptor NtDllIntercept; + +typedef NTSTATUS(NTAPI* LdrUnloadDll_func)(HMODULE module); +static WindowsDllInterceptor::FuncHookType<LdrUnloadDll_func> stub_LdrUnloadDll; + +static NTSTATUS NTAPI patched_LdrUnloadDll(HMODULE module) { + // Prevent the stack walker from suspending this thread when LdrUnloadDll + // holds the RtlLookupFunctionEntry lock. + AutoSuppressStackWalking suppress; + return stub_LdrUnloadDll(module); +} + +// These pointers are disguised as PVOID to avoid pulling in obscure headers +typedef PVOID(WINAPI* LdrResolveDelayLoadedAPI_func)( + PVOID ParentModuleBase, PVOID DelayloadDescriptor, PVOID FailureDllHook, + PVOID FailureSystemHook, PVOID ThunkAddress, ULONG Flags); +static WindowsDllInterceptor::FuncHookType<LdrResolveDelayLoadedAPI_func> + stub_LdrResolveDelayLoadedAPI; + +static PVOID WINAPI patched_LdrResolveDelayLoadedAPI( + PVOID ParentModuleBase, PVOID DelayloadDescriptor, PVOID FailureDllHook, + PVOID FailureSystemHook, PVOID ThunkAddress, ULONG Flags) { + // Prevent the stack walker from suspending this thread when + // LdrResolveDelayLoadAPI holds the RtlLookupFunctionEntry lock. + AutoSuppressStackWalking suppress; + return stub_LdrResolveDelayLoadedAPI(ParentModuleBase, DelayloadDescriptor, + FailureDllHook, FailureSystemHook, + ThunkAddress, Flags); +} + +MFBT_API void InitializeWin64ProfilerHooks() { + // This function could be called by both profilers, but we only want to run + // it once. + static bool ran = false; + if (ran) { + return; + } + ran = true; + + NtDllIntercept.Init("ntdll.dll"); + stub_LdrUnloadDll.Set(NtDllIntercept, "LdrUnloadDll", &patched_LdrUnloadDll); + if (IsWin8OrLater()) { // LdrResolveDelayLoadedAPI was introduced in Win8 + stub_LdrResolveDelayLoadedAPI.Set(NtDllIntercept, + "LdrResolveDelayLoadedAPI", + &patched_LdrResolveDelayLoadedAPI); + } +} +#endif // defined(GP_PLAT_amd64_windows) + +} // namespace baseprofiler +} // namespace mozilla diff --git a/mozglue/baseprofiler/core/platform.cpp b/mozglue/baseprofiler/core/platform.cpp new file mode 100644 index 0000000000..14c48ce649 --- /dev/null +++ b/mozglue/baseprofiler/core/platform.cpp @@ -0,0 +1,3712 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// There are three kinds of samples done by the profiler. +// +// - A "periodic" sample is the most complex kind. It is done in response to a +// timer while the profiler is active. It involves writing a stack trace plus +// a variety of other values (memory measurements, responsiveness +// measurements, etc.) into the main ProfileBuffer. The sampling is done from +// off-thread, and so SuspendAndSampleAndResumeThread() is used to get the +// register values. +// +// - A "synchronous" sample is a simpler kind. It is done in response to an API +// call (profiler_get_backtrace()). It involves writing a stack trace and +// little else into a temporary ProfileBuffer, and wrapping that up in a +// ProfilerBacktrace that can be subsequently used in a marker. The sampling +// is done on-thread, and so Registers::SyncPopulate() is used to get the +// register values. +// +// - A "backtrace" sample is the simplest kind. It is done in response to an +// API call (profiler_suspend_and_sample_thread()). It involves getting a +// stack trace via a ProfilerStackCollector; it does not write to a +// ProfileBuffer. The sampling is done from off-thread, and so uses +// SuspendAndSampleAndResumeThread() to get the register values. + +#include "platform.h" + +#include <algorithm> +#include <errno.h> +#include <fstream> +#include <ostream> +#include <set> +#include <sstream> + +// #include "memory_hooks.h" +#include "mozilla/ArrayUtils.h" +#include "mozilla/Atomics.h" +#include "mozilla/AutoProfilerLabel.h" +#include "mozilla/BaseProfilerDetail.h" +#include "mozilla/DoubleConversion.h" +#include "mozilla/Printf.h" +#include "mozilla/ProfileBufferChunkManagerSingle.h" +#include "mozilla/ProfileBufferChunkManagerWithLocalLimit.h" +#include "mozilla/ProfileChunkedBuffer.h" +#include "mozilla/Services.h" +#include "mozilla/Span.h" +#include "mozilla/StackWalk.h" +#include "mozilla/StaticPtr.h" +#include "mozilla/ThreadLocal.h" +#include "mozilla/TimeStamp.h" +#include "mozilla/Tuple.h" +#include "mozilla/UniquePtr.h" +#include "mozilla/Vector.h" +#include "prdtoa.h" +#include "prtime.h" + +#include "BaseProfiler.h" +#include "BaseProfilingCategory.h" +#include "PageInformation.h" +#include "ProfiledThreadData.h" +#include "ProfilerBacktrace.h" +#include "ProfileBuffer.h" +#include "RegisteredThread.h" +#include "BaseProfilerSharedLibraries.h" +#include "ThreadInfo.h" +#include "VTuneProfiler.h" + +// Win32 builds always have frame pointers, so FramePointerStackWalk() always +// works. +#if defined(GP_PLAT_x86_windows) +# define HAVE_NATIVE_UNWIND +# define USE_FRAME_POINTER_STACK_WALK +#endif + +// Win64 builds always omit frame pointers, so we use the slower +// MozStackWalk(), which works in that case. +#if defined(GP_PLAT_amd64_windows) +# define HAVE_NATIVE_UNWIND +# define USE_MOZ_STACK_WALK +#endif + +// AArch64 Win64 doesn't seem to use frame pointers, so we use the slower +// MozStackWalk(). +#if defined(GP_PLAT_arm64_windows) +# define HAVE_NATIVE_UNWIND +# define USE_MOZ_STACK_WALK +#endif + +// Mac builds only have frame pointers when MOZ_PROFILING is specified, so +// FramePointerStackWalk() only works in that case. We don't use MozStackWalk() +// on Mac. +#if defined(GP_OS_darwin) && defined(MOZ_PROFILING) +# define HAVE_NATIVE_UNWIND +# define USE_FRAME_POINTER_STACK_WALK +#endif + +// No stack-walking in baseprofiler on linux, android, bsd. +// APIs now make it easier to capture backtraces from the Base Profiler, which +// is currently not supported on these platform, and would lead to a MOZ_CRASH +// in Registers::SyncPopulate(). `#if 0` added in bug 1658232, follow-up bugs +// should be referenced in meta bug 1557568. +#if 0 +// Android builds use the ARM Exception Handling ABI to unwind. +# if defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android) +# define HAVE_NATIVE_UNWIND +# define USE_EHABI_STACKWALK +# include "EHABIStackWalk.h" +# endif + +// Linux/BSD builds use LUL, which uses DWARF info to unwind stacks. +# if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) || \ + defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) || \ + defined(GP_PLAT_mips64_linux) || defined(GP_PLAT_arm64_linux) || \ + defined(GP_PLAT_arm64_android) || defined(GP_PLAT_amd64_freebsd) || \ + defined(GP_PLAT_arm64_freebsd) +# define HAVE_NATIVE_UNWIND +# define USE_LUL_STACKWALK +# include "lul/LulMain.h" +# include "lul/platform-linux-lul.h" + +// On linux we use LUL for periodic samples and synchronous samples, but we use +// FramePointerStackWalk for backtrace samples when MOZ_PROFILING is enabled. +// (See the comment at the top of the file for a definition of +// periodic/synchronous/backtrace.). +// +// FramePointerStackWalk can produce incomplete stacks when the current entry is +// in a shared library without framepointers, however LUL can take a long time +// to initialize, which is undesirable for consumers of +// profiler_suspend_and_sample_thread like the Background Hang Reporter. +# if defined(MOZ_PROFILING) +# define USE_FRAME_POINTER_STACK_WALK +# endif +# endif +#endif + +// We can only stackwalk without expensive initialization on platforms which +// support FramePointerStackWalk or MozStackWalk. LUL Stackwalking requires +// initializing LUL, and EHABIStackWalk requires initializing EHABI, both of +// which can be expensive. +#if defined(USE_FRAME_POINTER_STACK_WALK) || defined(USE_MOZ_STACK_WALK) +# define HAVE_FASTINIT_NATIVE_UNWIND +#endif + +#ifdef MOZ_VALGRIND +# include <valgrind/memcheck.h> +#else +# define VALGRIND_MAKE_MEM_DEFINED(_addr, _len) ((void)0) +#endif + +#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd) +# include <ucontext.h> +#endif + +namespace mozilla { +namespace baseprofiler { + +using detail::RacyFeatures; + +bool LogTest(int aLevelToTest) { + static const int maxLevel = getenv("MOZ_BASE_PROFILER_VERBOSE_LOGGING") ? 5 + : getenv("MOZ_BASE_PROFILER_DEBUG_LOGGING") ? 4 + : getenv("MOZ_BASE_PROFILER_LOGGING") ? 3 + : 0; + return aLevelToTest <= maxLevel; +} + +void PrintToConsole(const char* aFmt, ...) { + va_list args; + va_start(args, aFmt); +#if defined(ANDROID) + __android_log_vprint(ANDROID_LOG_INFO, "Gecko", aFmt, args); +#else + vfprintf(stderr, aFmt, args); +#endif + va_end(args); +} + +// Statically initialized to 0, then set once from profiler_init(), which should +// be called from the main thread before any other use of the profiler. +int scProfilerMainThreadId; + +constexpr static bool ValidateFeatures() { + int expectedFeatureNumber = 0; + + // Feature numbers should start at 0 and increase by 1 each. +#define CHECK_FEATURE(n_, str_, Name_, desc_) \ + if ((n_) != expectedFeatureNumber) { \ + return false; \ + } \ + ++expectedFeatureNumber; + + BASE_PROFILER_FOR_EACH_FEATURE(CHECK_FEATURE) + +#undef CHECK_FEATURE + + return true; +} + +static_assert(ValidateFeatures(), "Feature list is invalid"); + +// Return all features that are available on this platform. +static uint32_t AvailableFeatures() { + uint32_t features = 0; + +#define ADD_FEATURE(n_, str_, Name_, desc_) \ + ProfilerFeature::Set##Name_(features); + + // Add all the possible features. + BASE_PROFILER_FOR_EACH_FEATURE(ADD_FEATURE) + +#undef ADD_FEATURE + + // Now remove features not supported on this platform/configuration. + ProfilerFeature::ClearJava(features); + ProfilerFeature::ClearJS(features); + ProfilerFeature::ClearScreenshots(features); +#if !defined(HAVE_NATIVE_UNWIND) + ProfilerFeature::ClearStackWalk(features); +#endif + ProfilerFeature::ClearTaskTracer(features); + ProfilerFeature::ClearJSTracer(features); + + return features; +} + +// Default features common to all contexts (even if not available). +static uint32_t DefaultFeatures() { + return ProfilerFeature::Java | ProfilerFeature::JS | ProfilerFeature::Leaf | + ProfilerFeature::StackWalk | ProfilerFeature::Threads; +} + +// Extra default features when MOZ_PROFILER_STARTUP is set (even if not +// available). +static uint32_t StartupExtraDefaultFeatures() { + // Enable CPUUtilization by default for startup profiles as it is useful to + // see when startup alternates between CPU intensive tasks and being blocked. + // Enable mainthreadio by default for startup profiles as startup is heavy on + // I/O operations, and main thread I/O is really important to see there. + return ProfilerFeature::CPUUtilization | ProfilerFeature::MainThreadIO; +} + +class MOZ_RAII PSAutoTryLock; + +// The auto-lock/unlock mutex that guards accesses to CorePS and ActivePS. +// Use `PSAutoLock lock;` to take the lock until the end of the enclosing block. +// External profilers may use this same lock for their own data, but as the lock +// is non-recursive, *only* `f(PSLockRef, ...)` functions below should be +// called, to avoid double-locking. +class MOZ_RAII PSAutoLock { + public: + PSAutoLock() { gPSMutex.Lock(); } + + ~PSAutoLock() { gPSMutex.Unlock(); } + + PSAutoLock(const PSAutoLock&) = delete; + void operator=(const PSAutoLock&) = delete; + + [[nodiscard]] static bool IsLockedOnCurrentThread() { + return gPSMutex.IsLockedOnCurrentThread(); + } + + private: + // Allow PSAutoTryLock to access gPSMutex, and to call the following + // `PSAutoLock(int)` constructor through `Maybe<const PSAutoLock>::emplace()`. + friend class PSAutoTryLock; + friend class Maybe<const PSAutoLock>; + + // Special constructor for an already-locked gPSMutex. The `int` parameter is + // necessary to distinguish it from the main constructor. + explicit PSAutoLock(int) { gPSMutex.AssertCurrentThreadOwns(); } + + static detail::BaseProfilerMutex gPSMutex; +}; + +// RAII class that attempts to lock the profiler mutex. Example usage: +// PSAutoTryLock tryLock; +// if (tryLock.IsLocked()) { locked_foo(tryLock.LockRef()); } +class MOZ_RAII PSAutoTryLock { + public: + PSAutoTryLock() { + if (PSAutoLock::gPSMutex.TryLock()) { + mMaybePSAutoLock.emplace(0); + } + } + + // Return true if the mutex was aquired and locked. + [[nodiscard]] bool IsLocked() const { return mMaybePSAutoLock.isSome(); } + + // Assuming the mutex is locked, return a reference to a `PSAutoLock` for that + // mutex, which can be passed as proof-of-lock. + [[nodiscard]] const PSAutoLock& LockRef() const { + MOZ_ASSERT(IsLocked()); + return mMaybePSAutoLock.ref(); + } + + private: + // `mMaybePSAutoLock` is `Nothing` if locking failed, otherwise it contains a + // `const PSAutoLock` holding the locked mutex, and whose reference may be + // passed to functions expecting a proof-of-lock. + Maybe<const PSAutoLock> mMaybePSAutoLock; +}; + +detail::BaseProfilerMutex PSAutoLock::gPSMutex; + +// Only functions that take a PSLockRef arg can access CorePS's and ActivePS's +// fields. +typedef const PSAutoLock& PSLockRef; + +#define PS_GET(type_, name_) \ + static type_ name_(PSLockRef) { \ + MOZ_ASSERT(sInstance); \ + return sInstance->m##name_; \ + } + +#define PS_GET_LOCKLESS(type_, name_) \ + static type_ name_() { \ + MOZ_ASSERT(sInstance); \ + return sInstance->m##name_; \ + } + +#define PS_GET_AND_SET(type_, name_) \ + PS_GET(type_, name_) \ + static void Set##name_(PSLockRef, type_ a##name_) { \ + MOZ_ASSERT(sInstance); \ + sInstance->m##name_ = a##name_; \ + } + +// All functions in this file can run on multiple threads unless they have an +// NS_IsMainThread() assertion. + +// This class contains the profiler's core global state, i.e. that which is +// valid even when the profiler is not active. Most profile operations can't do +// anything useful when this class is not instantiated, so we release-assert +// its non-nullness in all such operations. +// +// Accesses to CorePS are guarded by gPSMutex. Getters and setters take a +// PSAutoLock reference as an argument as proof that the gPSMutex is currently +// locked. This makes it clear when gPSMutex is locked and helps avoid +// accidental unlocked accesses to global state. There are ways to circumvent +// this mechanism, but please don't do so without *very* good reason and a +// detailed explanation. +// +// The exceptions to this rule: +// +// - mProcessStartTime, because it's immutable; +// +// - each thread's RacyRegisteredThread object is accessible without locking via +// TLSRegisteredThread::RacyRegisteredThread(). +class CorePS { + private: + CorePS() + : mProcessStartTime(TimeStamp::ProcessCreation()), + // This needs its own mutex, because it is used concurrently from + // functions guarded by gPSMutex as well as others without safety (e.g., + // profiler_add_marker). It is *not* used inside the critical section of + // the sampler, because mutexes cannot be used there. + mCoreBuffer(ProfileChunkedBuffer::ThreadSafety::WithMutex) +#ifdef USE_LUL_STACKWALK + , + mLul(nullptr) +#endif + { + } + + ~CorePS() {} + + public: + static void Create(PSLockRef aLock) { + MOZ_ASSERT(!sInstance); + sInstance = new CorePS(); + } + + static void Destroy(PSLockRef aLock) { + MOZ_ASSERT(sInstance); + delete sInstance; + sInstance = nullptr; + } + + // Unlike ActivePS::Exists(), CorePS::Exists() can be called without gPSMutex + // being locked. This is because CorePS is instantiated so early on the main + // thread that we don't have to worry about it being racy. + static bool Exists() { return !!sInstance; } + + static void AddSizeOf(PSLockRef, MallocSizeOf aMallocSizeOf, + size_t& aProfSize, size_t& aLulSize) { + MOZ_ASSERT(sInstance); + + aProfSize += aMallocSizeOf(sInstance); + + for (auto& registeredThread : sInstance->mRegisteredThreads) { + aProfSize += registeredThread->SizeOfIncludingThis(aMallocSizeOf); + } + + for (auto& registeredPage : sInstance->mRegisteredPages) { + aProfSize += registeredPage->SizeOfIncludingThis(aMallocSizeOf); + } + + // Measurement of the following things may be added later if DMD finds it + // is worthwhile: + // - CorePS::mRegisteredThreads itself (its elements' children are + // measured above) + // - CorePS::mRegisteredPages itself (its elements' children are + // measured above) + // - CorePS::mInterposeObserver + +#if defined(USE_LUL_STACKWALK) + if (sInstance->mLul) { + aLulSize += sInstance->mLul->SizeOfIncludingThis(aMallocSizeOf); + } +#endif + } + + // No PSLockRef is needed for this field because it's immutable. + PS_GET_LOCKLESS(const TimeStamp&, ProcessStartTime) + + // No PSLockRef is needed for this field because it's thread-safe. + PS_GET_LOCKLESS(ProfileChunkedBuffer&, CoreBuffer) + + PS_GET(const Vector<UniquePtr<RegisteredThread>>&, RegisteredThreads) + + static void AppendRegisteredThread( + PSLockRef, UniquePtr<RegisteredThread>&& aRegisteredThread) { + MOZ_ASSERT(sInstance); + MOZ_RELEASE_ASSERT( + sInstance->mRegisteredThreads.append(std::move(aRegisteredThread))); + } + + static void RemoveRegisteredThread(PSLockRef, + RegisteredThread* aRegisteredThread) { + MOZ_ASSERT(sInstance); + // Remove aRegisteredThread from mRegisteredThreads. + for (UniquePtr<RegisteredThread>& rt : sInstance->mRegisteredThreads) { + if (rt.get() == aRegisteredThread) { + sInstance->mRegisteredThreads.erase(&rt); + return; + } + } + } + + PS_GET(Vector<RefPtr<PageInformation>>&, RegisteredPages) + + static void AppendRegisteredPage(PSLockRef, + RefPtr<PageInformation>&& aRegisteredPage) { + MOZ_ASSERT(sInstance); + struct RegisteredPageComparator { + PageInformation* aA; + bool operator()(PageInformation* aB) const { return aA->Equals(aB); } + }; + + auto foundPageIter = std::find_if( + sInstance->mRegisteredPages.begin(), sInstance->mRegisteredPages.end(), + RegisteredPageComparator{aRegisteredPage.get()}); + + if (foundPageIter != sInstance->mRegisteredPages.end()) { + if ((*foundPageIter)->Url() == "about:blank") { + // When a BrowsingContext is loaded, the first url loaded in it will be + // about:blank, and if the principal matches, the first document loaded + // in it will share an inner window. That's why we should delete the + // intermittent about:blank if they share the inner window. + sInstance->mRegisteredPages.erase(foundPageIter); + } else { + // Do not register the same page again. + return; + } + } + MOZ_RELEASE_ASSERT( + sInstance->mRegisteredPages.append(std::move(aRegisteredPage))); + } + + static void RemoveRegisteredPage(PSLockRef, + uint64_t aRegisteredInnerWindowID) { + MOZ_ASSERT(sInstance); + // Remove RegisteredPage from mRegisteredPages by given inner window ID. + sInstance->mRegisteredPages.eraseIf([&](const RefPtr<PageInformation>& rd) { + return rd->InnerWindowID() == aRegisteredInnerWindowID; + }); + } + + static void ClearRegisteredPages(PSLockRef) { + MOZ_ASSERT(sInstance); + sInstance->mRegisteredPages.clear(); + } + + PS_GET(const Vector<BaseProfilerCount*>&, Counters) + + static void AppendCounter(PSLockRef, BaseProfilerCount* aCounter) { + MOZ_ASSERT(sInstance); + // we don't own the counter; they may be stored in static objects + MOZ_RELEASE_ASSERT(sInstance->mCounters.append(aCounter)); + } + + static void RemoveCounter(PSLockRef, BaseProfilerCount* aCounter) { + // we may be called to remove a counter after the profiler is stopped or + // late in shutdown. + if (sInstance) { + auto* counter = std::find(sInstance->mCounters.begin(), + sInstance->mCounters.end(), aCounter); + MOZ_RELEASE_ASSERT(counter != sInstance->mCounters.end()); + sInstance->mCounters.erase(counter); + } + } + +#ifdef USE_LUL_STACKWALK + static lul::LUL* Lul(PSLockRef) { + MOZ_ASSERT(sInstance); + return sInstance->mLul.get(); + } + static void SetLul(PSLockRef, UniquePtr<lul::LUL> aLul) { + MOZ_ASSERT(sInstance); + sInstance->mLul = std::move(aLul); + } +#endif + + PS_GET_AND_SET(const std::string&, ProcessName) + PS_GET_AND_SET(const std::string&, ETLDplus1) + + private: + // The singleton instance + static CorePS* sInstance; + + // The time that the process started. + const TimeStamp mProcessStartTime; + + // The thread-safe blocks-oriented buffer into which all profiling data is + // recorded. + // ActivePS controls the lifetime of the underlying contents buffer: When + // ActivePS does not exist, mCoreBuffer is empty and rejects all reads&writes; + // see ActivePS for further details. + // Note: This needs to live here outside of ActivePS, because some producers + // are indirectly controlled (e.g., by atomic flags) and therefore may still + // attempt to write some data shortly after ActivePS has shutdown and deleted + // the underlying buffer in memory. + ProfileChunkedBuffer mCoreBuffer; + + // Info on all the registered threads. + // ThreadIds in mRegisteredThreads are unique. + Vector<UniquePtr<RegisteredThread>> mRegisteredThreads; + + // Info on all the registered pages. + // InnerWindowIDs in mRegisteredPages are unique. + Vector<RefPtr<PageInformation>> mRegisteredPages; + + // Non-owning pointers to all active counters + Vector<BaseProfilerCount*> mCounters; + +#ifdef USE_LUL_STACKWALK + // LUL's state. Null prior to the first activation, non-null thereafter. + UniquePtr<lul::LUL> mLul; +#endif + + // Process name, provided by child process initialization code. + std::string mProcessName; + // Private name, provided by child process initialization code (eTLD+1 in + // fission) + std::string mETLDplus1; +}; + +CorePS* CorePS::sInstance = nullptr; + +ProfileChunkedBuffer& profiler_get_core_buffer() { + MOZ_ASSERT(CorePS::Exists()); + return CorePS::CoreBuffer(); +} + +class SamplerThread; + +static SamplerThread* NewSamplerThread(PSLockRef aLock, uint32_t aGeneration, + double aInterval); + +struct LiveProfiledThreadData { + RegisteredThread* mRegisteredThread; + UniquePtr<ProfiledThreadData> mProfiledThreadData; +}; + +// The buffer size is provided as a number of "entries", this is their size in +// bytes. +constexpr static uint32_t scBytesPerEntry = 8; + +// This class contains the profiler's global state that is valid only when the +// profiler is active. When not instantiated, the profiler is inactive. +// +// Accesses to ActivePS are guarded by gPSMutex, in much the same fashion as +// CorePS. +// +class ActivePS { + private: + // We need to decide how many chunks of what size we want to fit in the given + // total maximum capacity for this process, in the (likely) context of + // multiple processes doing the same choice and having an inter-process + // mechanism to control the overal memory limit. + + // Minimum chunk size allowed, enough for at least one stack. + constexpr static uint32_t scMinimumChunkSize = + 2 * ProfileBufferChunkManager::scExpectedMaximumStackSize; + + // Ideally we want at least 2 unreleased chunks to work with (1 current and 1 + // next), and 2 released chunks (so that one can be recycled when old, leaving + // one with some data). + constexpr static uint32_t scMinimumNumberOfChunks = 4; + + // And we want to limit chunks to a maximum size, which is a compromise + // between: + // - A big size, which helps with reducing the rate of allocations and IPCs. + // - A small size, which helps with equalizing the duration of recorded data + // (as the inter-process controller will discard the oldest chunks in all + // Firefox processes). + constexpr static uint32_t scMaximumChunkSize = 1024 * 1024; + + public: + // We should be able to store at least the minimum number of the smallest- + // possible chunks. + constexpr static uint32_t scMinimumBufferSize = + scMinimumNumberOfChunks * scMinimumChunkSize; + constexpr static uint32_t scMinimumBufferEntries = + scMinimumBufferSize / scBytesPerEntry; + + // Limit to 2GiB. + constexpr static uint32_t scMaximumBufferSize = 2u * 1024u * 1024u * 1024u; + constexpr static uint32_t scMaximumBufferEntries = + scMaximumBufferSize / scBytesPerEntry; + + constexpr static uint32_t ClampToAllowedEntries(uint32_t aEntries) { + if (aEntries <= scMinimumBufferEntries) { + return scMinimumBufferEntries; + } + if (aEntries >= scMaximumBufferEntries) { + return scMaximumBufferEntries; + } + return aEntries; + } + + private: + constexpr static uint32_t ChunkSizeForEntries(uint32_t aEntries) { + return uint32_t(std::min(size_t(ClampToAllowedEntries(aEntries)) * + scBytesPerEntry / scMinimumNumberOfChunks, + size_t(scMaximumChunkSize))); + } + + static uint32_t AdjustFeatures(uint32_t aFeatures, uint32_t aFilterCount) { + // Filter out any features unavailable in this platform/configuration. + aFeatures &= AvailableFeatures(); + + // Always enable ProfilerFeature::Threads if we have a filter, because + // users sometimes ask to filter by a list of threads but forget to + // explicitly specify ProfilerFeature::Threads. + if (aFilterCount > 0) { + aFeatures |= ProfilerFeature::Threads; + } + + // Some features imply others. + if (aFeatures & ProfilerFeature::FileIOAll) { + aFeatures |= ProfilerFeature::MainThreadIO | ProfilerFeature::FileIO; + } else if (aFeatures & ProfilerFeature::FileIO) { + aFeatures |= ProfilerFeature::MainThreadIO; + } + + return aFeatures; + } + + ActivePS(PSLockRef aLock, PowerOfTwo32 aCapacity, double aInterval, + uint32_t aFeatures, const char** aFilters, uint32_t aFilterCount, + const Maybe<double>& aDuration) + : mGeneration(sNextGeneration++), + mCapacity(aCapacity), + mDuration(aDuration), + mInterval(aInterval), + mFeatures(AdjustFeatures(aFeatures, aFilterCount)), + mProfileBufferChunkManager( + size_t(ClampToAllowedEntries(aCapacity.Value())) * scBytesPerEntry, + ChunkSizeForEntries(aCapacity.Value())), + mProfileBuffer([this]() -> ProfileChunkedBuffer& { + CorePS::CoreBuffer().SetChunkManager(mProfileBufferChunkManager); + return CorePS::CoreBuffer(); + }()), + // The new sampler thread doesn't start sampling immediately because the + // main loop within Run() is blocked until this function's caller + // unlocks gPSMutex. + mSamplerThread(NewSamplerThread(aLock, mGeneration, aInterval)), + mIsPaused(false), + mIsSamplingPaused(false) +#if defined(GP_OS_linux) || defined(GP_OS_freebsd) + , + mWasSamplingPaused(false) +#endif + { + // Deep copy aFilters. + MOZ_ALWAYS_TRUE(mFilters.resize(aFilterCount)); + for (uint32_t i = 0; i < aFilterCount; ++i) { + mFilters[i] = aFilters[i]; + } + } + + ~ActivePS() { CorePS::CoreBuffer().ResetChunkManager(); } + + bool ThreadSelected(const char* aThreadName) { + if (mFilters.empty()) { + return true; + } + + std::string name = aThreadName; + std::transform(name.begin(), name.end(), name.begin(), ::tolower); + + for (uint32_t i = 0; i < mFilters.length(); ++i) { + std::string filter = mFilters[i]; + + if (filter == "*") { + return true; + } + + std::transform(filter.begin(), filter.end(), filter.begin(), ::tolower); + + // Crude, non UTF-8 compatible, case insensitive substring search + if (name.find(filter) != std::string::npos) { + return true; + } + + // If the filter starts with pid:, check for a pid match + if (filter.find("pid:") == 0) { + std::string mypid = std::to_string(profiler_current_process_id()); + if (filter.compare(4, std::string::npos, mypid) == 0) { + return true; + } + } + } + + return false; + } + + public: + static void Create(PSLockRef aLock, PowerOfTwo32 aCapacity, double aInterval, + uint32_t aFeatures, const char** aFilters, + uint32_t aFilterCount, const Maybe<double>& aDuration) { + MOZ_ASSERT(!sInstance); + sInstance = new ActivePS(aLock, aCapacity, aInterval, aFeatures, aFilters, + aFilterCount, aDuration); + } + + [[nodiscard]] static SamplerThread* Destroy(PSLockRef aLock) { + MOZ_ASSERT(sInstance); + auto samplerThread = sInstance->mSamplerThread; + delete sInstance; + sInstance = nullptr; + + return samplerThread; + } + + static bool Exists(PSLockRef) { return !!sInstance; } + + static bool Equals(PSLockRef, PowerOfTwo32 aCapacity, + const Maybe<double>& aDuration, double aInterval, + uint32_t aFeatures, const char** aFilters, + uint32_t aFilterCount) { + MOZ_ASSERT(sInstance); + if (sInstance->mCapacity != aCapacity || + sInstance->mDuration != aDuration || + sInstance->mInterval != aInterval || + sInstance->mFeatures != aFeatures || + sInstance->mFilters.length() != aFilterCount) { + return false; + } + + for (uint32_t i = 0; i < sInstance->mFilters.length(); ++i) { + if (strcmp(sInstance->mFilters[i].c_str(), aFilters[i]) != 0) { + return false; + } + } + return true; + } + + static size_t SizeOf(PSLockRef, MallocSizeOf aMallocSizeOf) { + MOZ_ASSERT(sInstance); + + size_t n = aMallocSizeOf(sInstance); + + n += sInstance->mProfileBuffer.SizeOfExcludingThis(aMallocSizeOf); + + // Measurement of the following members may be added later if DMD finds it + // is worthwhile: + // - mLiveProfiledThreads (both the array itself, and the contents) + // - mDeadProfiledThreads (both the array itself, and the contents) + // + + return n; + } + + static bool ShouldProfileThread(PSLockRef aLock, ThreadInfo* aInfo) { + MOZ_ASSERT(sInstance); + return ((aInfo->IsMainThread() || FeatureThreads(aLock)) && + sInstance->ThreadSelected(aInfo->Name())); + } + + PS_GET(uint32_t, Generation) + + PS_GET(PowerOfTwo32, Capacity) + + PS_GET(Maybe<double>, Duration) + + PS_GET(double, Interval) + + PS_GET(uint32_t, Features) + +#define PS_GET_FEATURE(n_, str_, Name_, desc_) \ + static bool Feature##Name_(PSLockRef) { \ + MOZ_ASSERT(sInstance); \ + return ProfilerFeature::Has##Name_(sInstance->mFeatures); \ + } + + BASE_PROFILER_FOR_EACH_FEATURE(PS_GET_FEATURE) + +#undef PS_GET_FEATURE + + PS_GET(const Vector<std::string>&, Filters) + + static void FulfillChunkRequests(PSLockRef) { + MOZ_ASSERT(sInstance); + sInstance->mProfileBufferChunkManager.FulfillChunkRequests(); + } + + static ProfileBuffer& Buffer(PSLockRef) { + MOZ_ASSERT(sInstance); + return sInstance->mProfileBuffer; + } + + static const Vector<LiveProfiledThreadData>& LiveProfiledThreads(PSLockRef) { + MOZ_ASSERT(sInstance); + return sInstance->mLiveProfiledThreads; + } + + // Returns an array containing (RegisteredThread*, ProfiledThreadData*) pairs + // for all threads that should be included in a profile, both for threads + // that are still registered, and for threads that have been unregistered but + // still have data in the buffer. + // For threads that have already been unregistered, the RegisteredThread + // pointer will be null. + // The returned array is sorted by thread register time. + // Do not hold on to the return value across thread registration or profiler + // restarts. + static Vector<std::pair<RegisteredThread*, ProfiledThreadData*>> + ProfiledThreads(PSLockRef) { + MOZ_ASSERT(sInstance); + Vector<std::pair<RegisteredThread*, ProfiledThreadData*>> array; + MOZ_RELEASE_ASSERT( + array.initCapacity(sInstance->mLiveProfiledThreads.length() + + sInstance->mDeadProfiledThreads.length())); + for (auto& t : sInstance->mLiveProfiledThreads) { + MOZ_RELEASE_ASSERT(array.append( + std::make_pair(t.mRegisteredThread, t.mProfiledThreadData.get()))); + } + for (auto& t : sInstance->mDeadProfiledThreads) { + MOZ_RELEASE_ASSERT( + array.append(std::make_pair((RegisteredThread*)nullptr, t.get()))); + } + + std::sort(array.begin(), array.end(), + [](const std::pair<RegisteredThread*, ProfiledThreadData*>& a, + const std::pair<RegisteredThread*, ProfiledThreadData*>& b) { + return a.second->Info()->RegisterTime() < + b.second->Info()->RegisterTime(); + }); + return array; + } + + static Vector<RefPtr<PageInformation>> ProfiledPages(PSLockRef aLock) { + MOZ_ASSERT(sInstance); + Vector<RefPtr<PageInformation>> array; + for (auto& d : CorePS::RegisteredPages(aLock)) { + MOZ_RELEASE_ASSERT(array.append(d)); + } + for (auto& d : sInstance->mDeadProfiledPages) { + MOZ_RELEASE_ASSERT(array.append(d)); + } + // We don't need to sort the pages like threads since we won't show them + // as a list. + return array; + } + + // Do a linear search through mLiveProfiledThreads to find the + // ProfiledThreadData object for a RegisteredThread. + static ProfiledThreadData* GetProfiledThreadData( + PSLockRef, RegisteredThread* aRegisteredThread) { + MOZ_ASSERT(sInstance); + for (const LiveProfiledThreadData& thread : + sInstance->mLiveProfiledThreads) { + if (thread.mRegisteredThread == aRegisteredThread) { + return thread.mProfiledThreadData.get(); + } + } + return nullptr; + } + + static ProfiledThreadData* AddLiveProfiledThread( + PSLockRef, RegisteredThread* aRegisteredThread, + UniquePtr<ProfiledThreadData>&& aProfiledThreadData) { + MOZ_ASSERT(sInstance); + MOZ_RELEASE_ASSERT( + sInstance->mLiveProfiledThreads.append(LiveProfiledThreadData{ + aRegisteredThread, std::move(aProfiledThreadData)})); + + // Return a weak pointer to the ProfiledThreadData object. + return sInstance->mLiveProfiledThreads.back().mProfiledThreadData.get(); + } + + static void UnregisterThread(PSLockRef aLockRef, + RegisteredThread* aRegisteredThread) { + MOZ_ASSERT(sInstance); + + DiscardExpiredDeadProfiledThreads(aLockRef); + + // Find the right entry in the mLiveProfiledThreads array and remove the + // element, moving the ProfiledThreadData object for the thread into the + // mDeadProfiledThreads array. + // The thread's RegisteredThread object gets destroyed here. + for (size_t i = 0; i < sInstance->mLiveProfiledThreads.length(); i++) { + LiveProfiledThreadData& thread = sInstance->mLiveProfiledThreads[i]; + if (thread.mRegisteredThread == aRegisteredThread) { + thread.mProfiledThreadData->NotifyUnregistered( + sInstance->mProfileBuffer.BufferRangeEnd()); + MOZ_RELEASE_ASSERT(sInstance->mDeadProfiledThreads.append( + std::move(thread.mProfiledThreadData))); + sInstance->mLiveProfiledThreads.erase( + &sInstance->mLiveProfiledThreads[i]); + return; + } + } + } + + PS_GET_AND_SET(bool, IsPaused) + + // True if sampling is paused (though generic `SetIsPaused()` or specific + // `SetIsSamplingPaused()`). + static bool IsSamplingPaused(PSLockRef lock) { + MOZ_ASSERT(sInstance); + return IsPaused(lock) || sInstance->mIsSamplingPaused; + } + + static void SetIsSamplingPaused(PSLockRef, bool aIsSamplingPaused) { + MOZ_ASSERT(sInstance); + sInstance->mIsSamplingPaused = aIsSamplingPaused; + } + +#if defined(GP_OS_linux) || defined(GP_OS_freebsd) + PS_GET_AND_SET(bool, WasSamplingPaused) +#endif + + static void DiscardExpiredDeadProfiledThreads(PSLockRef) { + MOZ_ASSERT(sInstance); + uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart(); + // Discard any dead threads that were unregistered before bufferRangeStart. + sInstance->mDeadProfiledThreads.eraseIf( + [bufferRangeStart]( + const UniquePtr<ProfiledThreadData>& aProfiledThreadData) { + Maybe<uint64_t> bufferPosition = + aProfiledThreadData->BufferPositionWhenUnregistered(); + MOZ_RELEASE_ASSERT(bufferPosition, + "should have unregistered this thread"); + return *bufferPosition < bufferRangeStart; + }); + } + + static void UnregisterPage(PSLockRef aLock, + uint64_t aRegisteredInnerWindowID) { + MOZ_ASSERT(sInstance); + auto& registeredPages = CorePS::RegisteredPages(aLock); + for (size_t i = 0; i < registeredPages.length(); i++) { + RefPtr<PageInformation>& page = registeredPages[i]; + if (page->InnerWindowID() == aRegisteredInnerWindowID) { + page->NotifyUnregistered(sInstance->mProfileBuffer.BufferRangeEnd()); + MOZ_RELEASE_ASSERT( + sInstance->mDeadProfiledPages.append(std::move(page))); + registeredPages.erase(®isteredPages[i--]); + } + } + } + + static void DiscardExpiredPages(PSLockRef) { + MOZ_ASSERT(sInstance); + uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart(); + // Discard any dead pages that were unregistered before + // bufferRangeStart. + sInstance->mDeadProfiledPages.eraseIf( + [bufferRangeStart](const RefPtr<PageInformation>& aProfiledPage) { + Maybe<uint64_t> bufferPosition = + aProfiledPage->BufferPositionWhenUnregistered(); + MOZ_RELEASE_ASSERT(bufferPosition, + "should have unregistered this page"); + return *bufferPosition < bufferRangeStart; + }); + } + + static void ClearUnregisteredPages(PSLockRef) { + MOZ_ASSERT(sInstance); + sInstance->mDeadProfiledPages.clear(); + } + + static void ClearExpiredExitProfiles(PSLockRef) { + MOZ_ASSERT(sInstance); + uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart(); + // Discard exit profiles that were gathered before our buffer RangeStart. + sInstance->mExitProfiles.eraseIf( + [bufferRangeStart](const ExitProfile& aExitProfile) { + return aExitProfile.mBufferPositionAtGatherTime < bufferRangeStart; + }); + } + + static void AddExitProfile(PSLockRef aLock, const std::string& aExitProfile) { + MOZ_ASSERT(sInstance); + + ClearExpiredExitProfiles(aLock); + + MOZ_RELEASE_ASSERT(sInstance->mExitProfiles.append( + ExitProfile{aExitProfile, sInstance->mProfileBuffer.BufferRangeEnd()})); + } + + static Vector<std::string> MoveExitProfiles(PSLockRef aLock) { + MOZ_ASSERT(sInstance); + + ClearExpiredExitProfiles(aLock); + + Vector<std::string> profiles; + MOZ_RELEASE_ASSERT( + profiles.initCapacity(sInstance->mExitProfiles.length())); + for (auto& profile : sInstance->mExitProfiles) { + MOZ_RELEASE_ASSERT(profiles.append(std::move(profile.mJSON))); + } + sInstance->mExitProfiles.clear(); + return profiles; + } + + private: + // The singleton instance. + static ActivePS* sInstance; + + // We need to track activity generations. If we didn't we could have the + // following scenario. + // + // - profiler_stop() locks gPSMutex, de-instantiates ActivePS, unlocks + // gPSMutex, deletes the SamplerThread (which does a join). + // + // - profiler_start() runs on a different thread, locks gPSMutex, + // re-instantiates ActivePS, unlocks gPSMutex -- all before the join + // completes. + // + // - SamplerThread::Run() locks gPSMutex, sees that ActivePS is instantiated, + // and continues as if the start/stop pair didn't occur. Also + // profiler_stop() is stuck, unable to finish. + // + // By checking ActivePS *and* the generation, we can avoid this scenario. + // sNextGeneration is used to track the next generation number; it is static + // because it must persist across different ActivePS instantiations. + const uint32_t mGeneration; + static uint32_t sNextGeneration; + + // The maximum number of 8-byte entries in mProfileBuffer. + const PowerOfTwo32 mCapacity; + + // The maximum duration of entries in mProfileBuffer, in seconds. + const Maybe<double> mDuration; + + // The interval between samples, measured in milliseconds. + const double mInterval; + + // The profile features that are enabled. + const uint32_t mFeatures; + + // Substrings of names of threads we want to profile. + Vector<std::string> mFilters; + + // The chunk manager used by `mProfileBuffer` below. + ProfileBufferChunkManagerWithLocalLimit mProfileBufferChunkManager; + + // The buffer into which all samples are recorded. + ProfileBuffer mProfileBuffer; + + // ProfiledThreadData objects for any threads that were profiled at any point + // during this run of the profiler: + // - mLiveProfiledThreads contains all threads that are still registered, and + // - mDeadProfiledThreads contains all threads that have already been + // unregistered but for which there is still data in the profile buffer. + Vector<LiveProfiledThreadData> mLiveProfiledThreads; + Vector<UniquePtr<ProfiledThreadData>> mDeadProfiledThreads; + + // Info on all the dead pages. + // Registered pages are being moved to this array after unregistration. + // We are keeping them in case we need them in the profile data. + // We are removing them when we ensure that we won't need them anymore. + Vector<RefPtr<PageInformation>> mDeadProfiledPages; + + // The current sampler thread. This class is not responsible for destroying + // the SamplerThread object; the Destroy() method returns it so the caller + // can destroy it. + SamplerThread* const mSamplerThread; + + // Is the profiler fully paused? + bool mIsPaused; + + // Is the profiler periodic sampling paused? + bool mIsSamplingPaused; + +#if defined(GP_OS_linux) || defined(GP_OS_freebsd) + // Used to record whether the sampler was paused just before forking. False + // at all times except just before/after forking. + bool mWasSamplingPaused; +#endif + + struct ExitProfile { + std::string mJSON; + uint64_t mBufferPositionAtGatherTime; + }; + Vector<ExitProfile> mExitProfiles; +}; + +ActivePS* ActivePS::sInstance = nullptr; +uint32_t ActivePS::sNextGeneration = 0; + +#undef PS_GET +#undef PS_GET_LOCKLESS +#undef PS_GET_AND_SET + +Atomic<uint32_t, MemoryOrdering::Relaxed> RacyFeatures::sActiveAndFeatures(0); + +/* static */ +void RacyFeatures::SetActive(uint32_t aFeatures) { + sActiveAndFeatures = Active | aFeatures; +} + +/* static */ +void RacyFeatures::SetInactive() { sActiveAndFeatures = 0; } + +/* static */ +bool RacyFeatures::IsActive() { return uint32_t(sActiveAndFeatures) & Active; } + +/* static */ +void RacyFeatures::SetPaused() { sActiveAndFeatures |= Paused; } + +/* static */ +void RacyFeatures::SetUnpaused() { sActiveAndFeatures &= ~Paused; } + +/* static */ +void RacyFeatures::SetSamplingPaused() { sActiveAndFeatures |= SamplingPaused; } + +/* static */ +void RacyFeatures::SetSamplingUnpaused() { + sActiveAndFeatures &= ~SamplingPaused; +} + +/* static */ +bool RacyFeatures::IsActiveWithFeature(uint32_t aFeature) { + uint32_t af = sActiveAndFeatures; // copy it first + return (af & Active) && (af & aFeature); +} + +/* static */ +bool RacyFeatures::IsActiveAndUnpaused() { + uint32_t af = sActiveAndFeatures; // copy it first + return (af & Active) && !(af & Paused); +} + +/* static */ +bool RacyFeatures::IsActiveAndSamplingUnpaused() { + uint32_t af = sActiveAndFeatures; // copy it first + return (af & Active) && !(af & (Paused | SamplingPaused)); +} + +// Each live thread has a RegisteredThread, and we store a reference to it in +// TLS. This class encapsulates that TLS. +class TLSRegisteredThread { + public: + static bool Init(PSLockRef) { + bool ok1 = sRegisteredThread.init(); + bool ok2 = AutoProfilerLabel::sProfilingStack.init(); + return ok1 && ok2; + } + + // Get the entire RegisteredThread. Accesses are guarded by gPSMutex. + static class RegisteredThread* RegisteredThread(PSLockRef) { + return sRegisteredThread.get(); + } + + // Get only the RacyRegisteredThread. Accesses are not guarded by gPSMutex. + static class RacyRegisteredThread* RacyRegisteredThread() { + class RegisteredThread* registeredThread = sRegisteredThread.get(); + return registeredThread ? ®isteredThread->RacyRegisteredThread() + : nullptr; + } + + // Get only the ProfilingStack. Accesses are not guarded by gPSMutex. + // RacyRegisteredThread() can also be used to get the ProfilingStack, but that + // is marginally slower because it requires an extra pointer indirection. + static ProfilingStack* Stack() { + return AutoProfilerLabel::sProfilingStack.get(); + } + + static void SetRegisteredThread(PSLockRef, + class RegisteredThread* aRegisteredThread) { + sRegisteredThread.set(aRegisteredThread); + AutoProfilerLabel::sProfilingStack.set( + aRegisteredThread + ? &aRegisteredThread->RacyRegisteredThread().ProfilingStack() + : nullptr); + } + + private: + // This is a non-owning reference to the RegisteredThread; + // CorePS::mRegisteredThreads is the owning reference. On thread + // deregistration, this reference is cleared and the RegisteredThread is + // destroyed. + static MOZ_THREAD_LOCAL(class RegisteredThread*) sRegisteredThread; +}; + +MOZ_THREAD_LOCAL(RegisteredThread*) TLSRegisteredThread::sRegisteredThread; + +/* static */ +ProfilingStack* AutoProfilerLabel::GetProfilingStack() { + return sProfilingStack.get(); +} + +// Although you can access a thread's ProfilingStack via +// TLSRegisteredThread::sRegisteredThread, we also have a second TLS pointer +// directly to the ProfilingStack. Here's why. +// +// - We need to be able to push to and pop from the ProfilingStack in +// AutoProfilerLabel. +// +// - The class functions are hot and must be defined in BaseProfiler.h so they +// can be inlined. +// +// - We don't want to expose TLSRegisteredThread (and RegisteredThread) in +// BaseProfiler.h. +// +// This second pointer isn't ideal, but does provide a way to satisfy those +// constraints. TLSRegisteredThread is responsible for updating it. +MOZ_THREAD_LOCAL(ProfilingStack*) AutoProfilerLabel::sProfilingStack; + +// The name of the main thread. +static const char* const kMainThreadName = "GeckoMain"; + +//////////////////////////////////////////////////////////////////////// +// BEGIN sampling/unwinding code + +// The registers used for stack unwinding and a few other sampling purposes. +// The ctor does nothing; users are responsible for filling in the fields. +class Registers { + public: + Registers() : mPC{nullptr}, mSP{nullptr}, mFP{nullptr}, mLR{nullptr} {} + +#if defined(HAVE_NATIVE_UNWIND) + // Fills in mPC, mSP, mFP, mLR, and mContext for a synchronous sample. + void SyncPopulate(); +#endif + + void Clear() { memset(this, 0, sizeof(*this)); } + + // These fields are filled in by + // Sampler::SuspendAndSampleAndResumeThread() for periodic and backtrace + // samples, and by SyncPopulate() for synchronous samples. + Address mPC; // Instruction pointer. + Address mSP; // Stack pointer. + Address mFP; // Frame pointer. + Address mLR; // ARM link register. +#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd) + // This contains all the registers, which means it duplicates the four fields + // above. This is ok. + ucontext_t* mContext; // The context from the signal handler. +#endif +}; + +// Setting MAX_NATIVE_FRAMES too high risks the unwinder wasting a lot of time +// looping on corrupted stacks. +static const size_t MAX_NATIVE_FRAMES = 1024; + +struct NativeStack { + void* mPCs[MAX_NATIVE_FRAMES]; + void* mSPs[MAX_NATIVE_FRAMES]; + size_t mCount; // Number of frames filled. + + NativeStack() : mPCs(), mSPs(), mCount(0) {} +}; + +// Merges the profiling stack and native stack, outputting the details to +// aCollector. +static void MergeStacks(uint32_t aFeatures, bool aIsSynchronous, + const RegisteredThread& aRegisteredThread, + const Registers& aRegs, const NativeStack& aNativeStack, + ProfilerStackCollector& aCollector) { + // WARNING: this function runs within the profiler's "critical section". + // WARNING: this function might be called while the profiler is inactive, and + // cannot rely on ActivePS. + + const ProfilingStack& profilingStack = + aRegisteredThread.RacyRegisteredThread().ProfilingStack(); + const ProfilingStackFrame* profilingStackFrames = profilingStack.frames; + uint32_t profilingStackFrameCount = profilingStack.stackSize(); + + Maybe<uint64_t> samplePosInBuffer; + if (!aIsSynchronous) { + // aCollector.SamplePositionInBuffer() will return Nothing() when + // profiler_suspend_and_sample_thread is called from the background hang + // reporter. + samplePosInBuffer = aCollector.SamplePositionInBuffer(); + } + // While the profiling stack array is ordered oldest-to-youngest, the JS and + // native arrays are ordered youngest-to-oldest. We must add frames to aInfo + // oldest-to-youngest. Thus, iterate over the profiling stack forwards and JS + // and native arrays backwards. Note: this means the terminating condition + // jsIndex and nativeIndex is being < 0. + uint32_t profilingStackIndex = 0; + int32_t nativeIndex = aNativeStack.mCount - 1; + + uint8_t* lastLabelFrameStackAddr = nullptr; + + // Iterate as long as there is at least one frame remaining. + while (profilingStackIndex != profilingStackFrameCount || nativeIndex >= 0) { + // There are 1 to 3 frames available. Find and add the oldest. + uint8_t* profilingStackAddr = nullptr; + uint8_t* nativeStackAddr = nullptr; + + if (profilingStackIndex != profilingStackFrameCount) { + const ProfilingStackFrame& profilingStackFrame = + profilingStackFrames[profilingStackIndex]; + + if (profilingStackFrame.isLabelFrame() || + profilingStackFrame.isSpMarkerFrame()) { + lastLabelFrameStackAddr = (uint8_t*)profilingStackFrame.stackAddress(); + } + + // Skip any JS_OSR frames. Such frames are used when the JS interpreter + // enters a jit frame on a loop edge (via on-stack-replacement, or OSR). + // To avoid both the profiling stack frame and jit frame being recorded + // (and showing up twice), the interpreter marks the interpreter + // profiling stack frame as JS_OSR to ensure that it doesn't get counted. + if (profilingStackFrame.isOSRFrame()) { + profilingStackIndex++; + continue; + } + + MOZ_ASSERT(lastLabelFrameStackAddr); + profilingStackAddr = lastLabelFrameStackAddr; + } + + if (nativeIndex >= 0) { + nativeStackAddr = (uint8_t*)aNativeStack.mSPs[nativeIndex]; + } + + // If there's a native stack frame which has the same SP as a profiling + // stack frame, pretend we didn't see the native stack frame. Ditto for a + // native stack frame which has the same SP as a JS stack frame. In effect + // this means profiling stack frames or JS frames trump conflicting native + // frames. + if (nativeStackAddr && (profilingStackAddr == nativeStackAddr)) { + nativeStackAddr = nullptr; + nativeIndex--; + MOZ_ASSERT(profilingStackAddr); + } + + // Sanity checks. + MOZ_ASSERT_IF(profilingStackAddr, profilingStackAddr != nativeStackAddr); + MOZ_ASSERT_IF(nativeStackAddr, nativeStackAddr != profilingStackAddr); + + // Check to see if profiling stack frame is top-most. + if (profilingStackAddr > nativeStackAddr) { + MOZ_ASSERT(profilingStackIndex < profilingStackFrameCount); + const ProfilingStackFrame& profilingStackFrame = + profilingStackFrames[profilingStackIndex]; + + // Sp marker frames are just annotations and should not be recorded in + // the profile. + if (!profilingStackFrame.isSpMarkerFrame()) { + aCollector.CollectProfilingStackFrame(profilingStackFrame); + } + profilingStackIndex++; + continue; + } + + // If we reach here, there must be a native stack frame and it must be the + // greatest frame. + if (nativeStackAddr) { + MOZ_ASSERT(nativeIndex >= 0); + void* addr = (void*)aNativeStack.mPCs[nativeIndex]; + aCollector.CollectNativeLeafAddr(addr); + } + if (nativeIndex >= 0) { + nativeIndex--; + } + } +} + +#if defined(GP_OS_windows) && defined(USE_MOZ_STACK_WALK) +static HANDLE GetThreadHandle(PlatformData* aData); +#endif + +#if defined(USE_FRAME_POINTER_STACK_WALK) || defined(USE_MOZ_STACK_WALK) +static void StackWalkCallback(uint32_t aFrameNumber, void* aPC, void* aSP, + void* aClosure) { + NativeStack* nativeStack = static_cast<NativeStack*>(aClosure); + MOZ_ASSERT(nativeStack->mCount < MAX_NATIVE_FRAMES); + nativeStack->mSPs[nativeStack->mCount] = aSP; + nativeStack->mPCs[nativeStack->mCount] = aPC; + nativeStack->mCount++; +} +#endif + +#if defined(USE_FRAME_POINTER_STACK_WALK) +static void DoFramePointerBacktrace(PSLockRef aLock, + const RegisteredThread& aRegisteredThread, + const Registers& aRegs, + NativeStack& aNativeStack) { + // WARNING: this function runs within the profiler's "critical section". + // WARNING: this function might be called while the profiler is inactive, and + // cannot rely on ActivePS. + + // Start with the current function. We use 0 as the frame number here because + // the FramePointerStackWalk() call below will use 1..N. This is a bit weird + // but it doesn't matter because StackWalkCallback() doesn't use the frame + // number argument. + StackWalkCallback(/* frameNum */ 0, aRegs.mPC, aRegs.mSP, &aNativeStack); + + uint32_t maxFrames = uint32_t(MAX_NATIVE_FRAMES - aNativeStack.mCount); + + const void* stackEnd = aRegisteredThread.StackTop(); + if (aRegs.mFP >= aRegs.mSP && aRegs.mFP <= stackEnd) { + FramePointerStackWalk(StackWalkCallback, /* skipFrames */ 0, maxFrames, + &aNativeStack, reinterpret_cast<void**>(aRegs.mFP), + const_cast<void*>(stackEnd)); + } +} +#endif + +#if defined(USE_MOZ_STACK_WALK) +static void DoMozStackWalkBacktrace(PSLockRef aLock, + const RegisteredThread& aRegisteredThread, + const Registers& aRegs, + NativeStack& aNativeStack) { + // WARNING: this function runs within the profiler's "critical section". + // WARNING: this function might be called while the profiler is inactive, and + // cannot rely on ActivePS. + + // Start with the current function. We use 0 as the frame number here because + // the MozStackWalkThread() call below will use 1..N. This is a bit weird but + // it doesn't matter because StackWalkCallback() doesn't use the frame number + // argument. + StackWalkCallback(/* frameNum */ 0, aRegs.mPC, aRegs.mSP, &aNativeStack); + + uint32_t maxFrames = uint32_t(MAX_NATIVE_FRAMES - aNativeStack.mCount); + + HANDLE thread = GetThreadHandle(aRegisteredThread.GetPlatformData()); + MOZ_ASSERT(thread); + MozStackWalkThread(StackWalkCallback, /* skipFrames */ 0, maxFrames, + &aNativeStack, thread, /* context */ nullptr); +} +#endif + +#ifdef USE_EHABI_STACKWALK +static void DoEHABIBacktrace(PSLockRef aLock, + const RegisteredThread& aRegisteredThread, + const Registers& aRegs, + NativeStack& aNativeStack) { + // WARNING: this function runs within the profiler's "critical section". + // WARNING: this function might be called while the profiler is inactive, and + // cannot rely on ActivePS. + + aNativeStack.mCount = + EHABIStackWalk(aRegs.mContext->uc_mcontext, + const_cast<void*>(aRegisteredThread.StackTop()), + aNativeStack.mSPs, aNativeStack.mPCs, MAX_NATIVE_FRAMES); +} +#endif + +#ifdef USE_LUL_STACKWALK + +// See the comment at the callsite for why this function is necessary. +# if defined(MOZ_HAVE_ASAN_BLACKLIST) +MOZ_ASAN_BLACKLIST static void ASAN_memcpy(void* aDst, const void* aSrc, + size_t aLen) { + // The obvious thing to do here is call memcpy(). However, although + // ASAN_memcpy() is not instrumented by ASAN, memcpy() still is, and the + // false positive still manifests! So we must implement memcpy() ourselves + // within this function. + char* dst = static_cast<char*>(aDst); + const char* src = static_cast<const char*>(aSrc); + + for (size_t i = 0; i < aLen; i++) { + dst[i] = src[i]; + } +} +# endif + +static void DoLULBacktrace(PSLockRef aLock, + const RegisteredThread& aRegisteredThread, + const Registers& aRegs, NativeStack& aNativeStack) { + // WARNING: this function runs within the profiler's "critical section". + // WARNING: this function might be called while the profiler is inactive, and + // cannot rely on ActivePS. + + const mcontext_t* mc = &aRegs.mContext->uc_mcontext; + + lul::UnwindRegs startRegs; + memset(&startRegs, 0, sizeof(startRegs)); + +# if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android) + startRegs.xip = lul::TaggedUWord(mc->gregs[REG_RIP]); + startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_RSP]); + startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_RBP]); +# elif defined(GP_PLAT_amd64_freebsd) + startRegs.xip = lul::TaggedUWord(mc->mc_rip); + startRegs.xsp = lul::TaggedUWord(mc->mc_rsp); + startRegs.xbp = lul::TaggedUWord(mc->mc_rbp); +# elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android) + startRegs.r15 = lul::TaggedUWord(mc->arm_pc); + startRegs.r14 = lul::TaggedUWord(mc->arm_lr); + startRegs.r13 = lul::TaggedUWord(mc->arm_sp); + startRegs.r12 = lul::TaggedUWord(mc->arm_ip); + startRegs.r11 = lul::TaggedUWord(mc->arm_fp); + startRegs.r7 = lul::TaggedUWord(mc->arm_r7); +# elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android) + startRegs.pc = lul::TaggedUWord(mc->pc); + startRegs.x29 = lul::TaggedUWord(mc->regs[29]); + startRegs.x30 = lul::TaggedUWord(mc->regs[30]); + startRegs.sp = lul::TaggedUWord(mc->sp); +# elif defined(GP_PLAT_arm64_freebsd) + startRegs.pc = lul::TaggedUWord(mc->mc_gpregs.gp_elr); + startRegs.x29 = lul::TaggedUWord(mc->mc_gpregs.gp_x[29]); + startRegs.x30 = lul::TaggedUWord(mc->mc_gpregs.gp_lr); + startRegs.sp = lul::TaggedUWord(mc->mc_gpregs.gp_sp); +# elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android) + startRegs.xip = lul::TaggedUWord(mc->gregs[REG_EIP]); + startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_ESP]); + startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_EBP]); +# elif defined(GP_PLAT_mips64_linux) + startRegs.pc = lul::TaggedUWord(mc->pc); + startRegs.sp = lul::TaggedUWord(mc->gregs[29]); + startRegs.fp = lul::TaggedUWord(mc->gregs[30]); +# else +# error "Unknown plat" +# endif + + // Copy up to N_STACK_BYTES from rsp-REDZONE upwards, but not going past the + // stack's registered top point. Do some basic sanity checks too. This + // assumes that the TaggedUWord holding the stack pointer value is valid, but + // it should be, since it was constructed that way in the code just above. + + // We could construct |stackImg| so that LUL reads directly from the stack in + // question, rather than from a copy of it. That would reduce overhead and + // space use a bit. However, it gives a problem with dynamic analysis tools + // (ASan, TSan, Valgrind) which is that such tools will report invalid or + // racing memory accesses, and such accesses will be reported deep inside LUL. + // By taking a copy here, we can either sanitise the copy (for Valgrind) or + // copy it using an unchecked memcpy (for ASan, TSan). That way we don't have + // to try and suppress errors inside LUL. + // + // N_STACK_BYTES is set to 160KB. This is big enough to hold all stacks + // observed in some minutes of testing, whilst keeping the size of this + // function (DoNativeBacktrace)'s frame reasonable. Most stacks observed in + // practice are small, 4KB or less, and so the copy costs are insignificant + // compared to other profiler overhead. + // + // |stackImg| is allocated on this (the sampling thread's) stack. That + // implies that the frame for this function is at least N_STACK_BYTES large. + // In general it would be considered unacceptable to have such a large frame + // on a stack, but it only exists for the unwinder thread, and so is not + // expected to be a problem. Allocating it on the heap is troublesome because + // this function runs whilst the sampled thread is suspended, so any heap + // allocation risks deadlock. Allocating it as a global variable is not + // thread safe, which would be a problem if we ever allow multiple sampler + // threads. Hence allocating it on the stack seems to be the least-worst + // option. + + lul::StackImage stackImg; + + { +# if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android) || \ + defined(GP_PLAT_amd64_freebsd) + uintptr_t rEDZONE_SIZE = 128; + uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE; +# elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android) + uintptr_t rEDZONE_SIZE = 0; + uintptr_t start = startRegs.r13.Value() - rEDZONE_SIZE; +# elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android) || \ + defined(GP_PLAT_arm64_freebsd) + uintptr_t rEDZONE_SIZE = 0; + uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE; +# elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android) + uintptr_t rEDZONE_SIZE = 0; + uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE; +# elif defined(GP_PLAT_mips64_linux) + uintptr_t rEDZONE_SIZE = 0; + uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE; +# else +# error "Unknown plat" +# endif + uintptr_t end = reinterpret_cast<uintptr_t>(aRegisteredThread.StackTop()); + uintptr_t ws = sizeof(void*); + start &= ~(ws - 1); + end &= ~(ws - 1); + uintptr_t nToCopy = 0; + if (start < end) { + nToCopy = end - start; + if (nToCopy > lul::N_STACK_BYTES) nToCopy = lul::N_STACK_BYTES; + } + MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES); + stackImg.mLen = nToCopy; + stackImg.mStartAvma = start; + if (nToCopy > 0) { + // If this is a vanilla memcpy(), ASAN makes the following complaint: + // + // ERROR: AddressSanitizer: stack-buffer-underflow ... + // ... + // HINT: this may be a false positive if your program uses some custom + // stack unwind mechanism or swapcontext + // + // This code is very much a custom stack unwind mechanism! So we use an + // alternative memcpy() implementation that is ignored by ASAN. +# if defined(MOZ_HAVE_ASAN_BLACKLIST) + ASAN_memcpy(&stackImg.mContents[0], (void*)start, nToCopy); +# else + memcpy(&stackImg.mContents[0], (void*)start, nToCopy); +# endif + (void)VALGRIND_MAKE_MEM_DEFINED(&stackImg.mContents[0], nToCopy); + } + } + + size_t framePointerFramesAcquired = 0; + lul::LUL* lul = CorePS::Lul(aLock); + lul->Unwind(reinterpret_cast<uintptr_t*>(aNativeStack.mPCs), + reinterpret_cast<uintptr_t*>(aNativeStack.mSPs), + &aNativeStack.mCount, &framePointerFramesAcquired, + MAX_NATIVE_FRAMES, &startRegs, &stackImg); + + // Update stats in the LUL stats object. Unfortunately this requires + // three global memory operations. + lul->mStats.mContext += 1; + lul->mStats.mCFI += aNativeStack.mCount - 1 - framePointerFramesAcquired; + lul->mStats.mFP += framePointerFramesAcquired; +} + +#endif + +#ifdef HAVE_NATIVE_UNWIND +static void DoNativeBacktrace(PSLockRef aLock, + const RegisteredThread& aRegisteredThread, + const Registers& aRegs, + NativeStack& aNativeStack) { + // This method determines which stackwalker is used for periodic and + // synchronous samples. (Backtrace samples are treated differently, see + // profiler_suspend_and_sample_thread() for details). The only part of the + // ordering that matters is that LUL must precede FRAME_POINTER, because on + // Linux they can both be present. +# if defined(USE_LUL_STACKWALK) + DoLULBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack); +# elif defined(USE_EHABI_STACKWALK) + DoEHABIBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack); +# elif defined(USE_FRAME_POINTER_STACK_WALK) + DoFramePointerBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack); +# elif defined(USE_MOZ_STACK_WALK) + DoMozStackWalkBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack); +# else +# error "Invalid configuration" +# endif +} +#endif + +// Writes some components shared by periodic and synchronous profiles to +// ActivePS's ProfileBuffer. (This should only be called from DoSyncSample() +// and DoPeriodicSample().) +// +// The grammar for entry sequences is in a comment above +// ProfileBuffer::StreamSamplesToJSON. +static inline void DoSharedSample(PSLockRef aLock, bool aIsSynchronous, + RegisteredThread& aRegisteredThread, + const Registers& aRegs, uint64_t aSamplePos, + ProfileBuffer& aBuffer) { + // WARNING: this function runs within the profiler's "critical section". + + MOZ_ASSERT(!aBuffer.IsThreadSafe(), + "Mutexes cannot be used inside this critical section"); + + MOZ_RELEASE_ASSERT(ActivePS::Exists(aLock)); + + ProfileBufferCollector collector(aBuffer, aSamplePos); + NativeStack nativeStack; +#if defined(HAVE_NATIVE_UNWIND) + if (ActivePS::FeatureStackWalk(aLock)) { + DoNativeBacktrace(aLock, aRegisteredThread, aRegs, nativeStack); + + MergeStacks(ActivePS::Features(aLock), aIsSynchronous, aRegisteredThread, + aRegs, nativeStack, collector); + } else +#endif + { + MergeStacks(ActivePS::Features(aLock), aIsSynchronous, aRegisteredThread, + aRegs, nativeStack, collector); + + // We can't walk the whole native stack, but we can record the top frame. + if (ActivePS::FeatureLeaf(aLock)) { + aBuffer.AddEntry(ProfileBufferEntry::NativeLeafAddr((void*)aRegs.mPC)); + } + } +} + +// Writes the components of a synchronous sample to the given ProfileBuffer. +static void DoSyncSample(PSLockRef aLock, RegisteredThread& aRegisteredThread, + const TimeStamp& aNow, const Registers& aRegs, + ProfileBuffer& aBuffer) { + // WARNING: this function runs within the profiler's "critical section". + + uint64_t samplePos = + aBuffer.AddThreadIdEntry(aRegisteredThread.Info()->ThreadId()); + + TimeDuration delta = aNow - CorePS::ProcessStartTime(); + aBuffer.AddEntry(ProfileBufferEntry::Time(delta.ToMilliseconds())); + + DoSharedSample(aLock, /* aIsSynchronous = */ true, aRegisteredThread, aRegs, + samplePos, aBuffer); +} + +// Writes the components of a periodic sample to ActivePS's ProfileBuffer. +// The ThreadId entry is already written in the main ProfileBuffer, its location +// is `aSamplePos`, we can write the rest to `aBuffer` (which may be different). +static void DoPeriodicSample(PSLockRef aLock, + RegisteredThread& aRegisteredThread, + ProfiledThreadData& aProfiledThreadData, + const Registers& aRegs, uint64_t aSamplePos, + ProfileBuffer& aBuffer) { + // WARNING: this function runs within the profiler's "critical section". + + DoSharedSample(aLock, /* aIsSynchronous = */ false, aRegisteredThread, aRegs, + aSamplePos, aBuffer); +} + +// END sampling/unwinding code +//////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////// +// BEGIN saving/streaming code + +const static uint64_t kJS_MAX_SAFE_UINTEGER = +9007199254740991ULL; + +static int64_t SafeJSInteger(uint64_t aValue) { + return aValue <= kJS_MAX_SAFE_UINTEGER ? int64_t(aValue) : -1; +} + +static void AddSharedLibraryInfoToStream(JSONWriter& aWriter, + const SharedLibrary& aLib) { + aWriter.StartObjectElement(); + aWriter.IntProperty("start", SafeJSInteger(aLib.GetStart())); + aWriter.IntProperty("end", SafeJSInteger(aLib.GetEnd())); + aWriter.IntProperty("offset", SafeJSInteger(aLib.GetOffset())); + aWriter.StringProperty("name", aLib.GetModuleName()); + aWriter.StringProperty("path", aLib.GetModulePath()); + aWriter.StringProperty("debugName", aLib.GetDebugName()); + aWriter.StringProperty("debugPath", aLib.GetDebugPath()); + aWriter.StringProperty("breakpadId", aLib.GetBreakpadId()); + aWriter.StringProperty("arch", aLib.GetArch()); + aWriter.EndObject(); +} + +void AppendSharedLibraries(JSONWriter& aWriter) { + SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf(); + info.SortByAddress(); + for (size_t i = 0; i < info.GetSize(); i++) { + AddSharedLibraryInfoToStream(aWriter, info.GetEntry(i)); + } +} + +static void StreamCategories(SpliceableJSONWriter& aWriter) { + // Same order as ProfilingCategory. Format: + // [ + // { + // name: "Idle", + // color: "transparent", + // subcategories: ["Other"], + // }, + // { + // name: "Other", + // color: "grey", + // subcategories: [ + // "JSM loading", + // "Subprocess launching", + // "DLL loading" + // ] + // }, + // ... + // ] + +#define CATEGORY_JSON_BEGIN_CATEGORY(name, labelAsString, color) \ + aWriter.Start(); \ + aWriter.StringProperty("name", labelAsString); \ + aWriter.StringProperty("color", color); \ + aWriter.StartArrayProperty("subcategories"); +#define CATEGORY_JSON_SUBCATEGORY(supercategory, name, labelAsString) \ + aWriter.StringElement(labelAsString); +#define CATEGORY_JSON_END_CATEGORY \ + aWriter.EndArray(); \ + aWriter.EndObject(); + + MOZ_PROFILING_CATEGORY_LIST(CATEGORY_JSON_BEGIN_CATEGORY, + CATEGORY_JSON_SUBCATEGORY, + CATEGORY_JSON_END_CATEGORY) + +#undef CATEGORY_JSON_BEGIN_CATEGORY +#undef CATEGORY_JSON_SUBCATEGORY +#undef CATEGORY_JSON_END_CATEGORY +} + +static void StreamMarkerSchema(SpliceableJSONWriter& aWriter) { + // Get an array view with all registered marker-type-specific functions. + Span<const base_profiler_markers_detail::Streaming::MarkerTypeFunctions> + markerTypeFunctionsArray = + base_profiler_markers_detail::Streaming::MarkerTypeFunctionsArray(); + // List of streamed marker names, this is used to spot duplicates. + std::set<std::string> names; + // Stream the display schema for each different one. (Duplications may come + // from the same code potentially living in different libraries.) + for (const auto& markerTypeFunctions : markerTypeFunctionsArray) { + auto name = markerTypeFunctions.mMarkerTypeNameFunction(); + // std::set.insert(T&&) returns a pair, its `second` is true if the element + // was actually inserted (i.e., it was not there yet.) + const bool didInsert = + names.insert(std::string(name.data(), name.size())).second; + if (didInsert) { + markerTypeFunctions.mMarkerSchemaFunction().Stream(aWriter, name); + } + } +} + +static int64_t MicrosecondsSince1970(); + +static void StreamMetaJSCustomObject(PSLockRef aLock, + SpliceableJSONWriter& aWriter, + bool aIsShuttingDown) { + MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock)); + + aWriter.IntProperty("version", 19); + + // The "startTime" field holds the number of milliseconds since midnight + // January 1, 1970 GMT. This grotty code computes (Now - (Now - + // ProcessStartTime)) to convert CorePS::ProcessStartTime() into that form. + TimeDuration delta = TimeStamp::NowUnfuzzed() - CorePS::ProcessStartTime(); + aWriter.DoubleProperty( + "startTime", MicrosecondsSince1970() / 1000.0 - delta.ToMilliseconds()); + + // Write the shutdownTime field. Unlike startTime, shutdownTime is not an + // absolute time stamp: It's relative to startTime. This is consistent with + // all other (non-"startTime") times anywhere in the profile JSON. + if (aIsShuttingDown) { + aWriter.DoubleProperty("shutdownTime", profiler_time()); + } else { + aWriter.NullProperty("shutdownTime"); + } + + aWriter.StartArrayProperty("categories"); + StreamCategories(aWriter); + aWriter.EndArray(); + + aWriter.StartArrayProperty("markerSchema"); + StreamMarkerSchema(aWriter); + aWriter.EndArray(); + + if (!profiler_is_main_thread()) { + // Leave the rest of the properties out if we're not on the main thread. + // At the moment, the only case in which this function is called on a + // background thread is if we're in a content process and are going to + // send this profile to the parent process. In that case, the parent + // process profile's "meta" object already has the rest of the properties, + // and the parent process profile is dumped on that process's main thread. + return; + } + + aWriter.DoubleProperty("interval", ActivePS::Interval(aLock)); + aWriter.IntProperty("stackwalk", ActivePS::FeatureStackWalk(aLock)); + +#ifdef DEBUG + aWriter.IntProperty("debug", 1); +#else + aWriter.IntProperty("debug", 0); +#endif + + aWriter.IntProperty("gcpoison", 0); + + aWriter.IntProperty("asyncstack", 0); + + aWriter.IntProperty("processType", 0); +} + +static void StreamPages(PSLockRef aLock, SpliceableJSONWriter& aWriter) { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + ActivePS::DiscardExpiredPages(aLock); + for (const auto& page : ActivePS::ProfiledPages(aLock)) { + page->StreamJSON(aWriter); + } +} + +static void locked_profiler_stream_json_for_this_process( + PSLockRef aLock, SpliceableJSONWriter& aWriter, double aSinceTime, + bool aIsShuttingDown, bool aOnlyThreads = false) { + LOG("locked_profiler_stream_json_for_this_process"); + + MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock)); + + AUTO_PROFILER_STATS(base_locked_profiler_stream_json_for_this_process); + + const double collectionStartMs = profiler_time(); + + ProfileBuffer& buffer = ActivePS::Buffer(aLock); + + // If there is a set "Window length", discard older data. + Maybe<double> durationS = ActivePS::Duration(aLock); + if (durationS.isSome()) { + const double durationStartMs = collectionStartMs - *durationS * 1000; + buffer.DiscardSamplesBeforeTime(durationStartMs); + } + + if (!aOnlyThreads) { + // Put shared library info + aWriter.StartArrayProperty("libs"); + AppendSharedLibraries(aWriter); + aWriter.EndArray(); + + // Put meta data + aWriter.StartObjectProperty("meta"); + { StreamMetaJSCustomObject(aLock, aWriter, aIsShuttingDown); } + aWriter.EndObject(); + + // Put page data + aWriter.StartArrayProperty("pages"); + { StreamPages(aLock, aWriter); } + aWriter.EndArray(); + + buffer.StreamProfilerOverheadToJSON(aWriter, CorePS::ProcessStartTime(), + aSinceTime); + buffer.StreamCountersToJSON(aWriter, CorePS::ProcessStartTime(), + aSinceTime); + + // Lists the samples for each thread profile + aWriter.StartArrayProperty("threads"); + } + + // if aOnlyThreads is true, the only output will be the threads array items. + { + ActivePS::DiscardExpiredDeadProfiledThreads(aLock); + Vector<std::pair<RegisteredThread*, ProfiledThreadData*>> threads = + ActivePS::ProfiledThreads(aLock); + for (auto& thread : threads) { + ProfiledThreadData* profiledThreadData = thread.second; + profiledThreadData->StreamJSON( + buffer, aWriter, CorePS::ProcessName(aLock), CorePS::ETLDplus1(aLock), + CorePS::ProcessStartTime(), aSinceTime); + } + } + + if (!aOnlyThreads) { + aWriter.EndArray(); + + aWriter.StartArrayProperty("pausedRanges"); + { buffer.StreamPausedRangesToJSON(aWriter, aSinceTime); } + aWriter.EndArray(); + } + + const double collectionEndMs = profiler_time(); + + // Record timestamps for the collection into the buffer, so that consumers + // know why we didn't collect any samples for its duration. + // We put these entries into the buffer after we've collected the profile, + // so they'll be visible for the *next* profile collection (if they haven't + // been overwritten due to buffer wraparound by then). + buffer.AddEntry(ProfileBufferEntry::CollectionStart(collectionStartMs)); + buffer.AddEntry(ProfileBufferEntry::CollectionEnd(collectionEndMs)); +} + +bool profiler_stream_json_for_this_process(SpliceableJSONWriter& aWriter, + double aSinceTime, + bool aIsShuttingDown, + bool aOnlyThreads) { + LOG("profiler_stream_json_for_this_process"); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return false; + } + + locked_profiler_stream_json_for_this_process(lock, aWriter, aSinceTime, + aIsShuttingDown, aOnlyThreads); + return true; +} + +// END saving/streaming code +//////////////////////////////////////////////////////////////////////// + +static char FeatureCategory(uint32_t aFeature) { + if (aFeature & DefaultFeatures()) { + if (aFeature & AvailableFeatures()) { + return 'D'; + } + return 'd'; + } + + if (aFeature & StartupExtraDefaultFeatures()) { + if (aFeature & AvailableFeatures()) { + return 'S'; + } + return 's'; + } + + if (aFeature & AvailableFeatures()) { + return '-'; + } + return 'x'; +} + +static void PrintUsageThenExit(int aExitCode) { + PrintToConsole( + "\n" + "Profiler environment variable usage:\n" + "\n" + " MOZ_BASE_PROFILER_HELP\n" + " If set to any value, prints this message.\n" + " (Only BaseProfiler features are known here; Use MOZ_PROFILER_HELP\n" + " for Gecko Profiler help, with more features).\n" + "\n" + " MOZ_BASE_PROFILER_{,DEBUG_,VERBOSE}LOGGING\n" + " Enables BaseProfiler logging to stdout. The levels of logging\n" + " available are MOZ_BASE_PROFILER_LOGGING' (least verbose),\n" + " '..._DEBUG_LOGGING', '..._VERBOSE_LOGGING' (most verbose)\n" + "\n" + " MOZ_PROFILER_STARTUP\n" + " If set to any value other than '' or '0'/'N'/'n', starts the\n" + " profiler immediately on start-up.\n" + " Useful if you want profile code that runs very early.\n" + "\n" + " MOZ_PROFILER_STARTUP_ENTRIES=<%u..%u>\n" + " If MOZ_PROFILER_STARTUP is set, specifies the number of entries\n" + " per process in the profiler's circular buffer when the profiler is\n" + " first started.\n" + " If unset, the platform default is used:\n" + " %u entries per process, or %u when MOZ_PROFILER_STARTUP is set.\n" + " (%u bytes per entry -> %u or %u total bytes per process)\n" + "\n" + " MOZ_PROFILER_STARTUP_DURATION=<1..>\n" + " If MOZ_PROFILER_STARTUP is set, specifies the maximum life time\n" + " of entries in the the profiler's circular buffer when the profiler\n" + " is first started, in seconds.\n" + " If unset, the life time of the entries will only be restricted by\n" + " MOZ_PROFILER_STARTUP_ENTRIES (or its default value), and no\n" + " additional time duration restriction will be applied.\n" + "\n" + " MOZ_PROFILER_STARTUP_INTERVAL=<1..1000>\n" + " If MOZ_PROFILER_STARTUP is set, specifies the sample interval,\n" + " measured in milliseconds, when the profiler is first started.\n" + " If unset, the platform default is used.\n" + "\n" + " MOZ_PROFILER_STARTUP_FEATURES_BITFIELD=<Number>\n" + " If MOZ_PROFILER_STARTUP is set, specifies the profiling\n" + " features, as the integer value of the features bitfield.\n" + " If unset, the value from MOZ_PROFILER_STARTUP_FEATURES is used.\n" + "\n" + " MOZ_PROFILER_STARTUP_FEATURES=<Features>\n" + " If MOZ_PROFILER_STARTUP is set, specifies the profiling\n" + " features, as a comma-separated list of strings.\n" + " Ignored if MOZ_PROFILER_STARTUP_FEATURES_BITFIELD is set.\n" + " If unset, the platform default is used.\n" + "\n" + " Features: (x=unavailable, D/d=default/unavailable,\n" + " S/s=MOZ_PROFILER_STARTUP extra " + "default/unavailable)\n", + unsigned(ActivePS::scMinimumBufferEntries), + unsigned(ActivePS::scMaximumBufferEntries), + unsigned(BASE_PROFILER_DEFAULT_ENTRIES.Value()), + unsigned(BASE_PROFILER_DEFAULT_STARTUP_ENTRIES.Value()), + unsigned(scBytesPerEntry), + unsigned(BASE_PROFILER_DEFAULT_ENTRIES.Value() * scBytesPerEntry), + unsigned(BASE_PROFILER_DEFAULT_STARTUP_ENTRIES.Value() * + scBytesPerEntry)); + +#define PRINT_FEATURE(n_, str_, Name_, desc_) \ + PrintToConsole(" %c %7u: \"%s\" (%s)\n", \ + FeatureCategory(ProfilerFeature::Name_), \ + ProfilerFeature::Name_, str_, desc_); + + BASE_PROFILER_FOR_EACH_FEATURE(PRINT_FEATURE) + +#undef PRINT_FEATURE + + PrintToConsole( + " - \"default\" (All above D+S defaults)\n" + "\n" + " MOZ_PROFILER_STARTUP_FILTERS=<Filters>\n" + " If MOZ_PROFILER_STARTUP is set, specifies the thread filters, as " + "a\n" + " comma-separated list of strings. A given thread will be sampled if\n" + " any of the filters is a case-insensitive substring of the thread\n" + " name. If unset, a default is used.\n" + "\n" + " MOZ_PROFILER_SHUTDOWN\n" + " If set, the profiler saves a profile to the named file on shutdown.\n" + "\n" + " MOZ_PROFILER_SYMBOLICATE\n" + " If set, the profiler will pre-symbolicate profiles.\n" + " *Note* This will add a significant pause when gathering data, and\n" + " is intended mainly for local development.\n" + "\n" + " MOZ_PROFILER_LUL_TEST\n" + " If set to any value, runs LUL unit tests at startup.\n" + "\n" + " This platform %s native unwinding.\n" + "\n", +#if defined(HAVE_NATIVE_UNWIND) + "supports" +#else + "does not support" +#endif + ); + + exit(aExitCode); +} + +//////////////////////////////////////////////////////////////////////// +// BEGIN Sampler + +#if defined(GP_OS_linux) || defined(GP_OS_android) +struct SigHandlerCoordinator; +#endif + +// Sampler performs setup and teardown of the state required to sample with the +// profiler. Sampler may exist when ActivePS is not present. +// +// SuspendAndSampleAndResumeThread must only be called from a single thread, +// and must not sample the thread it is being called from. A separate Sampler +// instance must be used for each thread which wants to capture samples. + +// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING +// +// With the exception of SamplerThread, all Sampler objects must be Disable-d +// before releasing the lock which was used to create them. This avoids races +// on linux with the SIGPROF signal handler. + +class Sampler { + public: + // Sets up the profiler such that it can begin sampling. + explicit Sampler(PSLockRef aLock); + + // Disable the sampler, restoring it to its previous state. This must be + // called once, and only once, before the Sampler is destroyed. + void Disable(PSLockRef aLock); + + // This method suspends and resumes the samplee thread. It calls the passed-in + // function-like object aProcessRegs (passing it a populated |const + // Registers&| arg) while the samplee thread is suspended. + // + // Func must be a function-like object of type `void()`. + template <typename Func> + void SuspendAndSampleAndResumeThread( + PSLockRef aLock, const RegisteredThread& aRegisteredThread, + const TimeStamp& aNow, const Func& aProcessRegs); + + private: +#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd) + // Used to restore the SIGPROF handler when ours is removed. + struct sigaction mOldSigprofHandler; + + // This process' ID. Needed as an argument for tgkill in + // SuspendAndSampleAndResumeThread. + int mMyPid; + + // The sampler thread's ID. Used to assert that it is not sampling itself, + // which would lead to deadlock. + int mSamplerTid; + + public: + // This is the one-and-only variable used to communicate between the sampler + // thread and the samplee thread's signal handler. It's static because the + // samplee thread's signal handler is static. + static struct SigHandlerCoordinator* sSigHandlerCoordinator; +#endif +}; + +// END Sampler +//////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////// +// BEGIN SamplerThread + +// The sampler thread controls sampling and runs whenever the profiler is +// active. It periodically runs through all registered threads, finds those +// that should be sampled, then pauses and samples them. + +class SamplerThread { + public: + // Creates a sampler thread, but doesn't start it. + SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration, + double aIntervalMilliseconds); + ~SamplerThread(); + + // This runs on (is!) the sampler thread. + void Run(); + + // This runs on the main thread. + void Stop(PSLockRef aLock); + + private: + // This suspends the calling thread for the given number of microseconds. + // Best effort timing. + void SleepMicro(uint32_t aMicroseconds); + + // The sampler used to suspend and sample threads. + Sampler mSampler; + + // The activity generation, for detecting when the sampler thread must stop. + const uint32_t mActivityGeneration; + + // The interval between samples, measured in microseconds. + const int mIntervalMicroseconds; + + // The OS-specific handle for the sampler thread. +#if defined(GP_OS_windows) + HANDLE mThread; +#elif defined(GP_OS_darwin) || defined(GP_OS_linux) || \ + defined(GP_OS_android) || defined(GP_OS_freebsd) + pthread_t mThread; +#endif + + SamplerThread(const SamplerThread&) = delete; + void operator=(const SamplerThread&) = delete; +}; + +// This function is required because we need to create a SamplerThread within +// ActivePS's constructor, but SamplerThread is defined after ActivePS. It +// could probably be removed by moving some code around. +static SamplerThread* NewSamplerThread(PSLockRef aLock, uint32_t aGeneration, + double aInterval) { + return new SamplerThread(aLock, aGeneration, aInterval); +} + +// This function is the sampler thread. This implementation is used for all +// targets. +void SamplerThread::Run() { + // TODO: If possible, name this thread later on, after NSPR becomes available. + // PR_SetCurrentThreadName("SamplerThread"); + + // Features won't change during this SamplerThread's lifetime, so we can read + // them once and store them locally. + const uint32_t features = []() -> uint32_t { + PSAutoLock lock; + if (!ActivePS::Exists(lock)) { + // If there is no active profiler, it doesn't matter what we return, + // because this thread will exit before any feature is used. + return 0; + } + return ActivePS::Features(lock); + }(); + + // Not *no*-stack-sampling means we do want stack sampling. + const bool stackSampling = !ProfilerFeature::HasNoStackSampling(features); + + // Use local BlocksRingBuffer&ProfileBuffer to capture the stack. + // (This is to avoid touching the CorePS::CoreBuffer lock while + // a thread is suspended, because that thread could be working with + // the CorePS::CoreBuffer as well.) + ProfileBufferChunkManagerSingle localChunkManager( + ProfileBufferChunkManager::scExpectedMaximumStackSize); + ProfileChunkedBuffer localBuffer( + ProfileChunkedBuffer::ThreadSafety::WithoutMutex, localChunkManager); + ProfileBuffer localProfileBuffer(localBuffer); + + // Will be kept between collections, to know what each collection does. + auto previousState = localBuffer.GetState(); + + // This will be positive if we are running behind schedule (sampling less + // frequently than desired) and negative if we are ahead of schedule. + TimeDuration lastSleepOvershoot = 0; + TimeStamp sampleStart = TimeStamp::NowUnfuzzed(); + + while (true) { + // This scope is for |lock|. It ends before we sleep below. + { + PSAutoLock lock; + TimeStamp lockAcquired = TimeStamp::NowUnfuzzed(); + + if (!ActivePS::Exists(lock)) { + return; + } + + // At this point profiler_stop() might have been called, and + // profiler_start() might have been called on another thread. If this + // happens the generation won't match. + if (ActivePS::Generation(lock) != mActivityGeneration) { + return; + } + + ActivePS::ClearExpiredExitProfiles(lock); + + TimeStamp expiredMarkersCleaned = TimeStamp::NowUnfuzzed(); + + if (!ActivePS::IsSamplingPaused(lock)) { + TimeDuration delta = sampleStart - CorePS::ProcessStartTime(); + ProfileBuffer& buffer = ActivePS::Buffer(lock); + + // handle per-process generic counters + const Vector<BaseProfilerCount*>& counters = CorePS::Counters(lock); + for (auto& counter : counters) { + // create Buffer entries for each counter + buffer.AddEntry(ProfileBufferEntry::CounterId(counter)); + buffer.AddEntry(ProfileBufferEntry::Time(delta.ToMilliseconds())); + // XXX support keyed maps of counts + // In the future, we'll support keyed counters - for example, counters + // with a key which is a thread ID. For "simple" counters we'll just + // use a key of 0. + int64_t count; + uint64_t number; + counter->Sample(count, number); + buffer.AddEntry(ProfileBufferEntry::CounterKey(0)); + buffer.AddEntry(ProfileBufferEntry::Count(count)); + if (number) { + buffer.AddEntry(ProfileBufferEntry::Number(number)); + } + } + TimeStamp countersSampled = TimeStamp::NowUnfuzzed(); + + if (stackSampling) { + const Vector<LiveProfiledThreadData>& liveThreads = + ActivePS::LiveProfiledThreads(lock); + + for (auto& thread : liveThreads) { + RegisteredThread* registeredThread = thread.mRegisteredThread; + ProfiledThreadData* profiledThreadData = + thread.mProfiledThreadData.get(); + RefPtr<ThreadInfo> info = registeredThread->Info(); + + // If the thread is asleep and has been sampled before in the same + // sleep episode, find and copy the previous sample, as that's + // cheaper than taking a new sample. + if (registeredThread->RacyRegisteredThread() + .CanDuplicateLastSampleDueToSleep()) { + bool dup_ok = ActivePS::Buffer(lock).DuplicateLastSample( + info->ThreadId(), CorePS::ProcessStartTime(), + profiledThreadData->LastSample()); + if (dup_ok) { + continue; + } + } + + AUTO_PROFILER_STATS(base_SamplerThread_Run_DoPeriodicSample); + + TimeStamp now = TimeStamp::NowUnfuzzed(); + + // Add the thread ID now, so we know its position in the main + // buffer, which is used by some JS data. (DoPeriodicSample only + // knows about the temporary local buffer.) + uint64_t samplePos = + buffer.AddThreadIdEntry(registeredThread->Info()->ThreadId()); + profiledThreadData->LastSample() = Some(samplePos); + + // Also add the time, so it's always there after the thread ID, as + // expected by the parser. (Other stack data is optional.) + TimeDuration delta = now - CorePS::ProcessStartTime(); + buffer.AddEntry(ProfileBufferEntry::Time(delta.ToMilliseconds())); + + mSampler.SuspendAndSampleAndResumeThread( + lock, *registeredThread, now, + [&](const Registers& aRegs, const TimeStamp& aNow) { + DoPeriodicSample(lock, *registeredThread, *profiledThreadData, + aRegs, samplePos, localProfileBuffer); + }); + + // If data is complete, copy it into the global buffer. + auto state = localBuffer.GetState(); + if (state.mClearedBlockCount != previousState.mClearedBlockCount) { + LOG("Stack sample too big for local storage, needed %u bytes", + unsigned(state.mRangeEnd - previousState.mRangeEnd)); + } else if (state.mRangeEnd - previousState.mRangeEnd >= + *CorePS::CoreBuffer().BufferLength()) { + LOG("Stack sample too big for profiler storage, needed %u bytes", + unsigned(state.mRangeEnd - previousState.mRangeEnd)); + } else { + CorePS::CoreBuffer().AppendContents(localBuffer); + } + + // Clean up for the next run. + localBuffer.Clear(); + previousState = localBuffer.GetState(); + } + } + +#if defined(USE_LUL_STACKWALK) + // The LUL unwind object accumulates frame statistics. Periodically we + // should poke it to give it a chance to print those statistics. This + // involves doing I/O (fprintf, __android_log_print, etc.) and so + // can't safely be done from the critical section inside + // SuspendAndSampleAndResumeThread, which is why it is done here. + CorePS::Lul(lock)->MaybeShowStats(); +#endif + TimeStamp threadsSampled = TimeStamp::NowUnfuzzed(); + + { + AUTO_PROFILER_STATS(Sampler_FulfillChunkRequests); + ActivePS::FulfillChunkRequests(lock); + } + + buffer.CollectOverheadStats(delta, lockAcquired - sampleStart, + expiredMarkersCleaned - lockAcquired, + countersSampled - expiredMarkersCleaned, + threadsSampled - countersSampled); + } + } + // gPSMutex is not held after this point. + + // Calculate how long a sleep to request. After the sleep, measure how + // long we actually slept and take the difference into account when + // calculating the sleep interval for the next iteration. This is an + // attempt to keep "to schedule" in the presence of inaccuracy of the + // actual sleep intervals. + TimeStamp targetSleepEndTime = + sampleStart + TimeDuration::FromMicroseconds(mIntervalMicroseconds); + TimeStamp beforeSleep = TimeStamp::NowUnfuzzed(); + TimeDuration targetSleepDuration = targetSleepEndTime - beforeSleep; + double sleepTime = std::max( + 0.0, (targetSleepDuration - lastSleepOvershoot).ToMicroseconds()); + SleepMicro(static_cast<uint32_t>(sleepTime)); + sampleStart = TimeStamp::NowUnfuzzed(); + lastSleepOvershoot = + sampleStart - (beforeSleep + TimeDuration::FromMicroseconds(sleepTime)); + } +} + +// Temporary closing namespaces from enclosing platform.cpp. +} // namespace baseprofiler +} // namespace mozilla + +// We #include these files directly because it means those files can use +// declarations from this file trivially. These provide target-specific +// implementations of all SamplerThread methods except Run(). +#if defined(GP_OS_windows) +# include "platform-win32.cpp" +#elif defined(GP_OS_darwin) +# include "platform-macos.cpp" +#elif defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd) +# include "platform-linux-android.cpp" +#else +# error "bad platform" +#endif + +namespace mozilla { +namespace baseprofiler { + +UniquePlatformData AllocPlatformData(int aThreadId) { + return UniquePlatformData(new PlatformData(aThreadId)); +} + +void PlatformDataDestructor::operator()(PlatformData* aData) { delete aData; } + +// END SamplerThread +//////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////// +// BEGIN externally visible functions + +static uint32_t ParseFeature(const char* aFeature, bool aIsStartup) { + if (strcmp(aFeature, "default") == 0) { + return (aIsStartup ? (DefaultFeatures() | StartupExtraDefaultFeatures()) + : DefaultFeatures()) & + AvailableFeatures(); + } + +#define PARSE_FEATURE_BIT(n_, str_, Name_, desc_) \ + if (strcmp(aFeature, str_) == 0) { \ + return ProfilerFeature::Name_; \ + } + + BASE_PROFILER_FOR_EACH_FEATURE(PARSE_FEATURE_BIT) + +#undef PARSE_FEATURE_BIT + + PrintToConsole("\nUnrecognized feature \"%s\".\n\n", aFeature); + // Since we may have an old feature we don't implement anymore, don't exit + PrintUsageThenExit(0); + return 0; +} + +uint32_t ParseFeaturesFromStringArray(const char** aFeatures, + uint32_t aFeatureCount, + bool aIsStartup /* = false */) { + uint32_t features = 0; + for (size_t i = 0; i < aFeatureCount; i++) { + features |= ParseFeature(aFeatures[i], aIsStartup); + } + return features; +} + +// Find the RegisteredThread for the current thread. This should only be called +// in places where TLSRegisteredThread can't be used. +static RegisteredThread* FindCurrentThreadRegisteredThread(PSLockRef aLock) { + int id = profiler_current_thread_id(); + const Vector<UniquePtr<RegisteredThread>>& registeredThreads = + CorePS::RegisteredThreads(aLock); + for (auto& registeredThread : registeredThreads) { + if (registeredThread->Info()->ThreadId() == id) { + return registeredThread.get(); + } + } + + return nullptr; +} + +static ProfilingStack* locked_register_thread(PSLockRef aLock, + const char* aName, + void* aStackTop) { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + MOZ_ASSERT(!FindCurrentThreadRegisteredThread(aLock)); + + VTUNE_REGISTER_THREAD(aName); + + if (!TLSRegisteredThread::Init(aLock)) { + return nullptr; + } + + RefPtr<ThreadInfo> info = new ThreadInfo(aName, profiler_current_thread_id(), + profiler_is_main_thread()); + UniquePtr<RegisteredThread> registeredThread = + MakeUnique<RegisteredThread>(info, aStackTop); + + TLSRegisteredThread::SetRegisteredThread(aLock, registeredThread.get()); + + if (ActivePS::Exists(aLock) && ActivePS::ShouldProfileThread(aLock, info)) { + registeredThread->RacyRegisteredThread().SetIsBeingProfiled(true); + ActivePS::AddLiveProfiledThread(aLock, registeredThread.get(), + MakeUnique<ProfiledThreadData>(info)); + } + + ProfilingStack* profilingStack = + ®isteredThread->RacyRegisteredThread().ProfilingStack(); + + CorePS::AppendRegisteredThread(aLock, std::move(registeredThread)); + + return profilingStack; +} + +static void locked_profiler_start(PSLockRef aLock, PowerOfTwo32 aCapacity, + double aInterval, uint32_t aFeatures, + const char** aFilters, uint32_t aFilterCount, + const Maybe<double>& aDuration); + +static Vector<const char*> SplitAtCommas(const char* aString, + UniquePtr<char[]>& aStorage) { + size_t len = strlen(aString); + aStorage = MakeUnique<char[]>(len + 1); + PodCopy(aStorage.get(), aString, len + 1); + + // Iterate over all characters in aStorage and split at commas, by + // overwriting commas with the null char. + Vector<const char*> array; + size_t currentElementStart = 0; + for (size_t i = 0; i <= len; i++) { + if (aStorage[i] == ',') { + aStorage[i] = '\0'; + } + if (aStorage[i] == '\0') { + MOZ_RELEASE_ASSERT(array.append(&aStorage[currentElementStart])); + currentElementStart = i + 1; + } + } + return array; +} + +void profiler_init(void* aStackTop) { + LOG("profiler_init"); + + scProfilerMainThreadId = profiler_current_thread_id(); + + VTUNE_INIT(); + + MOZ_RELEASE_ASSERT(!CorePS::Exists()); + + if (getenv("MOZ_BASE_PROFILER_HELP")) { + PrintUsageThenExit(0); // terminates execution + } + + SharedLibraryInfo::Initialize(); + + uint32_t features = DefaultFeatures() & AvailableFeatures(); + + UniquePtr<char[]> filterStorage; + + Vector<const char*> filters; + MOZ_RELEASE_ASSERT(filters.append(kMainThreadName)); + + PowerOfTwo32 capacity = BASE_PROFILER_DEFAULT_ENTRIES; + Maybe<double> duration = Nothing(); + double interval = BASE_PROFILER_DEFAULT_INTERVAL; + + { + PSAutoLock lock; + + // We've passed the possible failure point. Instantiate CorePS, which + // indicates that the profiler has initialized successfully. + CorePS::Create(lock); + + Unused << locked_register_thread(lock, kMainThreadName, aStackTop); + + // Platform-specific initialization. + PlatformInit(lock); + + // (Linux-only) We could create CorePS::mLul and read unwind info into it + // at this point. That would match the lifetime implied by destruction of + // it in profiler_shutdown() just below. However, that gives a big delay on + // startup, even if no profiling is actually to be done. So, instead, it is + // created on demand at the first call to PlatformStart(). + + const char* startupEnv = getenv("MOZ_PROFILER_STARTUP"); + if (!startupEnv || startupEnv[0] == '\0' || + ((startupEnv[0] == '0' || startupEnv[0] == 'N' || + startupEnv[0] == 'n') && + startupEnv[1] == '\0')) { + return; + } + + // Hidden option to stop Base Profiler, mostly due to Talos intermittents, + // see https://bugzilla.mozilla.org/show_bug.cgi?id=1638851#c3 + // TODO: Investigate root cause and remove this in bugs 1648324 and 1648325. + if (getenv("MOZ_PROFILER_STARTUP_NO_BASE")) { + return; + } + + LOG("- MOZ_PROFILER_STARTUP is set"); + + // Startup default capacity may be different. + capacity = BASE_PROFILER_DEFAULT_STARTUP_ENTRIES; + + const char* startupCapacity = getenv("MOZ_PROFILER_STARTUP_ENTRIES"); + if (startupCapacity && startupCapacity[0] != '\0') { + errno = 0; + long capacityLong = strtol(startupCapacity, nullptr, 10); + // `long` could be 32 or 64 bits, so we force a 64-bit comparison with + // the maximum 32-bit signed number (as more than that is clamped down to + // 2^31 anyway). + if (errno == 0 && capacityLong > 0 && + static_cast<uint64_t>(capacityLong) <= + static_cast<uint64_t>(INT32_MAX)) { + capacity = PowerOfTwo32(ActivePS::ClampToAllowedEntries( + static_cast<uint32_t>(capacityLong))); + LOG("- MOZ_PROFILER_STARTUP_ENTRIES = %u", unsigned(capacity.Value())); + } else { + PrintToConsole("- MOZ_PROFILER_STARTUP_ENTRIES not a valid integer: %s", + startupCapacity); + PrintUsageThenExit(1); + } + } + + const char* startupDuration = getenv("MOZ_PROFILER_STARTUP_DURATION"); + if (startupDuration && startupDuration[0] != '\0') { + // The duration is a floating point number. Use StringToDouble rather than + // strtod, so that "." is used as the decimal separator regardless of OS + // locale. + auto durationVal = StringToDouble(std::string(startupDuration)); + if (durationVal && *durationVal >= 0.0) { + if (*durationVal > 0.0) { + duration = Some(*durationVal); + } + LOG("- MOZ_PROFILER_STARTUP_DURATION = %f", *durationVal); + } else { + PrintToConsole("- MOZ_PROFILER_STARTUP_DURATION not a valid float: %s", + startupDuration); + PrintUsageThenExit(1); + } + } + + const char* startupInterval = getenv("MOZ_PROFILER_STARTUP_INTERVAL"); + if (startupInterval && startupInterval[0] != '\0') { + // The interval is a floating point number. Use StringToDouble rather than + // strtod, so that "." is used as the decimal separator regardless of OS + // locale. + auto intervalValue = StringToDouble(MakeStringSpan(startupInterval)); + if (intervalValue && *intervalValue > 0.0 && *intervalValue <= 1000.0) { + interval = *intervalValue; + LOG("- MOZ_PROFILER_STARTUP_INTERVAL = %f", interval); + } else { + PrintToConsole("- MOZ_PROFILER_STARTUP_INTERVAL not a valid float: %s", + startupInterval); + PrintUsageThenExit(1); + } + } + + features |= StartupExtraDefaultFeatures() & AvailableFeatures(); + + const char* startupFeaturesBitfield = + getenv("MOZ_PROFILER_STARTUP_FEATURES_BITFIELD"); + if (startupFeaturesBitfield && startupFeaturesBitfield[0] != '\0') { + errno = 0; + features = strtol(startupFeaturesBitfield, nullptr, 10); + if (errno == 0 && features != 0) { + LOG("- MOZ_PROFILER_STARTUP_FEATURES_BITFIELD = %d", features); + } else { + PrintToConsole( + "- MOZ_PROFILER_STARTUP_FEATURES_BITFIELD not a valid integer: %s", + startupFeaturesBitfield); + PrintUsageThenExit(1); + } + } else { + const char* startupFeatures = getenv("MOZ_PROFILER_STARTUP_FEATURES"); + if (startupFeatures && startupFeatures[0] != '\0') { + // Interpret startupFeatures as a list of feature strings, separated by + // commas. + UniquePtr<char[]> featureStringStorage; + Vector<const char*> featureStringArray = + SplitAtCommas(startupFeatures, featureStringStorage); + features = ParseFeaturesFromStringArray(featureStringArray.begin(), + featureStringArray.length(), + /* aIsStartup */ true); + LOG("- MOZ_PROFILER_STARTUP_FEATURES = %d", features); + } + } + + const char* startupFilters = getenv("MOZ_PROFILER_STARTUP_FILTERS"); + if (startupFilters && startupFilters[0] != '\0') { + filters = SplitAtCommas(startupFilters, filterStorage); + LOG("- MOZ_PROFILER_STARTUP_FILTERS = %s", startupFilters); + } + + locked_profiler_start(lock, capacity, interval, features, filters.begin(), + filters.length(), duration); + } + + // TODO: Install memory counter if it is possible from mozglue. + // #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY) + // // start counting memory allocations (outside of lock because this may + // call + // // profiler_add_sampled_counter which would attempt to take the lock.) + // mozilla::profiler::install_memory_counter(true); + // #endif +} + +static void locked_profiler_save_profile_to_file(PSLockRef aLock, + const char* aFilename, + bool aIsShuttingDown); + +static SamplerThread* locked_profiler_stop(PSLockRef aLock); + +void profiler_shutdown() { + LOG("profiler_shutdown"); + + VTUNE_SHUTDOWN(); + + MOZ_RELEASE_ASSERT(profiler_is_main_thread()); + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + // If the profiler is active we must get a handle to the SamplerThread before + // ActivePS is destroyed, in order to delete it. + SamplerThread* samplerThread = nullptr; + { + PSAutoLock lock; + + // Save the profile on shutdown if requested. + if (ActivePS::Exists(lock)) { + const char* filename = getenv("MOZ_PROFILER_SHUTDOWN"); + if (filename) { + locked_profiler_save_profile_to_file(lock, filename, + /* aIsShuttingDown */ true); + } + + samplerThread = locked_profiler_stop(lock); + } + + CorePS::Destroy(lock); + + // We just destroyed CorePS and the ThreadInfos it contains, so we can + // clear this thread's TLSRegisteredThread. + TLSRegisteredThread::SetRegisteredThread(lock, nullptr); + } + + // We do these operations with gPSMutex unlocked. The comments in + // profiler_stop() explain why. + if (samplerThread) { + delete samplerThread; + } +} + +static bool WriteProfileToJSONWriter(SpliceableChunkedJSONWriter& aWriter, + double aSinceTime, bool aIsShuttingDown, + bool aOnlyThreads = false) { + LOG("WriteProfileToJSONWriter"); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + if (!aOnlyThreads) { + aWriter.Start(); + { + if (!profiler_stream_json_for_this_process( + aWriter, aSinceTime, aIsShuttingDown, aOnlyThreads)) { + return false; + } + + // Don't include profiles from other processes because this is a + // synchronous function. + aWriter.StartArrayProperty("processes"); + aWriter.EndArray(); + } + aWriter.End(); + } else { + aWriter.StartBareList(); + if (!profiler_stream_json_for_this_process(aWriter, aSinceTime, + aIsShuttingDown, aOnlyThreads)) { + return false; + } + aWriter.EndBareList(); + } + return true; +} + +void profiler_set_process_name(const std::string& aProcessName, + const std::string* aETLDplus1) { + LOG("profiler_set_process_name(\"%s\", \"%s\")", aProcessName.c_str(), + aETLDplus1 ? aETLDplus1->c_str() : "<none>"); + PSAutoLock lock; + CorePS::SetProcessName(lock, aProcessName); + if (aETLDplus1) { + CorePS::SetETLDplus1(lock, *aETLDplus1); + } +} + +UniquePtr<char[]> profiler_get_profile(double aSinceTime, bool aIsShuttingDown, + bool aOnlyThreads) { + LOG("profiler_get_profile"); + + SpliceableChunkedJSONWriter b; + if (!WriteProfileToJSONWriter(b, aSinceTime, aIsShuttingDown, aOnlyThreads)) { + return nullptr; + } + return b.ChunkedWriteFunc().CopyData(); +} + +void profiler_get_profile_json_into_lazily_allocated_buffer( + const std::function<char*(size_t)>& aAllocator, double aSinceTime, + bool aIsShuttingDown) { + LOG("profiler_get_profile_json_into_lazily_allocated_buffer"); + + SpliceableChunkedJSONWriter b; + if (!WriteProfileToJSONWriter(b, aSinceTime, aIsShuttingDown)) { + return; + } + + b.ChunkedWriteFunc().CopyDataIntoLazilyAllocatedBuffer(aAllocator); +} + +void profiler_get_start_params(int* aCapacity, Maybe<double>* aDuration, + double* aInterval, uint32_t* aFeatures, + Vector<const char*>* aFilters) { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + if (!aCapacity || !aDuration || !aInterval || !aFeatures || !aFilters) { + return; + } + + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + *aCapacity = 0; + *aDuration = Nothing(); + *aInterval = 0; + *aFeatures = 0; + aFilters->clear(); + return; + } + + *aCapacity = ActivePS::Capacity(lock).Value(); + *aDuration = ActivePS::Duration(lock); + *aInterval = ActivePS::Interval(lock); + *aFeatures = ActivePS::Features(lock); + + const Vector<std::string>& filters = ActivePS::Filters(lock); + MOZ_ALWAYS_TRUE(aFilters->resize(filters.length())); + for (uint32_t i = 0; i < filters.length(); ++i) { + (*aFilters)[i] = filters[i].c_str(); + } +} + +void GetProfilerEnvVarsForChildProcess( + std::function<void(const char* key, const char* value)>&& aSetEnv) { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + aSetEnv("MOZ_PROFILER_STARTUP", ""); + return; + } + + aSetEnv("MOZ_PROFILER_STARTUP", "1"); + auto capacityString = + Smprintf("%u", unsigned(ActivePS::Capacity(lock).Value())); + aSetEnv("MOZ_PROFILER_STARTUP_ENTRIES", capacityString.get()); + + // Use AppendFloat instead of Smprintf with %f because the decimal + // separator used by %f is locale-dependent. But the string we produce needs + // to be parseable by strtod, which only accepts the period character as a + // decimal separator. AppendFloat always uses the period character. + std::string intervalString = std::to_string(ActivePS::Interval(lock)); + aSetEnv("MOZ_PROFILER_STARTUP_INTERVAL", intervalString.c_str()); + + auto featuresString = Smprintf("%d", ActivePS::Features(lock)); + aSetEnv("MOZ_PROFILER_STARTUP_FEATURES_BITFIELD", featuresString.get()); + + std::string filtersString; + const Vector<std::string>& filters = ActivePS::Filters(lock); + for (uint32_t i = 0; i < filters.length(); ++i) { + filtersString += filters[i]; + if (i != filters.length() - 1) { + filtersString += ","; + } + } + aSetEnv("MOZ_PROFILER_STARTUP_FILTERS", filtersString.c_str()); +} + +void profiler_received_exit_profile(const std::string& aExitProfile) { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + PSAutoLock lock; + if (!ActivePS::Exists(lock)) { + return; + } + ActivePS::AddExitProfile(lock, aExitProfile); +} + +Vector<std::string> profiler_move_exit_profiles() { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + PSAutoLock lock; + Vector<std::string> profiles; + if (ActivePS::Exists(lock)) { + profiles = ActivePS::MoveExitProfiles(lock); + } + return profiles; +} + +static void locked_profiler_save_profile_to_file(PSLockRef aLock, + const char* aFilename, + bool aIsShuttingDown = false) { + LOG("locked_profiler_save_profile_to_file(%s)", aFilename); + + MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock)); + + std::ofstream stream; + stream.open(aFilename); + if (stream.is_open()) { + SpliceableJSONWriter w(MakeUnique<OStreamJSONWriteFunc>(stream)); + w.Start(); + { + locked_profiler_stream_json_for_this_process(aLock, w, /* sinceTime */ 0, + aIsShuttingDown); + + w.StartArrayProperty("processes"); + Vector<std::string> exitProfiles = ActivePS::MoveExitProfiles(aLock); + for (auto& exitProfile : exitProfiles) { + if (!exitProfile.empty()) { + w.Splice(exitProfile); + } + } + w.EndArray(); + } + w.End(); + + stream.close(); + } +} + +void profiler_save_profile_to_file(const char* aFilename) { + LOG("profiler_save_profile_to_file(%s)", aFilename); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return; + } + + locked_profiler_save_profile_to_file(lock, aFilename); +} + +uint32_t profiler_get_available_features() { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + return AvailableFeatures(); +} + +Maybe<ProfilerBufferInfo> profiler_get_buffer_info() { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return Nothing(); + } + + return Some(ActivePS::Buffer(lock).GetProfilerBufferInfo()); +} + +// This basically duplicates AutoProfilerLabel's constructor. +static void* MozGlueBaseLabelEnter(const char* aLabel, + const char* aDynamicString, void* aSp) { + ProfilingStack* profilingStack = AutoProfilerLabel::sProfilingStack.get(); + if (profilingStack) { + profilingStack->pushLabelFrame(aLabel, aDynamicString, aSp, + ProfilingCategoryPair::OTHER); + } + return profilingStack; +} + +// This basically duplicates AutoProfilerLabel's destructor. +static void MozGlueBaseLabelExit(void* sProfilingStack) { + if (sProfilingStack) { + reinterpret_cast<ProfilingStack*>(sProfilingStack)->pop(); + } +} + +static void locked_profiler_start(PSLockRef aLock, PowerOfTwo32 aCapacity, + double aInterval, uint32_t aFeatures, + const char** aFilters, uint32_t aFilterCount, + const Maybe<double>& aDuration) { + if (LOG_TEST) { + LOG("locked_profiler_start"); + LOG("- capacity = %d", int(aCapacity.Value())); + LOG("- duration = %.2f", aDuration ? *aDuration : -1); + LOG("- interval = %.2f", aInterval); + +#define LOG_FEATURE(n_, str_, Name_, desc_) \ + if (ProfilerFeature::Has##Name_(aFeatures)) { \ + LOG("- feature = %s", str_); \ + } + + BASE_PROFILER_FOR_EACH_FEATURE(LOG_FEATURE) + +#undef LOG_FEATURE + + for (uint32_t i = 0; i < aFilterCount; i++) { + LOG("- threads = %s", aFilters[i]); + } + } + + MOZ_RELEASE_ASSERT(CorePS::Exists() && !ActivePS::Exists(aLock)); + +#if defined(GP_PLAT_amd64_windows) + InitializeWin64ProfilerHooks(); +#endif + + // Fall back to the default values if the passed-in values are unreasonable. + // We want to be able to store at least one full stack. + // TODO: Review magic numbers. + PowerOfTwo32 capacity = + (aCapacity.Value() >= + ProfileBufferChunkManager::scExpectedMaximumStackSize / scBytesPerEntry) + ? aCapacity + : BASE_PROFILER_DEFAULT_ENTRIES; + Maybe<double> duration = aDuration; + + if (aDuration && *aDuration <= 0) { + duration = Nothing(); + } + double interval = aInterval > 0 ? aInterval : BASE_PROFILER_DEFAULT_INTERVAL; + + ActivePS::Create(aLock, capacity, interval, aFeatures, aFilters, aFilterCount, + duration); + + // Set up profiling for each registered thread, if appropriate. + const Vector<UniquePtr<RegisteredThread>>& registeredThreads = + CorePS::RegisteredThreads(aLock); + for (auto& registeredThread : registeredThreads) { + RefPtr<ThreadInfo> info = registeredThread->Info(); + + if (ActivePS::ShouldProfileThread(aLock, info)) { + registeredThread->RacyRegisteredThread().SetIsBeingProfiled(true); + ActivePS::AddLiveProfiledThread(aLock, registeredThread.get(), + MakeUnique<ProfiledThreadData>(info)); + registeredThread->RacyRegisteredThread().ReinitializeOnResume(); + } + } + + // Setup support for pushing/popping labels in mozglue. + RegisterProfilerLabelEnterExit(MozGlueBaseLabelEnter, MozGlueBaseLabelExit); + + // At the very end, set up RacyFeatures. + RacyFeatures::SetActive(ActivePS::Features(aLock)); +} + +void profiler_start(PowerOfTwo32 aCapacity, double aInterval, + uint32_t aFeatures, const char** aFilters, + uint32_t aFilterCount, const Maybe<double>& aDuration) { + LOG("profiler_start"); + + SamplerThread* samplerThread = nullptr; + { + PSAutoLock lock; + + // Initialize if necessary. + if (!CorePS::Exists()) { + profiler_init(nullptr); + } + + // Reset the current state if the profiler is running. + if (ActivePS::Exists(lock)) { + samplerThread = locked_profiler_stop(lock); + } + + locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters, + aFilterCount, aDuration); + } + + // TODO: Install memory counter if it is possible from mozglue. + // #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY) + // // start counting memory allocations (outside of lock because this may + // call + // // profiler_add_sampled_counter which would attempt to take the lock.) + // mozilla::profiler::install_memory_counter(true); + // #endif + + // We do these operations with gPSMutex unlocked. The comments in + // profiler_stop() explain why. + if (samplerThread) { + delete samplerThread; + } +} + +void profiler_ensure_started(PowerOfTwo32 aCapacity, double aInterval, + uint32_t aFeatures, const char** aFilters, + uint32_t aFilterCount, + const Maybe<double>& aDuration) { + LOG("profiler_ensure_started"); + + // bool startedProfiler = false; (See TODO below) + SamplerThread* samplerThread = nullptr; + { + PSAutoLock lock; + + // Initialize if necessary. + if (!CorePS::Exists()) { + profiler_init(nullptr); + } + + if (ActivePS::Exists(lock)) { + // The profiler is active. + if (!ActivePS::Equals(lock, aCapacity, aDuration, aInterval, aFeatures, + aFilters, aFilterCount)) { + // Stop and restart with different settings. + samplerThread = locked_profiler_stop(lock); + locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters, + aFilterCount, aDuration); + // startedProfiler = true; (See TODO below) + } + } else { + // The profiler is stopped. + locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters, + aFilterCount, aDuration); + // startedProfiler = true; (See TODO below) + } + } + + // TODO: Install memory counter if it is possible from mozglue. + // #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY) + // // start counting memory allocations (outside of lock because this may + // // call profiler_add_sampled_counter which would attempt to take the + // // lock.) + // mozilla::profiler::install_memory_counter(true); + // #endif + + // We do these operations with gPSMutex unlocked. The comments in + // profiler_stop() explain why. + if (samplerThread) { + delete samplerThread; + } +} + +[[nodiscard]] static SamplerThread* locked_profiler_stop(PSLockRef aLock) { + LOG("locked_profiler_stop"); + + MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock)); + + // At the very start, clear RacyFeatures. + RacyFeatures::SetInactive(); + + // TODO: Uninstall memory counter if it is possible from mozglue. + // #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY) + // mozilla::profiler::install_memory_counter(false); + // #endif + + // Remove support for pushing/popping labels in mozglue. + RegisterProfilerLabelEnterExit(nullptr, nullptr); + + // Stop sampling live threads. + const Vector<LiveProfiledThreadData>& liveProfiledThreads = + ActivePS::LiveProfiledThreads(aLock); + for (auto& thread : liveProfiledThreads) { + RegisteredThread* registeredThread = thread.mRegisteredThread; + registeredThread->RacyRegisteredThread().SetIsBeingProfiled(false); + } + + // The Stop() call doesn't actually stop Run(); that happens in this + // function's caller when the sampler thread is destroyed. Stop() just gives + // the SamplerThread a chance to do some cleanup with gPSMutex locked. + SamplerThread* samplerThread = ActivePS::Destroy(aLock); + samplerThread->Stop(aLock); + + return samplerThread; +} + +void profiler_stop() { + LOG("profiler_stop"); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + SamplerThread* samplerThread; + { + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return; + } + + samplerThread = locked_profiler_stop(lock); + } + + // We delete with gPSMutex unlocked. Otherwise we would get a deadlock: we + // would be waiting here with gPSMutex locked for SamplerThread::Run() to + // return so the join operation within the destructor can complete, but Run() + // needs to lock gPSMutex to return. + // + // Because this call occurs with gPSMutex unlocked, it -- including the final + // iteration of Run()'s loop -- must be able detect deactivation and return + // in a way that's safe with respect to other gPSMutex-locking operations + // that may have occurred in the meantime. + delete samplerThread; +} + +bool profiler_is_paused() { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return false; + } + + return ActivePS::IsPaused(lock); +} + +void profiler_pause() { + LOG("profiler_pause"); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + { + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return; + } + + RacyFeatures::SetPaused(); + ActivePS::SetIsPaused(lock, true); + ActivePS::Buffer(lock).AddEntry(ProfileBufferEntry::Pause(profiler_time())); + } +} + +void profiler_resume() { + LOG("profiler_resume"); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + { + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return; + } + + ActivePS::Buffer(lock).AddEntry( + ProfileBufferEntry::Resume(profiler_time())); + ActivePS::SetIsPaused(lock, false); + RacyFeatures::SetUnpaused(); + } +} + +bool profiler_is_sampling_paused() { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return false; + } + + return ActivePS::IsSamplingPaused(lock); +} + +void profiler_pause_sampling() { + LOG("profiler_pause_sampling"); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + { + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return; + } + + RacyFeatures::SetSamplingPaused(); + ActivePS::SetIsSamplingPaused(lock, true); + ActivePS::Buffer(lock).AddEntry( + ProfileBufferEntry::PauseSampling(profiler_time())); + } +} + +void profiler_resume_sampling() { + LOG("profiler_resume_sampling"); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + { + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return; + } + + ActivePS::Buffer(lock).AddEntry( + ProfileBufferEntry::ResumeSampling(profiler_time())); + ActivePS::SetIsSamplingPaused(lock, false); + RacyFeatures::SetSamplingUnpaused(); + } +} + +bool profiler_feature_active(uint32_t aFeature) { + // This function runs both on and off the main thread. + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + // This function is hot enough that we use RacyFeatures, not ActivePS. + return RacyFeatures::IsActiveWithFeature(aFeature); +} + +void profiler_add_sampled_counter(BaseProfilerCount* aCounter) { + DEBUG_LOG("profiler_add_sampled_counter(%s)", aCounter->mLabel); + PSAutoLock lock; + CorePS::AppendCounter(lock, aCounter); +} + +void profiler_remove_sampled_counter(BaseProfilerCount* aCounter) { + DEBUG_LOG("profiler_remove_sampled_counter(%s)", aCounter->mLabel); + PSAutoLock lock; + // Note: we don't enforce a final sample, though we could do so if the + // profiler was active + CorePS::RemoveCounter(lock, aCounter); +} + +ProfilingStack* profiler_register_thread(const char* aName, + void* aGuessStackTop) { + DEBUG_LOG("profiler_register_thread(%s)", aName); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + PSAutoLock lock; + + if (RegisteredThread* thread = FindCurrentThreadRegisteredThread(lock); + thread) { + LOG("profiler_register_thread(%s) - thread %d already registered as %s", + aName, profiler_current_thread_id(), thread->Info()->Name()); + // TODO: Use new name. This is currently not possible because the + // RegisteredThread's ThreadInfo cannot be changed. + // In the meantime, we record a marker that could be used in the frontend. + std::string text("Thread "); + text += std::to_string(profiler_current_thread_id()); + text += " \""; + text += thread->Info()->Name(); + text += "\" attempted to re-register as \""; + text += aName; + text += "\""; + BASE_PROFILER_MARKER_TEXT("profiler_register_thread again", OTHER_Profiling, + MarkerThreadId::MainThread(), text); + + return &thread->RacyRegisteredThread().ProfilingStack(); + } + + void* stackTop = GetStackTop(aGuessStackTop); + return locked_register_thread(lock, aName, stackTop); +} + +void profiler_unregister_thread() { + if (!CorePS::Exists()) { + // This function can be called after the main thread has already shut down. + return; + } + + PSAutoLock lock; + + RegisteredThread* registeredThread = FindCurrentThreadRegisteredThread(lock); + MOZ_RELEASE_ASSERT(registeredThread == + TLSRegisteredThread::RegisteredThread(lock)); + if (registeredThread) { + RefPtr<ThreadInfo> info = registeredThread->Info(); + + DEBUG_LOG("profiler_unregister_thread: %s", info->Name()); + + if (ActivePS::Exists(lock)) { + ActivePS::UnregisterThread(lock, registeredThread); + } + + // Clear the pointer to the RegisteredThread object that we're about to + // destroy. + TLSRegisteredThread::SetRegisteredThread(lock, nullptr); + + // Remove the thread from the list of registered threads. This deletes the + // registeredThread object. + CorePS::RemoveRegisteredThread(lock, registeredThread); + } else { + LOG("profiler_unregister_thread() - thread %d already unregistered", + profiler_current_thread_id()); + // We cannot record a marker on this thread because it was already + // unregistered. Send it to the main thread (unless this *is* already the + // main thread, which has been unregistered); this may be useful to catch + // mismatched register/unregister pairs in Firefox. + if (int tid = profiler_current_thread_id(); + tid != profiler_main_thread_id()) { + BASE_PROFILER_MARKER_TEXT("profiler_unregister_thread again", + OTHER_Profiling, MarkerThreadId::MainThread(), + std::to_string(profiler_current_thread_id())); + } + // There are two ways FindCurrentThreadRegisteredThread() might have failed. + // + // - TLSRegisteredThread::Init() failed in locked_register_thread(). + // + // - We've already called profiler_unregister_thread() for this thread. + // (Whether or not it should, this does happen in practice.) + // + // Either way, TLSRegisteredThread should be empty. + MOZ_RELEASE_ASSERT(!TLSRegisteredThread::RegisteredThread(lock)); + } +} + +void profiler_register_page(uint64_t aBrowsingContextID, + uint64_t aInnerWindowID, const std::string& aUrl, + uint64_t aEmbedderInnerWindowID) { + DEBUG_LOG("profiler_register_page(%" PRIu64 ", %" PRIu64 ", %s, %" PRIu64 ")", + aBrowsingContextID, aInnerWindowID, aUrl.c_str(), + aEmbedderInnerWindowID); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + PSAutoLock lock; + + // When a Browsing context is first loaded, the first url loaded in it will be + // about:blank. Because of that, this call keeps the first non-about:blank + // registration of window and discards the previous one. + RefPtr<PageInformation> pageInfo = new PageInformation( + aBrowsingContextID, aInnerWindowID, aUrl, aEmbedderInnerWindowID); + CorePS::AppendRegisteredPage(lock, std::move(pageInfo)); + + // After appending the given page to CorePS, look for the expired + // pages and remove them if there are any. + if (ActivePS::Exists(lock)) { + ActivePS::DiscardExpiredPages(lock); + } +} + +void profiler_unregister_page(uint64_t aRegisteredInnerWindowID) { + if (!CorePS::Exists()) { + // This function can be called after the main thread has already shut down. + return; + } + + PSAutoLock lock; + + // During unregistration, if the profiler is active, we have to keep the + // page information since there may be some markers associated with the given + // page. But if profiler is not active. we have no reason to keep the + // page information here because there can't be any marker associated with it. + if (ActivePS::Exists(lock)) { + ActivePS::UnregisterPage(lock, aRegisteredInnerWindowID); + } else { + CorePS::RemoveRegisteredPage(lock, aRegisteredInnerWindowID); + } +} + +void profiler_clear_all_pages() { + if (!CorePS::Exists()) { + // This function can be called after the main thread has already shut down. + return; + } + + { + PSAutoLock lock; + CorePS::ClearRegisteredPages(lock); + if (ActivePS::Exists(lock)) { + ActivePS::ClearUnregisteredPages(lock); + } + } +} + +void profiler_thread_sleep() { + // This function runs both on and off the main thread. + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + RacyRegisteredThread* racyRegisteredThread = + TLSRegisteredThread::RacyRegisteredThread(); + if (!racyRegisteredThread) { + return; + } + + racyRegisteredThread->SetSleeping(); +} + +void profiler_thread_wake() { + // This function runs both on and off the main thread. + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + RacyRegisteredThread* racyRegisteredThread = + TLSRegisteredThread::RacyRegisteredThread(); + if (!racyRegisteredThread) { + return; + } + + racyRegisteredThread->SetAwake(); +} + +bool detail::IsThreadBeingProfiled() { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + const RacyRegisteredThread* racyRegisteredThread = + TLSRegisteredThread::RacyRegisteredThread(); + return racyRegisteredThread && racyRegisteredThread->IsBeingProfiled(); +} + +bool profiler_thread_is_sleeping() { + MOZ_RELEASE_ASSERT(profiler_is_main_thread()); + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + RacyRegisteredThread* racyRegisteredThread = + TLSRegisteredThread::RacyRegisteredThread(); + if (!racyRegisteredThread) { + return false; + } + return racyRegisteredThread->IsSleeping(); +} + +double profiler_time() { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + TimeDuration delta = TimeStamp::NowUnfuzzed() - CorePS::ProcessStartTime(); + return delta.ToMilliseconds(); +} + +bool profiler_capture_backtrace_into(ProfileChunkedBuffer& aChunkedBuffer) { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return false; + } + + RegisteredThread* registeredThread = + TLSRegisteredThread::RegisteredThread(lock); + if (!registeredThread) { + MOZ_ASSERT(registeredThread); + return false; + } + + ProfileBuffer profileBuffer(aChunkedBuffer); + + Registers regs; +#if defined(HAVE_NATIVE_UNWIND) + regs.SyncPopulate(); +#else + regs.Clear(); +#endif + + DoSyncSample(lock, *registeredThread, TimeStamp::NowUnfuzzed(), regs, + profileBuffer); + + return true; +} + +UniquePtr<ProfileChunkedBuffer> profiler_capture_backtrace() { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + // Quick is-active check before allocating a buffer. + if (!profiler_is_active()) { + return nullptr; + } + + auto buffer = MakeUnique<ProfileChunkedBuffer>( + ProfileChunkedBuffer::ThreadSafety::WithoutMutex, + MakeUnique<ProfileBufferChunkManagerSingle>( + ProfileBufferChunkManager::scExpectedMaximumStackSize)); + + if (!profiler_capture_backtrace_into(*buffer)) { + return nullptr; + } + + return buffer; +} + +UniqueProfilerBacktrace profiler_get_backtrace() { + UniquePtr<ProfileChunkedBuffer> buffer = profiler_capture_backtrace(); + + if (!buffer) { + return nullptr; + } + + return UniqueProfilerBacktrace( + new ProfilerBacktrace("SyncProfile", std::move(buffer))); +} + +void ProfilerBacktraceDestructor::operator()(ProfilerBacktrace* aBacktrace) { + delete aBacktrace; +} + +bool profiler_is_locked_on_current_thread() { + // This function is used to help users avoid calling `profiler_...` functions + // when the profiler may already have a lock in place, which would prevent a + // 2nd recursive lock (resulting in a crash or a never-ending wait). + // So we must return `true` for any of: + // - The main profiler mutex, used by most functions, and/or + // - The buffer mutex, used directly in some functions without locking the + // main mutex, e.g., marker-related functions. + return PSAutoLock::IsLockedOnCurrentThread() || + CorePS::CoreBuffer().IsThreadSafeAndLockedOnCurrentThread(); +} + +// This is a simplified version of profiler_add_marker that can be easily passed +// into the JS engine. +void profiler_add_js_marker(const char* aMarkerName, const char* aMarkerText) { + BASE_PROFILER_MARKER_TEXT( + ProfilerString8View::WrapNullTerminatedString(aMarkerName), JS, {}, + ProfilerString8View::WrapNullTerminatedString(aMarkerText)); +} + +// NOTE: aCollector's methods will be called while the target thread is paused. +// Doing things in those methods like allocating -- which may try to claim +// locks -- is a surefire way to deadlock. +void profiler_suspend_and_sample_thread(int aThreadId, uint32_t aFeatures, + ProfilerStackCollector& aCollector, + bool aSampleNative /* = true */) { + // Lock the profiler mutex + PSAutoLock lock; + + const Vector<UniquePtr<RegisteredThread>>& registeredThreads = + CorePS::RegisteredThreads(lock); + for (auto& thread : registeredThreads) { + RefPtr<ThreadInfo> info = thread->Info(); + RegisteredThread& registeredThread = *thread.get(); + + if (info->ThreadId() == aThreadId) { + if (info->IsMainThread()) { + aCollector.SetIsMainThread(); + } + + // Allocate the space for the native stack + NativeStack nativeStack; + + // Suspend, sample, and then resume the target thread. + Sampler sampler(lock); + TimeStamp now = TimeStamp::NowUnfuzzed(); + sampler.SuspendAndSampleAndResumeThread( + lock, registeredThread, now, + [&](const Registers& aRegs, const TimeStamp& aNow) { + // The target thread is now suspended. Collect a native + // backtrace, and call the callback. + bool isSynchronous = false; +#if defined(HAVE_FASTINIT_NATIVE_UNWIND) + if (aSampleNative) { + // We can only use FramePointerStackWalk or MozStackWalk from + // suspend_and_sample_thread as other stackwalking methods may not be + // initialized. +# if defined(USE_FRAME_POINTER_STACK_WALK) + DoFramePointerBacktrace(lock, registeredThread, aRegs, + nativeStack); +# elif defined(USE_MOZ_STACK_WALK) + DoMozStackWalkBacktrace(lock, registeredThread, aRegs, + nativeStack); +# else +# error "Invalid configuration" +# endif + + MergeStacks(aFeatures, isSynchronous, registeredThread, aRegs, + nativeStack, aCollector); + } else +#endif + { + MergeStacks(aFeatures, isSynchronous, registeredThread, aRegs, + nativeStack, aCollector); + + if (ProfilerFeature::HasLeaf(aFeatures)) { + aCollector.CollectNativeLeafAddr((void*)aRegs.mPC); + } + } + }); + + // NOTE: Make sure to disable the sampler before it is destroyed, in case + // the profiler is running at the same time. + sampler.Disable(lock); + break; + } + } +} + +// END externally visible functions +//////////////////////////////////////////////////////////////////////// + +} // namespace baseprofiler +} // namespace mozilla diff --git a/mozglue/baseprofiler/core/platform.h b/mozglue/baseprofiler/core/platform.h new file mode 100644 index 0000000000..1913a0def6 --- /dev/null +++ b/mozglue/baseprofiler/core/platform.h @@ -0,0 +1,132 @@ +// Copyright (c) 2006-2011 The Chromium Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in +// the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google, Inc. nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS +// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +// SUCH DAMAGE. + +#ifndef TOOLS_PLATFORM_H_ +#define TOOLS_PLATFORM_H_ + +#include "PlatformMacros.h" + +#include "BaseProfiler.h" + +#include "mozilla/Logging.h" +#include "mozilla/UniquePtr.h" +#include "mozilla/Vector.h" + +#include <functional> +#include <stdint.h> +#include <string> + +namespace mozilla { +namespace baseprofiler { +bool LogTest(int aLevelToTest); +void PrintToConsole(const char* aFmt, ...) MOZ_FORMAT_PRINTF(1, 2); +} // namespace baseprofiler +} // namespace mozilla + +// These are for MOZ_BASE_PROFILER_LOGGING and above. It's the default logging +// level for the profiler, and should be used sparingly. +#define LOG_TEST ::mozilla::baseprofiler::LogTest(3) +#define LOG(arg, ...) \ + do { \ + if (LOG_TEST) { \ + ::mozilla::baseprofiler::PrintToConsole( \ + "[I %d/%d] " arg "\n", profiler_current_process_id(), \ + profiler_current_thread_id(), ##__VA_ARGS__); \ + } \ + } while (0) + +// These are for MOZ_BASE_PROFILER_DEBUG_LOGGING. It should be used for logging +// that is somewhat more verbose than LOG. +#define DEBUG_LOG_TEST ::mozilla::baseprofiler::LogTest(4) +#define DEBUG_LOG(arg, ...) \ + do { \ + if (DEBUG_LOG_TEST) { \ + ::mozilla::baseprofiler::PrintToConsole( \ + "[D %d/%d] " arg "\n", profiler_current_process_id(), \ + profiler_current_thread_id(), ##__VA_ARGS__); \ + } \ + } while (0) + +// These are for MOZ_BASE_PROFILER_VERBOSE_LOGGING. It should be used for +// logging that is somewhat more verbose than DEBUG_LOG. +#define VERBOSE_LOG_TEST ::mozilla::baseprofiler::LogTest(5) +#define VERBOSE_LOG(arg, ...) \ + do { \ + if (VERBOSE_LOG_TEST) { \ + ::mozilla::baseprofiler::PrintToConsole( \ + "[V %d/%d] " arg "\n", profiler_current_process_id(), \ + profiler_current_thread_id(), ##__VA_ARGS__); \ + } \ + } while (0) + +namespace mozilla { + +class JSONWriter; + +namespace baseprofiler { + +typedef uint8_t* Address; + +class PlatformData; + +// We can't new/delete the type safely without defining it +// (-Wdelete-incomplete). Use these to hide the details from clients. +struct PlatformDataDestructor { + void operator()(PlatformData*); +}; + +typedef UniquePtr<PlatformData, PlatformDataDestructor> UniquePlatformData; +UniquePlatformData AllocPlatformData(int aThreadId); + +// Convert the array of strings to a bitfield. +uint32_t ParseFeaturesFromStringArray(const char** aFeatures, + uint32_t aFeatureCount, + bool aIsStartup = false); + +void profiler_get_profile_json_into_lazily_allocated_buffer( + const std::function<char*(size_t)>& aAllocator, double aSinceTime, + bool aIsShuttingDown); + +// Flags to conveniently track various JS instrumentations. +enum class JSInstrumentationFlags { + StackSampling = 0x1, + TraceLogging = 0x2, + Allocations = 0x4, +}; + +// Record an exit profile from a child process. +void profiler_received_exit_profile(const std::string& aExitProfile); + +// Extract all received exit profiles that have not yet expired (i.e., they +// still intersect with this process' buffer range). +Vector<std::string> profiler_move_exit_profiles(); + +} // namespace baseprofiler +} // namespace mozilla + +#endif /* ndef TOOLS_PLATFORM_H_ */ diff --git a/mozglue/baseprofiler/core/shared-libraries-linux.cc b/mozglue/baseprofiler/core/shared-libraries-linux.cc new file mode 100644 index 0000000000..c38e72378a --- /dev/null +++ b/mozglue/baseprofiler/core/shared-libraries-linux.cc @@ -0,0 +1,835 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "BaseProfilerSharedLibraries.h" + +#define PATH_MAX_TOSTRING(x) #x +#define PATH_MAX_STRING(x) PATH_MAX_TOSTRING(x) +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <limits.h> +#include <unistd.h> +#include <fstream> +#include "platform.h" +#include "mozilla/Sprintf.h" +#include "mozilla/Unused.h" + +#include <algorithm> +#include <arpa/inet.h> +#include <dlfcn.h> +#include <elf.h> +#include <fcntl.h> +#if defined(GP_OS_linux) || defined(GP_OS_android) +# include <features.h> +#endif +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <vector> + +#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd) +# include <link.h> // dl_phdr_info, ElfW() +#else +# error "Unexpected configuration" +#endif + +#if defined(GP_OS_android) +extern "C" MOZ_EXPORT __attribute__((weak)) int dl_iterate_phdr( + int (*callback)(struct dl_phdr_info* info, size_t size, void* data), + void* data); +#endif + +#if defined(GP_OS_freebsd) && !defined(ElfW) +# define ElfW(type) Elf_##type +#endif + +// ---------------------------------------------------------------------------- +// Starting imports from toolkit/crashreporter/google-breakpad/, as needed by +// this file when moved to mozglue. + +// Imported from +// toolkit/crashreporter/google-breakpad/src/common/memory_range.h. +// A lightweight wrapper with a pointer and a length to encapsulate a contiguous +// range of memory. It provides helper methods for checked access of a subrange +// of the memory. Its implemementation does not allocate memory or call into +// libc functions, and is thus safer to use in a crashed environment. +class MemoryRange { + public: + MemoryRange() : data_(NULL), length_(0) {} + + MemoryRange(const void* data, size_t length) { Set(data, length); } + + // Returns true if this memory range contains no data. + bool IsEmpty() const { + // Set() guarantees that |length_| is zero if |data_| is NULL. + return length_ == 0; + } + + // Resets to an empty range. + void Reset() { + data_ = NULL; + length_ = 0; + } + + // Sets this memory range to point to |data| and its length to |length|. + void Set(const void* data, size_t length) { + data_ = reinterpret_cast<const uint8_t*>(data); + // Always set |length_| to zero if |data_| is NULL. + length_ = data ? length : 0; + } + + // Returns true if this range covers a subrange of |sub_length| bytes + // at |sub_offset| bytes of this memory range, or false otherwise. + bool Covers(size_t sub_offset, size_t sub_length) const { + // The following checks verify that: + // 1. sub_offset is within [ 0 .. length_ - 1 ] + // 2. sub_offset + sub_length is within + // [ sub_offset .. length_ ] + return sub_offset < length_ && sub_offset + sub_length >= sub_offset && + sub_offset + sub_length <= length_; + } + + // Returns a raw data pointer to a subrange of |sub_length| bytes at + // |sub_offset| bytes of this memory range, or NULL if the subrange + // is out of bounds. + const void* GetData(size_t sub_offset, size_t sub_length) const { + return Covers(sub_offset, sub_length) ? (data_ + sub_offset) : NULL; + } + + // Same as the two-argument version of GetData() but uses sizeof(DataType) + // as the subrange length and returns an |DataType| pointer for convenience. + template <typename DataType> + const DataType* GetData(size_t sub_offset) const { + return reinterpret_cast<const DataType*>( + GetData(sub_offset, sizeof(DataType))); + } + + // Returns a raw pointer to the |element_index|-th element of an array + // of elements of length |element_size| starting at |sub_offset| bytes + // of this memory range, or NULL if the element is out of bounds. + const void* GetArrayElement(size_t element_offset, size_t element_size, + unsigned element_index) const { + size_t sub_offset = element_offset + element_index * element_size; + return GetData(sub_offset, element_size); + } + + // Same as the three-argument version of GetArrayElement() but deduces + // the element size using sizeof(ElementType) and returns an |ElementType| + // pointer for convenience. + template <typename ElementType> + const ElementType* GetArrayElement(size_t element_offset, + unsigned element_index) const { + return reinterpret_cast<const ElementType*>( + GetArrayElement(element_offset, sizeof(ElementType), element_index)); + } + + // Returns a subrange of |sub_length| bytes at |sub_offset| bytes of + // this memory range, or an empty range if the subrange is out of bounds. + MemoryRange Subrange(size_t sub_offset, size_t sub_length) const { + return Covers(sub_offset, sub_length) + ? MemoryRange(data_ + sub_offset, sub_length) + : MemoryRange(); + } + + // Returns a pointer to the beginning of this memory range. + const uint8_t* data() const { return data_; } + + // Returns the length, in bytes, of this memory range. + size_t length() const { return length_; } + + private: + // Pointer to the beginning of this memory range. + const uint8_t* data_; + + // Length, in bytes, of this memory range. + size_t length_; +}; + +// Imported from +// toolkit/crashreporter/google-breakpad/src/common/linux/memory_mapped_file.h +// and inlined .cc. +// A utility class for mapping a file into memory for read-only access of the +// file content. Its implementation avoids calling into libc functions by +// directly making system calls for open, close, mmap, and munmap. +class MemoryMappedFile { + public: + MemoryMappedFile() {} + + // Constructor that calls Map() to map a file at |path| into memory. + // If Map() fails, the object behaves as if it is default constructed. + MemoryMappedFile(const char* path, size_t offset) { Map(path, offset); } + + MemoryMappedFile(const MemoryMappedFile&) = delete; + MemoryMappedFile& operator=(const MemoryMappedFile&) = delete; + + ~MemoryMappedFile() {} + + // Maps a file at |path| into memory, which can then be accessed via + // content() as a MemoryRange object or via data(), and returns true on + // success. Mapping an empty file will succeed but with data() and size() + // returning NULL and 0, respectively. An existing mapping is unmapped + // before a new mapping is created. + bool Map(const char* path, size_t offset) { + Unmap(); + + int fd = open(path, O_RDONLY, 0); + if (fd == -1) { + return false; + } + +#if defined(__x86_64__) || defined(__aarch64__) || \ + (defined(__mips__) && _MIPS_SIM == _ABI64) || \ + !(defined(GP_OS_linux) || defined(GP_OS_android)) + + struct stat st; + if (fstat(fd, &st) == -1 || st.st_size < 0) { +#else + struct stat64 st; + if (fstat64(fd, &st) == -1 || st.st_size < 0) { +#endif + close(fd); + return false; + } + + // Strangely file size can be negative, but we check above that it is not. + size_t file_len = static_cast<size_t>(st.st_size); + // If the file does not extend beyond the offset, simply use an empty + // MemoryRange and return true. Don't bother to call mmap() + // even though mmap() can handle an empty file on some platforms. + if (offset >= file_len) { + close(fd); + return true; + } + + void* data = mmap(NULL, file_len, PROT_READ, MAP_PRIVATE, fd, offset); + close(fd); + if (data == MAP_FAILED) { + return false; + } + + content_.Set(data, file_len - offset); + return true; + } + + // Unmaps the memory for the mapped file. It's a no-op if no file is + // mapped. + void Unmap() { + if (content_.data()) { + munmap(const_cast<uint8_t*>(content_.data()), content_.length()); + content_.Set(NULL, 0); + } + } + + // Returns a MemoryRange object that covers the memory for the mapped + // file. The MemoryRange object is empty if no file is mapped. + const MemoryRange& content() const { return content_; } + + // Returns a pointer to the beginning of the memory for the mapped file. + // or NULL if no file is mapped or the mapped file is empty. + const void* data() const { return content_.data(); } + + // Returns the size in bytes of the mapped file, or zero if no file + // is mapped. + size_t size() const { return content_.length(); } + + private: + // Mapped file content as a MemoryRange object. + MemoryRange content_; +}; + +// Imported from +// toolkit/crashreporter/google-breakpad/src/common/linux/file_id.h and inlined +// .cc. +// GNU binutils' ld defaults to 'sha1', which is 160 bits == 20 bytes, +// so this is enough to fit that, which most binaries will use. +// This is just a sensible default for vectors so most callers can get away with +// stack allocation. +static const size_t kDefaultBuildIdSize = 20; + +// Used in a few places for backwards-compatibility. +typedef struct { + uint32_t data1; + uint16_t data2; + uint16_t data3; + uint8_t data4[8]; +} MDGUID; /* GUID */ + +const size_t kMDGUIDSize = sizeof(MDGUID); + +class FileID { + public: + explicit FileID(const char* path) : path_(path) {} + ~FileID() {} + + // Load the identifier for the elf file path specified in the constructor into + // |identifier|. + // + // The current implementation will look for a .note.gnu.build-id + // section and use that as the file id, otherwise it falls back to + // XORing the first 4096 bytes of the .text section to generate an identifier. + bool ElfFileIdentifier(std::vector<uint8_t>& identifier) { + MemoryMappedFile mapped_file(path_.c_str(), 0); + if (!mapped_file.data()) // Should probably check if size >= ElfW(Ehdr)? + return false; + + return ElfFileIdentifierFromMappedFile(mapped_file.data(), identifier); + } + + // Traits classes so consumers can write templatized code to deal + // with specific ELF bits. + struct ElfClass32 { + typedef Elf32_Addr Addr; + typedef Elf32_Ehdr Ehdr; + typedef Elf32_Nhdr Nhdr; + typedef Elf32_Phdr Phdr; + typedef Elf32_Shdr Shdr; + typedef Elf32_Half Half; + typedef Elf32_Off Off; + typedef Elf32_Sym Sym; + typedef Elf32_Word Word; + + static const int kClass = ELFCLASS32; + static const uint16_t kMachine = EM_386; + static const size_t kAddrSize = sizeof(Elf32_Addr); + static constexpr const char* kMachineName = "x86"; + }; + + struct ElfClass64 { + typedef Elf64_Addr Addr; + typedef Elf64_Ehdr Ehdr; + typedef Elf64_Nhdr Nhdr; + typedef Elf64_Phdr Phdr; + typedef Elf64_Shdr Shdr; + typedef Elf64_Half Half; + typedef Elf64_Off Off; + typedef Elf64_Sym Sym; + typedef Elf64_Word Word; + + static const int kClass = ELFCLASS64; + static const uint16_t kMachine = EM_X86_64; + static const size_t kAddrSize = sizeof(Elf64_Addr); + static constexpr const char* kMachineName = "x86_64"; + }; + + // Internal helper method, exposed for convenience for callers + // that already have more info. + template <typename ElfClass> + static const typename ElfClass::Shdr* FindElfSectionByName( + const char* name, typename ElfClass::Word section_type, + const typename ElfClass::Shdr* sections, const char* section_names, + const char* names_end, int nsection) { + if (!name || !sections || nsection == 0) { + return NULL; + } + + int name_len = strlen(name); + if (name_len == 0) return NULL; + + for (int i = 0; i < nsection; ++i) { + const char* section_name = section_names + sections[i].sh_name; + if (sections[i].sh_type == section_type && + names_end - section_name >= name_len + 1 && + strcmp(name, section_name) == 0) { + return sections + i; + } + } + return NULL; + } + + struct ElfSegment { + const void* start; + size_t size; + }; + + // Convert an offset from an Elf header into a pointer to the mapped + // address in the current process. Takes an extra template parameter + // to specify the return type to avoid having to dynamic_cast the + // result. + template <typename ElfClass, typename T> + static const T* GetOffset(const typename ElfClass::Ehdr* elf_header, + typename ElfClass::Off offset) { + return reinterpret_cast<const T*>(reinterpret_cast<uintptr_t>(elf_header) + + offset); + } + +// ELF note name and desc are 32-bits word padded. +#define NOTE_PADDING(a) ((a + 3) & ~3) + + static bool ElfClassBuildIDNoteIdentifier(const void* section, size_t length, + std::vector<uint8_t>& identifier) { + static_assert(sizeof(ElfClass32::Nhdr) == sizeof(ElfClass64::Nhdr), + "Elf32_Nhdr and Elf64_Nhdr should be the same"); + typedef typename ElfClass32::Nhdr Nhdr; + + const void* section_end = reinterpret_cast<const char*>(section) + length; + const Nhdr* note_header = reinterpret_cast<const Nhdr*>(section); + while (reinterpret_cast<const void*>(note_header) < section_end) { + if (note_header->n_type == NT_GNU_BUILD_ID) break; + note_header = reinterpret_cast<const Nhdr*>( + reinterpret_cast<const char*>(note_header) + sizeof(Nhdr) + + NOTE_PADDING(note_header->n_namesz) + + NOTE_PADDING(note_header->n_descsz)); + } + if (reinterpret_cast<const void*>(note_header) >= section_end || + note_header->n_descsz == 0) { + return false; + } + + const uint8_t* build_id = reinterpret_cast<const uint8_t*>(note_header) + + sizeof(Nhdr) + + NOTE_PADDING(note_header->n_namesz); + identifier.insert(identifier.end(), build_id, + build_id + note_header->n_descsz); + + return true; + } + + template <typename ElfClass> + static bool FindElfClassSection(const char* elf_base, + const char* section_name, + typename ElfClass::Word section_type, + const void** section_start, + size_t* section_size) { + typedef typename ElfClass::Ehdr Ehdr; + typedef typename ElfClass::Shdr Shdr; + + if (!elf_base || !section_start || !section_size) { + return false; + } + + if (strncmp(elf_base, ELFMAG, SELFMAG) != 0) { + return false; + } + + const Ehdr* elf_header = reinterpret_cast<const Ehdr*>(elf_base); + if (elf_header->e_ident[EI_CLASS] != ElfClass::kClass) { + return false; + } + + const Shdr* sections = + GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff); + const Shdr* section_names = sections + elf_header->e_shstrndx; + const char* names = + GetOffset<ElfClass, char>(elf_header, section_names->sh_offset); + const char* names_end = names + section_names->sh_size; + + const Shdr* section = + FindElfSectionByName<ElfClass>(section_name, section_type, sections, + names, names_end, elf_header->e_shnum); + + if (section != NULL && section->sh_size > 0) { + *section_start = elf_base + section->sh_offset; + *section_size = section->sh_size; + } + + return true; + } + + template <typename ElfClass> + static bool FindElfClassSegment(const char* elf_base, + typename ElfClass::Word segment_type, + std::vector<ElfSegment>* segments) { + typedef typename ElfClass::Ehdr Ehdr; + typedef typename ElfClass::Phdr Phdr; + + if (!elf_base || !segments) { + return false; + } + + if (strncmp(elf_base, ELFMAG, SELFMAG) != 0) { + return false; + } + + const Ehdr* elf_header = reinterpret_cast<const Ehdr*>(elf_base); + if (elf_header->e_ident[EI_CLASS] != ElfClass::kClass) { + return false; + } + + const Phdr* phdrs = + GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff); + + for (int i = 0; i < elf_header->e_phnum; ++i) { + if (phdrs[i].p_type == segment_type) { + ElfSegment seg = {}; + seg.start = elf_base + phdrs[i].p_offset; + seg.size = phdrs[i].p_filesz; + segments->push_back(seg); + } + } + + return true; + } + + static bool IsValidElf(const void* elf_base) { + return strncmp(reinterpret_cast<const char*>(elf_base), ELFMAG, SELFMAG) == + 0; + } + + static int ElfClass(const void* elf_base) { + const ElfW(Ehdr)* elf_header = + reinterpret_cast<const ElfW(Ehdr)*>(elf_base); + + return elf_header->e_ident[EI_CLASS]; + } + + static bool FindElfSection(const void* elf_mapped_base, + const char* section_name, uint32_t section_type, + const void** section_start, size_t* section_size) { + if (!elf_mapped_base || !section_start || !section_size) { + return false; + } + + *section_start = NULL; + *section_size = 0; + + if (!IsValidElf(elf_mapped_base)) return false; + + int cls = ElfClass(elf_mapped_base); + const char* elf_base = static_cast<const char*>(elf_mapped_base); + + if (cls == ELFCLASS32) { + return FindElfClassSection<ElfClass32>(elf_base, section_name, + section_type, section_start, + section_size) && + *section_start != NULL; + } else if (cls == ELFCLASS64) { + return FindElfClassSection<ElfClass64>(elf_base, section_name, + section_type, section_start, + section_size) && + *section_start != NULL; + } + + return false; + } + + static bool FindElfSegments(const void* elf_mapped_base, + uint32_t segment_type, + std::vector<ElfSegment>* segments) { + if (!elf_mapped_base || !segments) { + return false; + } + + if (!IsValidElf(elf_mapped_base)) return false; + + int cls = ElfClass(elf_mapped_base); + const char* elf_base = static_cast<const char*>(elf_mapped_base); + + if (cls == ELFCLASS32) { + return FindElfClassSegment<ElfClass32>(elf_base, segment_type, segments); + } else if (cls == ELFCLASS64) { + return FindElfClassSegment<ElfClass64>(elf_base, segment_type, segments); + } + + return false; + } + + // Attempt to locate a .note.gnu.build-id section in an ELF binary + // and copy it into |identifier|. + static bool FindElfBuildIDNote(const void* elf_mapped_base, + std::vector<uint8_t>& identifier) { + // lld normally creates 2 PT_NOTEs, gold normally creates 1. + std::vector<ElfSegment> segs; + if (FindElfSegments(elf_mapped_base, PT_NOTE, &segs)) { + for (ElfSegment& seg : segs) { + if (ElfClassBuildIDNoteIdentifier(seg.start, seg.size, identifier)) { + return true; + } + } + } + + void* note_section; + size_t note_size; + if (FindElfSection(elf_mapped_base, ".note.gnu.build-id", SHT_NOTE, + (const void**)¬e_section, ¬e_size)) { + return ElfClassBuildIDNoteIdentifier(note_section, note_size, identifier); + } + + return false; + } + + // Attempt to locate the .text section of an ELF binary and generate + // a simple hash by XORing the first page worth of bytes into |identifier|. + static bool HashElfTextSection(const void* elf_mapped_base, + std::vector<uint8_t>& identifier) { + identifier.resize(kMDGUIDSize); + + void* text_section; + size_t text_size; + if (!FindElfSection(elf_mapped_base, ".text", SHT_PROGBITS, + (const void**)&text_section, &text_size) || + text_size == 0) { + return false; + } + + // Only provide |kMDGUIDSize| bytes to keep identifiers produced by this + // function backwards-compatible. + memset(&identifier[0], 0, kMDGUIDSize); + const uint8_t* ptr = reinterpret_cast<const uint8_t*>(text_section); + const uint8_t* ptr_end = + ptr + std::min(text_size, static_cast<size_t>(4096)); + while (ptr < ptr_end) { + for (unsigned i = 0; i < kMDGUIDSize; i++) identifier[i] ^= ptr[i]; + ptr += kMDGUIDSize; + } + return true; + } + + // Load the identifier for the elf file mapped into memory at |base| into + // |identifier|. Return false if the identifier could not be created for this + // file. + static bool ElfFileIdentifierFromMappedFile( + const void* base, std::vector<uint8_t>& identifier) { + // Look for a build id note first. + if (FindElfBuildIDNote(base, identifier)) return true; + + // Fall back on hashing the first page of the text section. + return HashElfTextSection(base, identifier); + } + + // These three functions are not ever called in an unsafe context, so it's OK + // to allocate memory and use libc. + static std::string bytes_to_hex_string(const uint8_t* bytes, size_t count) { + std::string result; + for (unsigned int idx = 0; idx < count; ++idx) { + char buf[3]; + SprintfLiteral(buf, "%02X", bytes[idx]); + result.append(buf); + } + return result; + } + + // Convert the |identifier| data to a string. The string will + // be formatted as a UUID in all uppercase without dashes. + // (e.g., 22F065BBFC9C49F780FE26A7CEBD7BCE). + static std::string ConvertIdentifierToUUIDString( + const std::vector<uint8_t>& identifier) { + uint8_t identifier_swapped[kMDGUIDSize] = {0}; + + // Endian-ness swap to match dump processor expectation. + memcpy(identifier_swapped, &identifier[0], + std::min(kMDGUIDSize, identifier.size())); + uint32_t* data1 = reinterpret_cast<uint32_t*>(identifier_swapped); + *data1 = htonl(*data1); + uint16_t* data2 = reinterpret_cast<uint16_t*>(identifier_swapped + 4); + *data2 = htons(*data2); + uint16_t* data3 = reinterpret_cast<uint16_t*>(identifier_swapped + 6); + *data3 = htons(*data3); + + return bytes_to_hex_string(identifier_swapped, kMDGUIDSize); + } + + // Convert the entire |identifier| data to a hex string. + static std::string ConvertIdentifierToString( + const std::vector<uint8_t>& identifier) { + return bytes_to_hex_string(&identifier[0], identifier.size()); + } + + private: + // Storage for the path specified + std::string path_; +}; + +// End of imports from toolkit/crashreporter/google-breakpad/. +// ---------------------------------------------------------------------------- + +struct LoadedLibraryInfo { + LoadedLibraryInfo(const char* aName, unsigned long aBaseAddress, + unsigned long aFirstMappingStart, + unsigned long aLastMappingEnd) + : mName(aName), + mBaseAddress(aBaseAddress), + mFirstMappingStart(aFirstMappingStart), + mLastMappingEnd(aLastMappingEnd) {} + + std::string mName; + unsigned long mBaseAddress; + unsigned long mFirstMappingStart; + unsigned long mLastMappingEnd; +}; + +static std::string IDtoUUIDString(const std::vector<uint8_t>& aIdentifier) { + std::string uuid = FileID::ConvertIdentifierToUUIDString(aIdentifier); + // This is '0', not '\0', since it represents the breakpad id age. + uuid += '0'; + return uuid; +} + +// Get the breakpad Id for the binary file pointed by bin_name +static std::string getId(const char* bin_name) { + std::vector<uint8_t> identifier; + identifier.reserve(kDefaultBuildIdSize); + + FileID file_id(bin_name); + if (file_id.ElfFileIdentifier(identifier)) { + return IDtoUUIDString(identifier); + } + + return {}; +} + +static SharedLibrary SharedLibraryAtPath(const char* path, + unsigned long libStart, + unsigned long libEnd, + unsigned long offset = 0) { + std::string pathStr = path; + + size_t pos = pathStr.rfind('\\'); + std::string nameStr = + (pos != std::string::npos) ? pathStr.substr(pos + 1) : pathStr; + + return SharedLibrary(libStart, libEnd, offset, getId(path), nameStr, pathStr, + nameStr, pathStr, std::string{}, ""); +} + +static int dl_iterate_callback(struct dl_phdr_info* dl_info, size_t size, + void* data) { + auto libInfoList = reinterpret_cast<std::vector<LoadedLibraryInfo>*>(data); + + if (dl_info->dlpi_phnum <= 0) return 0; + + unsigned long baseAddress = dl_info->dlpi_addr; + unsigned long firstMappingStart = -1; + unsigned long lastMappingEnd = 0; + + for (size_t i = 0; i < dl_info->dlpi_phnum; i++) { + if (dl_info->dlpi_phdr[i].p_type != PT_LOAD) { + continue; + } + unsigned long start = dl_info->dlpi_addr + dl_info->dlpi_phdr[i].p_vaddr; + unsigned long end = start + dl_info->dlpi_phdr[i].p_memsz; + if (start < firstMappingStart) { + firstMappingStart = start; + } + if (end > lastMappingEnd) { + lastMappingEnd = end; + } + } + + libInfoList->push_back(LoadedLibraryInfo(dl_info->dlpi_name, baseAddress, + firstMappingStart, lastMappingEnd)); + + return 0; +} + +SharedLibraryInfo SharedLibraryInfo::GetInfoForSelf() { + SharedLibraryInfo info; + +#if defined(GP_OS_linux) + // We need to find the name of the executable (exeName, exeNameLen) and the + // address of its executable section (exeExeAddr) in the running image. + char exeName[PATH_MAX]; + memset(exeName, 0, sizeof(exeName)); + + ssize_t exeNameLen = readlink("/proc/self/exe", exeName, sizeof(exeName) - 1); + if (exeNameLen == -1) { + // readlink failed for whatever reason. Note this, but keep going. + exeName[0] = '\0'; + exeNameLen = 0; + // LOG("SharedLibraryInfo::GetInfoForSelf(): readlink failed"); + } else { + // Assert no buffer overflow. + MOZ_RELEASE_ASSERT(exeNameLen >= 0 && + exeNameLen < static_cast<ssize_t>(sizeof(exeName))); + } + + unsigned long exeExeAddr = 0; +#endif + +#if defined(GP_OS_android) + // If dl_iterate_phdr doesn't exist, we give up immediately. + if (!dl_iterate_phdr) { + // On ARM Android, dl_iterate_phdr is provided by the custom linker. + // So if libxul was loaded by the system linker (e.g. as part of + // xpcshell when running tests), it won't be available and we should + // not call it. + return info; + } +#endif + +#if defined(GP_OS_linux) || defined(GP_OS_android) + // Read info from /proc/self/maps. We ignore most of it. + pid_t pid = mozilla::baseprofiler::profiler_current_process_id(); + char path[PATH_MAX]; + SprintfLiteral(path, "/proc/%d/maps", pid); + std::ifstream maps(path); + std::string line; + while (std::getline(maps, line)) { + int ret; + unsigned long start; + unsigned long end; + char perm[6 + 1] = ""; + unsigned long offset; + char modulePath[PATH_MAX + 1] = ""; + ret = sscanf(line.c_str(), + "%lx-%lx %6s %lx %*s %*x %" PATH_MAX_STRING(PATH_MAX) "s\n", + &start, &end, perm, &offset, modulePath); + if (!strchr(perm, 'x')) { + // Ignore non executable entries + continue; + } + if (ret != 5 && ret != 4) { + // LOG("SharedLibraryInfo::GetInfoForSelf(): " + // "reading /proc/self/maps failed"); + continue; + } + +# if defined(GP_OS_linux) + // Try to establish the main executable's load address. + if (exeNameLen > 0 && strcmp(modulePath, exeName) == 0) { + exeExeAddr = start; + } +# elif defined(GP_OS_android) + // Use /proc/pid/maps to get the dalvik-jit section since it has no + // associated phdrs. + if (0 == strcmp(modulePath, "/dev/ashmem/dalvik-jit-code-cache")) { + info.AddSharedLibrary( + SharedLibraryAtPath(modulePath, start, end, offset)); + if (info.GetSize() > 10000) { + // LOG("SharedLibraryInfo::GetInfoForSelf(): " + // "implausibly large number of mappings acquired"); + break; + } + } +# endif + } +#endif + + std::vector<LoadedLibraryInfo> libInfoList; + + // We collect the bulk of the library info using dl_iterate_phdr. + dl_iterate_phdr(dl_iterate_callback, &libInfoList); + + for (const auto& libInfo : libInfoList) { + info.AddSharedLibrary( + SharedLibraryAtPath(libInfo.mName.c_str(), libInfo.mFirstMappingStart, + libInfo.mLastMappingEnd, + libInfo.mFirstMappingStart - libInfo.mBaseAddress)); + } + +#if defined(GP_OS_linux) + // Make another pass over the information we just harvested from + // dl_iterate_phdr. If we see a nameless object mapped at what we earlier + // established to be the main executable's load address, attach the + // executable's name to that entry. + for (size_t i = 0; i < info.GetSize(); i++) { + SharedLibrary& lib = info.GetMutableEntry(i); + if (lib.GetStart() <= exeExeAddr && exeExeAddr <= lib.GetEnd() && + lib.GetDebugPath().empty()) { + lib = SharedLibraryAtPath(exeName, lib.GetStart(), lib.GetEnd(), + lib.GetOffset()); + + // We only expect to see one such entry. + break; + } + } +#endif + + return info; +} + +void SharedLibraryInfo::Initialize() { /* do nothing */ +} diff --git a/mozglue/baseprofiler/core/shared-libraries-macos.cc b/mozglue/baseprofiler/core/shared-libraries-macos.cc new file mode 100644 index 0000000000..13e66f9f26 --- /dev/null +++ b/mozglue/baseprofiler/core/shared-libraries-macos.cc @@ -0,0 +1,182 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "BaseProfilerSharedLibraries.h" + +#include "platform.h" + +#include "mozilla/Unused.h" +#include <AvailabilityMacros.h> + +#include <dlfcn.h> +#include <mach-o/arch.h> +#include <mach-o/dyld_images.h> +#include <mach-o/dyld.h> +#include <mach-o/loader.h> +#include <mach/mach_init.h> +#include <mach/mach_traps.h> +#include <mach/task_info.h> +#include <mach/task.h> +#include <sstream> +#include <stdlib.h> +#include <string.h> +#include <vector> + +// Architecture specific abstraction. +#if defined(GP_ARCH_x86) +typedef mach_header platform_mach_header; +typedef segment_command mach_segment_command_type; +# define MACHO_MAGIC_NUMBER MH_MAGIC +# define CMD_SEGMENT LC_SEGMENT +# define seg_size uint32_t +#else +typedef mach_header_64 platform_mach_header; +typedef segment_command_64 mach_segment_command_type; +# define MACHO_MAGIC_NUMBER MH_MAGIC_64 +# define CMD_SEGMENT LC_SEGMENT_64 +# define seg_size uint64_t +#endif + +struct NativeSharedLibrary { + const platform_mach_header* header; + std::string path; +}; +static std::vector<NativeSharedLibrary>* sSharedLibrariesList = nullptr; + +class MOZ_RAII SharedLibrariesLock { + public: + SharedLibrariesLock() { sSharedLibrariesMutex.Lock(); } + + ~SharedLibrariesLock() { sSharedLibrariesMutex.Unlock(); } + + SharedLibrariesLock(const SharedLibrariesLock&) = delete; + void operator=(const SharedLibrariesLock&) = delete; + + private: + static mozilla::baseprofiler::detail::BaseProfilerMutex sSharedLibrariesMutex; +}; + +mozilla::baseprofiler::detail::BaseProfilerMutex + SharedLibrariesLock::sSharedLibrariesMutex; + +static void SharedLibraryAddImage(const struct mach_header* mh, + intptr_t vmaddr_slide) { + // NOTE: Presumably for backwards-compatibility reasons, this function accepts + // a mach_header even on 64-bit where it ought to be a mach_header_64. We cast + // it to the right type here. + auto header = reinterpret_cast<const platform_mach_header*>(mh); + + Dl_info info; + if (!dladdr(header, &info)) { + return; + } + + SharedLibrariesLock lock; + if (!sSharedLibrariesList) { + return; + } + + NativeSharedLibrary lib = {header, info.dli_fname}; + sSharedLibrariesList->push_back(lib); +} + +static void SharedLibraryRemoveImage(const struct mach_header* mh, + intptr_t vmaddr_slide) { + // NOTE: Presumably for backwards-compatibility reasons, this function accepts + // a mach_header even on 64-bit where it ought to be a mach_header_64. We cast + // it to the right type here. + auto header = reinterpret_cast<const platform_mach_header*>(mh); + + SharedLibrariesLock lock; + if (!sSharedLibrariesList) { + return; + } + + uint32_t count = sSharedLibrariesList->size(); + for (uint32_t i = 0; i < count; ++i) { + if ((*sSharedLibrariesList)[i].header == header) { + sSharedLibrariesList->erase(sSharedLibrariesList->begin() + i); + return; + } + } +} + +void SharedLibraryInfo::Initialize() { + // NOTE: We intentionally leak this memory here. We're allocating dynamically + // in order to avoid static initializers. + sSharedLibrariesList = new std::vector<NativeSharedLibrary>(); + + _dyld_register_func_for_add_image(SharedLibraryAddImage); + _dyld_register_func_for_remove_image(SharedLibraryRemoveImage); +} + +static void addSharedLibrary(const platform_mach_header* header, + const char* path, SharedLibraryInfo& info) { + const struct load_command* cmd = + reinterpret_cast<const struct load_command*>(header + 1); + + seg_size size = 0; + unsigned long long start = reinterpret_cast<unsigned long long>(header); + // Find the cmd segment in the macho image. It will contain the offset we care + // about. + const uint8_t* uuid_bytes = nullptr; + for (unsigned int i = 0; + cmd && (i < header->ncmds) && (uuid_bytes == nullptr || size == 0); + ++i) { + if (cmd->cmd == CMD_SEGMENT) { + const mach_segment_command_type* seg = + reinterpret_cast<const mach_segment_command_type*>(cmd); + + if (!strcmp(seg->segname, "__TEXT")) { + size = seg->vmsize; + } + } else if (cmd->cmd == LC_UUID) { + const uuid_command* ucmd = reinterpret_cast<const uuid_command*>(cmd); + uuid_bytes = ucmd->uuid; + } + + cmd = reinterpret_cast<const struct load_command*>( + reinterpret_cast<const char*>(cmd) + cmd->cmdsize); + } + + std::string uuid; + if (uuid_bytes != nullptr) { + static constexpr char digits[16] = {'0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; + for (int i = 0; i < 15; ++i) { + uint8_t byte = uuid_bytes[i]; + uuid += digits[byte >> 4]; + uuid += digits[byte & 0xFu]; + } + // breakpad id age. + uuid += '0'; + } + + std::string pathStr = path; + + size_t pos = pathStr.rfind('\\'); + std::string nameStr = + (pos != std::string::npos) ? pathStr.substr(pos + 1) : pathStr; + + const NXArchInfo* archInfo = + NXGetArchInfoFromCpuType(header->cputype, header->cpusubtype); + + info.AddSharedLibrary(SharedLibrary(start, start + size, 0, uuid, nameStr, + pathStr, nameStr, pathStr, std::string{}, + archInfo ? archInfo->name : "")); +} + +// Translate the statically stored sSharedLibrariesList information into a +// SharedLibraryInfo object. +SharedLibraryInfo SharedLibraryInfo::GetInfoForSelf() { + SharedLibrariesLock lock; + SharedLibraryInfo sharedLibraryInfo; + + for (auto& info : *sSharedLibrariesList) { + addSharedLibrary(info.header, info.path.c_str(), sharedLibraryInfo); + } + + return sharedLibraryInfo; +} diff --git a/mozglue/baseprofiler/core/shared-libraries-win32.cc b/mozglue/baseprofiler/core/shared-libraries-win32.cc new file mode 100644 index 0000000000..5bf7408193 --- /dev/null +++ b/mozglue/baseprofiler/core/shared-libraries-win32.cc @@ -0,0 +1,277 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <windows.h> +#include <dbghelp.h> +#include <sstream> +#include <psapi.h> + +#include "BaseProfilerSharedLibraries.h" + +#include "mozilla/glue/WindowsUnicode.h" +#include "mozilla/Unused.h" +#include "mozilla/WindowsVersion.h" + +#include <cctype> +#include <string> + +#define CV_SIGNATURE 0x53445352 // 'SDSR' + +struct CodeViewRecord70 { + uint32_t signature; + GUID pdbSignature; + uint32_t pdbAge; + // A UTF-8 string, according to + // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/PDB/dbi/locator.cpp#L785 + char pdbFileName[1]; +}; + +static constexpr char digits[16] = {'0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; + +static void AppendHex(const unsigned char* aBegin, const unsigned char* aEnd, + std::string& aOut) { + for (const unsigned char* p = aBegin; p < aEnd; ++p) { + unsigned char c = *p; + aOut += digits[c >> 4]; + aOut += digits[c & 0xFu]; + } +} + +static constexpr bool WITH_PADDING = true; +static constexpr bool WITHOUT_PADDING = false; +template <typename T> +static void AppendHex(T aValue, std::string& aOut, bool aWithPadding) { + for (int i = sizeof(T) * 2 - 1; i >= 0; --i) { + unsigned nibble = (aValue >> (i * 4)) & 0xFu; + // If no-padding requested, skip starting zeroes -- unless we're on the very + // last nibble (so we don't output a blank). + if (!aWithPadding && i != 0) { + if (nibble == 0) { + // Requested no padding, skip zeroes. + continue; + } + // Requested no padding, got first non-zero, pretend we now want padding + // so we don't skip zeroes anymore. + aWithPadding = true; + } + aOut += digits[nibble]; + } +} + +static bool GetPdbInfo(uintptr_t aStart, std::string& aSignature, + uint32_t& aAge, char** aPdbName) { + if (!aStart) { + return false; + } + + PIMAGE_DOS_HEADER dosHeader = reinterpret_cast<PIMAGE_DOS_HEADER>(aStart); + if (dosHeader->e_magic != IMAGE_DOS_SIGNATURE) { + return false; + } + + PIMAGE_NT_HEADERS ntHeaders = + reinterpret_cast<PIMAGE_NT_HEADERS>(aStart + dosHeader->e_lfanew); + if (ntHeaders->Signature != IMAGE_NT_SIGNATURE) { + return false; + } + + uint32_t relativeVirtualAddress = + ntHeaders->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_DEBUG] + .VirtualAddress; + if (!relativeVirtualAddress) { + return false; + } + + PIMAGE_DEBUG_DIRECTORY debugDirectory = + reinterpret_cast<PIMAGE_DEBUG_DIRECTORY>(aStart + relativeVirtualAddress); + if (!debugDirectory || debugDirectory->Type != IMAGE_DEBUG_TYPE_CODEVIEW) { + return false; + } + + CodeViewRecord70* debugInfo = reinterpret_cast<CodeViewRecord70*>( + aStart + debugDirectory->AddressOfRawData); + if (!debugInfo || debugInfo->signature != CV_SIGNATURE) { + return false; + } + + aAge = debugInfo->pdbAge; + GUID& pdbSignature = debugInfo->pdbSignature; + AppendHex(pdbSignature.Data1, aSignature, WITH_PADDING); + AppendHex(pdbSignature.Data2, aSignature, WITH_PADDING); + AppendHex(pdbSignature.Data3, aSignature, WITH_PADDING); + AppendHex(reinterpret_cast<const unsigned char*>(&pdbSignature.Data4), + reinterpret_cast<const unsigned char*>(&pdbSignature.Data4) + + sizeof(pdbSignature.Data4), + aSignature); + + // The PDB file name could be different from module filename, so report both + // e.g. The PDB for C:\Windows\SysWOW64\ntdll.dll is wntdll.pdb + *aPdbName = debugInfo->pdbFileName; + + return true; +} + +static std::string GetVersion(wchar_t* dllPath) { + DWORD infoSize = GetFileVersionInfoSizeW(dllPath, nullptr); + if (infoSize == 0) { + return {}; + } + + mozilla::UniquePtr<unsigned char[]> infoData = + mozilla::MakeUnique<unsigned char[]>(infoSize); + if (!GetFileVersionInfoW(dllPath, 0, infoSize, infoData.get())) { + return {}; + } + + VS_FIXEDFILEINFO* vInfo; + UINT vInfoLen; + if (!VerQueryValueW(infoData.get(), L"\\", (LPVOID*)&vInfo, &vInfoLen)) { + return {}; + } + if (!vInfo) { + return {}; + } + + return std::to_string(vInfo->dwFileVersionMS >> 16) + '.' + + std::to_string(vInfo->dwFileVersionMS & 0xFFFF) + '.' + + std::to_string(vInfo->dwFileVersionLS >> 16) + '.' + + std::to_string(vInfo->dwFileVersionLS & 0xFFFF); +} + +SharedLibraryInfo SharedLibraryInfo::GetInfoForSelf() { + SharedLibraryInfo sharedLibraryInfo; + + HANDLE hProcess = GetCurrentProcess(); + mozilla::UniquePtr<HMODULE[]> hMods; + size_t modulesNum = 0; + if (hProcess != NULL) { + DWORD modulesSize; + if (!EnumProcessModules(hProcess, nullptr, 0, &modulesSize)) { + return sharedLibraryInfo; + } + modulesNum = modulesSize / sizeof(HMODULE); + hMods = mozilla::MakeUnique<HMODULE[]>(modulesNum); + if (!EnumProcessModules(hProcess, hMods.get(), modulesNum * sizeof(HMODULE), + &modulesSize)) { + return sharedLibraryInfo; + } + // The list may have shrunk between calls + if (modulesSize / sizeof(HMODULE) < modulesNum) { + modulesNum = modulesSize / sizeof(HMODULE); + } + } + + for (unsigned int i = 0; i < modulesNum; i++) { + wchar_t modulePath[MAX_PATH + 1]; + if (!GetModuleFileNameExW(hProcess, hMods[i], modulePath, + std::size(modulePath))) { + continue; + } + mozilla::UniquePtr<char[]> utf8ModulePath( + mozilla::glue::WideToUTF8(modulePath)); + if (!utf8ModulePath) { + continue; + } + + MODULEINFO module = {0}; + if (!GetModuleInformation(hProcess, hMods[i], &module, + sizeof(MODULEINFO))) { + continue; + } + + std::string modulePathStr(utf8ModulePath.get()); + size_t pos = modulePathStr.find_last_of("\\/"); + std::string moduleNameStr = (pos != std::string::npos) + ? modulePathStr.substr(pos + 1) + : modulePathStr; + + // Hackaround for Bug 1607574. Nvidia's shim driver nvd3d9wrap[x].dll + // detours LoadLibraryExW when it's loaded and the detour function causes + // AV when the code tries to access data pointing to an address within + // unloaded nvinit[x].dll. + // The crashing code is executed when a given parameter is "detoured.dll" + // and OS version is older than 6.2. We hit that crash at the following + // call to LoadLibraryEx even if we specify LOAD_LIBRARY_AS_DATAFILE. + // We work around it by skipping LoadLibraryEx, and add a library info with + // a dummy breakpad id instead. +#if !defined(_M_ARM64) +# if defined(_M_AMD64) + LPCWSTR kNvidiaShimDriver = L"nvd3d9wrapx.dll"; + LPCWSTR kNvidiaInitDriver = L"nvinitx.dll"; +# elif defined(_M_IX86) + LPCWSTR kNvidiaShimDriver = L"nvd3d9wrap.dll"; + LPCWSTR kNvidiaInitDriver = L"nvinit.dll"; +# endif + constexpr std::string_view detoured_dll = "detoured.dll"; + if (std::equal(moduleNameStr.cbegin(), moduleNameStr.cend(), + detoured_dll.cbegin(), detoured_dll.cend(), + [](char aModuleChar, char aDetouredChar) { + return std::tolower(aModuleChar) == aDetouredChar; + }) && + !mozilla::IsWin8OrLater() && ::GetModuleHandleW(kNvidiaShimDriver) && + !::GetModuleHandleW(kNvidiaInitDriver)) { + const std::string pdbNameStr = "detoured.pdb"; + SharedLibrary shlib((uintptr_t)module.lpBaseOfDll, + (uintptr_t)module.lpBaseOfDll + module.SizeOfImage, + 0, // DLLs are always mapped at offset 0 on Windows + "000000000000000000000000000000000", moduleNameStr, + modulePathStr, pdbNameStr, pdbNameStr, "", ""); + sharedLibraryInfo.AddSharedLibrary(shlib); + continue; + } +#endif // !defined(_M_ARM64) + + std::string breakpadId; + // Load the module again to make sure that its handle will remain + // valid as we attempt to read the PDB information from it. We load the + // DLL as a datafile so that if the module actually gets unloaded between + // the call to EnumProcessModules and the following LoadLibraryEx, we + // don't end up running the now newly loaded module's DllMain function. If + // the module is already loaded, LoadLibraryEx just increments its + // refcount. + // + // Note that because of the race condition above, merely loading the DLL + // again is not safe enough, therefore we also need to make sure that we + // can read the memory mapped at the base address before we can safely + // proceed to actually access those pages. + HMODULE handleLock = + LoadLibraryExW(modulePath, NULL, LOAD_LIBRARY_AS_DATAFILE); + MEMORY_BASIC_INFORMATION vmemInfo = {0}; + std::string pdbSig; + uint32_t pdbAge; + std::string pdbPathStr; + std::string pdbNameStr; + char* pdbName = nullptr; + if (handleLock && + sizeof(vmemInfo) == + VirtualQuery(module.lpBaseOfDll, &vmemInfo, sizeof(vmemInfo)) && + vmemInfo.State == MEM_COMMIT && + GetPdbInfo((uintptr_t)module.lpBaseOfDll, pdbSig, pdbAge, &pdbName)) { + MOZ_ASSERT(breakpadId.empty()); + breakpadId += pdbSig; + AppendHex(pdbAge, breakpadId, WITHOUT_PADDING); + + pdbPathStr = pdbName; + size_t pos = pdbPathStr.find_last_of("\\/"); + pdbNameStr = + (pos != std::string::npos) ? pdbPathStr.substr(pos + 1) : pdbPathStr; + } + + SharedLibrary shlib((uintptr_t)module.lpBaseOfDll, + (uintptr_t)module.lpBaseOfDll + module.SizeOfImage, + 0, // DLLs are always mapped at offset 0 on Windows + breakpadId, moduleNameStr, modulePathStr, pdbNameStr, + pdbPathStr, GetVersion(modulePath), ""); + sharedLibraryInfo.AddSharedLibrary(shlib); + + FreeLibrary(handleLock); // ok to free null handles + } + + return sharedLibraryInfo; +} + +void SharedLibraryInfo::Initialize() { /* do nothing */ +} diff --git a/mozglue/baseprofiler/core/vtune/ittnotify.h b/mozglue/baseprofiler/core/vtune/ittnotify.h new file mode 100644 index 0000000000..04adf9eb5e --- /dev/null +++ b/mozglue/baseprofiler/core/vtune/ittnotify.h @@ -0,0 +1,4127 @@ +// clang-format off + +/* <copyright> + This file is provided under a dual BSD/GPLv2 license. When using or + redistributing this file, you may do so under either license. + + GPL LICENSE SUMMARY + + Copyright (c) 2005-2014 Intel Corporation. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + The full GNU General Public License is included in this distribution + in the file called LICENSE.GPL. + + Contact Information: + http://software.intel.com/en-us/articles/intel-vtune-amplifier-xe/ + + BSD LICENSE + + Copyright (c) 2005-2014 Intel Corporation. All rights reserved. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +</copyright> */ +#ifndef _ITTNOTIFY_H_ +# define _ITTNOTIFY_H_ + +/** +@file +@brief Public User API functions and types +@mainpage + +The ITT API is used to annotate a user's program with additional information +that can be used by correctness and performance tools. The user inserts +calls in their program. Those calls generate information that is collected +at runtime, and used by Intel(R) Threading Tools. + +@section API Concepts +The following general concepts are used throughout the API. + +@subsection Unicode Support +Many API functions take character string arguments. On Windows, there +are two versions of each such function. The function name is suffixed +by W if Unicode support is enabled, and by A otherwise. Any API function +that takes a character string argument adheres to this convention. + +@subsection Conditional Compilation +Many users prefer having an option to modify ITT API code when linking it +inside their runtimes. ITT API header file provides a mechanism to replace +ITT API function names inside your code with empty strings. To do this, +define the macros INTEL_NO_ITTNOTIFY_API during compilation and remove the +static library from the linker script. + +@subsection Domains +[see domains] +Domains provide a way to separate notification for different modules or +libraries in a program. Domains are specified by dotted character strings, +e.g. TBB.Internal.Control. + +A mechanism (to be specified) is provided to enable and disable +domains. By default, all domains are enabled. +@subsection Named Entities and Instances +Named entities (frames, regions, tasks, and markers) communicate +information about the program to the analysis tools. A named entity often +refers to a section of program code, or to some set of logical concepts +that the programmer wants to group together. + +Named entities relate to the programmer's static view of the program. When +the program actually executes, many instances of a given named entity +may be created. + +The API annotations denote instances of named entities. The actual +named entities are displayed using the analysis tools. In other words, +the named entities come into existence when instances are created. + +Instances of named entities may have instance identifiers (IDs). Some +API calls use instance identifiers to create relationships between +different instances of named entities. Other API calls associate data +with instances of named entities. + +Some named entities must always have instance IDs. In particular, regions +and frames always have IDs. Task and markers need IDs only if the ID is +needed in another API call (such as adding a relation or metadata). + +The lifetime of instance IDs is distinct from the lifetime of +instances. This allows various relationships to be specified separate +from the actual execution of instances. This flexibility comes at the +expense of extra API calls. + +The same ID may not be reused for different instances, unless a previous +[ref] __itt_id_destroy call for that ID has been issued. +*/ + +/** @cond exclude_from_documentation */ +#ifndef ITT_OS_WIN +# define ITT_OS_WIN 1 +#endif /* ITT_OS_WIN */ + +#ifndef ITT_OS_LINUX +# define ITT_OS_LINUX 2 +#endif /* ITT_OS_LINUX */ + +#ifndef ITT_OS_MAC +# define ITT_OS_MAC 3 +#endif /* ITT_OS_MAC */ + +#ifndef ITT_OS_FREEBSD +# define ITT_OS_FREEBSD 4 +#endif /* ITT_OS_FREEBSD */ + +#ifndef ITT_OS +# if defined WIN32 || defined _WIN32 +# define ITT_OS ITT_OS_WIN +# elif defined( __APPLE__ ) && defined( __MACH__ ) +# define ITT_OS ITT_OS_MAC +# elif defined( __FreeBSD__ ) +# define ITT_OS ITT_OS_FREEBSD +# else +# define ITT_OS ITT_OS_LINUX +# endif +#endif /* ITT_OS */ + +#ifndef ITT_PLATFORM_WIN +# define ITT_PLATFORM_WIN 1 +#endif /* ITT_PLATFORM_WIN */ + +#ifndef ITT_PLATFORM_POSIX +# define ITT_PLATFORM_POSIX 2 +#endif /* ITT_PLATFORM_POSIX */ + +#ifndef ITT_PLATFORM_MAC +# define ITT_PLATFORM_MAC 3 +#endif /* ITT_PLATFORM_MAC */ + +#ifndef ITT_PLATFORM_FREEBSD +# define ITT_PLATFORM_FREEBSD 4 +#endif /* ITT_PLATFORM_FREEBSD */ + +#ifndef ITT_PLATFORM +# if ITT_OS==ITT_OS_WIN +# define ITT_PLATFORM ITT_PLATFORM_WIN +# elif ITT_OS==ITT_OS_MAC +# define ITT_PLATFORM ITT_PLATFORM_MAC +# elif ITT_OS==ITT_OS_FREEBSD +# define ITT_PLATFORM ITT_PLATFORM_FREEBSD +# else +# define ITT_PLATFORM ITT_PLATFORM_POSIX +# endif +#endif /* ITT_PLATFORM */ + +#if defined(_UNICODE) && !defined(UNICODE) +#define UNICODE +#endif + +#include <stddef.h> +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#include <tchar.h> +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#include <stdint.h> +#if defined(UNICODE) || defined(_UNICODE) +#include <wchar.h> +#endif /* UNICODE || _UNICODE */ +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#ifndef ITTAPI_CDECL +# if ITT_PLATFORM==ITT_PLATFORM_WIN +# define ITTAPI_CDECL __cdecl +# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +# if defined _M_IX86 || defined __i386__ +# define ITTAPI_CDECL __attribute__ ((cdecl)) +# else /* _M_IX86 || __i386__ */ +# define ITTAPI_CDECL /* actual only on x86 platform */ +# endif /* _M_IX86 || __i386__ */ +# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* ITTAPI_CDECL */ + +#ifndef STDCALL +# if ITT_PLATFORM==ITT_PLATFORM_WIN +# define STDCALL __stdcall +# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +# if defined _M_IX86 || defined __i386__ +# define STDCALL __attribute__ ((stdcall)) +# else /* _M_IX86 || __i386__ */ +# define STDCALL /* supported only on x86 platform */ +# endif /* _M_IX86 || __i386__ */ +# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* STDCALL */ + +#define ITTAPI ITTAPI_CDECL +#define LIBITTAPI ITTAPI_CDECL + +/* TODO: Temporary for compatibility! */ +#define ITTAPI_CALL ITTAPI_CDECL +#define LIBITTAPI_CALL ITTAPI_CDECL + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +/* use __forceinline (VC++ specific) */ +#define ITT_INLINE __forceinline +#define ITT_INLINE_ATTRIBUTE /* nothing */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +/* + * Generally, functions are not inlined unless optimization is specified. + * For functions declared inline, this attribute inlines the function even + * if no optimization level was specified. + */ +#ifdef __STRICT_ANSI__ +#define ITT_INLINE static +#define ITT_INLINE_ATTRIBUTE __attribute__((unused)) +#else /* __STRICT_ANSI__ */ +#define ITT_INLINE static inline +#define ITT_INLINE_ATTRIBUTE __attribute__((always_inline, unused)) +#endif /* __STRICT_ANSI__ */ +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +/** @endcond */ + +#ifdef INTEL_ITTNOTIFY_ENABLE_LEGACY +# if ITT_PLATFORM==ITT_PLATFORM_WIN +# pragma message("WARNING!!! Deprecated API is used. Please undefine INTEL_ITTNOTIFY_ENABLE_LEGACY macro") +# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +# warning "Deprecated API is used. Please undefine INTEL_ITTNOTIFY_ENABLE_LEGACY macro" +# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +# include "vtune/legacy/ittnotify.h" +#endif /* INTEL_ITTNOTIFY_ENABLE_LEGACY */ + +/** @cond exclude_from_documentation */ +/* Helper macro for joining tokens */ +#define ITT_JOIN_AUX(p,n) p##n +#define ITT_JOIN(p,n) ITT_JOIN_AUX(p,n) + +#ifdef ITT_MAJOR +#undef ITT_MAJOR +#endif +#ifdef ITT_MINOR +#undef ITT_MINOR +#endif +#define ITT_MAJOR 3 +#define ITT_MINOR 0 + +/* Standard versioning of a token with major and minor version numbers */ +#define ITT_VERSIONIZE(x) \ + ITT_JOIN(x, \ + ITT_JOIN(_, \ + ITT_JOIN(ITT_MAJOR, \ + ITT_JOIN(_, ITT_MINOR)))) + +#ifndef INTEL_ITTNOTIFY_PREFIX +# define INTEL_ITTNOTIFY_PREFIX __itt_ +#endif /* INTEL_ITTNOTIFY_PREFIX */ +#ifndef INTEL_ITTNOTIFY_POSTFIX +# define INTEL_ITTNOTIFY_POSTFIX _ptr_ +#endif /* INTEL_ITTNOTIFY_POSTFIX */ + +#define ITTNOTIFY_NAME_AUX(n) ITT_JOIN(INTEL_ITTNOTIFY_PREFIX,n) +#define ITTNOTIFY_NAME(n) ITT_VERSIONIZE(ITTNOTIFY_NAME_AUX(ITT_JOIN(n,INTEL_ITTNOTIFY_POSTFIX))) + +#define ITTNOTIFY_VOID(n) (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n) +#define ITTNOTIFY_DATA(n) (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n) + +#define ITTNOTIFY_VOID_D0(n,d) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d) +#define ITTNOTIFY_VOID_D1(n,d,x) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x) +#define ITTNOTIFY_VOID_D2(n,d,x,y) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y) +#define ITTNOTIFY_VOID_D3(n,d,x,y,z) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z) +#define ITTNOTIFY_VOID_D4(n,d,x,y,z,a) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a) +#define ITTNOTIFY_VOID_D5(n,d,x,y,z,a,b) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b) +#define ITTNOTIFY_VOID_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c) +#define ITTNOTIFY_DATA_D0(n,d) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d) +#define ITTNOTIFY_DATA_D1(n,d,x) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x) +#define ITTNOTIFY_DATA_D2(n,d,x,y) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y) +#define ITTNOTIFY_DATA_D3(n,d,x,y,z) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z) +#define ITTNOTIFY_DATA_D4(n,d,x,y,z,a) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a) +#define ITTNOTIFY_DATA_D5(n,d,x,y,z,a,b) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b) +#define ITTNOTIFY_DATA_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c) + +#ifdef ITT_STUB +#undef ITT_STUB +#endif +#ifdef ITT_STUBV +#undef ITT_STUBV +#endif +#define ITT_STUBV(api,type,name,args) \ + typedef type (api* ITT_JOIN(ITTNOTIFY_NAME(name),_t)) args; \ + extern ITT_JOIN(ITTNOTIFY_NAME(name),_t) ITTNOTIFY_NAME(name); +#define ITT_STUB ITT_STUBV +/** @endcond */ + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/** @cond exclude_from_gpa_documentation */ +/** + * @defgroup public Public API + * @{ + * @} + */ + +/** + * @defgroup control Collection Control + * @ingroup public + * General behavior: application continues to run, but no profiling information is being collected + * + * Pausing occurs not only for the current thread but for all process as well as spawned processes + * - Intel(R) Parallel Inspector and Intel(R) Inspector XE: + * - Does not analyze or report errors that involve memory access. + * - Other errors are reported as usual. Pausing data collection in + * Intel(R) Parallel Inspector and Intel(R) Inspector XE + * only pauses tracing and analyzing memory access. + * It does not pause tracing or analyzing threading APIs. + * . + * - Intel(R) Parallel Amplifier and Intel(R) VTune(TM) Amplifier XE: + * - Does continue to record when new threads are started. + * . + * - Other effects: + * - Possible reduction of runtime overhead. + * . + * @{ + */ +/** @brief Pause collection */ +void ITTAPI __itt_pause(void); +/** @brief Resume collection */ +void ITTAPI __itt_resume(void); +/** @brief Detach collection */ +void ITTAPI __itt_detach(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, pause, (void)) +ITT_STUBV(ITTAPI, void, resume, (void)) +ITT_STUBV(ITTAPI, void, detach, (void)) +#define __itt_pause ITTNOTIFY_VOID(pause) +#define __itt_pause_ptr ITTNOTIFY_NAME(pause) +#define __itt_resume ITTNOTIFY_VOID(resume) +#define __itt_resume_ptr ITTNOTIFY_NAME(resume) +#define __itt_detach ITTNOTIFY_VOID(detach) +#define __itt_detach_ptr ITTNOTIFY_NAME(detach) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_pause() +#define __itt_pause_ptr 0 +#define __itt_resume() +#define __itt_resume_ptr 0 +#define __itt_detach() +#define __itt_detach_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_pause_ptr 0 +#define __itt_resume_ptr 0 +#define __itt_detach_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} control group */ +/** @endcond */ + +/** + * @defgroup threads Threads + * @ingroup public + * Give names to threads + * @{ + */ +/** + * @brief Sets thread name of calling thread + * @param[in] name - name of thread + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_thread_set_nameA(const char *name); +void ITTAPI __itt_thread_set_nameW(const wchar_t *name); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_thread_set_name __itt_thread_set_nameW +# define __itt_thread_set_name_ptr __itt_thread_set_nameW_ptr +#else /* UNICODE */ +# define __itt_thread_set_name __itt_thread_set_nameA +# define __itt_thread_set_name_ptr __itt_thread_set_nameA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +void ITTAPI __itt_thread_set_name(const char *name); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, thread_set_nameA, (const char *name)) +ITT_STUBV(ITTAPI, void, thread_set_nameW, (const wchar_t *name)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, thread_set_name, (const char *name)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_thread_set_nameA ITTNOTIFY_VOID(thread_set_nameA) +#define __itt_thread_set_nameA_ptr ITTNOTIFY_NAME(thread_set_nameA) +#define __itt_thread_set_nameW ITTNOTIFY_VOID(thread_set_nameW) +#define __itt_thread_set_nameW_ptr ITTNOTIFY_NAME(thread_set_nameW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_thread_set_name ITTNOTIFY_VOID(thread_set_name) +#define __itt_thread_set_name_ptr ITTNOTIFY_NAME(thread_set_name) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_thread_set_nameA(name) +#define __itt_thread_set_nameA_ptr 0 +#define __itt_thread_set_nameW(name) +#define __itt_thread_set_nameW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_thread_set_name(name) +#define __itt_thread_set_name_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_thread_set_nameA_ptr 0 +#define __itt_thread_set_nameW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_thread_set_name_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @cond exclude_from_gpa_documentation */ + +/** + * @brief Mark current thread as ignored from this point on, for the duration of its existence. + */ +void ITTAPI __itt_thread_ignore(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, thread_ignore, (void)) +#define __itt_thread_ignore ITTNOTIFY_VOID(thread_ignore) +#define __itt_thread_ignore_ptr ITTNOTIFY_NAME(thread_ignore) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_thread_ignore() +#define __itt_thread_ignore_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_thread_ignore_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} threads group */ + +/** + * @defgroup suppress Error suppression + * @ingroup public + * General behavior: application continues to run, but errors are suppressed + * + * @{ + */ + +/*****************************************************************//** + * @name group of functions used for error suppression in correctness tools + *********************************************************************/ +/** @{ */ +/** + * @hideinitializer + * @brief possible value for suppression mask + */ +#define __itt_suppress_all_errors 0x7fffffff + +/** + * @hideinitializer + * @brief possible value for suppression mask (suppresses errors from threading analysis) + */ +#define __itt_suppress_threading_errors 0x000000ff + +/** + * @hideinitializer + * @brief possible value for suppression mask (suppresses errors from memory analysis) + */ +#define __itt_suppress_memory_errors 0x0000ff00 + +/** + * @brief Start suppressing errors identified in mask on this thread + */ +void ITTAPI __itt_suppress_push(unsigned int mask); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, suppress_push, (unsigned int mask)) +#define __itt_suppress_push ITTNOTIFY_VOID(suppress_push) +#define __itt_suppress_push_ptr ITTNOTIFY_NAME(suppress_push) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_suppress_push(mask) +#define __itt_suppress_push_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_suppress_push_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Undo the effects of the matching call to __itt_suppress_push + */ +void ITTAPI __itt_suppress_pop(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, suppress_pop, (void)) +#define __itt_suppress_pop ITTNOTIFY_VOID(suppress_pop) +#define __itt_suppress_pop_ptr ITTNOTIFY_NAME(suppress_pop) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_suppress_pop() +#define __itt_suppress_pop_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_suppress_pop_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @enum __itt_model_disable + * @brief Enumerator for the disable methods + */ +typedef enum __itt_suppress_mode { + __itt_unsuppress_range, + __itt_suppress_range +} __itt_suppress_mode_t; + +/** + * @brief Mark a range of memory for error suppression or unsuppression for error types included in mask + */ +void ITTAPI __itt_suppress_mark_range(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, suppress_mark_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size)) +#define __itt_suppress_mark_range ITTNOTIFY_VOID(suppress_mark_range) +#define __itt_suppress_mark_range_ptr ITTNOTIFY_NAME(suppress_mark_range) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_suppress_mark_range(mask) +#define __itt_suppress_mark_range_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_suppress_mark_range_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Undo the effect of a matching call to __itt_suppress_mark_range. If not matching + * call is found, nothing is changed. + */ +void ITTAPI __itt_suppress_clear_range(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, suppress_clear_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size)) +#define __itt_suppress_clear_range ITTNOTIFY_VOID(suppress_clear_range) +#define __itt_suppress_clear_range_ptr ITTNOTIFY_NAME(suppress_clear_range) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_suppress_clear_range(mask) +#define __itt_suppress_clear_range_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_suppress_clear_range_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} */ +/** @} suppress group */ + +/** + * @defgroup sync Synchronization + * @ingroup public + * Indicate user-written synchronization code + * @{ + */ +/** + * @hideinitializer + * @brief possible value of attribute argument for sync object type + */ +#define __itt_attr_barrier 1 + +/** + * @hideinitializer + * @brief possible value of attribute argument for sync object type + */ +#define __itt_attr_mutex 2 + +/** +@brief Name a synchronization object +@param[in] addr Handle for the synchronization object. You should +use a real address to uniquely identify the synchronization object. +@param[in] objtype null-terminated object type string. If NULL is +passed, the name will be "User Synchronization". +@param[in] objname null-terminated object name string. If NULL, +no name will be assigned to the object. +@param[in] attribute one of [#__itt_attr_barrier, #__itt_attr_mutex] + */ + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_sync_createA(void *addr, const char *objtype, const char *objname, int attribute); +void ITTAPI __itt_sync_createW(void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_sync_create __itt_sync_createW +# define __itt_sync_create_ptr __itt_sync_createW_ptr +#else /* UNICODE */ +# define __itt_sync_create __itt_sync_createA +# define __itt_sync_create_ptr __itt_sync_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +void ITTAPI __itt_sync_create (void *addr, const char *objtype, const char *objname, int attribute); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, sync_createA, (void *addr, const char *objtype, const char *objname, int attribute)) +ITT_STUBV(ITTAPI, void, sync_createW, (void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, sync_create, (void *addr, const char* objtype, const char* objname, int attribute)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_sync_createA ITTNOTIFY_VOID(sync_createA) +#define __itt_sync_createA_ptr ITTNOTIFY_NAME(sync_createA) +#define __itt_sync_createW ITTNOTIFY_VOID(sync_createW) +#define __itt_sync_createW_ptr ITTNOTIFY_NAME(sync_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_sync_create ITTNOTIFY_VOID(sync_create) +#define __itt_sync_create_ptr ITTNOTIFY_NAME(sync_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_sync_createA(addr, objtype, objname, attribute) +#define __itt_sync_createA_ptr 0 +#define __itt_sync_createW(addr, objtype, objname, attribute) +#define __itt_sync_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_sync_create(addr, objtype, objname, attribute) +#define __itt_sync_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_sync_createA_ptr 0 +#define __itt_sync_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_sync_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** +@brief Rename a synchronization object + +You can use the rename call to assign or reassign a name to a given +synchronization object. +@param[in] addr handle for the synchronization object. +@param[in] name null-terminated object name string. +*/ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_sync_renameA(void *addr, const char *name); +void ITTAPI __itt_sync_renameW(void *addr, const wchar_t *name); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_sync_rename __itt_sync_renameW +# define __itt_sync_rename_ptr __itt_sync_renameW_ptr +#else /* UNICODE */ +# define __itt_sync_rename __itt_sync_renameA +# define __itt_sync_rename_ptr __itt_sync_renameA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +void ITTAPI __itt_sync_rename(void *addr, const char *name); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, sync_renameA, (void *addr, const char *name)) +ITT_STUBV(ITTAPI, void, sync_renameW, (void *addr, const wchar_t *name)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, sync_rename, (void *addr, const char *name)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_sync_renameA ITTNOTIFY_VOID(sync_renameA) +#define __itt_sync_renameA_ptr ITTNOTIFY_NAME(sync_renameA) +#define __itt_sync_renameW ITTNOTIFY_VOID(sync_renameW) +#define __itt_sync_renameW_ptr ITTNOTIFY_NAME(sync_renameW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_sync_rename ITTNOTIFY_VOID(sync_rename) +#define __itt_sync_rename_ptr ITTNOTIFY_NAME(sync_rename) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_sync_renameA(addr, name) +#define __itt_sync_renameA_ptr 0 +#define __itt_sync_renameW(addr, name) +#define __itt_sync_renameW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_sync_rename(addr, name) +#define __itt_sync_rename_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_sync_renameA_ptr 0 +#define __itt_sync_renameW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_sync_rename_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + @brief Destroy a synchronization object. + @param addr Handle for the synchronization object. + */ +void ITTAPI __itt_sync_destroy(void *addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, sync_destroy, (void *addr)) +#define __itt_sync_destroy ITTNOTIFY_VOID(sync_destroy) +#define __itt_sync_destroy_ptr ITTNOTIFY_NAME(sync_destroy) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_sync_destroy(addr) +#define __itt_sync_destroy_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_sync_destroy_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/*****************************************************************//** + * @name group of functions is used for performance measurement tools + *********************************************************************/ +/** @{ */ +/** + * @brief Enter spin loop on user-defined sync object + */ +void ITTAPI __itt_sync_prepare(void* addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, sync_prepare, (void *addr)) +#define __itt_sync_prepare ITTNOTIFY_VOID(sync_prepare) +#define __itt_sync_prepare_ptr ITTNOTIFY_NAME(sync_prepare) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_sync_prepare(addr) +#define __itt_sync_prepare_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_sync_prepare_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Quit spin loop without acquiring spin object + */ +void ITTAPI __itt_sync_cancel(void *addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, sync_cancel, (void *addr)) +#define __itt_sync_cancel ITTNOTIFY_VOID(sync_cancel) +#define __itt_sync_cancel_ptr ITTNOTIFY_NAME(sync_cancel) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_sync_cancel(addr) +#define __itt_sync_cancel_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_sync_cancel_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Successful spin loop completion (sync object acquired) + */ +void ITTAPI __itt_sync_acquired(void *addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, sync_acquired, (void *addr)) +#define __itt_sync_acquired ITTNOTIFY_VOID(sync_acquired) +#define __itt_sync_acquired_ptr ITTNOTIFY_NAME(sync_acquired) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_sync_acquired(addr) +#define __itt_sync_acquired_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_sync_acquired_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Start sync object releasing code. Is called before the lock release call. + */ +void ITTAPI __itt_sync_releasing(void* addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, sync_releasing, (void *addr)) +#define __itt_sync_releasing ITTNOTIFY_VOID(sync_releasing) +#define __itt_sync_releasing_ptr ITTNOTIFY_NAME(sync_releasing) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_sync_releasing(addr) +#define __itt_sync_releasing_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_sync_releasing_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} */ + +/** @} sync group */ + +/**************************************************************//** + * @name group of functions is used for correctness checking tools + ******************************************************************/ +/** @{ */ +/** + * @ingroup legacy + * @deprecated Legacy API + * @brief Fast synchronization which does no require spinning. + * - This special function is to be used by TBB and OpenMP libraries only when they know + * there is no spin but they need to suppress TC warnings about shared variable modifications. + * - It only has corresponding pointers in static library and does not have corresponding function + * in dynamic library. + * @see void __itt_sync_prepare(void* addr); + */ +void ITTAPI __itt_fsync_prepare(void* addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, fsync_prepare, (void *addr)) +#define __itt_fsync_prepare ITTNOTIFY_VOID(fsync_prepare) +#define __itt_fsync_prepare_ptr ITTNOTIFY_NAME(fsync_prepare) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_fsync_prepare(addr) +#define __itt_fsync_prepare_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_fsync_prepare_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup legacy + * @deprecated Legacy API + * @brief Fast synchronization which does no require spinning. + * - This special function is to be used by TBB and OpenMP libraries only when they know + * there is no spin but they need to suppress TC warnings about shared variable modifications. + * - It only has corresponding pointers in static library and does not have corresponding function + * in dynamic library. + * @see void __itt_sync_cancel(void *addr); + */ +void ITTAPI __itt_fsync_cancel(void *addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, fsync_cancel, (void *addr)) +#define __itt_fsync_cancel ITTNOTIFY_VOID(fsync_cancel) +#define __itt_fsync_cancel_ptr ITTNOTIFY_NAME(fsync_cancel) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_fsync_cancel(addr) +#define __itt_fsync_cancel_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_fsync_cancel_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup legacy + * @deprecated Legacy API + * @brief Fast synchronization which does no require spinning. + * - This special function is to be used by TBB and OpenMP libraries only when they know + * there is no spin but they need to suppress TC warnings about shared variable modifications. + * - It only has corresponding pointers in static library and does not have corresponding function + * in dynamic library. + * @see void __itt_sync_acquired(void *addr); + */ +void ITTAPI __itt_fsync_acquired(void *addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, fsync_acquired, (void *addr)) +#define __itt_fsync_acquired ITTNOTIFY_VOID(fsync_acquired) +#define __itt_fsync_acquired_ptr ITTNOTIFY_NAME(fsync_acquired) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_fsync_acquired(addr) +#define __itt_fsync_acquired_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_fsync_acquired_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup legacy + * @deprecated Legacy API + * @brief Fast synchronization which does no require spinning. + * - This special function is to be used by TBB and OpenMP libraries only when they know + * there is no spin but they need to suppress TC warnings about shared variable modifications. + * - It only has corresponding pointers in static library and does not have corresponding function + * in dynamic library. + * @see void __itt_sync_releasing(void* addr); + */ +void ITTAPI __itt_fsync_releasing(void* addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, fsync_releasing, (void *addr)) +#define __itt_fsync_releasing ITTNOTIFY_VOID(fsync_releasing) +#define __itt_fsync_releasing_ptr ITTNOTIFY_NAME(fsync_releasing) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_fsync_releasing(addr) +#define __itt_fsync_releasing_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_fsync_releasing_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} */ + +/** + * @defgroup model Modeling by Intel(R) Parallel Advisor + * @ingroup public + * This is the subset of itt used for modeling by Intel(R) Parallel Advisor. + * This API is called ONLY using annotate.h, by "Annotation" macros + * the user places in their sources during the parallelism modeling steps. + * + * site_begin/end and task_begin/end take the address of handle variables, + * which are writeable by the API. Handles must be 0 initialized prior + * to the first call to begin, or may cause a run-time failure. + * The handles are initialized in a multi-thread safe way by the API if + * the handle is 0. The commonly expected idiom is one static handle to + * identify a site or task. If a site or task of the same name has already + * been started during this collection, the same handle MAY be returned, + * but is not required to be - it is unspecified if data merging is done + * based on name. These routines also take an instance variable. Like + * the lexical instance, these must be 0 initialized. Unlike the lexical + * instance, this is used to track a single dynamic instance. + * + * API used by the Intel(R) Parallel Advisor to describe potential concurrency + * and related activities. User-added source annotations expand to calls + * to these procedures to enable modeling of a hypothetical concurrent + * execution serially. + * @{ + */ +#if !defined(_ADVISOR_ANNOTATE_H_) || defined(ANNOTATE_EXPAND_NULL) + +typedef void* __itt_model_site; /*!< @brief handle for lexical site */ +typedef void* __itt_model_site_instance; /*!< @brief handle for dynamic instance */ +typedef void* __itt_model_task; /*!< @brief handle for lexical site */ +typedef void* __itt_model_task_instance; /*!< @brief handle for dynamic instance */ + +/** + * @enum __itt_model_disable + * @brief Enumerator for the disable methods + */ +typedef enum { + __itt_model_disable_observation, + __itt_model_disable_collection +} __itt_model_disable; + +#endif /* !_ADVISOR_ANNOTATE_H_ || ANNOTATE_EXPAND_NULL */ + +/** + * @brief ANNOTATE_SITE_BEGIN/ANNOTATE_SITE_END support. + * + * site_begin/end model a potential concurrency site. + * site instances may be recursively nested with themselves. + * site_end exits the most recently started but unended site for the current + * thread. The handle passed to end may be used to validate structure. + * Instances of a site encountered on different threads concurrently + * are considered completely distinct. If the site name for two different + * lexical sites match, it is unspecified whether they are treated as the + * same or different for data presentation. + */ +void ITTAPI __itt_model_site_begin(__itt_model_site *site, __itt_model_site_instance *instance, const char *name); +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_model_site_beginW(const wchar_t *name); +#endif +void ITTAPI __itt_model_site_beginA(const char *name); +void ITTAPI __itt_model_site_beginAL(const char *name, size_t siteNameLen); +void ITTAPI __itt_model_site_end (__itt_model_site *site, __itt_model_site_instance *instance); +void ITTAPI __itt_model_site_end_2(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_site_begin, (__itt_model_site *site, __itt_model_site_instance *instance, const char *name)) +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, model_site_beginW, (const wchar_t *name)) +#endif +ITT_STUBV(ITTAPI, void, model_site_beginA, (const char *name)) +ITT_STUBV(ITTAPI, void, model_site_beginAL, (const char *name, size_t siteNameLen)) +ITT_STUBV(ITTAPI, void, model_site_end, (__itt_model_site *site, __itt_model_site_instance *instance)) +ITT_STUBV(ITTAPI, void, model_site_end_2, (void)) +#define __itt_model_site_begin ITTNOTIFY_VOID(model_site_begin) +#define __itt_model_site_begin_ptr ITTNOTIFY_NAME(model_site_begin) +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_model_site_beginW ITTNOTIFY_VOID(model_site_beginW) +#define __itt_model_site_beginW_ptr ITTNOTIFY_NAME(model_site_beginW) +#endif +#define __itt_model_site_beginA ITTNOTIFY_VOID(model_site_beginA) +#define __itt_model_site_beginA_ptr ITTNOTIFY_NAME(model_site_beginA) +#define __itt_model_site_beginAL ITTNOTIFY_VOID(model_site_beginAL) +#define __itt_model_site_beginAL_ptr ITTNOTIFY_NAME(model_site_beginAL) +#define __itt_model_site_end ITTNOTIFY_VOID(model_site_end) +#define __itt_model_site_end_ptr ITTNOTIFY_NAME(model_site_end) +#define __itt_model_site_end_2 ITTNOTIFY_VOID(model_site_end_2) +#define __itt_model_site_end_2_ptr ITTNOTIFY_NAME(model_site_end_2) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_site_begin(site, instance, name) +#define __itt_model_site_begin_ptr 0 +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_model_site_beginW(name) +#define __itt_model_site_beginW_ptr 0 +#endif +#define __itt_model_site_beginA(name) +#define __itt_model_site_beginA_ptr 0 +#define __itt_model_site_beginAL(name, siteNameLen) +#define __itt_model_site_beginAL_ptr 0 +#define __itt_model_site_end(site, instance) +#define __itt_model_site_end_ptr 0 +#define __itt_model_site_end_2() +#define __itt_model_site_end_2_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_site_begin_ptr 0 +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_model_site_beginW_ptr 0 +#endif +#define __itt_model_site_beginA_ptr 0 +#define __itt_model_site_beginAL_ptr 0 +#define __itt_model_site_end_ptr 0 +#define __itt_model_site_end_2_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_TASK_BEGIN/ANNOTATE_TASK_END support + * + * task_begin/end model a potential task, which is contained within the most + * closely enclosing dynamic site. task_end exits the most recently started + * but unended task. The handle passed to end may be used to validate + * structure. It is unspecified if bad dynamic nesting is detected. If it + * is, it should be encoded in the resulting data collection. The collector + * should not fail due to construct nesting issues, nor attempt to directly + * indicate the problem. + */ +void ITTAPI __itt_model_task_begin(__itt_model_task *task, __itt_model_task_instance *instance, const char *name); +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_model_task_beginW(const wchar_t *name); +void ITTAPI __itt_model_iteration_taskW(const wchar_t *name); +#endif +void ITTAPI __itt_model_task_beginA(const char *name); +void ITTAPI __itt_model_task_beginAL(const char *name, size_t taskNameLen); +void ITTAPI __itt_model_iteration_taskA(const char *name); +void ITTAPI __itt_model_iteration_taskAL(const char *name, size_t taskNameLen); +void ITTAPI __itt_model_task_end (__itt_model_task *task, __itt_model_task_instance *instance); +void ITTAPI __itt_model_task_end_2(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_task_begin, (__itt_model_task *task, __itt_model_task_instance *instance, const char *name)) +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, model_task_beginW, (const wchar_t *name)) +ITT_STUBV(ITTAPI, void, model_iteration_taskW, (const wchar_t *name)) +#endif +ITT_STUBV(ITTAPI, void, model_task_beginA, (const char *name)) +ITT_STUBV(ITTAPI, void, model_task_beginAL, (const char *name, size_t taskNameLen)) +ITT_STUBV(ITTAPI, void, model_iteration_taskA, (const char *name)) +ITT_STUBV(ITTAPI, void, model_iteration_taskAL, (const char *name, size_t taskNameLen)) +ITT_STUBV(ITTAPI, void, model_task_end, (__itt_model_task *task, __itt_model_task_instance *instance)) +ITT_STUBV(ITTAPI, void, model_task_end_2, (void)) +#define __itt_model_task_begin ITTNOTIFY_VOID(model_task_begin) +#define __itt_model_task_begin_ptr ITTNOTIFY_NAME(model_task_begin) +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_model_task_beginW ITTNOTIFY_VOID(model_task_beginW) +#define __itt_model_task_beginW_ptr ITTNOTIFY_NAME(model_task_beginW) +#define __itt_model_iteration_taskW ITTNOTIFY_VOID(model_iteration_taskW) +#define __itt_model_iteration_taskW_ptr ITTNOTIFY_NAME(model_iteration_taskW) +#endif +#define __itt_model_task_beginA ITTNOTIFY_VOID(model_task_beginA) +#define __itt_model_task_beginA_ptr ITTNOTIFY_NAME(model_task_beginA) +#define __itt_model_task_beginAL ITTNOTIFY_VOID(model_task_beginAL) +#define __itt_model_task_beginAL_ptr ITTNOTIFY_NAME(model_task_beginAL) +#define __itt_model_iteration_taskA ITTNOTIFY_VOID(model_iteration_taskA) +#define __itt_model_iteration_taskA_ptr ITTNOTIFY_NAME(model_iteration_taskA) +#define __itt_model_iteration_taskAL ITTNOTIFY_VOID(model_iteration_taskAL) +#define __itt_model_iteration_taskAL_ptr ITTNOTIFY_NAME(model_iteration_taskAL) +#define __itt_model_task_end ITTNOTIFY_VOID(model_task_end) +#define __itt_model_task_end_ptr ITTNOTIFY_NAME(model_task_end) +#define __itt_model_task_end_2 ITTNOTIFY_VOID(model_task_end_2) +#define __itt_model_task_end_2_ptr ITTNOTIFY_NAME(model_task_end_2) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_task_begin(task, instance, name) +#define __itt_model_task_begin_ptr 0 +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_model_task_beginW(name) +#define __itt_model_task_beginW_ptr 0 +#endif +#define __itt_model_task_beginA(name) +#define __itt_model_task_beginA_ptr 0 +#define __itt_model_task_beginAL(name, siteNameLen) +#define __itt_model_task_beginAL_ptr 0 +#define __itt_model_iteration_taskA(name) +#define __itt_model_iteration_taskA_ptr 0 +#define __itt_model_iteration_taskAL(name, siteNameLen) +#define __itt_model_iteration_taskAL_ptr 0 +#define __itt_model_task_end(task, instance) +#define __itt_model_task_end_ptr 0 +#define __itt_model_task_end_2() +#define __itt_model_task_end_2_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_task_begin_ptr 0 +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_model_task_beginW_ptr 0 +#endif +#define __itt_model_task_beginA_ptr 0 +#define __itt_model_task_beginAL_ptr 0 +#define __itt_model_iteration_taskA_ptr 0 +#define __itt_model_iteration_taskAL_ptr 0 +#define __itt_model_task_end_ptr 0 +#define __itt_model_task_end_2_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_LOCK_ACQUIRE/ANNOTATE_LOCK_RELEASE support + * + * lock_acquire/release model a potential lock for both lockset and + * performance modeling. Each unique address is modeled as a separate + * lock, with invalid addresses being valid lock IDs. Specifically: + * no storage is accessed by the API at the specified address - it is only + * used for lock identification. Lock acquires may be self-nested and are + * unlocked by a corresponding number of releases. + * (These closely correspond to __itt_sync_acquired/__itt_sync_releasing, + * but may not have identical semantics.) + */ +void ITTAPI __itt_model_lock_acquire(void *lock); +void ITTAPI __itt_model_lock_acquire_2(void *lock); +void ITTAPI __itt_model_lock_release(void *lock); +void ITTAPI __itt_model_lock_release_2(void *lock); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_lock_acquire, (void *lock)) +ITT_STUBV(ITTAPI, void, model_lock_acquire_2, (void *lock)) +ITT_STUBV(ITTAPI, void, model_lock_release, (void *lock)) +ITT_STUBV(ITTAPI, void, model_lock_release_2, (void *lock)) +#define __itt_model_lock_acquire ITTNOTIFY_VOID(model_lock_acquire) +#define __itt_model_lock_acquire_ptr ITTNOTIFY_NAME(model_lock_acquire) +#define __itt_model_lock_acquire_2 ITTNOTIFY_VOID(model_lock_acquire_2) +#define __itt_model_lock_acquire_2_ptr ITTNOTIFY_NAME(model_lock_acquire_2) +#define __itt_model_lock_release ITTNOTIFY_VOID(model_lock_release) +#define __itt_model_lock_release_ptr ITTNOTIFY_NAME(model_lock_release) +#define __itt_model_lock_release_2 ITTNOTIFY_VOID(model_lock_release_2) +#define __itt_model_lock_release_2_ptr ITTNOTIFY_NAME(model_lock_release_2) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_lock_acquire(lock) +#define __itt_model_lock_acquire_ptr 0 +#define __itt_model_lock_acquire_2(lock) +#define __itt_model_lock_acquire_2_ptr 0 +#define __itt_model_lock_release(lock) +#define __itt_model_lock_release_ptr 0 +#define __itt_model_lock_release_2(lock) +#define __itt_model_lock_release_2_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_lock_acquire_ptr 0 +#define __itt_model_lock_acquire_2_ptr 0 +#define __itt_model_lock_release_ptr 0 +#define __itt_model_lock_release_2_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_RECORD_ALLOCATION/ANNOTATE_RECORD_DEALLOCATION support + * + * record_allocation/deallocation describe user-defined memory allocator + * behavior, which may be required for correctness modeling to understand + * when storage is not expected to be actually reused across threads. + */ +void ITTAPI __itt_model_record_allocation (void *addr, size_t size); +void ITTAPI __itt_model_record_deallocation(void *addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_record_allocation, (void *addr, size_t size)) +ITT_STUBV(ITTAPI, void, model_record_deallocation, (void *addr)) +#define __itt_model_record_allocation ITTNOTIFY_VOID(model_record_allocation) +#define __itt_model_record_allocation_ptr ITTNOTIFY_NAME(model_record_allocation) +#define __itt_model_record_deallocation ITTNOTIFY_VOID(model_record_deallocation) +#define __itt_model_record_deallocation_ptr ITTNOTIFY_NAME(model_record_deallocation) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_record_allocation(addr, size) +#define __itt_model_record_allocation_ptr 0 +#define __itt_model_record_deallocation(addr) +#define __itt_model_record_deallocation_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_record_allocation_ptr 0 +#define __itt_model_record_deallocation_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_INDUCTION_USES support + * + * Note particular storage is inductive through the end of the current site + */ +void ITTAPI __itt_model_induction_uses(void* addr, size_t size); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_induction_uses, (void *addr, size_t size)) +#define __itt_model_induction_uses ITTNOTIFY_VOID(model_induction_uses) +#define __itt_model_induction_uses_ptr ITTNOTIFY_NAME(model_induction_uses) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_induction_uses(addr, size) +#define __itt_model_induction_uses_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_induction_uses_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_REDUCTION_USES support + * + * Note particular storage is used for reduction through the end + * of the current site + */ +void ITTAPI __itt_model_reduction_uses(void* addr, size_t size); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_reduction_uses, (void *addr, size_t size)) +#define __itt_model_reduction_uses ITTNOTIFY_VOID(model_reduction_uses) +#define __itt_model_reduction_uses_ptr ITTNOTIFY_NAME(model_reduction_uses) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_reduction_uses(addr, size) +#define __itt_model_reduction_uses_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_reduction_uses_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_OBSERVE_USES support + * + * Have correctness modeling record observations about uses of storage + * through the end of the current site + */ +void ITTAPI __itt_model_observe_uses(void* addr, size_t size); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_observe_uses, (void *addr, size_t size)) +#define __itt_model_observe_uses ITTNOTIFY_VOID(model_observe_uses) +#define __itt_model_observe_uses_ptr ITTNOTIFY_NAME(model_observe_uses) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_observe_uses(addr, size) +#define __itt_model_observe_uses_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_observe_uses_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_CLEAR_USES support + * + * Clear the special handling of a piece of storage related to induction, + * reduction or observe_uses + */ +void ITTAPI __itt_model_clear_uses(void* addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_clear_uses, (void *addr)) +#define __itt_model_clear_uses ITTNOTIFY_VOID(model_clear_uses) +#define __itt_model_clear_uses_ptr ITTNOTIFY_NAME(model_clear_uses) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_clear_uses(addr) +#define __itt_model_clear_uses_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_clear_uses_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_DISABLE_*_PUSH/ANNOTATE_DISABLE_*_POP support + * + * disable_push/disable_pop push and pop disabling based on a parameter. + * Disabling observations stops processing of memory references during + * correctness modeling, and all annotations that occur in the disabled + * region. This allows description of code that is expected to be handled + * specially during conversion to parallelism or that is not recognized + * by tools (e.g. some kinds of synchronization operations.) + * This mechanism causes all annotations in the disabled region, other + * than disable_push and disable_pop, to be ignored. (For example, this + * might validly be used to disable an entire parallel site and the contained + * tasks and locking in it for data collection purposes.) + * The disable for collection is a more expensive operation, but reduces + * collector overhead significantly. This applies to BOTH correctness data + * collection and performance data collection. For example, a site + * containing a task might only enable data collection for the first 10 + * iterations. Both performance and correctness data should reflect this, + * and the program should run as close to full speed as possible when + * collection is disabled. + */ +void ITTAPI __itt_model_disable_push(__itt_model_disable x); +void ITTAPI __itt_model_disable_pop(void); +void ITTAPI __itt_model_aggregate_task(size_t x); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_disable_push, (__itt_model_disable x)) +ITT_STUBV(ITTAPI, void, model_disable_pop, (void)) +ITT_STUBV(ITTAPI, void, model_aggregate_task, (size_t x)) +#define __itt_model_disable_push ITTNOTIFY_VOID(model_disable_push) +#define __itt_model_disable_push_ptr ITTNOTIFY_NAME(model_disable_push) +#define __itt_model_disable_pop ITTNOTIFY_VOID(model_disable_pop) +#define __itt_model_disable_pop_ptr ITTNOTIFY_NAME(model_disable_pop) +#define __itt_model_aggregate_task ITTNOTIFY_VOID(model_aggregate_task) +#define __itt_model_aggregate_task_ptr ITTNOTIFY_NAME(model_aggregate_task) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_disable_push(x) +#define __itt_model_disable_push_ptr 0 +#define __itt_model_disable_pop() +#define __itt_model_disable_pop_ptr 0 +#define __itt_model_aggregate_task(x) +#define __itt_model_aggregate_task_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_disable_push_ptr 0 +#define __itt_model_disable_pop_ptr 0 +#define __itt_model_aggregate_task_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} model group */ + +/** + * @defgroup heap Heap + * @ingroup public + * Heap group + * @{ + */ + +typedef void* __itt_heap_function; + +/** + * @brief Create an identification for heap function + * @return non-zero identifier or NULL + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_heap_function ITTAPI __itt_heap_function_createA(const char* name, const char* domain); +__itt_heap_function ITTAPI __itt_heap_function_createW(const wchar_t* name, const wchar_t* domain); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_heap_function_create __itt_heap_function_createW +# define __itt_heap_function_create_ptr __itt_heap_function_createW_ptr +#else +# define __itt_heap_function_create __itt_heap_function_createA +# define __itt_heap_function_create_ptr __itt_heap_function_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_heap_function ITTAPI __itt_heap_function_create(const char* name, const char* domain); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createA, (const char* name, const char* domain)) +ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createW, (const wchar_t* name, const wchar_t* domain)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_heap_function, heap_function_create, (const char* name, const char* domain)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_heap_function_createA ITTNOTIFY_DATA(heap_function_createA) +#define __itt_heap_function_createA_ptr ITTNOTIFY_NAME(heap_function_createA) +#define __itt_heap_function_createW ITTNOTIFY_DATA(heap_function_createW) +#define __itt_heap_function_createW_ptr ITTNOTIFY_NAME(heap_function_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_heap_function_create ITTNOTIFY_DATA(heap_function_create) +#define __itt_heap_function_create_ptr ITTNOTIFY_NAME(heap_function_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_heap_function_createA(name, domain) (__itt_heap_function)0 +#define __itt_heap_function_createA_ptr 0 +#define __itt_heap_function_createW(name, domain) (__itt_heap_function)0 +#define __itt_heap_function_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_heap_function_create(name, domain) (__itt_heap_function)0 +#define __itt_heap_function_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_heap_function_createA_ptr 0 +#define __itt_heap_function_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_heap_function_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an allocation begin occurrence. + */ +void ITTAPI __itt_heap_allocate_begin(__itt_heap_function h, size_t size, int initialized); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_allocate_begin, (__itt_heap_function h, size_t size, int initialized)) +#define __itt_heap_allocate_begin ITTNOTIFY_VOID(heap_allocate_begin) +#define __itt_heap_allocate_begin_ptr ITTNOTIFY_NAME(heap_allocate_begin) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_allocate_begin(h, size, initialized) +#define __itt_heap_allocate_begin_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_allocate_begin_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an allocation end occurrence. + */ +void ITTAPI __itt_heap_allocate_end(__itt_heap_function h, void** addr, size_t size, int initialized); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_allocate_end, (__itt_heap_function h, void** addr, size_t size, int initialized)) +#define __itt_heap_allocate_end ITTNOTIFY_VOID(heap_allocate_end) +#define __itt_heap_allocate_end_ptr ITTNOTIFY_NAME(heap_allocate_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_allocate_end(h, addr, size, initialized) +#define __itt_heap_allocate_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_allocate_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an free begin occurrence. + */ +void ITTAPI __itt_heap_free_begin(__itt_heap_function h, void* addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_free_begin, (__itt_heap_function h, void* addr)) +#define __itt_heap_free_begin ITTNOTIFY_VOID(heap_free_begin) +#define __itt_heap_free_begin_ptr ITTNOTIFY_NAME(heap_free_begin) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_free_begin(h, addr) +#define __itt_heap_free_begin_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_free_begin_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an free end occurrence. + */ +void ITTAPI __itt_heap_free_end(__itt_heap_function h, void* addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_free_end, (__itt_heap_function h, void* addr)) +#define __itt_heap_free_end ITTNOTIFY_VOID(heap_free_end) +#define __itt_heap_free_end_ptr ITTNOTIFY_NAME(heap_free_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_free_end(h, addr) +#define __itt_heap_free_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_free_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an reallocation begin occurrence. + */ +void ITTAPI __itt_heap_reallocate_begin(__itt_heap_function h, void* addr, size_t new_size, int initialized); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_reallocate_begin, (__itt_heap_function h, void* addr, size_t new_size, int initialized)) +#define __itt_heap_reallocate_begin ITTNOTIFY_VOID(heap_reallocate_begin) +#define __itt_heap_reallocate_begin_ptr ITTNOTIFY_NAME(heap_reallocate_begin) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_reallocate_begin(h, addr, new_size, initialized) +#define __itt_heap_reallocate_begin_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_reallocate_begin_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an reallocation end occurrence. + */ +void ITTAPI __itt_heap_reallocate_end(__itt_heap_function h, void* addr, void** new_addr, size_t new_size, int initialized); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_reallocate_end, (__itt_heap_function h, void* addr, void** new_addr, size_t new_size, int initialized)) +#define __itt_heap_reallocate_end ITTNOTIFY_VOID(heap_reallocate_end) +#define __itt_heap_reallocate_end_ptr ITTNOTIFY_NAME(heap_reallocate_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_reallocate_end(h, addr, new_addr, new_size, initialized) +#define __itt_heap_reallocate_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_reallocate_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @brief internal access begin */ +void ITTAPI __itt_heap_internal_access_begin(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_internal_access_begin, (void)) +#define __itt_heap_internal_access_begin ITTNOTIFY_VOID(heap_internal_access_begin) +#define __itt_heap_internal_access_begin_ptr ITTNOTIFY_NAME(heap_internal_access_begin) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_internal_access_begin() +#define __itt_heap_internal_access_begin_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_internal_access_begin_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @brief internal access end */ +void ITTAPI __itt_heap_internal_access_end(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_internal_access_end, (void)) +#define __itt_heap_internal_access_end ITTNOTIFY_VOID(heap_internal_access_end) +#define __itt_heap_internal_access_end_ptr ITTNOTIFY_NAME(heap_internal_access_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_internal_access_end() +#define __itt_heap_internal_access_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_internal_access_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @brief record memory growth begin */ +void ITTAPI __itt_heap_record_memory_growth_begin(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_record_memory_growth_begin, (void)) +#define __itt_heap_record_memory_growth_begin ITTNOTIFY_VOID(heap_record_memory_growth_begin) +#define __itt_heap_record_memory_growth_begin_ptr ITTNOTIFY_NAME(heap_record_memory_growth_begin) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_record_memory_growth_begin() +#define __itt_heap_record_memory_growth_begin_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_record_memory_growth_begin_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @brief record memory growth end */ +void ITTAPI __itt_heap_record_memory_growth_end(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_record_memory_growth_end, (void)) +#define __itt_heap_record_memory_growth_end ITTNOTIFY_VOID(heap_record_memory_growth_end) +#define __itt_heap_record_memory_growth_end_ptr ITTNOTIFY_NAME(heap_record_memory_growth_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_record_memory_growth_end() +#define __itt_heap_record_memory_growth_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_record_memory_growth_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Specify the type of heap detection/reporting to modify. + */ +/** + * @hideinitializer + * @brief Report on memory leaks. + */ +#define __itt_heap_leaks 0x00000001 + +/** + * @hideinitializer + * @brief Report on memory growth. + */ +#define __itt_heap_growth 0x00000002 + + +/** @brief heap reset detection */ +void ITTAPI __itt_heap_reset_detection(unsigned int reset_mask); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_reset_detection, (unsigned int reset_mask)) +#define __itt_heap_reset_detection ITTNOTIFY_VOID(heap_reset_detection) +#define __itt_heap_reset_detection_ptr ITTNOTIFY_NAME(heap_reset_detection) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_reset_detection() +#define __itt_heap_reset_detection_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_reset_detection_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @brief report */ +void ITTAPI __itt_heap_record(unsigned int record_mask); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_record, (unsigned int record_mask)) +#define __itt_heap_record ITTNOTIFY_VOID(heap_record) +#define __itt_heap_record_ptr ITTNOTIFY_NAME(heap_record) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_record() +#define __itt_heap_record_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_record_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @} heap group */ +/** @endcond */ +/* ========================================================================== */ + +/** + * @defgroup domains Domains + * @ingroup public + * Domains group + * @{ + */ + +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) + +typedef struct ___itt_domain +{ + volatile int flags; /*!< Zero if disabled, non-zero if enabled. The meaning of different non-zero values is reserved to the runtime */ + const char* nameA; /*!< Copy of original name in ASCII. */ +#if defined(UNICODE) || defined(_UNICODE) + const wchar_t* nameW; /*!< Copy of original name in UNICODE. */ +#else /* UNICODE || _UNICODE */ + void* nameW; +#endif /* UNICODE || _UNICODE */ + int extra1; /*!< Reserved to the runtime */ + void* extra2; /*!< Reserved to the runtime */ + struct ___itt_domain* next; +} __itt_domain; + +#pragma pack(pop) +/** @endcond */ + +/** + * @ingroup domains + * @brief Create a domain. + * Create domain using some domain name: the URI naming style is recommended. + * Because the set of domains is expected to be static over the application's + * execution time, there is no mechanism to destroy a domain. + * Any domain can be accessed by any thread in the process, regardless of + * which thread created the domain. This call is thread-safe. + * @param[in] name name of domain + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_domain* ITTAPI __itt_domain_createA(const char *name); +__itt_domain* ITTAPI __itt_domain_createW(const wchar_t *name); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_domain_create __itt_domain_createW +# define __itt_domain_create_ptr __itt_domain_createW_ptr +#else /* UNICODE */ +# define __itt_domain_create __itt_domain_createA +# define __itt_domain_create_ptr __itt_domain_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_domain* ITTAPI __itt_domain_create(const char *name); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_domain*, domain_createA, (const char *name)) +ITT_STUB(ITTAPI, __itt_domain*, domain_createW, (const wchar_t *name)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_domain*, domain_create, (const char *name)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_domain_createA ITTNOTIFY_DATA(domain_createA) +#define __itt_domain_createA_ptr ITTNOTIFY_NAME(domain_createA) +#define __itt_domain_createW ITTNOTIFY_DATA(domain_createW) +#define __itt_domain_createW_ptr ITTNOTIFY_NAME(domain_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_domain_create ITTNOTIFY_DATA(domain_create) +#define __itt_domain_create_ptr ITTNOTIFY_NAME(domain_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_domain_createA(name) (__itt_domain*)0 +#define __itt_domain_createA_ptr 0 +#define __itt_domain_createW(name) (__itt_domain*)0 +#define __itt_domain_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_domain_create(name) (__itt_domain*)0 +#define __itt_domain_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_domain_createA_ptr 0 +#define __itt_domain_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_domain_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} domains group */ + +/** + * @defgroup ids IDs + * @ingroup public + * IDs group + * @{ + */ + +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) + +typedef struct ___itt_id +{ + unsigned long long d1, d2, d3; +} __itt_id; + +#pragma pack(pop) +/** @endcond */ + +const __itt_id __itt_null = { 0, 0, 0 }; + +/** + * @ingroup ids + * @brief A convenience function is provided to create an ID without domain control. + * @brief This is a convenience function to initialize an __itt_id structure. This function + * does not affect the collector runtime in any way. After you make the ID with this + * function, you still must create it with the __itt_id_create function before using the ID + * to identify a named entity. + * @param[in] addr The address of object; high QWORD of the ID value. + * @param[in] extra The extra data to unique identify object; low QWORD of the ID value. + */ + +ITT_INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra) ITT_INLINE_ATTRIBUTE; +ITT_INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra) +{ + __itt_id id = __itt_null; + id.d1 = (unsigned long long)((uintptr_t)addr); + id.d2 = (unsigned long long)extra; + id.d3 = (unsigned long long)0; /* Reserved. Must be zero */ + return id; +} + +/** + * @ingroup ids + * @brief Create an instance of identifier. + * This establishes the beginning of the lifetime of an instance of + * the given ID in the trace. Once this lifetime starts, the ID + * can be used to tag named entity instances in calls such as + * __itt_task_begin, and to specify relationships among + * identified named entity instances, using the \ref relations APIs. + * Instance IDs are not domain specific! + * @param[in] domain The domain controlling the execution of this call. + * @param[in] id The ID to create. + */ +void ITTAPI __itt_id_create(const __itt_domain *domain, __itt_id id); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, id_create, (const __itt_domain *domain, __itt_id id)) +#define __itt_id_create(d,x) ITTNOTIFY_VOID_D1(id_create,d,x) +#define __itt_id_create_ptr ITTNOTIFY_NAME(id_create) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_id_create(domain,id) +#define __itt_id_create_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_id_create_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup ids + * @brief Destroy an instance of identifier. + * This ends the lifetime of the current instance of the given ID value in the trace. + * Any relationships that are established after this lifetime ends are invalid. + * This call must be performed before the given ID value can be reused for a different + * named entity instance. + * @param[in] domain The domain controlling the execution of this call. + * @param[in] id The ID to destroy. + */ +void ITTAPI __itt_id_destroy(const __itt_domain *domain, __itt_id id); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, id_destroy, (const __itt_domain *domain, __itt_id id)) +#define __itt_id_destroy(d,x) ITTNOTIFY_VOID_D1(id_destroy,d,x) +#define __itt_id_destroy_ptr ITTNOTIFY_NAME(id_destroy) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_id_destroy(domain,id) +#define __itt_id_destroy_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_id_destroy_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} ids group */ + +/** + * @defgroup handless String Handles + * @ingroup public + * String Handles group + * @{ + */ + +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) + +typedef struct ___itt_string_handle +{ + const char* strA; /*!< Copy of original string in ASCII. */ +#if defined(UNICODE) || defined(_UNICODE) + const wchar_t* strW; /*!< Copy of original string in UNICODE. */ +#else /* UNICODE || _UNICODE */ + void* strW; +#endif /* UNICODE || _UNICODE */ + int extra1; /*!< Reserved. Must be zero */ + void* extra2; /*!< Reserved. Must be zero */ + struct ___itt_string_handle* next; +} __itt_string_handle; + +#pragma pack(pop) +/** @endcond */ + +/** + * @ingroup handles + * @brief Create a string handle. + * Create and return handle value that can be associated with a string. + * Consecutive calls to __itt_string_handle_create with the same name + * return the same value. Because the set of string handles is expected to remain + * static during the application's execution time, there is no mechanism to destroy a string handle. + * Any string handle can be accessed by any thread in the process, regardless of which thread created + * the string handle. This call is thread-safe. + * @param[in] name The input string + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_string_handle* ITTAPI __itt_string_handle_createA(const char *name); +__itt_string_handle* ITTAPI __itt_string_handle_createW(const wchar_t *name); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_string_handle_create __itt_string_handle_createW +# define __itt_string_handle_create_ptr __itt_string_handle_createW_ptr +#else /* UNICODE */ +# define __itt_string_handle_create __itt_string_handle_createA +# define __itt_string_handle_create_ptr __itt_string_handle_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_string_handle* ITTAPI __itt_string_handle_create(const char *name); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createA, (const char *name)) +ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createW, (const wchar_t *name)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_create, (const char *name)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_string_handle_createA ITTNOTIFY_DATA(string_handle_createA) +#define __itt_string_handle_createA_ptr ITTNOTIFY_NAME(string_handle_createA) +#define __itt_string_handle_createW ITTNOTIFY_DATA(string_handle_createW) +#define __itt_string_handle_createW_ptr ITTNOTIFY_NAME(string_handle_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_string_handle_create ITTNOTIFY_DATA(string_handle_create) +#define __itt_string_handle_create_ptr ITTNOTIFY_NAME(string_handle_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_string_handle_createA(name) (__itt_string_handle*)0 +#define __itt_string_handle_createA_ptr 0 +#define __itt_string_handle_createW(name) (__itt_string_handle*)0 +#define __itt_string_handle_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_string_handle_create(name) (__itt_string_handle*)0 +#define __itt_string_handle_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_string_handle_createA_ptr 0 +#define __itt_string_handle_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_string_handle_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} handles group */ + +/** @cond exclude_from_documentation */ +typedef unsigned long long __itt_timestamp; +/** @endcond */ + +#define __itt_timestamp_none ((__itt_timestamp)-1LL) + +/** @cond exclude_from_gpa_documentation */ + +/** + * @ingroup timestamps + * @brief Return timestamp corresponding to the current moment. + * This returns the timestamp in the format that is the most relevant for the current + * host or platform (RDTSC, QPC, and others). You can use the "<" operator to + * compare __itt_timestamp values. + */ +__itt_timestamp ITTAPI __itt_get_timestamp(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, __itt_timestamp, get_timestamp, (void)) +#define __itt_get_timestamp ITTNOTIFY_DATA(get_timestamp) +#define __itt_get_timestamp_ptr ITTNOTIFY_NAME(get_timestamp) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_get_timestamp() +#define __itt_get_timestamp_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_get_timestamp_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} timestamps */ +/** @endcond */ + +/** @cond exclude_from_gpa_documentation */ + +/** + * @defgroup regions Regions + * @ingroup public + * Regions group + * @{ + */ +/** + * @ingroup regions + * @brief Begin of region instance. + * Successive calls to __itt_region_begin with the same ID are ignored + * until a call to __itt_region_end with the same ID + * @param[in] domain The domain for this region instance + * @param[in] id The instance ID for this region instance. Must not be __itt_null + * @param[in] parentid The instance ID for the parent of this region instance, or __itt_null + * @param[in] name The name of this region + */ +void ITTAPI __itt_region_begin(const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name); + +/** + * @ingroup regions + * @brief End of region instance. + * The first call to __itt_region_end with a given ID ends the + * region. Successive calls with the same ID are ignored, as are + * calls that do not have a matching __itt_region_begin call. + * @param[in] domain The domain for this region instance + * @param[in] id The instance ID for this region instance + */ +void ITTAPI __itt_region_end(const __itt_domain *domain, __itt_id id); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, region_begin, (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name)) +ITT_STUBV(ITTAPI, void, region_end, (const __itt_domain *domain, __itt_id id)) +#define __itt_region_begin(d,x,y,z) ITTNOTIFY_VOID_D3(region_begin,d,x,y,z) +#define __itt_region_begin_ptr ITTNOTIFY_NAME(region_begin) +#define __itt_region_end(d,x) ITTNOTIFY_VOID_D1(region_end,d,x) +#define __itt_region_end_ptr ITTNOTIFY_NAME(region_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_region_begin(d,x,y,z) +#define __itt_region_begin_ptr 0 +#define __itt_region_end(d,x) +#define __itt_region_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_region_begin_ptr 0 +#define __itt_region_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} regions group */ + +/** + * @defgroup frames Frames + * @ingroup public + * Frames are similar to regions, but are intended to be easier to use and to implement. + * In particular: + * - Frames always represent periods of elapsed time + * - By default, frames have no nesting relationships + * @{ + */ + +/** + * @ingroup frames + * @brief Begin a frame instance. + * Successive calls to __itt_frame_begin with the + * same ID are ignored until a call to __itt_frame_end with the same ID. + * @param[in] domain The domain for this frame instance + * @param[in] id The instance ID for this frame instance or NULL + */ +void ITTAPI __itt_frame_begin_v3(const __itt_domain *domain, __itt_id *id); + +/** + * @ingroup frames + * @brief End a frame instance. + * The first call to __itt_frame_end with a given ID + * ends the frame. Successive calls with the same ID are ignored, as are + * calls that do not have a matching __itt_frame_begin call. + * @param[in] domain The domain for this frame instance + * @param[in] id The instance ID for this frame instance or NULL for current + */ +void ITTAPI __itt_frame_end_v3(const __itt_domain *domain, __itt_id *id); + +/** + * @ingroup frames + * @brief Submits a frame instance. + * Successive calls to __itt_frame_begin or __itt_frame_submit with the + * same ID are ignored until a call to __itt_frame_end or __itt_frame_submit + * with the same ID. + * Passing special __itt_timestamp_none value as "end" argument means + * take the current timestamp as the end timestamp. + * @param[in] domain The domain for this frame instance + * @param[in] id The instance ID for this frame instance or NULL + * @param[in] begin Timestamp of the beginning of the frame + * @param[in] end Timestamp of the end of the frame + */ +void ITTAPI __itt_frame_submit_v3(const __itt_domain *domain, __itt_id *id, + __itt_timestamp begin, __itt_timestamp end); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, frame_begin_v3, (const __itt_domain *domain, __itt_id *id)) +ITT_STUBV(ITTAPI, void, frame_end_v3, (const __itt_domain *domain, __itt_id *id)) +ITT_STUBV(ITTAPI, void, frame_submit_v3, (const __itt_domain *domain, __itt_id *id, __itt_timestamp begin, __itt_timestamp end)) +#define __itt_frame_begin_v3(d,x) ITTNOTIFY_VOID_D1(frame_begin_v3,d,x) +#define __itt_frame_begin_v3_ptr ITTNOTIFY_NAME(frame_begin_v3) +#define __itt_frame_end_v3(d,x) ITTNOTIFY_VOID_D1(frame_end_v3,d,x) +#define __itt_frame_end_v3_ptr ITTNOTIFY_NAME(frame_end_v3) +#define __itt_frame_submit_v3(d,x,b,e) ITTNOTIFY_VOID_D3(frame_submit_v3,d,x,b,e) +#define __itt_frame_submit_v3_ptr ITTNOTIFY_NAME(frame_submit_v3) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_frame_begin_v3(domain,id) +#define __itt_frame_begin_v3_ptr 0 +#define __itt_frame_end_v3(domain,id) +#define __itt_frame_end_v3_ptr 0 +#define __itt_frame_submit_v3(domain,id,begin,end) +#define __itt_frame_submit_v3_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_frame_begin_v3_ptr 0 +#define __itt_frame_end_v3_ptr 0 +#define __itt_frame_submit_v3_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} frames group */ +/** @endcond */ + +/** + * @defgroup taskgroup Task Group + * @ingroup public + * Task Group + * @{ + */ +/** + * @ingroup task_groups + * @brief Denotes a task_group instance. + * Successive calls to __itt_task_group with the same ID are ignored. + * @param[in] domain The domain for this task_group instance + * @param[in] id The instance ID for this task_group instance. Must not be __itt_null. + * @param[in] parentid The instance ID for the parent of this task_group instance, or __itt_null. + * @param[in] name The name of this task_group + */ +void ITTAPI __itt_task_group(const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, task_group, (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name)) +#define __itt_task_group(d,x,y,z) ITTNOTIFY_VOID_D3(task_group,d,x,y,z) +#define __itt_task_group_ptr ITTNOTIFY_NAME(task_group) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_task_group(d,x,y,z) +#define __itt_task_group_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_task_group_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} taskgroup group */ + +/** + * @defgroup tasks Tasks + * @ingroup public + * A task instance represents a piece of work performed by a particular + * thread for a period of time. A call to __itt_task_begin creates a + * task instance. This becomes the current instance for that task on that + * thread. A following call to __itt_task_end on the same thread ends the + * instance. There may be multiple simultaneous instances of tasks with the + * same name on different threads. If an ID is specified, the task instance + * receives that ID. Nested tasks are allowed. + * + * Note: The task is defined by the bracketing of __itt_task_begin and + * __itt_task_end on the same thread. If some scheduling mechanism causes + * task switching (the thread executes a different user task) or task + * switching (the user task switches to a different thread) then this breaks + * the notion of current instance. Additional API calls are required to + * deal with that possibility. + * @{ + */ + +/** + * @ingroup tasks + * @brief Begin a task instance. + * @param[in] domain The domain for this task + * @param[in] taskid The instance ID for this task instance, or __itt_null + * @param[in] parentid The parent instance to which this task instance belongs, or __itt_null + * @param[in] name The name of this task + */ +void ITTAPI __itt_task_begin(const __itt_domain *domain, __itt_id taskid, __itt_id parentid, __itt_string_handle *name); + +/** + * @ingroup tasks + * @brief Begin a task instance. + * @param[in] domain The domain for this task + * @param[in] taskid The identifier for this task instance (may be 0) + * @param[in] parentid The parent of this task (may be 0) + * @param[in] fn The pointer to the function you are tracing + */ +void ITTAPI __itt_task_begin_fn(const __itt_domain *domain, __itt_id taskid, __itt_id parentid, void* fn); + +/** + * @ingroup tasks + * @brief End the current task instance. + * @param[in] domain The domain for this task + */ +void ITTAPI __itt_task_end(const __itt_domain *domain); + +/** + * @ingroup tasks + * @brief Begin an overlapped task instance. + * @param[in] domain The domain for this task. + * @param[in] taskid The identifier for this task instance, *cannot* be __itt_null. + * @param[in] parentid The parent of this task, or __itt_null. + * @param[in] name The name of this task. + */ +void ITTAPI __itt_task_begin_overlapped(const __itt_domain* domain, __itt_id taskid, __itt_id parentid, __itt_string_handle* name); + +/** + * @ingroup tasks + * @brief End an overlapped task instance. + * @param[in] domain The domain for this task + * @param[in] taskid Explicit ID of finished task + */ +void ITTAPI __itt_task_end_overlapped(const __itt_domain *domain, __itt_id taskid); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, task_begin, (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name)) +ITT_STUBV(ITTAPI, void, task_begin_fn, (const __itt_domain *domain, __itt_id id, __itt_id parentid, void* fn)) +ITT_STUBV(ITTAPI, void, task_end, (const __itt_domain *domain)) +ITT_STUBV(ITTAPI, void, task_begin_overlapped, (const __itt_domain *domain, __itt_id taskid, __itt_id parentid, __itt_string_handle *name)) +ITT_STUBV(ITTAPI, void, task_end_overlapped, (const __itt_domain *domain, __itt_id taskid)) +#define __itt_task_begin(d,x,y,z) ITTNOTIFY_VOID_D3(task_begin,d,x,y,z) +#define __itt_task_begin_ptr ITTNOTIFY_NAME(task_begin) +#define __itt_task_begin_fn(d,x,y,z) ITTNOTIFY_VOID_D3(task_begin_fn,d,x,y,z) +#define __itt_task_begin_fn_ptr ITTNOTIFY_NAME(task_begin_fn) +#define __itt_task_end(d) ITTNOTIFY_VOID_D0(task_end,d) +#define __itt_task_end_ptr ITTNOTIFY_NAME(task_end) +#define __itt_task_begin_overlapped(d,x,y,z) ITTNOTIFY_VOID_D3(task_begin_overlapped,d,x,y,z) +#define __itt_task_begin_overlapped_ptr ITTNOTIFY_NAME(task_begin_overlapped) +#define __itt_task_end_overlapped(d,x) ITTNOTIFY_VOID_D1(task_end_overlapped,d,x) +#define __itt_task_end_overlapped_ptr ITTNOTIFY_NAME(task_end_overlapped) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_task_begin(domain,id,parentid,name) +#define __itt_task_begin_ptr 0 +#define __itt_task_begin_fn(domain,id,parentid,fn) +#define __itt_task_begin_fn_ptr 0 +#define __itt_task_end(domain) +#define __itt_task_end_ptr 0 +#define __itt_task_begin_overlapped(domain,taskid,parentid,name) +#define __itt_task_begin_overlapped_ptr 0 +#define __itt_task_end_overlapped(domain,taskid) +#define __itt_task_end_overlapped_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_task_begin_ptr 0 +#define __itt_task_begin_fn_ptr 0 +#define __itt_task_end_ptr 0 +#define __itt_task_begin_overlapped_ptr 0 +#define __itt_task_end_overlapped_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} tasks group */ + + +/** + * @defgroup markers Markers + * Markers represent a single discreet event in time. Markers have a scope, + * described by an enumerated type __itt_scope. Markers are created by + * the API call __itt_marker. A marker instance can be given an ID for use in + * adding metadata. + * @{ + */ + +/** + * @brief Describes the scope of an event object in the trace. + */ +typedef enum +{ + __itt_scope_unknown = 0, + __itt_scope_global, + __itt_scope_track_group, + __itt_scope_track, + __itt_scope_task, + __itt_scope_marker +} __itt_scope; + +/** @cond exclude_from_documentation */ +#define __itt_marker_scope_unknown __itt_scope_unknown +#define __itt_marker_scope_global __itt_scope_global +#define __itt_marker_scope_process __itt_scope_track_group +#define __itt_marker_scope_thread __itt_scope_track +#define __itt_marker_scope_task __itt_scope_task +/** @endcond */ + +/** + * @ingroup markers + * @brief Create a marker instance + * @param[in] domain The domain for this marker + * @param[in] id The instance ID for this marker or __itt_null + * @param[in] name The name for this marker + * @param[in] scope The scope for this marker + */ +void ITTAPI __itt_marker(const __itt_domain *domain, __itt_id id, __itt_string_handle *name, __itt_scope scope); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, marker, (const __itt_domain *domain, __itt_id id, __itt_string_handle *name, __itt_scope scope)) +#define __itt_marker(d,x,y,z) ITTNOTIFY_VOID_D3(marker,d,x,y,z) +#define __itt_marker_ptr ITTNOTIFY_NAME(marker) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_marker(domain,id,name,scope) +#define __itt_marker_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_marker_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} markers group */ + +/** + * @defgroup metadata Metadata + * The metadata API is used to attach extra information to named + * entities. Metadata can be attached to an identified named entity by ID, + * or to the current entity (which is always a task). + * + * Conceptually metadata has a type (what kind of metadata), a key (the + * name of the metadata), and a value (the actual data). The encoding of + * the value depends on the type of the metadata. + * + * The type of metadata is specified by an enumerated type __itt_metdata_type. + * @{ + */ + +/** + * @ingroup parameters + * @brief describes the type of metadata + */ +typedef enum { + __itt_metadata_unknown = 0, + __itt_metadata_u64, /**< Unsigned 64-bit integer */ + __itt_metadata_s64, /**< Signed 64-bit integer */ + __itt_metadata_u32, /**< Unsigned 32-bit integer */ + __itt_metadata_s32, /**< Signed 32-bit integer */ + __itt_metadata_u16, /**< Unsigned 16-bit integer */ + __itt_metadata_s16, /**< Signed 16-bit integer */ + __itt_metadata_float, /**< Signed 32-bit floating-point */ + __itt_metadata_double /**< SIgned 64-bit floating-point */ +} __itt_metadata_type; + +/** + * @ingroup parameters + * @brief Add metadata to an instance of a named entity. + * @param[in] domain The domain controlling the call + * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task + * @param[in] key The name of the metadata + * @param[in] type The type of the metadata + * @param[in] count The number of elements of the given type. If count == 0, no metadata will be added. + * @param[in] data The metadata itself +*/ +void ITTAPI __itt_metadata_add(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, metadata_add, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data)) +#define __itt_metadata_add(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(metadata_add,d,x,y,z,a,b) +#define __itt_metadata_add_ptr ITTNOTIFY_NAME(metadata_add) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_metadata_add(d,x,y,z,a,b) +#define __itt_metadata_add_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_metadata_add_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup parameters + * @brief Add string metadata to an instance of a named entity. + * @param[in] domain The domain controlling the call + * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task + * @param[in] key The name of the metadata + * @param[in] data The metadata itself + * @param[in] length The number of characters in the string, or -1 if the length is unknown but the string is null-terminated +*/ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_metadata_str_addA(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length); +void ITTAPI __itt_metadata_str_addW(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const wchar_t *data, size_t length); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_metadata_str_add __itt_metadata_str_addW +# define __itt_metadata_str_add_ptr __itt_metadata_str_addW_ptr +#else /* UNICODE */ +# define __itt_metadata_str_add __itt_metadata_str_addA +# define __itt_metadata_str_add_ptr __itt_metadata_str_addA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +void ITTAPI __itt_metadata_str_add(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length); +#endif + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, metadata_str_addA, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length)) +ITT_STUBV(ITTAPI, void, metadata_str_addW, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const wchar_t *data, size_t length)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, metadata_str_add, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_metadata_str_addA(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_addA,d,x,y,z,a) +#define __itt_metadata_str_addA_ptr ITTNOTIFY_NAME(metadata_str_addA) +#define __itt_metadata_str_addW(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_addW,d,x,y,z,a) +#define __itt_metadata_str_addW_ptr ITTNOTIFY_NAME(metadata_str_addW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_metadata_str_add(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add,d,x,y,z,a) +#define __itt_metadata_str_add_ptr ITTNOTIFY_NAME(metadata_str_add) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_metadata_str_addA(d,x,y,z,a) +#define __itt_metadata_str_addA_ptr 0 +#define __itt_metadata_str_addW(d,x,y,z,a) +#define __itt_metadata_str_addW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_metadata_str_add(d,x,y,z,a) +#define __itt_metadata_str_add_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_metadata_str_addA_ptr 0 +#define __itt_metadata_str_addW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_metadata_str_add_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup parameters + * @brief Add metadata to an instance of a named entity. + * @param[in] domain The domain controlling the call + * @param[in] scope The scope of the instance to which the metadata is to be added + + * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task + + * @param[in] key The name of the metadata + * @param[in] type The type of the metadata + * @param[in] count The number of elements of the given type. If count == 0, no metadata will be added. + * @param[in] data The metadata itself +*/ +void ITTAPI __itt_metadata_add_with_scope(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, metadata_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data)) +#define __itt_metadata_add_with_scope(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(metadata_add_with_scope,d,x,y,z,a,b) +#define __itt_metadata_add_with_scope_ptr ITTNOTIFY_NAME(metadata_add_with_scope) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_metadata_add_with_scope(d,x,y,z,a,b) +#define __itt_metadata_add_with_scope_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_metadata_add_with_scope_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup parameters + * @brief Add string metadata to an instance of a named entity. + * @param[in] domain The domain controlling the call + * @param[in] scope The scope of the instance to which the metadata is to be added + + * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task + + * @param[in] key The name of the metadata + * @param[in] data The metadata itself + * @param[in] length The number of characters in the string, or -1 if the length is unknown but the string is null-terminated +*/ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_metadata_str_add_with_scopeA(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length); +void ITTAPI __itt_metadata_str_add_with_scopeW(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const wchar_t *data, size_t length); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_metadata_str_add_with_scope __itt_metadata_str_add_with_scopeW +# define __itt_metadata_str_add_with_scope_ptr __itt_metadata_str_add_with_scopeW_ptr +#else /* UNICODE */ +# define __itt_metadata_str_add_with_scope __itt_metadata_str_add_with_scopeA +# define __itt_metadata_str_add_with_scope_ptr __itt_metadata_str_add_with_scopeA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +void ITTAPI __itt_metadata_str_add_with_scope(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length); +#endif + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeA, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length)) +ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeW, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const wchar_t *data, size_t length)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, metadata_str_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_metadata_str_add_with_scopeA(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add_with_scopeA,d,x,y,z,a) +#define __itt_metadata_str_add_with_scopeA_ptr ITTNOTIFY_NAME(metadata_str_add_with_scopeA) +#define __itt_metadata_str_add_with_scopeW(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add_with_scopeW,d,x,y,z,a) +#define __itt_metadata_str_add_with_scopeW_ptr ITTNOTIFY_NAME(metadata_str_add_with_scopeW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_metadata_str_add_with_scope(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add_with_scope,d,x,y,z,a) +#define __itt_metadata_str_add_with_scope_ptr ITTNOTIFY_NAME(metadata_str_add_with_scope) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_metadata_str_add_with_scopeA(d,x,y,z,a) +#define __itt_metadata_str_add_with_scopeA_ptr 0 +#define __itt_metadata_str_add_with_scopeW(d,x,y,z,a) +#define __itt_metadata_str_add_with_scopeW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_metadata_str_add_with_scope(d,x,y,z,a) +#define __itt_metadata_str_add_with_scope_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_metadata_str_add_with_scopeA_ptr 0 +#define __itt_metadata_str_add_with_scopeW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_metadata_str_add_with_scope_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @} metadata group */ + +/** + * @defgroup relations Relations + * Instances of named entities can be explicitly associated with other + * instances using instance IDs and the relationship API calls. + * + * @{ + */ + +/** + * @ingroup relations + * @brief The kind of relation between two instances is specified by the enumerated type __itt_relation. + * Relations between instances can be added with an API call. The relation + * API uses instance IDs. Relations can be added before or after the actual + * instances are created and persist independently of the instances. This + * is the motivation for having different lifetimes for instance IDs and + * the actual instances. + */ +typedef enum +{ + __itt_relation_is_unknown = 0, + __itt_relation_is_dependent_on, /**< "A is dependent on B" means that A cannot start until B completes */ + __itt_relation_is_sibling_of, /**< "A is sibling of B" means that A and B were created as a group */ + __itt_relation_is_parent_of, /**< "A is parent of B" means that A created B */ + __itt_relation_is_continuation_of, /**< "A is continuation of B" means that A assumes the dependencies of B */ + __itt_relation_is_child_of, /**< "A is child of B" means that A was created by B (inverse of is_parent_of) */ + __itt_relation_is_continued_by, /**< "A is continued by B" means that B assumes the dependencies of A (inverse of is_continuation_of) */ + __itt_relation_is_predecessor_to /**< "A is predecessor to B" means that B cannot start until A completes (inverse of is_dependent_on) */ +} __itt_relation; + +/** + * @ingroup relations + * @brief Add a relation to the current task instance. + * The current task instance is the head of the relation. + * @param[in] domain The domain controlling this call + * @param[in] relation The kind of relation + * @param[in] tail The ID for the tail of the relation + */ +void ITTAPI __itt_relation_add_to_current(const __itt_domain *domain, __itt_relation relation, __itt_id tail); + +/** + * @ingroup relations + * @brief Add a relation between two instance identifiers. + * @param[in] domain The domain controlling this call + * @param[in] head The ID for the head of the relation + * @param[in] relation The kind of relation + * @param[in] tail The ID for the tail of the relation + */ +void ITTAPI __itt_relation_add(const __itt_domain *domain, __itt_id head, __itt_relation relation, __itt_id tail); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, relation_add_to_current, (const __itt_domain *domain, __itt_relation relation, __itt_id tail)) +ITT_STUBV(ITTAPI, void, relation_add, (const __itt_domain *domain, __itt_id head, __itt_relation relation, __itt_id tail)) +#define __itt_relation_add_to_current(d,x,y) ITTNOTIFY_VOID_D2(relation_add_to_current,d,x,y) +#define __itt_relation_add_to_current_ptr ITTNOTIFY_NAME(relation_add_to_current) +#define __itt_relation_add(d,x,y,z) ITTNOTIFY_VOID_D3(relation_add,d,x,y,z) +#define __itt_relation_add_ptr ITTNOTIFY_NAME(relation_add) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_relation_add_to_current(d,x,y) +#define __itt_relation_add_to_current_ptr 0 +#define __itt_relation_add(d,x,y,z) +#define __itt_relation_add_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_relation_add_to_current_ptr 0 +#define __itt_relation_add_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} relations group */ + +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) + +typedef struct ___itt_clock_info +{ + unsigned long long clock_freq; /*!< Clock domain frequency */ + unsigned long long clock_base; /*!< Clock domain base timestamp */ +} __itt_clock_info; + +#pragma pack(pop) +/** @endcond */ + +/** @cond exclude_from_documentation */ +typedef void (ITTAPI *__itt_get_clock_info_fn)(__itt_clock_info* clock_info, void* data); +/** @endcond */ + +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) + +typedef struct ___itt_clock_domain +{ + __itt_clock_info info; /*!< Most recent clock domain info */ + __itt_get_clock_info_fn fn; /*!< Callback function pointer */ + void* fn_data; /*!< Input argument for the callback function */ + int extra1; /*!< Reserved. Must be zero */ + void* extra2; /*!< Reserved. Must be zero */ + struct ___itt_clock_domain* next; +} __itt_clock_domain; + +#pragma pack(pop) +/** @endcond */ + +/** + * @ingroup clockdomains + * @brief Create a clock domain. + * Certain applications require the capability to trace their application using + * a clock domain different than the CPU, for instance the instrumentation of events + * that occur on a GPU. + * Because the set of domains is expected to be static over the application's execution time, + * there is no mechanism to destroy a domain. + * Any domain can be accessed by any thread in the process, regardless of which thread created + * the domain. This call is thread-safe. + * @param[in] fn A pointer to a callback function which retrieves alternative CPU timestamps + * @param[in] fn_data Argument for a callback function; may be NULL + */ +__itt_clock_domain* ITTAPI __itt_clock_domain_create(__itt_get_clock_info_fn fn, void* fn_data); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, __itt_clock_domain*, clock_domain_create, (__itt_get_clock_info_fn fn, void* fn_data)) +#define __itt_clock_domain_create ITTNOTIFY_DATA(clock_domain_create) +#define __itt_clock_domain_create_ptr ITTNOTIFY_NAME(clock_domain_create) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_clock_domain_create(fn,fn_data) (__itt_clock_domain*)0 +#define __itt_clock_domain_create_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_clock_domain_create_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup clockdomains + * @brief Recalculate clock domains frequences and clock base timestamps. + */ +void ITTAPI __itt_clock_domain_reset(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, clock_domain_reset, (void)) +#define __itt_clock_domain_reset ITTNOTIFY_VOID(clock_domain_reset) +#define __itt_clock_domain_reset_ptr ITTNOTIFY_NAME(clock_domain_reset) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_clock_domain_reset() +#define __itt_clock_domain_reset_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_clock_domain_reset_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup clockdomain + * @brief Create an instance of identifier. This establishes the beginning of the lifetime of + * an instance of the given ID in the trace. Once this lifetime starts, the ID can be used to + * tag named entity instances in calls such as __itt_task_begin, and to specify relationships among + * identified named entity instances, using the \ref relations APIs. + * @param[in] domain The domain controlling the execution of this call. + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] id The ID to create. + */ +void ITTAPI __itt_id_create_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id); + +/** + * @ingroup clockdomain + * @brief Destroy an instance of identifier. This ends the lifetime of the current instance of the + * given ID value in the trace. Any relationships that are established after this lifetime ends are + * invalid. This call must be performed before the given ID value can be reused for a different + * named entity instance. + * @param[in] domain The domain controlling the execution of this call. + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] id The ID to destroy. + */ +void ITTAPI __itt_id_destroy_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, id_create_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id)) +ITT_STUBV(ITTAPI, void, id_destroy_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id)) +#define __itt_id_create_ex(d,x,y,z) ITTNOTIFY_VOID_D3(id_create_ex,d,x,y,z) +#define __itt_id_create_ex_ptr ITTNOTIFY_NAME(id_create_ex) +#define __itt_id_destroy_ex(d,x,y,z) ITTNOTIFY_VOID_D3(id_destroy_ex,d,x,y,z) +#define __itt_id_destroy_ex_ptr ITTNOTIFY_NAME(id_destroy_ex) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_id_create_ex(domain,clock_domain,timestamp,id) +#define __itt_id_create_ex_ptr 0 +#define __itt_id_destroy_ex(domain,clock_domain,timestamp,id) +#define __itt_id_destroy_ex_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_id_create_ex_ptr 0 +#define __itt_id_destroy_ex_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup clockdomain + * @brief Begin a task instance. + * @param[in] domain The domain for this task + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] taskid The instance ID for this task instance, or __itt_null + * @param[in] parentid The parent instance to which this task instance belongs, or __itt_null + * @param[in] name The name of this task + */ +void ITTAPI __itt_task_begin_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name); + +/** + * @ingroup clockdomain + * @brief Begin a task instance. + * @param[in] domain The domain for this task + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] taskid The identifier for this task instance, or __itt_null + * @param[in] parentid The parent of this task, or __itt_null + * @param[in] fn The pointer to the function you are tracing + */ +void ITTAPI __itt_task_begin_fn_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, void* fn); + +/** + * @ingroup clockdomain + * @brief End the current task instance. + * @param[in] domain The domain for this task + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + */ +void ITTAPI __itt_task_end_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, task_begin_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, __itt_string_handle *name)) +ITT_STUBV(ITTAPI, void, task_begin_fn_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, void* fn)) +ITT_STUBV(ITTAPI, void, task_end_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp)) +#define __itt_task_begin_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(task_begin_ex,d,x,y,z,a,b) +#define __itt_task_begin_ex_ptr ITTNOTIFY_NAME(task_begin_ex) +#define __itt_task_begin_fn_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(task_begin_fn_ex,d,x,y,z,a,b) +#define __itt_task_begin_fn_ex_ptr ITTNOTIFY_NAME(task_begin_fn_ex) +#define __itt_task_end_ex(d,x,y) ITTNOTIFY_VOID_D2(task_end_ex,d,x,y) +#define __itt_task_end_ex_ptr ITTNOTIFY_NAME(task_end_ex) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_task_begin_ex(domain,clock_domain,timestamp,id,parentid,name) +#define __itt_task_begin_ex_ptr 0 +#define __itt_task_begin_fn_ex(domain,clock_domain,timestamp,id,parentid,fn) +#define __itt_task_begin_fn_ex_ptr 0 +#define __itt_task_end_ex(domain,clock_domain,timestamp) +#define __itt_task_end_ex_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_task_begin_ex_ptr 0 +#define __itt_task_begin_fn_ex_ptr 0 +#define __itt_task_end_ex_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @defgroup counters Counters + * @ingroup public + * Counters are user-defined objects with a monotonically increasing + * value. Counter values are 64-bit unsigned integers. + * Counters have names that can be displayed in + * the tools. + * @{ + */ + +/** + * @brief opaque structure for counter identification + */ +/** @cond exclude_from_documentation */ + +typedef struct ___itt_counter* __itt_counter; + +/** + * @brief Create an unsigned 64 bits integer counter with given name/domain + * + * After __itt_counter_create() is called, __itt_counter_inc(id), __itt_counter_inc_delta(id, delta), + * __itt_counter_set_value(id, value_ptr) or __itt_counter_set_value_ex(id, clock_domain, timestamp, value_ptr) + * can be used to change the value of the counter, where value_ptr is a pointer to an unsigned 64 bits integer + * + * The call is equal to __itt_counter_create_typed(name, domain, __itt_metadata_u64) + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_counter ITTAPI __itt_counter_createA(const char *name, const char *domain); +__itt_counter ITTAPI __itt_counter_createW(const wchar_t *name, const wchar_t *domain); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_counter_create __itt_counter_createW +# define __itt_counter_create_ptr __itt_counter_createW_ptr +#else /* UNICODE */ +# define __itt_counter_create __itt_counter_createA +# define __itt_counter_create_ptr __itt_counter_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_counter ITTAPI __itt_counter_create(const char *name, const char *domain); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_counter, counter_createA, (const char *name, const char *domain)) +ITT_STUB(ITTAPI, __itt_counter, counter_createW, (const wchar_t *name, const wchar_t *domain)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_counter, counter_create, (const char *name, const char *domain)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_counter_createA ITTNOTIFY_DATA(counter_createA) +#define __itt_counter_createA_ptr ITTNOTIFY_NAME(counter_createA) +#define __itt_counter_createW ITTNOTIFY_DATA(counter_createW) +#define __itt_counter_createW_ptr ITTNOTIFY_NAME(counter_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_counter_create ITTNOTIFY_DATA(counter_create) +#define __itt_counter_create_ptr ITTNOTIFY_NAME(counter_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_counter_createA(name, domain) +#define __itt_counter_createA_ptr 0 +#define __itt_counter_createW(name, domain) +#define __itt_counter_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_counter_create(name, domain) +#define __itt_counter_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_counter_createA_ptr 0 +#define __itt_counter_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_counter_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Increment the unsigned 64 bits integer counter value + * + * Calling this function to non-unsigned 64 bits integer counters has no effect + */ +void ITTAPI __itt_counter_inc(__itt_counter id); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_inc, (__itt_counter id)) +#define __itt_counter_inc ITTNOTIFY_VOID(counter_inc) +#define __itt_counter_inc_ptr ITTNOTIFY_NAME(counter_inc) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_inc(id) +#define __itt_counter_inc_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_inc_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** + * @brief Increment the unsigned 64 bits integer counter value with x + * + * Calling this function to non-unsigned 64 bits integer counters has no effect + */ +void ITTAPI __itt_counter_inc_delta(__itt_counter id, unsigned long long value); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_inc_delta, (__itt_counter id, unsigned long long value)) +#define __itt_counter_inc_delta ITTNOTIFY_VOID(counter_inc_delta) +#define __itt_counter_inc_delta_ptr ITTNOTIFY_NAME(counter_inc_delta) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_inc_delta(id, value) +#define __itt_counter_inc_delta_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_inc_delta_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Decrement the unsigned 64 bits integer counter value + * + * Calling this function to non-unsigned 64 bits integer counters has no effect + */ +void ITTAPI __itt_counter_dec(__itt_counter id); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_dec, (__itt_counter id)) +#define __itt_counter_dec ITTNOTIFY_VOID(counter_dec) +#define __itt_counter_dec_ptr ITTNOTIFY_NAME(counter_dec) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_dec(id) +#define __itt_counter_dec_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_dec_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** + * @brief Decrement the unsigned 64 bits integer counter value with x + * + * Calling this function to non-unsigned 64 bits integer counters has no effect + */ +void ITTAPI __itt_counter_dec_delta(__itt_counter id, unsigned long long value); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_dec_delta, (__itt_counter id, unsigned long long value)) +#define __itt_counter_dec_delta ITTNOTIFY_VOID(counter_dec_delta) +#define __itt_counter_dec_delta_ptr ITTNOTIFY_NAME(counter_dec_delta) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_dec_delta(id, value) +#define __itt_counter_dec_delta_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_dec_delta_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup counters + * @brief Increment a counter by one. + * The first call with a given name creates a counter by that name and sets its + * value to zero. Successive calls increment the counter value. + * @param[in] domain The domain controlling the call. Counter names are not domain specific. + * The domain argument is used only to enable or disable the API calls. + * @param[in] name The name of the counter + */ +void ITTAPI __itt_counter_inc_v3(const __itt_domain *domain, __itt_string_handle *name); + +/** + * @ingroup counters + * @brief Increment a counter by the value specified in delta. + * @param[in] domain The domain controlling the call. Counter names are not domain specific. + * The domain argument is used only to enable or disable the API calls. + * @param[in] name The name of the counter + * @param[in] delta The amount by which to increment the counter + */ +void ITTAPI __itt_counter_inc_delta_v3(const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_inc_v3, (const __itt_domain *domain, __itt_string_handle *name)) +ITT_STUBV(ITTAPI, void, counter_inc_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta)) +#define __itt_counter_inc_v3(d,x) ITTNOTIFY_VOID_D1(counter_inc_v3,d,x) +#define __itt_counter_inc_v3_ptr ITTNOTIFY_NAME(counter_inc_v3) +#define __itt_counter_inc_delta_v3(d,x,y) ITTNOTIFY_VOID_D2(counter_inc_delta_v3,d,x,y) +#define __itt_counter_inc_delta_v3_ptr ITTNOTIFY_NAME(counter_inc_delta_v3) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_inc_v3(domain,name) +#define __itt_counter_inc_v3_ptr 0 +#define __itt_counter_inc_delta_v3(domain,name,delta) +#define __itt_counter_inc_delta_v3_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_inc_v3_ptr 0 +#define __itt_counter_inc_delta_v3_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + + +/** + * @ingroup counters + * @brief Decrement a counter by one. + * The first call with a given name creates a counter by that name and sets its + * value to zero. Successive calls decrement the counter value. + * @param[in] domain The domain controlling the call. Counter names are not domain specific. + * The domain argument is used only to enable or disable the API calls. + * @param[in] name The name of the counter + */ +void ITTAPI __itt_counter_dec_v3(const __itt_domain *domain, __itt_string_handle *name); + +/** + * @ingroup counters + * @brief Decrement a counter by the value specified in delta. + * @param[in] domain The domain controlling the call. Counter names are not domain specific. + * The domain argument is used only to enable or disable the API calls. + * @param[in] name The name of the counter + * @param[in] delta The amount by which to decrement the counter + */ +void ITTAPI __itt_counter_dec_delta_v3(const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_dec_v3, (const __itt_domain *domain, __itt_string_handle *name)) +ITT_STUBV(ITTAPI, void, counter_dec_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta)) +#define __itt_counter_dec_v3(d,x) ITTNOTIFY_VOID_D1(counter_dec_v3,d,x) +#define __itt_counter_dec_v3_ptr ITTNOTIFY_NAME(counter_dec_v3) +#define __itt_counter_dec_delta_v3(d,x,y) ITTNOTIFY_VOID_D2(counter_dec_delta_v3,d,x,y) +#define __itt_counter_dec_delta_v3_ptr ITTNOTIFY_NAME(counter_dec_delta_v3) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_dec_v3(domain,name) +#define __itt_counter_dec_v3_ptr 0 +#define __itt_counter_dec_delta_v3(domain,name,delta) +#define __itt_counter_dec_delta_v3_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_dec_v3_ptr 0 +#define __itt_counter_dec_delta_v3_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @} counters group */ + + +/** + * @brief Set the counter value + */ +void ITTAPI __itt_counter_set_value(__itt_counter id, void *value_ptr); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_set_value, (__itt_counter id, void *value_ptr)) +#define __itt_counter_set_value ITTNOTIFY_VOID(counter_set_value) +#define __itt_counter_set_value_ptr ITTNOTIFY_NAME(counter_set_value) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_set_value(id, value_ptr) +#define __itt_counter_set_value_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_set_value_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Set the counter value + */ +void ITTAPI __itt_counter_set_value_ex(__itt_counter id, __itt_clock_domain *clock_domain, unsigned long long timestamp, void *value_ptr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_set_value_ex, (__itt_counter id, __itt_clock_domain *clock_domain, unsigned long long timestamp, void *value_ptr)) +#define __itt_counter_set_value_ex ITTNOTIFY_VOID(counter_set_value_ex) +#define __itt_counter_set_value_ex_ptr ITTNOTIFY_NAME(counter_set_value_ex) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_set_value_ex(id, clock_domain, timestamp, value_ptr) +#define __itt_counter_set_value_ex_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_set_value_ex_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Create a typed counter with given name/domain + * + * After __itt_counter_create_typed() is called, __itt_counter_inc(id), __itt_counter_inc_delta(id, delta), + * __itt_counter_set_value(id, value_ptr) or __itt_counter_set_value_ex(id, clock_domain, timestamp, value_ptr) + * can be used to change the value of the counter + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_counter ITTAPI __itt_counter_create_typedA(const char *name, const char *domain, __itt_metadata_type type); +__itt_counter ITTAPI __itt_counter_create_typedW(const wchar_t *name, const wchar_t *domain, __itt_metadata_type type); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_counter_create_typed __itt_counter_create_typedW +# define __itt_counter_create_typed_ptr __itt_counter_create_typedW_ptr +#else /* UNICODE */ +# define __itt_counter_create_typed __itt_counter_create_typedA +# define __itt_counter_create_typed_ptr __itt_counter_create_typedA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_counter ITTAPI __itt_counter_create_typed(const char *name, const char *domain, __itt_metadata_type type); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_counter, counter_create_typedA, (const char *name, const char *domain, __itt_metadata_type type)) +ITT_STUB(ITTAPI, __itt_counter, counter_create_typedW, (const wchar_t *name, const wchar_t *domain, __itt_metadata_type type)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_counter, counter_create_typed, (const char *name, const char *domain, __itt_metadata_type type)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_counter_create_typedA ITTNOTIFY_DATA(counter_create_typedA) +#define __itt_counter_create_typedA_ptr ITTNOTIFY_NAME(counter_create_typedA) +#define __itt_counter_create_typedW ITTNOTIFY_DATA(counter_create_typedW) +#define __itt_counter_create_typedW_ptr ITTNOTIFY_NAME(counter_create_typedW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_counter_create_typed ITTNOTIFY_DATA(counter_create_typed) +#define __itt_counter_create_typed_ptr ITTNOTIFY_NAME(counter_create_typed) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_counter_create_typedA(name, domain, type) +#define __itt_counter_create_typedA_ptr 0 +#define __itt_counter_create_typedW(name, domain, type) +#define __itt_counter_create_typedW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_counter_create_typed(name, domain, type) +#define __itt_counter_create_typed_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_counter_create_typedA_ptr 0 +#define __itt_counter_create_typedW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_counter_create_typed_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Destroy the counter identified by the pointer previously returned by __itt_counter_create() or + * __itt_counter_create_typed() + */ +void ITTAPI __itt_counter_destroy(__itt_counter id); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_destroy, (__itt_counter id)) +#define __itt_counter_destroy ITTNOTIFY_VOID(counter_destroy) +#define __itt_counter_destroy_ptr ITTNOTIFY_NAME(counter_destroy) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_destroy(id) +#define __itt_counter_destroy_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_destroy_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} counters group */ + +/** + * @ingroup markers + * @brief Create a marker instance. + * @param[in] domain The domain for this marker + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] id The instance ID for this marker, or __itt_null + * @param[in] name The name for this marker + * @param[in] scope The scope for this marker + */ +void ITTAPI __itt_marker_ex(const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_string_handle *name, __itt_scope scope); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, marker_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_string_handle *name, __itt_scope scope)) +#define __itt_marker_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(marker_ex,d,x,y,z,a,b) +#define __itt_marker_ex_ptr ITTNOTIFY_NAME(marker_ex) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_marker_ex(domain,clock_domain,timestamp,id,name,scope) +#define __itt_marker_ex_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_marker_ex_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup clockdomain + * @brief Add a relation to the current task instance. + * The current task instance is the head of the relation. + * @param[in] domain The domain controlling this call + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] relation The kind of relation + * @param[in] tail The ID for the tail of the relation + */ +void ITTAPI __itt_relation_add_to_current_ex(const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_relation relation, __itt_id tail); + +/** + * @ingroup clockdomain + * @brief Add a relation between two instance identifiers. + * @param[in] domain The domain controlling this call + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] head The ID for the head of the relation + * @param[in] relation The kind of relation + * @param[in] tail The ID for the tail of the relation + */ +void ITTAPI __itt_relation_add_ex(const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id head, __itt_relation relation, __itt_id tail); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, relation_add_to_current_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_relation relation, __itt_id tail)) +ITT_STUBV(ITTAPI, void, relation_add_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id head, __itt_relation relation, __itt_id tail)) +#define __itt_relation_add_to_current_ex(d,x,y,z,a) ITTNOTIFY_VOID_D4(relation_add_to_current_ex,d,x,y,z,a) +#define __itt_relation_add_to_current_ex_ptr ITTNOTIFY_NAME(relation_add_to_current_ex) +#define __itt_relation_add_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(relation_add_ex,d,x,y,z,a,b) +#define __itt_relation_add_ex_ptr ITTNOTIFY_NAME(relation_add_ex) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_relation_add_to_current_ex(domain,clock_domain,timestame,relation,tail) +#define __itt_relation_add_to_current_ex_ptr 0 +#define __itt_relation_add_ex(domain,clock_domain,timestamp,head,relation,tail) +#define __itt_relation_add_ex_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_relation_add_to_current_ex_ptr 0 +#define __itt_relation_add_ex_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @cond exclude_from_documentation */ +typedef enum ___itt_track_group_type +{ + __itt_track_group_type_normal = 0 +} __itt_track_group_type; +/** @endcond */ + +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) + +typedef struct ___itt_track_group +{ + __itt_string_handle* name; /*!< Name of the track group */ + struct ___itt_track* track; /*!< List of child tracks */ + __itt_track_group_type tgtype; /*!< Type of the track group */ + int extra1; /*!< Reserved. Must be zero */ + void* extra2; /*!< Reserved. Must be zero */ + struct ___itt_track_group* next; +} __itt_track_group; + +#pragma pack(pop) +/** @endcond */ + +/** + * @brief Placeholder for custom track types. Currently, "normal" custom track + * is the only available track type. + */ +typedef enum ___itt_track_type +{ + __itt_track_type_normal = 0 +#ifdef INTEL_ITTNOTIFY_API_PRIVATE + , __itt_track_type_queue +#endif /* INTEL_ITTNOTIFY_API_PRIVATE */ +} __itt_track_type; + +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) + +typedef struct ___itt_track +{ + __itt_string_handle* name; /*!< Name of the track group */ + __itt_track_group* group; /*!< Parent group to a track */ + __itt_track_type ttype; /*!< Type of the track */ + int extra1; /*!< Reserved. Must be zero */ + void* extra2; /*!< Reserved. Must be zero */ + struct ___itt_track* next; +} __itt_track; + +#pragma pack(pop) +/** @endcond */ + +/** + * @brief Create logical track group. + */ +__itt_track_group* ITTAPI __itt_track_group_create(__itt_string_handle* name, __itt_track_group_type track_group_type); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, __itt_track_group*, track_group_create, (__itt_string_handle* name, __itt_track_group_type track_group_type)) +#define __itt_track_group_create ITTNOTIFY_DATA(track_group_create) +#define __itt_track_group_create_ptr ITTNOTIFY_NAME(track_group_create) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_track_group_create(name) (__itt_track_group*)0 +#define __itt_track_group_create_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_track_group_create_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Create logical track. + */ +__itt_track* ITTAPI __itt_track_create(__itt_track_group* track_group, __itt_string_handle* name, __itt_track_type track_type); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, __itt_track*, track_create, (__itt_track_group* track_group,__itt_string_handle* name, __itt_track_type track_type)) +#define __itt_track_create ITTNOTIFY_DATA(track_create) +#define __itt_track_create_ptr ITTNOTIFY_NAME(track_create) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_track_create(track_group,name,track_type) (__itt_track*)0 +#define __itt_track_create_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_track_create_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Set the logical track. + */ +void ITTAPI __itt_set_track(__itt_track* track); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, set_track, (__itt_track *track)) +#define __itt_set_track ITTNOTIFY_VOID(set_track) +#define __itt_set_track_ptr ITTNOTIFY_NAME(set_track) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_set_track(track) +#define __itt_set_track_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_set_track_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/* ========================================================================== */ +/** @cond exclude_from_gpa_documentation */ +/** + * @defgroup events Events + * @ingroup public + * Events group + * @{ + */ +/** @brief user event type */ +typedef int __itt_event; + +/** + * @brief Create an event notification + * @note name or namelen being null/name and namelen not matching, user event feature not enabled + * @return non-zero event identifier upon success and __itt_err otherwise + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_event LIBITTAPI __itt_event_createA(const char *name, int namelen); +__itt_event LIBITTAPI __itt_event_createW(const wchar_t *name, int namelen); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_event_create __itt_event_createW +# define __itt_event_create_ptr __itt_event_createW_ptr +#else +# define __itt_event_create __itt_event_createA +# define __itt_event_create_ptr __itt_event_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_event LIBITTAPI __itt_event_create(const char *name, int namelen); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(LIBITTAPI, __itt_event, event_createA, (const char *name, int namelen)) +ITT_STUB(LIBITTAPI, __itt_event, event_createW, (const wchar_t *name, int namelen)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(LIBITTAPI, __itt_event, event_create, (const char *name, int namelen)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_event_createA ITTNOTIFY_DATA(event_createA) +#define __itt_event_createA_ptr ITTNOTIFY_NAME(event_createA) +#define __itt_event_createW ITTNOTIFY_DATA(event_createW) +#define __itt_event_createW_ptr ITTNOTIFY_NAME(event_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_event_create ITTNOTIFY_DATA(event_create) +#define __itt_event_create_ptr ITTNOTIFY_NAME(event_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_event_createA(name, namelen) (__itt_event)0 +#define __itt_event_createA_ptr 0 +#define __itt_event_createW(name, namelen) (__itt_event)0 +#define __itt_event_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_event_create(name, namelen) (__itt_event)0 +#define __itt_event_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_event_createA_ptr 0 +#define __itt_event_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_event_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an event occurrence. + * @return __itt_err upon failure (invalid event id/user event feature not enabled) + */ +int LIBITTAPI __itt_event_start(__itt_event event); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(LIBITTAPI, int, event_start, (__itt_event event)) +#define __itt_event_start ITTNOTIFY_DATA(event_start) +#define __itt_event_start_ptr ITTNOTIFY_NAME(event_start) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_event_start(event) (int)0 +#define __itt_event_start_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_event_start_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an event end occurrence. + * @note It is optional if events do not have durations. + * @return __itt_err upon failure (invalid event id/user event feature not enabled) + */ +int LIBITTAPI __itt_event_end(__itt_event event); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(LIBITTAPI, int, event_end, (__itt_event event)) +#define __itt_event_end ITTNOTIFY_DATA(event_end) +#define __itt_event_end_ptr ITTNOTIFY_NAME(event_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_event_end(event) (int)0 +#define __itt_event_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_event_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} events group */ + + +/** + * @defgroup arrays Arrays Visualizer + * @ingroup public + * Visualize arrays + * @{ + */ + +/** + * @enum __itt_av_data_type + * @brief Defines types of arrays data (for C/C++ intrinsic types) + */ +typedef enum +{ + __itt_e_first = 0, + __itt_e_char = 0, /* 1-byte integer */ + __itt_e_uchar, /* 1-byte unsigned integer */ + __itt_e_int16, /* 2-byte integer */ + __itt_e_uint16, /* 2-byte unsigned integer */ + __itt_e_int32, /* 4-byte integer */ + __itt_e_uint32, /* 4-byte unsigned integer */ + __itt_e_int64, /* 8-byte integer */ + __itt_e_uint64, /* 8-byte unsigned integer */ + __itt_e_float, /* 4-byte floating */ + __itt_e_double, /* 8-byte floating */ + __itt_e_last = __itt_e_double +} __itt_av_data_type; + +/** + * @brief Save an array data to a file. + * Output format is defined by the file extension. The csv and bmp formats are supported (bmp - for 2-dimensional array only). + * @param[in] data - pointer to the array data + * @param[in] rank - the rank of the array + * @param[in] dimensions - pointer to an array of integers, which specifies the array dimensions. + * The size of dimensions must be equal to the rank + * @param[in] type - the type of the array, specified as one of the __itt_av_data_type values (for intrinsic types) + * @param[in] filePath - the file path; the output format is defined by the file extension + * @param[in] columnOrder - defines how the array is stored in the linear memory. + * It should be 1 for column-major order (e.g. in FORTRAN) or 0 - for row-major order (e.g. in C). + */ + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +int ITTAPI __itt_av_saveA(void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder); +int ITTAPI __itt_av_saveW(void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_av_save __itt_av_saveW +# define __itt_av_save_ptr __itt_av_saveW_ptr +#else /* UNICODE */ +# define __itt_av_save __itt_av_saveA +# define __itt_av_save_ptr __itt_av_saveA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +int ITTAPI __itt_av_save(void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, int, av_saveA, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder)) +ITT_STUB(ITTAPI, int, av_saveW, (void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, int, av_save, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_av_saveA ITTNOTIFY_DATA(av_saveA) +#define __itt_av_saveA_ptr ITTNOTIFY_NAME(av_saveA) +#define __itt_av_saveW ITTNOTIFY_DATA(av_saveW) +#define __itt_av_saveW_ptr ITTNOTIFY_NAME(av_saveW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_av_save ITTNOTIFY_DATA(av_save) +#define __itt_av_save_ptr ITTNOTIFY_NAME(av_save) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_av_saveA(name) +#define __itt_av_saveA_ptr 0 +#define __itt_av_saveW(name) +#define __itt_av_saveW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_av_save(name) +#define __itt_av_save_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_av_saveA_ptr 0 +#define __itt_av_saveW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_av_save_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +void ITTAPI __itt_enable_attach(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, enable_attach, (void)) +#define __itt_enable_attach ITTNOTIFY_VOID(enable_attach) +#define __itt_enable_attach_ptr ITTNOTIFY_NAME(enable_attach) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_enable_attach() +#define __itt_enable_attach_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_enable_attach_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @cond exclude_from_gpa_documentation */ + +/** @} arrays group */ + +/** @endcond */ + +/** + * @brief Module load info + * This API is used to report necessary information in case of module relocation + * @param[in] start_addr - relocated module start address + * @param[in] end_addr - relocated module end address + * @param[in] path - file system path to the module + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_module_loadA(void *start_addr, void *end_addr, const char *path); +void ITTAPI __itt_module_loadW(void *start_addr, void *end_addr, const wchar_t *path); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_module_load __itt_module_loadW +# define __itt_module_load_ptr __itt_module_loadW_ptr +#else /* UNICODE */ +# define __itt_module_load __itt_module_loadA +# define __itt_module_load_ptr __itt_module_loadA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +void ITTAPI __itt_module_load(void *start_addr, void *end_addr, const char *path); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, void, module_loadA, (void *start_addr, void *end_addr, const char *path)) +ITT_STUB(ITTAPI, void, module_loadW, (void *start_addr, void *end_addr, const wchar_t *path)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, void, module_load, (void *start_addr, void *end_addr, const char *path)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_module_loadA ITTNOTIFY_VOID(module_loadA) +#define __itt_module_loadA_ptr ITTNOTIFY_NAME(module_loadA) +#define __itt_module_loadW ITTNOTIFY_VOID(module_loadW) +#define __itt_module_loadW_ptr ITTNOTIFY_NAME(module_loadW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_module_load ITTNOTIFY_VOID(module_load) +#define __itt_module_load_ptr ITTNOTIFY_NAME(module_load) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_module_loadA(start_addr, end_addr, path) +#define __itt_module_loadA_ptr 0 +#define __itt_module_loadW(start_addr, end_addr, path) +#define __itt_module_loadW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_module_load(start_addr, end_addr, path) +#define __itt_module_load_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_module_loadA_ptr 0 +#define __itt_module_loadW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_module_load_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + + + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* _ITTNOTIFY_H_ */ + +#ifdef INTEL_ITTNOTIFY_API_PRIVATE + +#ifndef _ITTNOTIFY_PRIVATE_ +#define _ITTNOTIFY_PRIVATE_ + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/** + * @ingroup clockdomain + * @brief Begin an overlapped task instance. + * @param[in] domain The domain for this task + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] taskid The identifier for this task instance, *cannot* be __itt_null. + * @param[in] parentid The parent of this task, or __itt_null. + * @param[in] name The name of this task. + */ +void ITTAPI __itt_task_begin_overlapped_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name); + +/** + * @ingroup clockdomain + * @brief End an overlapped task instance. + * @param[in] domain The domain for this task + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] taskid Explicit ID of finished task + */ +void ITTAPI __itt_task_end_overlapped_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, task_begin_overlapped_ex, (const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name)) +ITT_STUBV(ITTAPI, void, task_end_overlapped_ex, (const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid)) +#define __itt_task_begin_overlapped_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(task_begin_overlapped_ex,d,x,y,z,a,b) +#define __itt_task_begin_overlapped_ex_ptr ITTNOTIFY_NAME(task_begin_overlapped_ex) +#define __itt_task_end_overlapped_ex(d,x,y,z) ITTNOTIFY_VOID_D3(task_end_overlapped_ex,d,x,y,z) +#define __itt_task_end_overlapped_ex_ptr ITTNOTIFY_NAME(task_end_overlapped_ex) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_task_begin_overlapped_ex(domain,clock_domain,timestamp,taskid,parentid,name) +#define __itt_task_begin_overlapped_ex_ptr 0 +#define __itt_task_end_overlapped_ex(domain,clock_domain,timestamp,taskid) +#define __itt_task_end_overlapped_ex_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_task_begin_overlapped_ex_ptr 0 +#define __itt_task_end_overlapped_ptr 0 +#define __itt_task_end_overlapped_ex_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @defgroup makrs_internal Marks + * @ingroup internal + * Marks group + * @warning Internal API: + * - It is not shipped to outside of Intel + * - It is delivered to internal Intel teams using e-mail or SVN access only + * @{ + */ +/** @brief user mark type */ +typedef int __itt_mark_type; + +/** + * @brief Creates a user mark type with the specified name using char or Unicode string. + * @param[in] name - name of mark to create + * @return Returns a handle to the mark type + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_mark_type ITTAPI __itt_mark_createA(const char *name); +__itt_mark_type ITTAPI __itt_mark_createW(const wchar_t *name); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_mark_create __itt_mark_createW +# define __itt_mark_create_ptr __itt_mark_createW_ptr +#else /* UNICODE */ +# define __itt_mark_create __itt_mark_createA +# define __itt_mark_create_ptr __itt_mark_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_mark_type ITTAPI __itt_mark_create(const char *name); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_mark_type, mark_createA, (const char *name)) +ITT_STUB(ITTAPI, __itt_mark_type, mark_createW, (const wchar_t *name)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_mark_type, mark_create, (const char *name)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_mark_createA ITTNOTIFY_DATA(mark_createA) +#define __itt_mark_createA_ptr ITTNOTIFY_NAME(mark_createA) +#define __itt_mark_createW ITTNOTIFY_DATA(mark_createW) +#define __itt_mark_createW_ptr ITTNOTIFY_NAME(mark_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark_create ITTNOTIFY_DATA(mark_create) +#define __itt_mark_create_ptr ITTNOTIFY_NAME(mark_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_mark_createA(name) (__itt_mark_type)0 +#define __itt_mark_createA_ptr 0 +#define __itt_mark_createW(name) (__itt_mark_type)0 +#define __itt_mark_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark_create(name) (__itt_mark_type)0 +#define __itt_mark_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_mark_createA_ptr 0 +#define __itt_mark_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Creates a "discrete" user mark type of the specified type and an optional parameter using char or Unicode string. + * + * - The mark of "discrete" type is placed to collection results in case of success. It appears in overtime view(s) as a special tick sign. + * - The call is "synchronous" - function returns after mark is actually added to results. + * - This function is useful, for example, to mark different phases of application + * (beginning of the next mark automatically meand end of current region). + * - Can be used together with "continuous" marks (see below) at the same collection session + * @param[in] mt - mark, created by __itt_mark_create(const char* name) function + * @param[in] parameter - string parameter of mark + * @return Returns zero value in case of success, non-zero value otherwise. + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +int ITTAPI __itt_markA(__itt_mark_type mt, const char *parameter); +int ITTAPI __itt_markW(__itt_mark_type mt, const wchar_t *parameter); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_mark __itt_markW +# define __itt_mark_ptr __itt_markW_ptr +#else /* UNICODE */ +# define __itt_mark __itt_markA +# define __itt_mark_ptr __itt_markA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +int ITTAPI __itt_mark(__itt_mark_type mt, const char *parameter); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, int, markA, (__itt_mark_type mt, const char *parameter)) +ITT_STUB(ITTAPI, int, markW, (__itt_mark_type mt, const wchar_t *parameter)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, int, mark, (__itt_mark_type mt, const char *parameter)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_markA ITTNOTIFY_DATA(markA) +#define __itt_markA_ptr ITTNOTIFY_NAME(markA) +#define __itt_markW ITTNOTIFY_DATA(markW) +#define __itt_markW_ptr ITTNOTIFY_NAME(markW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark ITTNOTIFY_DATA(mark) +#define __itt_mark_ptr ITTNOTIFY_NAME(mark) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_markA(mt, parameter) (int)0 +#define __itt_markA_ptr 0 +#define __itt_markW(mt, parameter) (int)0 +#define __itt_markW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark(mt, parameter) (int)0 +#define __itt_mark_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_markA_ptr 0 +#define __itt_markW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Use this if necessary to create a "discrete" user event type (mark) for process + * rather then for one thread + * @see int __itt_mark(__itt_mark_type mt, const char* parameter); + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +int ITTAPI __itt_mark_globalA(__itt_mark_type mt, const char *parameter); +int ITTAPI __itt_mark_globalW(__itt_mark_type mt, const wchar_t *parameter); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_mark_global __itt_mark_globalW +# define __itt_mark_global_ptr __itt_mark_globalW_ptr +#else /* UNICODE */ +# define __itt_mark_global __itt_mark_globalA +# define __itt_mark_global_ptr __itt_mark_globalA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +int ITTAPI __itt_mark_global(__itt_mark_type mt, const char *parameter); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, int, mark_globalA, (__itt_mark_type mt, const char *parameter)) +ITT_STUB(ITTAPI, int, mark_globalW, (__itt_mark_type mt, const wchar_t *parameter)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, int, mark_global, (__itt_mark_type mt, const char *parameter)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_mark_globalA ITTNOTIFY_DATA(mark_globalA) +#define __itt_mark_globalA_ptr ITTNOTIFY_NAME(mark_globalA) +#define __itt_mark_globalW ITTNOTIFY_DATA(mark_globalW) +#define __itt_mark_globalW_ptr ITTNOTIFY_NAME(mark_globalW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark_global ITTNOTIFY_DATA(mark_global) +#define __itt_mark_global_ptr ITTNOTIFY_NAME(mark_global) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_mark_globalA(mt, parameter) (int)0 +#define __itt_mark_globalA_ptr 0 +#define __itt_mark_globalW(mt, parameter) (int)0 +#define __itt_mark_globalW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark_global(mt, parameter) (int)0 +#define __itt_mark_global_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_mark_globalA_ptr 0 +#define __itt_mark_globalW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark_global_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Creates an "end" point for "continuous" mark with specified name. + * + * - Returns zero value in case of success, non-zero value otherwise. + * Also returns non-zero value when preceding "begin" point for the + * mark with the same name failed to be created or not created. + * - The mark of "continuous" type is placed to collection results in + * case of success. It appears in overtime view(s) as a special tick + * sign (different from "discrete" mark) together with line from + * corresponding "begin" mark to "end" mark. + * @note Continuous marks can overlap and be nested inside each other. + * Discrete mark can be nested inside marked region + * @param[in] mt - mark, created by __itt_mark_create(const char* name) function + * @return Returns zero value in case of success, non-zero value otherwise. + */ +int ITTAPI __itt_mark_off(__itt_mark_type mt); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, int, mark_off, (__itt_mark_type mt)) +#define __itt_mark_off ITTNOTIFY_DATA(mark_off) +#define __itt_mark_off_ptr ITTNOTIFY_NAME(mark_off) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_mark_off(mt) (int)0 +#define __itt_mark_off_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_mark_off_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Use this if necessary to create an "end" point for mark of process + * @see int __itt_mark_off(__itt_mark_type mt); + */ +int ITTAPI __itt_mark_global_off(__itt_mark_type mt); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, int, mark_global_off, (__itt_mark_type mt)) +#define __itt_mark_global_off ITTNOTIFY_DATA(mark_global_off) +#define __itt_mark_global_off_ptr ITTNOTIFY_NAME(mark_global_off) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_mark_global_off(mt) (int)0 +#define __itt_mark_global_off_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_mark_global_off_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} marks group */ + +/** + * @defgroup counters_internal Counters + * @ingroup internal + * Counters group + * @{ + */ + + +/** + * @defgroup stitch Stack Stitching + * @ingroup internal + * Stack Stitching group + * @{ + */ +/** + * @brief opaque structure for counter identification + */ +typedef struct ___itt_caller *__itt_caller; + +/** + * @brief Create the stitch point e.g. a point in call stack where other stacks should be stitched to. + * The function returns a unique identifier which is used to match the cut points with corresponding stitch points. + */ +__itt_caller ITTAPI __itt_stack_caller_create(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, __itt_caller, stack_caller_create, (void)) +#define __itt_stack_caller_create ITTNOTIFY_DATA(stack_caller_create) +#define __itt_stack_caller_create_ptr ITTNOTIFY_NAME(stack_caller_create) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_stack_caller_create() (__itt_caller)0 +#define __itt_stack_caller_create_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_stack_caller_create_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Destroy the inforamtion about stitch point identified by the pointer previously returned by __itt_stack_caller_create() + */ +void ITTAPI __itt_stack_caller_destroy(__itt_caller id); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, stack_caller_destroy, (__itt_caller id)) +#define __itt_stack_caller_destroy ITTNOTIFY_VOID(stack_caller_destroy) +#define __itt_stack_caller_destroy_ptr ITTNOTIFY_NAME(stack_caller_destroy) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_stack_caller_destroy(id) +#define __itt_stack_caller_destroy_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_stack_caller_destroy_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Sets the cut point. Stack from each event which occurs after this call will be cut + * at the same stack level the function was called and stitched to the corresponding stitch point. + */ +void ITTAPI __itt_stack_callee_enter(__itt_caller id); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, stack_callee_enter, (__itt_caller id)) +#define __itt_stack_callee_enter ITTNOTIFY_VOID(stack_callee_enter) +#define __itt_stack_callee_enter_ptr ITTNOTIFY_NAME(stack_callee_enter) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_stack_callee_enter(id) +#define __itt_stack_callee_enter_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_stack_callee_enter_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief This function eliminates the cut point which was set by latest __itt_stack_callee_enter(). + */ +void ITTAPI __itt_stack_callee_leave(__itt_caller id); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, stack_callee_leave, (__itt_caller id)) +#define __itt_stack_callee_leave ITTNOTIFY_VOID(stack_callee_leave) +#define __itt_stack_callee_leave_ptr ITTNOTIFY_NAME(stack_callee_leave) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_stack_callee_leave(id) +#define __itt_stack_callee_leave_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_stack_callee_leave_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @} stitch group */ + +/* ***************************************************************************************************************************** */ + +#include <stdarg.h> + +/** @cond exclude_from_documentation */ +typedef enum __itt_error_code +{ + __itt_error_success = 0, /*!< no error */ + __itt_error_no_module = 1, /*!< module can't be loaded */ + /* %1$s -- library name; win: %2$d -- system error code; unx: %2$s -- system error message. */ + __itt_error_no_symbol = 2, /*!< symbol not found */ + /* %1$s -- library name, %2$s -- symbol name. */ + __itt_error_unknown_group = 3, /*!< unknown group specified */ + /* %1$s -- env var name, %2$s -- group name. */ + __itt_error_cant_read_env = 4, /*!< GetEnvironmentVariable() failed */ + /* %1$s -- env var name, %2$d -- system error. */ + __itt_error_env_too_long = 5, /*!< variable value too long */ + /* %1$s -- env var name, %2$d -- actual length of the var, %3$d -- max allowed length. */ + __itt_error_system = 6 /*!< pthread_mutexattr_init or pthread_mutex_init failed */ + /* %1$s -- function name, %2$d -- errno. */ +} __itt_error_code; + +typedef void (__itt_error_handler_t)(__itt_error_code code, va_list); +__itt_error_handler_t* __itt_set_error_handler(__itt_error_handler_t*); + +const char* ITTAPI __itt_api_version(void); +/** @endcond */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#define __itt_error_handler ITT_JOIN(INTEL_ITTNOTIFY_PREFIX, error_handler) +void __itt_error_handler(__itt_error_code code, va_list args); +extern const int ITTNOTIFY_NAME(err); +#define __itt_err ITTNOTIFY_NAME(err) +ITT_STUB(ITTAPI, const char*, api_version, (void)) +#define __itt_api_version ITTNOTIFY_DATA(api_version) +#define __itt_api_version_ptr ITTNOTIFY_NAME(api_version) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_api_version() (const char*)0 +#define __itt_api_version_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_api_version_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* _ITTNOTIFY_PRIVATE_ */ + +#endif /* INTEL_ITTNOTIFY_API_PRIVATE */ + +// clang-format on diff --git a/mozglue/baseprofiler/lul/AutoObjectMapper.cpp b/mozglue/baseprofiler/lul/AutoObjectMapper.cpp new file mode 100644 index 0000000000..0037c943aa --- /dev/null +++ b/mozglue/baseprofiler/lul/AutoObjectMapper.cpp @@ -0,0 +1,80 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <sys/mman.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + +#include "mozilla/Assertions.h" +#include "mozilla/Sprintf.h" + +#include "BaseProfiler.h" +#include "PlatformMacros.h" +#include "AutoObjectMapper.h" + +// A helper function for creating failure error messages in +// AutoObjectMapper*::Map. +static void failedToMessage(void (*aLog)(const char*), const char* aHowFailed, + std::string aFileName) { + char buf[300]; + SprintfLiteral(buf, "AutoObjectMapper::Map: Failed to %s \'%s\'", aHowFailed, + aFileName.c_str()); + buf[sizeof(buf) - 1] = 0; + aLog(buf); +} + +AutoObjectMapperPOSIX::AutoObjectMapperPOSIX(void (*aLog)(const char*)) + : mImage(nullptr), mSize(0), mLog(aLog), mIsMapped(false) {} + +AutoObjectMapperPOSIX::~AutoObjectMapperPOSIX() { + if (!mIsMapped) { + // There's nothing to do. + MOZ_ASSERT(!mImage); + MOZ_ASSERT(mSize == 0); + return; + } + MOZ_ASSERT(mSize > 0); + // The following assertion doesn't necessarily have to be true, + // but we assume (reasonably enough) that no mmap facility would + // be crazy enough to map anything at page zero. + MOZ_ASSERT(mImage); + munmap(mImage, mSize); +} + +bool AutoObjectMapperPOSIX::Map(/*OUT*/ void** start, /*OUT*/ size_t* length, + std::string fileName) { + MOZ_ASSERT(!mIsMapped); + + int fd = open(fileName.c_str(), O_RDONLY); + if (fd == -1) { + failedToMessage(mLog, "open", fileName); + return false; + } + + struct stat st; + int err = fstat(fd, &st); + size_t sz = (err == 0) ? st.st_size : 0; + if (err != 0 || sz == 0) { + failedToMessage(mLog, "fstat", fileName); + close(fd); + return false; + } + + void* image = mmap(nullptr, sz, PROT_READ, MAP_SHARED, fd, 0); + if (image == MAP_FAILED) { + failedToMessage(mLog, "mmap", fileName); + close(fd); + return false; + } + + close(fd); + mIsMapped = true; + mImage = *start = image; + mSize = *length = sz; + return true; +} diff --git a/mozglue/baseprofiler/lul/AutoObjectMapper.h b/mozglue/baseprofiler/lul/AutoObjectMapper.h new file mode 100644 index 0000000000..f63aa43e0e --- /dev/null +++ b/mozglue/baseprofiler/lul/AutoObjectMapper.h @@ -0,0 +1,64 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef AutoObjectMapper_h +#define AutoObjectMapper_h + +#include <string> + +#include "mozilla/Attributes.h" +#include "PlatformMacros.h" + +// A (nearly-) RAII class that maps an object in and then unmaps it on +// destruction. This base class version uses the "normal" POSIX +// functions: open, fstat, close, mmap, munmap. + +class MOZ_STACK_CLASS AutoObjectMapperPOSIX { + public: + // The constructor does not attempt to map the file, because that + // might fail. Instead, once the object has been constructed, + // call Map() to attempt the mapping. There is no corresponding + // Unmap() since the unmapping is done in the destructor. Failure + // messages are sent to |aLog|. + explicit AutoObjectMapperPOSIX(void (*aLog)(const char*)); + + // Unmap the file on destruction of this object. + ~AutoObjectMapperPOSIX(); + + // Map |fileName| into the address space and return the mapping + // extents. If the file is zero sized this will fail. The file is + // mapped read-only and private. Returns true iff the mapping + // succeeded, in which case *start and *length hold its extent. + // Once a call to Map succeeds, all subsequent calls to it will + // fail. + bool Map(/*OUT*/ void** start, /*OUT*/ size_t* length, std::string fileName); + + protected: + // If we are currently holding a mapped object, these record the + // mapped address range. + void* mImage; + size_t mSize; + + // A logging sink, for complaining about mapping failures. + void (*mLog)(const char*); + + private: + // Are we currently holding a mapped object? This is private to + // the base class. Derived classes need to have their own way to + // track whether they are holding a mapped object. + bool mIsMapped; + + // Disable copying and assignment. + AutoObjectMapperPOSIX(const AutoObjectMapperPOSIX&); + AutoObjectMapperPOSIX& operator=(const AutoObjectMapperPOSIX&); + // Disable heap allocation of this class. + void* operator new(size_t); + void* operator new[](size_t); + void operator delete(void*); + void operator delete[](void*); +}; + +#endif // AutoObjectMapper_h diff --git a/mozglue/baseprofiler/lul/LulCommon.cpp b/mozglue/baseprofiler/lul/LulCommon.cpp new file mode 100644 index 0000000000..f014892a57 --- /dev/null +++ b/mozglue/baseprofiler/lul/LulCommon.cpp @@ -0,0 +1,102 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ + +// Copyright (c) 2011, 2013 Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// This file is derived from the following files in +// toolkit/crashreporter/google-breakpad: +// src/common/module.cc +// src/common/unique_string.cc + +// There's no internal-only interface for LulCommon. Hence include +// the external interface directly. +#include "LulCommonExt.h" + +#include <stdlib.h> +#include <string.h> + +#include <string> +#include <map> + +#include "BaseProfiler.h" + +namespace lul { + +using std::string; + +//////////////////////////////////////////////////////////////// +// Module +// +Module::Module(const string& name, const string& os, const string& architecture, + const string& id) + : name_(name), os_(os), architecture_(architecture), id_(id) {} + +Module::~Module() {} + +//////////////////////////////////////////////////////////////// +// UniqueString +// +class UniqueString { + public: + explicit UniqueString(string str) { str_ = strdup(str.c_str()); } + ~UniqueString() { free(reinterpret_cast<void*>(const_cast<char*>(str_))); } + const char* str_; +}; + +const char* FromUniqueString(const UniqueString* ustr) { return ustr->str_; } + +bool IsEmptyUniqueString(const UniqueString* ustr) { + return (ustr->str_)[0] == '\0'; +} + +//////////////////////////////////////////////////////////////// +// UniqueStringUniverse +// +UniqueStringUniverse::~UniqueStringUniverse() { + for (std::map<string, UniqueString*>::iterator it = map_.begin(); + it != map_.end(); it++) { + delete it->second; + } +} + +const UniqueString* UniqueStringUniverse::ToUniqueString(string str) { + std::map<string, UniqueString*>::iterator it = map_.find(str); + if (it == map_.end()) { + UniqueString* ustr = new UniqueString(str); + map_[str] = ustr; + return ustr; + } else { + return it->second; + } +} + +} // namespace lul diff --git a/mozglue/baseprofiler/lul/LulCommonExt.h b/mozglue/baseprofiler/lul/LulCommonExt.h new file mode 100644 index 0000000000..b20a7321ff --- /dev/null +++ b/mozglue/baseprofiler/lul/LulCommonExt.h @@ -0,0 +1,509 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ + +// Copyright (c) 2006, 2010, 2012, 2013 Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// module.h: Define google_breakpad::Module. A Module holds debugging +// information, and can write that information out as a Breakpad +// symbol file. + +// (C) Copyright Greg Colvin and Beman Dawes 1998, 1999. +// Copyright (c) 2001, 2002 Peter Dimov +// +// Permission to copy, use, modify, sell and distribute this software +// is granted provided this copyright notice appears in all copies. +// This software is provided "as is" without express or implied +// warranty, and with no claim as to its suitability for any purpose. +// +// See http://www.boost.org/libs/smart_ptr/scoped_ptr.htm for documentation. +// + +// This file is derived from the following files in +// toolkit/crashreporter/google-breakpad: +// src/common/unique_string.h +// src/common/scoped_ptr.h +// src/common/module.h + +// External interface for the "Common" component of LUL. + +#ifndef LulCommonExt_h +#define LulCommonExt_h + +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> + +#include <string> +#include <map> +#include <vector> +#include <cstddef> // for std::ptrdiff_t + +#include "mozilla/Assertions.h" + +namespace lul { + +using std::map; +using std::string; + +//////////////////////////////////////////////////////////////// +// UniqueString +// + +// Abstract type +class UniqueString; + +// Get the contained C string (debugging only) +const char* FromUniqueString(const UniqueString*); + +// Is the given string empty (that is, "") ? +bool IsEmptyUniqueString(const UniqueString*); + +//////////////////////////////////////////////////////////////// +// UniqueStringUniverse +// + +// All UniqueStrings live in some specific UniqueStringUniverse. +class UniqueStringUniverse { + public: + UniqueStringUniverse() {} + ~UniqueStringUniverse(); + // Convert a |string| to a UniqueString, that lives in this universe. + const UniqueString* ToUniqueString(string str); + + private: + map<string, UniqueString*> map_; +}; + +//////////////////////////////////////////////////////////////// +// GUID +// + +typedef struct { + uint32_t data1; + uint16_t data2; + uint16_t data3; + uint8_t data4[8]; +} MDGUID; // GUID + +typedef MDGUID GUID; + +//////////////////////////////////////////////////////////////// +// scoped_ptr +// + +// scoped_ptr mimics a built-in pointer except that it guarantees deletion +// of the object pointed to, either on destruction of the scoped_ptr or via +// an explicit reset(). scoped_ptr is a simple solution for simple needs; +// use shared_ptr or std::auto_ptr if your needs are more complex. + +// *** NOTE *** +// If your scoped_ptr is a class member of class FOO pointing to a +// forward declared type BAR (as shown below), then you MUST use a non-inlined +// version of the destructor. The destructor of a scoped_ptr (called from +// FOO's destructor) must have a complete definition of BAR in order to +// destroy it. Example: +// +// -- foo.h -- +// class BAR; +// +// class FOO { +// public: +// FOO(); +// ~FOO(); // Required for sources that instantiate class FOO to compile! +// +// private: +// scoped_ptr<BAR> bar_; +// }; +// +// -- foo.cc -- +// #include "foo.h" +// FOO::~FOO() {} // Empty, but must be non-inlined to FOO's class definition. + +// scoped_ptr_malloc added by Google +// When one of these goes out of scope, instead of doing a delete or +// delete[], it calls free(). scoped_ptr_malloc<char> is likely to see +// much more use than any other specializations. + +// release() added by Google +// Use this to conditionally transfer ownership of a heap-allocated object +// to the caller, usually on method success. + +template <typename T> +class scoped_ptr { + private: + T* ptr; + + scoped_ptr(scoped_ptr const&); + scoped_ptr& operator=(scoped_ptr const&); + + public: + typedef T element_type; + + explicit scoped_ptr(T* p = 0) : ptr(p) {} + + ~scoped_ptr() { delete ptr; } + + void reset(T* p = 0) { + if (ptr != p) { + delete ptr; + ptr = p; + } + } + + T& operator*() const { + MOZ_ASSERT(ptr != 0); + return *ptr; + } + + T* operator->() const { + MOZ_ASSERT(ptr != 0); + return ptr; + } + + bool operator==(T* p) const { return ptr == p; } + + bool operator!=(T* p) const { return ptr != p; } + + T* get() const { return ptr; } + + void swap(scoped_ptr& b) { + T* tmp = b.ptr; + b.ptr = ptr; + ptr = tmp; + } + + T* release() { + T* tmp = ptr; + ptr = 0; + return tmp; + } + + private: + // no reason to use these: each scoped_ptr should have its own object + template <typename U> + bool operator==(scoped_ptr<U> const& p) const; + template <typename U> + bool operator!=(scoped_ptr<U> const& p) const; +}; + +template <typename T> +inline void swap(scoped_ptr<T>& a, scoped_ptr<T>& b) { + a.swap(b); +} + +template <typename T> +inline bool operator==(T* p, const scoped_ptr<T>& b) { + return p == b.get(); +} + +template <typename T> +inline bool operator!=(T* p, const scoped_ptr<T>& b) { + return p != b.get(); +} + +// scoped_array extends scoped_ptr to arrays. Deletion of the array pointed to +// is guaranteed, either on destruction of the scoped_array or via an explicit +// reset(). Use shared_array or std::vector if your needs are more complex. + +template <typename T> +class scoped_array { + private: + T* ptr; + + scoped_array(scoped_array const&); + scoped_array& operator=(scoped_array const&); + + public: + typedef T element_type; + + explicit scoped_array(T* p = 0) : ptr(p) {} + + ~scoped_array() { delete[] ptr; } + + void reset(T* p = 0) { + if (ptr != p) { + delete[] ptr; + ptr = p; + } + } + + T& operator[](std::ptrdiff_t i) const { + MOZ_ASSERT(ptr != 0); + MOZ_ASSERT(i >= 0); + return ptr[i]; + } + + bool operator==(T* p) const { return ptr == p; } + + bool operator!=(T* p) const { return ptr != p; } + + T* get() const { return ptr; } + + void swap(scoped_array& b) { + T* tmp = b.ptr; + b.ptr = ptr; + ptr = tmp; + } + + T* release() { + T* tmp = ptr; + ptr = 0; + return tmp; + } + + private: + // no reason to use these: each scoped_array should have its own object + template <typename U> + bool operator==(scoped_array<U> const& p) const; + template <typename U> + bool operator!=(scoped_array<U> const& p) const; +}; + +template <class T> +inline void swap(scoped_array<T>& a, scoped_array<T>& b) { + a.swap(b); +} + +template <typename T> +inline bool operator==(T* p, const scoped_array<T>& b) { + return p == b.get(); +} + +template <typename T> +inline bool operator!=(T* p, const scoped_array<T>& b) { + return p != b.get(); +} + +// This class wraps the c library function free() in a class that can be +// passed as a template argument to scoped_ptr_malloc below. +class ScopedPtrMallocFree { + public: + inline void operator()(void* x) const { free(x); } +}; + +// scoped_ptr_malloc<> is similar to scoped_ptr<>, but it accepts a +// second template argument, the functor used to free the object. + +template <typename T, typename FreeProc = ScopedPtrMallocFree> +class scoped_ptr_malloc { + private: + T* ptr; + + scoped_ptr_malloc(scoped_ptr_malloc const&); + scoped_ptr_malloc& operator=(scoped_ptr_malloc const&); + + public: + typedef T element_type; + + explicit scoped_ptr_malloc(T* p = 0) : ptr(p) {} + + ~scoped_ptr_malloc() { free_((void*)ptr); } + + void reset(T* p = 0) { + if (ptr != p) { + free_((void*)ptr); + ptr = p; + } + } + + T& operator*() const { + MOZ_ASSERT(ptr != 0); + return *ptr; + } + + T* operator->() const { + MOZ_ASSERT(ptr != 0); + return ptr; + } + + bool operator==(T* p) const { return ptr == p; } + + bool operator!=(T* p) const { return ptr != p; } + + T* get() const { return ptr; } + + void swap(scoped_ptr_malloc& b) { + T* tmp = b.ptr; + b.ptr = ptr; + ptr = tmp; + } + + T* release() { + T* tmp = ptr; + ptr = 0; + return tmp; + } + + private: + // no reason to use these: each scoped_ptr_malloc should have its own object + template <typename U, typename GP> + bool operator==(scoped_ptr_malloc<U, GP> const& p) const; + template <typename U, typename GP> + bool operator!=(scoped_ptr_malloc<U, GP> const& p) const; + + static FreeProc const free_; +}; + +template <typename T, typename FP> +FP const scoped_ptr_malloc<T, FP>::free_ = FP(); + +template <typename T, typename FP> +inline void swap(scoped_ptr_malloc<T, FP>& a, scoped_ptr_malloc<T, FP>& b) { + a.swap(b); +} + +template <typename T, typename FP> +inline bool operator==(T* p, const scoped_ptr_malloc<T, FP>& b) { + return p == b.get(); +} + +template <typename T, typename FP> +inline bool operator!=(T* p, const scoped_ptr_malloc<T, FP>& b) { + return p != b.get(); +} + +//////////////////////////////////////////////////////////////// +// Module +// + +// A Module represents the contents of a module, and supports methods +// for adding information produced by parsing STABS or DWARF data +// --- possibly both from the same file --- and then writing out the +// unified contents as a Breakpad-format symbol file. +class Module { + public: + // The type of addresses and sizes in a symbol table. + typedef uint64_t Address; + + // Representation of an expression. This can either be a postfix + // expression, in which case it is stored as a string, or a simple + // expression of the form (identifier + imm) or *(identifier + imm). + // It can also be invalid (denoting "no value"). + enum ExprHow { kExprInvalid = 1, kExprPostfix, kExprSimple, kExprSimpleMem }; + + struct Expr { + // Construct a simple-form expression + Expr(const UniqueString* ident, long offset, bool deref) { + if (IsEmptyUniqueString(ident)) { + Expr(); + } else { + postfix_ = ""; + ident_ = ident; + offset_ = offset; + how_ = deref ? kExprSimpleMem : kExprSimple; + } + } + + // Construct an invalid expression + Expr() { + postfix_ = ""; + ident_ = nullptr; + offset_ = 0; + how_ = kExprInvalid; + } + + // Return the postfix expression string, either directly, + // if this is a postfix expression, or by synthesising it + // for a simple expression. + std::string getExprPostfix() const { + switch (how_) { + case kExprPostfix: + return postfix_; + case kExprSimple: + case kExprSimpleMem: { + char buf[40]; + sprintf(buf, " %ld %c%s", labs(offset_), offset_ < 0 ? '-' : '+', + how_ == kExprSimple ? "" : " ^"); + return std::string(FromUniqueString(ident_)) + std::string(buf); + } + case kExprInvalid: + default: + MOZ_ASSERT(0 && "getExprPostfix: invalid Module::Expr type"); + return "Expr::genExprPostfix: kExprInvalid"; + } + } + + // The identifier that gives the starting value for simple expressions. + const UniqueString* ident_; + // The offset to add for simple expressions. + long offset_; + // The Postfix expression string to evaluate for non-simple expressions. + std::string postfix_; + // The operation expressed by this expression. + ExprHow how_; + }; + + // A map from register names to expressions that recover + // their values. This can represent a complete set of rules to + // follow at some address, or a set of changes to be applied to an + // extant set of rules. + // NOTE! there are two completely different types called RuleMap. This + // is one of them. + typedef std::map<const UniqueString*, Expr> RuleMap; + + // A map from addresses to RuleMaps, representing changes that take + // effect at given addresses. + typedef std::map<Address, RuleMap> RuleChangeMap; + + // A range of 'STACK CFI' stack walking information. An instance of + // this structure corresponds to a 'STACK CFI INIT' record and the + // subsequent 'STACK CFI' records that fall within its range. + struct StackFrameEntry { + // The starting address and number of bytes of machine code this + // entry covers. + Address address, size; + + // The initial register recovery rules, in force at the starting + // address. + RuleMap initial_rules; + + // A map from addresses to rule changes. To find the rules in + // force at a given address, start with initial_rules, and then + // apply the changes given in this map for all addresses up to and + // including the address you're interested in. + RuleChangeMap rule_changes; + }; + + // Create a new module with the given name, operating system, + // architecture, and ID string. + Module(const std::string& name, const std::string& os, + const std::string& architecture, const std::string& id); + ~Module(); + + private: + // Module header entries. + std::string name_, os_, architecture_, id_; +}; + +} // namespace lul + +#endif // LulCommonExt_h diff --git a/mozglue/baseprofiler/lul/LulDwarf.cpp b/mozglue/baseprofiler/lul/LulDwarf.cpp new file mode 100644 index 0000000000..c83296fc62 --- /dev/null +++ b/mozglue/baseprofiler/lul/LulDwarf.cpp @@ -0,0 +1,2252 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ + +// Copyright (c) 2010 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// CFI reader author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// Implementation of dwarf2reader::LineInfo, dwarf2reader::CompilationUnit, +// and dwarf2reader::CallFrameInfo. See dwarf2reader.h for details. + +// This file is derived from the following files in +// toolkit/crashreporter/google-breakpad: +// src/common/dwarf/bytereader.cc +// src/common/dwarf/dwarf2reader.cc +// src/common/dwarf_cfi_to_module.cc + +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> + +#include <map> +#include <stack> +#include <string> + +#include "mozilla/Assertions.h" +#include "mozilla/Sprintf.h" + +#include "LulCommonExt.h" +#include "LulDwarfInt.h" + +// Set this to 1 for verbose logging +#define DEBUG_DWARF 0 + +namespace lul { + +using std::string; + +ByteReader::ByteReader(enum Endianness endian) + : offset_reader_(NULL), + address_reader_(NULL), + endian_(endian), + address_size_(0), + offset_size_(0), + have_section_base_(), + have_text_base_(), + have_data_base_(), + have_function_base_() {} + +ByteReader::~ByteReader() {} + +void ByteReader::SetOffsetSize(uint8 size) { + offset_size_ = size; + MOZ_ASSERT(size == 4 || size == 8); + if (size == 4) { + this->offset_reader_ = &ByteReader::ReadFourBytes; + } else { + this->offset_reader_ = &ByteReader::ReadEightBytes; + } +} + +void ByteReader::SetAddressSize(uint8 size) { + address_size_ = size; + MOZ_ASSERT(size == 4 || size == 8); + if (size == 4) { + this->address_reader_ = &ByteReader::ReadFourBytes; + } else { + this->address_reader_ = &ByteReader::ReadEightBytes; + } +} + +uint64 ByteReader::ReadInitialLength(const char* start, size_t* len) { + const uint64 initial_length = ReadFourBytes(start); + start += 4; + + // In DWARF2/3, if the initial length is all 1 bits, then the offset + // size is 8 and we need to read the next 8 bytes for the real length. + if (initial_length == 0xffffffff) { + SetOffsetSize(8); + *len = 12; + return ReadOffset(start); + } else { + SetOffsetSize(4); + *len = 4; + } + return initial_length; +} + +bool ByteReader::ValidEncoding(DwarfPointerEncoding encoding) const { + if (encoding == DW_EH_PE_omit) return true; + if (encoding == DW_EH_PE_aligned) return true; + if ((encoding & 0x7) > DW_EH_PE_udata8) return false; + if ((encoding & 0x70) > DW_EH_PE_funcrel) return false; + return true; +} + +bool ByteReader::UsableEncoding(DwarfPointerEncoding encoding) const { + switch (encoding & 0x70) { + case DW_EH_PE_absptr: + return true; + case DW_EH_PE_pcrel: + return have_section_base_; + case DW_EH_PE_textrel: + return have_text_base_; + case DW_EH_PE_datarel: + return have_data_base_; + case DW_EH_PE_funcrel: + return have_function_base_; + default: + return false; + } +} + +uint64 ByteReader::ReadEncodedPointer(const char* buffer, + DwarfPointerEncoding encoding, + size_t* len) const { + // UsableEncoding doesn't approve of DW_EH_PE_omit, so we shouldn't + // see it here. + MOZ_ASSERT(encoding != DW_EH_PE_omit); + + // The Linux Standards Base 4.0 does not make this clear, but the + // GNU tools (gcc/unwind-pe.h; readelf/dwarf.c; gdb/dwarf2-frame.c) + // agree that aligned pointers are always absolute, machine-sized, + // machine-signed pointers. + if (encoding == DW_EH_PE_aligned) { + MOZ_ASSERT(have_section_base_); + + // We don't need to align BUFFER in *our* address space. Rather, we + // need to find the next position in our buffer that would be aligned + // when the .eh_frame section the buffer contains is loaded into the + // program's memory. So align assuming that buffer_base_ gets loaded at + // address section_base_, where section_base_ itself may or may not be + // aligned. + + // First, find the offset to START from the closest prior aligned + // address. + uint64 skew = section_base_ & (AddressSize() - 1); + // Now find the offset from that aligned address to buffer. + uint64 offset = skew + (buffer - buffer_base_); + // Round up to the next boundary. + uint64 aligned = (offset + AddressSize() - 1) & -AddressSize(); + // Convert back to a pointer. + const char* aligned_buffer = buffer_base_ + (aligned - skew); + // Finally, store the length and actually fetch the pointer. + *len = aligned_buffer - buffer + AddressSize(); + return ReadAddress(aligned_buffer); + } + + // Extract the value first, ignoring whether it's a pointer or an + // offset relative to some base. + uint64 offset; + switch (encoding & 0x0f) { + case DW_EH_PE_absptr: + // DW_EH_PE_absptr is weird, as it is used as a meaningful value for + // both the high and low nybble of encoding bytes. When it appears in + // the high nybble, it means that the pointer is absolute, not an + // offset from some base address. When it appears in the low nybble, + // as here, it means that the pointer is stored as a normal + // machine-sized and machine-signed address. A low nybble of + // DW_EH_PE_absptr does not imply that the pointer is absolute; it is + // correct for us to treat the value as an offset from a base address + // if the upper nybble is not DW_EH_PE_absptr. + offset = ReadAddress(buffer); + *len = AddressSize(); + break; + + case DW_EH_PE_uleb128: + offset = ReadUnsignedLEB128(buffer, len); + break; + + case DW_EH_PE_udata2: + offset = ReadTwoBytes(buffer); + *len = 2; + break; + + case DW_EH_PE_udata4: + offset = ReadFourBytes(buffer); + *len = 4; + break; + + case DW_EH_PE_udata8: + offset = ReadEightBytes(buffer); + *len = 8; + break; + + case DW_EH_PE_sleb128: + offset = ReadSignedLEB128(buffer, len); + break; + + case DW_EH_PE_sdata2: + offset = ReadTwoBytes(buffer); + // Sign-extend from 16 bits. + offset = (offset ^ 0x8000) - 0x8000; + *len = 2; + break; + + case DW_EH_PE_sdata4: + offset = ReadFourBytes(buffer); + // Sign-extend from 32 bits. + offset = (offset ^ 0x80000000ULL) - 0x80000000ULL; + *len = 4; + break; + + case DW_EH_PE_sdata8: + // No need to sign-extend; this is the full width of our type. + offset = ReadEightBytes(buffer); + *len = 8; + break; + + default: + abort(); + } + + // Find the appropriate base address. + uint64 base; + switch (encoding & 0x70) { + case DW_EH_PE_absptr: + base = 0; + break; + + case DW_EH_PE_pcrel: + MOZ_ASSERT(have_section_base_); + base = section_base_ + (buffer - buffer_base_); + break; + + case DW_EH_PE_textrel: + MOZ_ASSERT(have_text_base_); + base = text_base_; + break; + + case DW_EH_PE_datarel: + MOZ_ASSERT(have_data_base_); + base = data_base_; + break; + + case DW_EH_PE_funcrel: + MOZ_ASSERT(have_function_base_); + base = function_base_; + break; + + default: + abort(); + } + + uint64 pointer = base + offset; + + // Remove inappropriate upper bits. + if (AddressSize() == 4) + pointer = pointer & 0xffffffff; + else + MOZ_ASSERT(AddressSize() == sizeof(uint64)); + + return pointer; +} + +// A DWARF rule for recovering the address or value of a register, or +// computing the canonical frame address. There is one subclass of this for +// each '*Rule' member function in CallFrameInfo::Handler. +// +// It's annoying that we have to handle Rules using pointers (because +// the concrete instances can have an arbitrary size). They're small, +// so it would be much nicer if we could just handle them by value +// instead of fretting about ownership and destruction. +// +// It seems like all these could simply be instances of std::tr1::bind, +// except that we need instances to be EqualityComparable, too. +// +// This could logically be nested within State, but then the qualified names +// get horrendous. +class CallFrameInfo::Rule { + public: + virtual ~Rule() {} + + // Tell HANDLER that, at ADDRESS in the program, REG can be + // recovered using this rule. If REG is kCFARegister, then this rule + // describes how to compute the canonical frame address. Return what the + // HANDLER member function returned. + virtual bool Handle(Handler* handler, uint64 address, int reg) const = 0; + + // Equality on rules. We use these to decide which rules we need + // to report after a DW_CFA_restore_state instruction. + virtual bool operator==(const Rule& rhs) const = 0; + + bool operator!=(const Rule& rhs) const { return !(*this == rhs); } + + // Return a pointer to a copy of this rule. + virtual Rule* Copy() const = 0; + + // If this is a base+offset rule, change its base register to REG. + // Otherwise, do nothing. (Ugly, but required for DW_CFA_def_cfa_register.) + virtual void SetBaseRegister(unsigned reg) {} + + // If this is a base+offset rule, change its offset to OFFSET. Otherwise, + // do nothing. (Ugly, but required for DW_CFA_def_cfa_offset.) + virtual void SetOffset(long long offset) {} + + // A RTTI workaround, to make it possible to implement equality + // comparisons on classes derived from this one. + enum CFIRTag { + CFIR_UNDEFINED_RULE, + CFIR_SAME_VALUE_RULE, + CFIR_OFFSET_RULE, + CFIR_VAL_OFFSET_RULE, + CFIR_REGISTER_RULE, + CFIR_EXPRESSION_RULE, + CFIR_VAL_EXPRESSION_RULE + }; + + // Produce the tag that identifies the child class of this object. + virtual CFIRTag getTag() const = 0; +}; + +// Rule: the value the register had in the caller cannot be recovered. +class CallFrameInfo::UndefinedRule : public CallFrameInfo::Rule { + public: + UndefinedRule() {} + ~UndefinedRule() {} + CFIRTag getTag() const override { return CFIR_UNDEFINED_RULE; } + bool Handle(Handler* handler, uint64 address, int reg) const override { + return handler->UndefinedRule(address, reg); + } + bool operator==(const Rule& rhs) const override { + if (rhs.getTag() != CFIR_UNDEFINED_RULE) return false; + return true; + } + Rule* Copy() const override { return new UndefinedRule(*this); } +}; + +// Rule: the register's value is the same as that it had in the caller. +class CallFrameInfo::SameValueRule : public CallFrameInfo::Rule { + public: + SameValueRule() {} + ~SameValueRule() {} + CFIRTag getTag() const override { return CFIR_SAME_VALUE_RULE; } + bool Handle(Handler* handler, uint64 address, int reg) const override { + return handler->SameValueRule(address, reg); + } + bool operator==(const Rule& rhs) const override { + if (rhs.getTag() != CFIR_SAME_VALUE_RULE) return false; + return true; + } + Rule* Copy() const override { return new SameValueRule(*this); } +}; + +// Rule: the register is saved at OFFSET from BASE_REGISTER. BASE_REGISTER +// may be CallFrameInfo::Handler::kCFARegister. +class CallFrameInfo::OffsetRule : public CallFrameInfo::Rule { + public: + OffsetRule(int base_register, long offset) + : base_register_(base_register), offset_(offset) {} + ~OffsetRule() {} + CFIRTag getTag() const override { return CFIR_OFFSET_RULE; } + bool Handle(Handler* handler, uint64 address, int reg) const override { + return handler->OffsetRule(address, reg, base_register_, offset_); + } + bool operator==(const Rule& rhs) const override { + if (rhs.getTag() != CFIR_OFFSET_RULE) return false; + const OffsetRule* our_rhs = static_cast<const OffsetRule*>(&rhs); + return (base_register_ == our_rhs->base_register_ && + offset_ == our_rhs->offset_); + } + Rule* Copy() const override { return new OffsetRule(*this); } + // We don't actually need SetBaseRegister or SetOffset here, since they + // are only ever applied to CFA rules, for DW_CFA_def_cfa_offset, and it + // doesn't make sense to use OffsetRule for computing the CFA: it + // computes the address at which a register is saved, not a value. + private: + int base_register_; + long offset_; +}; + +// Rule: the value the register had in the caller is the value of +// BASE_REGISTER plus offset. BASE_REGISTER may be +// CallFrameInfo::Handler::kCFARegister. +class CallFrameInfo::ValOffsetRule : public CallFrameInfo::Rule { + public: + ValOffsetRule(int base_register, long offset) + : base_register_(base_register), offset_(offset) {} + ~ValOffsetRule() {} + CFIRTag getTag() const override { return CFIR_VAL_OFFSET_RULE; } + bool Handle(Handler* handler, uint64 address, int reg) const override { + return handler->ValOffsetRule(address, reg, base_register_, offset_); + } + bool operator==(const Rule& rhs) const override { + if (rhs.getTag() != CFIR_VAL_OFFSET_RULE) return false; + const ValOffsetRule* our_rhs = static_cast<const ValOffsetRule*>(&rhs); + return (base_register_ == our_rhs->base_register_ && + offset_ == our_rhs->offset_); + } + Rule* Copy() const override { return new ValOffsetRule(*this); } + void SetBaseRegister(unsigned reg) override { base_register_ = reg; } + void SetOffset(long long offset) override { offset_ = offset; } + + private: + int base_register_; + long offset_; +}; + +// Rule: the register has been saved in another register REGISTER_NUMBER_. +class CallFrameInfo::RegisterRule : public CallFrameInfo::Rule { + public: + explicit RegisterRule(int register_number) + : register_number_(register_number) {} + ~RegisterRule() {} + CFIRTag getTag() const override { return CFIR_REGISTER_RULE; } + bool Handle(Handler* handler, uint64 address, int reg) const override { + return handler->RegisterRule(address, reg, register_number_); + } + bool operator==(const Rule& rhs) const override { + if (rhs.getTag() != CFIR_REGISTER_RULE) return false; + const RegisterRule* our_rhs = static_cast<const RegisterRule*>(&rhs); + return (register_number_ == our_rhs->register_number_); + } + Rule* Copy() const override { return new RegisterRule(*this); } + + private: + int register_number_; +}; + +// Rule: EXPRESSION evaluates to the address at which the register is saved. +class CallFrameInfo::ExpressionRule : public CallFrameInfo::Rule { + public: + explicit ExpressionRule(const string& expression) : expression_(expression) {} + ~ExpressionRule() {} + CFIRTag getTag() const override { return CFIR_EXPRESSION_RULE; } + bool Handle(Handler* handler, uint64 address, int reg) const override { + return handler->ExpressionRule(address, reg, expression_); + } + bool operator==(const Rule& rhs) const override { + if (rhs.getTag() != CFIR_EXPRESSION_RULE) return false; + const ExpressionRule* our_rhs = static_cast<const ExpressionRule*>(&rhs); + return (expression_ == our_rhs->expression_); + } + Rule* Copy() const override { return new ExpressionRule(*this); } + + private: + string expression_; +}; + +// Rule: EXPRESSION evaluates to the previous value of the register. +class CallFrameInfo::ValExpressionRule : public CallFrameInfo::Rule { + public: + explicit ValExpressionRule(const string& expression) + : expression_(expression) {} + ~ValExpressionRule() {} + CFIRTag getTag() const override { return CFIR_VAL_EXPRESSION_RULE; } + bool Handle(Handler* handler, uint64 address, int reg) const override { + return handler->ValExpressionRule(address, reg, expression_); + } + bool operator==(const Rule& rhs) const override { + if (rhs.getTag() != CFIR_VAL_EXPRESSION_RULE) return false; + const ValExpressionRule* our_rhs = + static_cast<const ValExpressionRule*>(&rhs); + return (expression_ == our_rhs->expression_); + } + Rule* Copy() const override { return new ValExpressionRule(*this); } + + private: + string expression_; +}; + +// A map from register numbers to rules. +class CallFrameInfo::RuleMap { + public: + RuleMap() : cfa_rule_(NULL) {} + RuleMap(const RuleMap& rhs) : cfa_rule_(NULL) { *this = rhs; } + ~RuleMap() { Clear(); } + + RuleMap& operator=(const RuleMap& rhs); + + // Set the rule for computing the CFA to RULE. Take ownership of RULE. + void SetCFARule(Rule* rule) { + delete cfa_rule_; + cfa_rule_ = rule; + } + + // Return the current CFA rule. Unlike RegisterRule, this RuleMap retains + // ownership of the rule. We use this for DW_CFA_def_cfa_offset and + // DW_CFA_def_cfa_register, and for detecting references to the CFA before + // a rule for it has been established. + Rule* CFARule() const { return cfa_rule_; } + + // Return the rule for REG, or NULL if there is none. The caller takes + // ownership of the result. + Rule* RegisterRule(int reg) const; + + // Set the rule for computing REG to RULE. Take ownership of RULE. + void SetRegisterRule(int reg, Rule* rule); + + // Make all the appropriate calls to HANDLER as if we were changing from + // this RuleMap to NEW_RULES at ADDRESS. We use this to implement + // DW_CFA_restore_state, where lots of rules can change simultaneously. + // Return true if all handlers returned true; otherwise, return false. + bool HandleTransitionTo(Handler* handler, uint64 address, + const RuleMap& new_rules) const; + + private: + // A map from register numbers to Rules. + typedef std::map<int, Rule*> RuleByNumber; + + // Remove all register rules and clear cfa_rule_. + void Clear(); + + // The rule for computing the canonical frame address. This RuleMap owns + // this rule. + Rule* cfa_rule_; + + // A map from register numbers to postfix expressions to recover + // their values. This RuleMap owns the Rules the map refers to. + RuleByNumber registers_; +}; + +CallFrameInfo::RuleMap& CallFrameInfo::RuleMap::operator=(const RuleMap& rhs) { + Clear(); + // Since each map owns the rules it refers to, assignment must copy them. + if (rhs.cfa_rule_) cfa_rule_ = rhs.cfa_rule_->Copy(); + for (RuleByNumber::const_iterator it = rhs.registers_.begin(); + it != rhs.registers_.end(); it++) + registers_[it->first] = it->second->Copy(); + return *this; +} + +CallFrameInfo::Rule* CallFrameInfo::RuleMap::RegisterRule(int reg) const { + MOZ_ASSERT(reg != Handler::kCFARegister); + RuleByNumber::const_iterator it = registers_.find(reg); + if (it != registers_.end()) + return it->second->Copy(); + else + return NULL; +} + +void CallFrameInfo::RuleMap::SetRegisterRule(int reg, Rule* rule) { + MOZ_ASSERT(reg != Handler::kCFARegister); + MOZ_ASSERT(rule); + Rule** slot = ®isters_[reg]; + delete *slot; + *slot = rule; +} + +bool CallFrameInfo::RuleMap::HandleTransitionTo( + Handler* handler, uint64 address, const RuleMap& new_rules) const { + // Transition from cfa_rule_ to new_rules.cfa_rule_. + if (cfa_rule_ && new_rules.cfa_rule_) { + if (*cfa_rule_ != *new_rules.cfa_rule_ && + !new_rules.cfa_rule_->Handle(handler, address, Handler::kCFARegister)) + return false; + } else if (cfa_rule_) { + // this RuleMap has a CFA rule but new_rules doesn't. + // CallFrameInfo::Handler has no way to handle this --- and shouldn't; + // it's garbage input. The instruction interpreter should have + // detected this and warned, so take no action here. + } else if (new_rules.cfa_rule_) { + // This shouldn't be possible: NEW_RULES is some prior state, and + // there's no way to remove entries. + MOZ_ASSERT(0); + } else { + // Both CFA rules are empty. No action needed. + } + + // Traverse the two maps in order by register number, and report + // whatever differences we find. + RuleByNumber::const_iterator old_it = registers_.begin(); + RuleByNumber::const_iterator new_it = new_rules.registers_.begin(); + while (old_it != registers_.end() && new_it != new_rules.registers_.end()) { + if (old_it->first < new_it->first) { + // This RuleMap has an entry for old_it->first, but NEW_RULES + // doesn't. + // + // This isn't really the right thing to do, but since CFI generally + // only mentions callee-saves registers, and GCC's convention for + // callee-saves registers is that they are unchanged, it's a good + // approximation. + if (!handler->SameValueRule(address, old_it->first)) return false; + old_it++; + } else if (old_it->first > new_it->first) { + // NEW_RULES has entry for new_it->first, but this RuleMap + // doesn't. This shouldn't be possible: NEW_RULES is some prior + // state, and there's no way to remove entries. + MOZ_ASSERT(0); + } else { + // Both maps have an entry for this register. Report the new + // rule if it is different. + if (*old_it->second != *new_it->second && + !new_it->second->Handle(handler, address, new_it->first)) + return false; + new_it++; + old_it++; + } + } + // Finish off entries from this RuleMap with no counterparts in new_rules. + while (old_it != registers_.end()) { + if (!handler->SameValueRule(address, old_it->first)) return false; + old_it++; + } + // Since we only make transitions from a rule set to some previously + // saved rule set, and we can only add rules to the map, NEW_RULES + // must have fewer rules than *this. + MOZ_ASSERT(new_it == new_rules.registers_.end()); + + return true; +} + +// Remove all register rules and clear cfa_rule_. +void CallFrameInfo::RuleMap::Clear() { + delete cfa_rule_; + cfa_rule_ = NULL; + for (RuleByNumber::iterator it = registers_.begin(); it != registers_.end(); + it++) + delete it->second; + registers_.clear(); +} + +// The state of the call frame information interpreter as it processes +// instructions from a CIE and FDE. +class CallFrameInfo::State { + public: + // Create a call frame information interpreter state with the given + // reporter, reader, handler, and initial call frame info address. + State(ByteReader* reader, Handler* handler, Reporter* reporter, + uint64 address) + : reader_(reader), + handler_(handler), + reporter_(reporter), + address_(address), + entry_(NULL), + cursor_(NULL), + saved_rules_(NULL) {} + + ~State() { + if (saved_rules_) delete saved_rules_; + } + + // Interpret instructions from CIE, save the resulting rule set for + // DW_CFA_restore instructions, and return true. On error, report + // the problem to reporter_ and return false. + bool InterpretCIE(const CIE& cie); + + // Interpret instructions from FDE, and return true. On error, + // report the problem to reporter_ and return false. + bool InterpretFDE(const FDE& fde); + + private: + // The operands of a CFI instruction, for ParseOperands. + struct Operands { + unsigned register_number; // A register number. + uint64 offset; // An offset or address. + long signed_offset; // A signed offset. + string expression; // A DWARF expression. + }; + + // Parse CFI instruction operands from STATE's instruction stream as + // described by FORMAT. On success, populate OPERANDS with the + // results, and return true. On failure, report the problem and + // return false. + // + // Each character of FORMAT should be one of the following: + // + // 'r' unsigned LEB128 register number (OPERANDS->register_number) + // 'o' unsigned LEB128 offset (OPERANDS->offset) + // 's' signed LEB128 offset (OPERANDS->signed_offset) + // 'a' machine-size address (OPERANDS->offset) + // (If the CIE has a 'z' augmentation string, 'a' uses the + // encoding specified by the 'R' argument.) + // '1' a one-byte offset (OPERANDS->offset) + // '2' a two-byte offset (OPERANDS->offset) + // '4' a four-byte offset (OPERANDS->offset) + // '8' an eight-byte offset (OPERANDS->offset) + // 'e' a DW_FORM_block holding a (OPERANDS->expression) + // DWARF expression + bool ParseOperands(const char* format, Operands* operands); + + // Interpret one CFI instruction from STATE's instruction stream, update + // STATE, report any rule changes to handler_, and return true. On + // failure, report the problem and return false. + bool DoInstruction(); + + // The following Do* member functions are subroutines of DoInstruction, + // factoring out the actual work of operations that have several + // different encodings. + + // Set the CFA rule to be the value of BASE_REGISTER plus OFFSET, and + // return true. On failure, report and return false. (Used for + // DW_CFA_def_cfa and DW_CFA_def_cfa_sf.) + bool DoDefCFA(unsigned base_register, long offset); + + // Change the offset of the CFA rule to OFFSET, and return true. On + // failure, report and return false. (Subroutine for + // DW_CFA_def_cfa_offset and DW_CFA_def_cfa_offset_sf.) + bool DoDefCFAOffset(long offset); + + // Specify that REG can be recovered using RULE, and return true. On + // failure, report and return false. + bool DoRule(unsigned reg, Rule* rule); + + // Specify that REG can be found at OFFSET from the CFA, and return true. + // On failure, report and return false. (Subroutine for DW_CFA_offset, + // DW_CFA_offset_extended, and DW_CFA_offset_extended_sf.) + bool DoOffset(unsigned reg, long offset); + + // Specify that the caller's value for REG is the CFA plus OFFSET, + // and return true. On failure, report and return false. (Subroutine + // for DW_CFA_val_offset and DW_CFA_val_offset_sf.) + bool DoValOffset(unsigned reg, long offset); + + // Restore REG to the rule established in the CIE, and return true. On + // failure, report and return false. (Subroutine for DW_CFA_restore and + // DW_CFA_restore_extended.) + bool DoRestore(unsigned reg); + + // Return the section offset of the instruction at cursor. For use + // in error messages. + uint64 CursorOffset() { return entry_->offset + (cursor_ - entry_->start); } + + // Report that entry_ is incomplete, and return false. For brevity. + bool ReportIncomplete() { + reporter_->Incomplete(entry_->offset, entry_->kind); + return false; + } + + // For reading multi-byte values with the appropriate endianness. + ByteReader* reader_; + + // The handler to which we should report the data we find. + Handler* handler_; + + // For reporting problems in the info we're parsing. + Reporter* reporter_; + + // The code address to which the next instruction in the stream applies. + uint64 address_; + + // The entry whose instructions we are currently processing. This is + // first a CIE, and then an FDE. + const Entry* entry_; + + // The next instruction to process. + const char* cursor_; + + // The current set of rules. + RuleMap rules_; + + // The set of rules established by the CIE, used by DW_CFA_restore + // and DW_CFA_restore_extended. We set this after interpreting the + // CIE's instructions. + RuleMap cie_rules_; + + // A stack of saved states, for DW_CFA_remember_state and + // DW_CFA_restore_state. + std::stack<RuleMap>* saved_rules_; +}; + +bool CallFrameInfo::State::InterpretCIE(const CIE& cie) { + entry_ = &cie; + cursor_ = entry_->instructions; + while (cursor_ < entry_->end) + if (!DoInstruction()) return false; + // Note the rules established by the CIE, for use by DW_CFA_restore + // and DW_CFA_restore_extended. + cie_rules_ = rules_; + return true; +} + +bool CallFrameInfo::State::InterpretFDE(const FDE& fde) { + entry_ = &fde; + cursor_ = entry_->instructions; + while (cursor_ < entry_->end) + if (!DoInstruction()) return false; + return true; +} + +bool CallFrameInfo::State::ParseOperands(const char* format, + Operands* operands) { + size_t len; + const char* operand; + + for (operand = format; *operand; operand++) { + size_t bytes_left = entry_->end - cursor_; + switch (*operand) { + case 'r': + operands->register_number = reader_->ReadUnsignedLEB128(cursor_, &len); + if (len > bytes_left) return ReportIncomplete(); + cursor_ += len; + break; + + case 'o': + operands->offset = reader_->ReadUnsignedLEB128(cursor_, &len); + if (len > bytes_left) return ReportIncomplete(); + cursor_ += len; + break; + + case 's': + operands->signed_offset = reader_->ReadSignedLEB128(cursor_, &len); + if (len > bytes_left) return ReportIncomplete(); + cursor_ += len; + break; + + case 'a': + operands->offset = reader_->ReadEncodedPointer( + cursor_, entry_->cie->pointer_encoding, &len); + if (len > bytes_left) return ReportIncomplete(); + cursor_ += len; + break; + + case '1': + if (1 > bytes_left) return ReportIncomplete(); + operands->offset = static_cast<unsigned char>(*cursor_++); + break; + + case '2': + if (2 > bytes_left) return ReportIncomplete(); + operands->offset = reader_->ReadTwoBytes(cursor_); + cursor_ += 2; + break; + + case '4': + if (4 > bytes_left) return ReportIncomplete(); + operands->offset = reader_->ReadFourBytes(cursor_); + cursor_ += 4; + break; + + case '8': + if (8 > bytes_left) return ReportIncomplete(); + operands->offset = reader_->ReadEightBytes(cursor_); + cursor_ += 8; + break; + + case 'e': { + size_t expression_length = reader_->ReadUnsignedLEB128(cursor_, &len); + if (len > bytes_left || expression_length > bytes_left - len) + return ReportIncomplete(); + cursor_ += len; + operands->expression = string(cursor_, expression_length); + cursor_ += expression_length; + break; + } + + default: + MOZ_ASSERT(0); + } + } + + return true; +} + +bool CallFrameInfo::State::DoInstruction() { + CIE* cie = entry_->cie; + Operands ops; + + // Our entry's kind should have been set by now. + MOZ_ASSERT(entry_->kind != kUnknown); + + // We shouldn't have been invoked unless there were more + // instructions to parse. + MOZ_ASSERT(cursor_ < entry_->end); + + unsigned opcode = *cursor_++; + if ((opcode & 0xc0) != 0) { + switch (opcode & 0xc0) { + // Advance the address. + case DW_CFA_advance_loc: { + size_t code_offset = opcode & 0x3f; + address_ += code_offset * cie->code_alignment_factor; + break; + } + + // Find a register at an offset from the CFA. + case DW_CFA_offset: + if (!ParseOperands("o", &ops) || + !DoOffset(opcode & 0x3f, ops.offset * cie->data_alignment_factor)) + return false; + break; + + // Restore the rule established for a register by the CIE. + case DW_CFA_restore: + if (!DoRestore(opcode & 0x3f)) return false; + break; + + // The 'if' above should have excluded this possibility. + default: + MOZ_ASSERT(0); + } + + // Return here, so the big switch below won't be indented. + return true; + } + + switch (opcode) { + // Set the address. + case DW_CFA_set_loc: + if (!ParseOperands("a", &ops)) return false; + address_ = ops.offset; + break; + + // Advance the address. + case DW_CFA_advance_loc1: + if (!ParseOperands("1", &ops)) return false; + address_ += ops.offset * cie->code_alignment_factor; + break; + + // Advance the address. + case DW_CFA_advance_loc2: + if (!ParseOperands("2", &ops)) return false; + address_ += ops.offset * cie->code_alignment_factor; + break; + + // Advance the address. + case DW_CFA_advance_loc4: + if (!ParseOperands("4", &ops)) return false; + address_ += ops.offset * cie->code_alignment_factor; + break; + + // Advance the address. + case DW_CFA_MIPS_advance_loc8: + if (!ParseOperands("8", &ops)) return false; + address_ += ops.offset * cie->code_alignment_factor; + break; + + // Compute the CFA by adding an offset to a register. + case DW_CFA_def_cfa: + if (!ParseOperands("ro", &ops) || + !DoDefCFA(ops.register_number, ops.offset)) + return false; + break; + + // Compute the CFA by adding an offset to a register. + case DW_CFA_def_cfa_sf: + if (!ParseOperands("rs", &ops) || + !DoDefCFA(ops.register_number, + ops.signed_offset * cie->data_alignment_factor)) + return false; + break; + + // Change the base register used to compute the CFA. + case DW_CFA_def_cfa_register: { + Rule* cfa_rule = rules_.CFARule(); + if (!cfa_rule) { + reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset()); + return false; + } + if (!ParseOperands("r", &ops)) return false; + cfa_rule->SetBaseRegister(ops.register_number); + if (!cfa_rule->Handle(handler_, address_, Handler::kCFARegister)) + return false; + break; + } + + // Change the offset used to compute the CFA. + case DW_CFA_def_cfa_offset: + if (!ParseOperands("o", &ops) || !DoDefCFAOffset(ops.offset)) + return false; + break; + + // Change the offset used to compute the CFA. + case DW_CFA_def_cfa_offset_sf: + if (!ParseOperands("s", &ops) || + !DoDefCFAOffset(ops.signed_offset * cie->data_alignment_factor)) + return false; + break; + + // Specify an expression whose value is the CFA. + case DW_CFA_def_cfa_expression: { + if (!ParseOperands("e", &ops)) return false; + Rule* rule = new ValExpressionRule(ops.expression); + rules_.SetCFARule(rule); + if (!rule->Handle(handler_, address_, Handler::kCFARegister)) + return false; + break; + } + + // The register's value cannot be recovered. + case DW_CFA_undefined: { + if (!ParseOperands("r", &ops) || + !DoRule(ops.register_number, new UndefinedRule())) + return false; + break; + } + + // The register's value is unchanged from its value in the caller. + case DW_CFA_same_value: { + if (!ParseOperands("r", &ops) || + !DoRule(ops.register_number, new SameValueRule())) + return false; + break; + } + + // Find a register at an offset from the CFA. + case DW_CFA_offset_extended: + if (!ParseOperands("ro", &ops) || + !DoOffset(ops.register_number, + ops.offset * cie->data_alignment_factor)) + return false; + break; + + // The register is saved at an offset from the CFA. + case DW_CFA_offset_extended_sf: + if (!ParseOperands("rs", &ops) || + !DoOffset(ops.register_number, + ops.signed_offset * cie->data_alignment_factor)) + return false; + break; + + // The register is saved at an offset from the CFA. + case DW_CFA_GNU_negative_offset_extended: + if (!ParseOperands("ro", &ops) || + !DoOffset(ops.register_number, + -ops.offset * cie->data_alignment_factor)) + return false; + break; + + // The register's value is the sum of the CFA plus an offset. + case DW_CFA_val_offset: + if (!ParseOperands("ro", &ops) || + !DoValOffset(ops.register_number, + ops.offset * cie->data_alignment_factor)) + return false; + break; + + // The register's value is the sum of the CFA plus an offset. + case DW_CFA_val_offset_sf: + if (!ParseOperands("rs", &ops) || + !DoValOffset(ops.register_number, + ops.signed_offset * cie->data_alignment_factor)) + return false; + break; + + // The register has been saved in another register. + case DW_CFA_register: { + if (!ParseOperands("ro", &ops) || + !DoRule(ops.register_number, new RegisterRule(ops.offset))) + return false; + break; + } + + // An expression yields the address at which the register is saved. + case DW_CFA_expression: { + if (!ParseOperands("re", &ops) || + !DoRule(ops.register_number, new ExpressionRule(ops.expression))) + return false; + break; + } + + // An expression yields the caller's value for the register. + case DW_CFA_val_expression: { + if (!ParseOperands("re", &ops) || + !DoRule(ops.register_number, new ValExpressionRule(ops.expression))) + return false; + break; + } + + // Restore the rule established for a register by the CIE. + case DW_CFA_restore_extended: + if (!ParseOperands("r", &ops) || !DoRestore(ops.register_number)) + return false; + break; + + // Save the current set of rules on a stack. + case DW_CFA_remember_state: + if (!saved_rules_) { + saved_rules_ = new std::stack<RuleMap>(); + } + saved_rules_->push(rules_); + break; + + // Pop the current set of rules off the stack. + case DW_CFA_restore_state: { + if (!saved_rules_ || saved_rules_->empty()) { + reporter_->EmptyStateStack(entry_->offset, entry_->kind, + CursorOffset()); + return false; + } + const RuleMap& new_rules = saved_rules_->top(); + if (rules_.CFARule() && !new_rules.CFARule()) { + reporter_->ClearingCFARule(entry_->offset, entry_->kind, + CursorOffset()); + return false; + } + rules_.HandleTransitionTo(handler_, address_, new_rules); + rules_ = new_rules; + saved_rules_->pop(); + break; + } + + // No operation. (Padding instruction.) + case DW_CFA_nop: + break; + + // A SPARC register window save: Registers 8 through 15 (%o0-%o7) + // are saved in registers 24 through 31 (%i0-%i7), and registers + // 16 through 31 (%l0-%l7 and %i0-%i7) are saved at CFA offsets + // (0-15 * the register size). The register numbers must be + // hard-coded. A GNU extension, and not a pretty one. + case DW_CFA_GNU_window_save: { + // Save %o0-%o7 in %i0-%i7. + for (int i = 8; i < 16; i++) + if (!DoRule(i, new RegisterRule(i + 16))) return false; + // Save %l0-%l7 and %i0-%i7 at the CFA. + for (int i = 16; i < 32; i++) + // Assume that the byte reader's address size is the same as + // the architecture's register size. !@#%*^ hilarious. + if (!DoRule(i, new OffsetRule(Handler::kCFARegister, + (i - 16) * reader_->AddressSize()))) + return false; + break; + } + + // I'm not sure what this is. GDB doesn't use it for unwinding. + case DW_CFA_GNU_args_size: + if (!ParseOperands("o", &ops)) return false; + break; + + // An opcode we don't recognize. + default: { + reporter_->BadInstruction(entry_->offset, entry_->kind, CursorOffset()); + return false; + } + } + + return true; +} + +bool CallFrameInfo::State::DoDefCFA(unsigned base_register, long offset) { + Rule* rule = new ValOffsetRule(base_register, offset); + rules_.SetCFARule(rule); + return rule->Handle(handler_, address_, Handler::kCFARegister); +} + +bool CallFrameInfo::State::DoDefCFAOffset(long offset) { + Rule* cfa_rule = rules_.CFARule(); + if (!cfa_rule) { + reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset()); + return false; + } + cfa_rule->SetOffset(offset); + return cfa_rule->Handle(handler_, address_, Handler::kCFARegister); +} + +bool CallFrameInfo::State::DoRule(unsigned reg, Rule* rule) { + rules_.SetRegisterRule(reg, rule); + return rule->Handle(handler_, address_, reg); +} + +bool CallFrameInfo::State::DoOffset(unsigned reg, long offset) { + if (!rules_.CFARule()) { + reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset()); + return false; + } + return DoRule(reg, new OffsetRule(Handler::kCFARegister, offset)); +} + +bool CallFrameInfo::State::DoValOffset(unsigned reg, long offset) { + if (!rules_.CFARule()) { + reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset()); + return false; + } + return DoRule(reg, new ValOffsetRule(Handler::kCFARegister, offset)); +} + +bool CallFrameInfo::State::DoRestore(unsigned reg) { + // DW_CFA_restore and DW_CFA_restore_extended don't make sense in a CIE. + if (entry_->kind == kCIE) { + reporter_->RestoreInCIE(entry_->offset, CursorOffset()); + return false; + } + Rule* rule = cie_rules_.RegisterRule(reg); + if (!rule) { + // This isn't really the right thing to do, but since CFI generally + // only mentions callee-saves registers, and GCC's convention for + // callee-saves registers is that they are unchanged, it's a good + // approximation. + rule = new SameValueRule(); + } + return DoRule(reg, rule); +} + +bool CallFrameInfo::ReadEntryPrologue(const char* cursor, Entry* entry) { + const char* buffer_end = buffer_ + buffer_length_; + + // Initialize enough of ENTRY for use in error reporting. + entry->offset = cursor - buffer_; + entry->start = cursor; + entry->kind = kUnknown; + entry->end = NULL; + + // Read the initial length. This sets reader_'s offset size. + size_t length_size; + uint64 length = reader_->ReadInitialLength(cursor, &length_size); + if (length_size > size_t(buffer_end - cursor)) return ReportIncomplete(entry); + cursor += length_size; + + // In a .eh_frame section, a length of zero marks the end of the series + // of entries. + if (length == 0 && eh_frame_) { + entry->kind = kTerminator; + entry->end = cursor; + return true; + } + + // Validate the length. + if (length > size_t(buffer_end - cursor)) return ReportIncomplete(entry); + + // The length is the number of bytes after the initial length field; + // we have that position handy at this point, so compute the end + // now. (If we're parsing 64-bit-offset DWARF on a 32-bit machine, + // and the length didn't fit in a size_t, we would have rejected it + // above.) + entry->end = cursor + length; + + // Parse the next field: either the offset of a CIE or a CIE id. + size_t offset_size = reader_->OffsetSize(); + if (offset_size > size_t(entry->end - cursor)) return ReportIncomplete(entry); + entry->id = reader_->ReadOffset(cursor); + + // Don't advance cursor past id field yet; in .eh_frame data we need + // the id's position to compute the section offset of an FDE's CIE. + + // Now we can decide what kind of entry this is. + if (eh_frame_) { + // In .eh_frame data, an ID of zero marks the entry as a CIE, and + // anything else is an offset from the id field of the FDE to the start + // of the CIE. + if (entry->id == 0) { + entry->kind = kCIE; + } else { + entry->kind = kFDE; + // Turn the offset from the id into an offset from the buffer's start. + entry->id = (cursor - buffer_) - entry->id; + } + } else { + // In DWARF CFI data, an ID of ~0 (of the appropriate width, given the + // offset size for the entry) marks the entry as a CIE, and anything + // else is the offset of the CIE from the beginning of the section. + if (offset_size == 4) + entry->kind = (entry->id == 0xffffffff) ? kCIE : kFDE; + else { + MOZ_ASSERT(offset_size == 8); + entry->kind = (entry->id == 0xffffffffffffffffULL) ? kCIE : kFDE; + } + } + + // Now advance cursor past the id. + cursor += offset_size; + + // The fields specific to this kind of entry start here. + entry->fields = cursor; + + entry->cie = NULL; + + return true; +} + +bool CallFrameInfo::ReadCIEFields(CIE* cie) { + const char* cursor = cie->fields; + size_t len; + + MOZ_ASSERT(cie->kind == kCIE); + + // Prepare for early exit. + cie->version = 0; + cie->augmentation.clear(); + cie->code_alignment_factor = 0; + cie->data_alignment_factor = 0; + cie->return_address_register = 0; + cie->has_z_augmentation = false; + cie->pointer_encoding = DW_EH_PE_absptr; + cie->instructions = 0; + + // Parse the version number. + if (cie->end - cursor < 1) return ReportIncomplete(cie); + cie->version = reader_->ReadOneByte(cursor); + cursor++; + + // If we don't recognize the version, we can't parse any more fields of the + // CIE. For DWARF CFI, we handle versions 1 through 4 (there was never a + // version 2 of CFI data). For .eh_frame, we handle versions 1 and 4 as well; + // the difference between those versions seems to be the same as for + // .debug_frame. + if (cie->version < 1 || cie->version > 4) { + reporter_->UnrecognizedVersion(cie->offset, cie->version); + return false; + } + + const char* augmentation_start = cursor; + const void* augmentation_end = + memchr(augmentation_start, '\0', cie->end - augmentation_start); + if (!augmentation_end) return ReportIncomplete(cie); + cursor = static_cast<const char*>(augmentation_end); + cie->augmentation = string(augmentation_start, cursor - augmentation_start); + // Skip the terminating '\0'. + cursor++; + + // Is this CFI augmented? + if (!cie->augmentation.empty()) { + // Is it an augmentation we recognize? + if (cie->augmentation[0] == DW_Z_augmentation_start) { + // Linux C++ ABI 'z' augmentation, used for exception handling data. + cie->has_z_augmentation = true; + } else { + // Not an augmentation we recognize. Augmentations can have arbitrary + // effects on the form of rest of the content, so we have to give up. + reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation); + return false; + } + } + + if (cie->version >= 4) { + // Check that the address_size and segment_size fields are plausible. + if (cie->end - cursor < 2) { + return ReportIncomplete(cie); + } + uint8_t address_size = reader_->ReadOneByte(cursor); + cursor++; + if (address_size != sizeof(void*)) { + // This is not per-se invalid CFI. But we can reasonably expect to + // be running on a target of the same word size as the CFI is for, + // so we reject this case. + reporter_->InvalidDwarf4Artefact(cie->offset, "Invalid address_size"); + return false; + } + uint8_t segment_size = reader_->ReadOneByte(cursor); + cursor++; + if (segment_size != 0) { + // This is also not per-se invalid CFI, but we don't currently handle + // the case of non-zero |segment_size|. + reporter_->InvalidDwarf4Artefact(cie->offset, "Invalid segment_size"); + return false; + } + // We only continue parsing if |segment_size| is zero. If this routine + // is ever changed to allow non-zero |segment_size|, then + // ReadFDEFields() below will have to be changed to match, per comments + // there. + } + + // Parse the code alignment factor. + cie->code_alignment_factor = reader_->ReadUnsignedLEB128(cursor, &len); + if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie); + cursor += len; + + // Parse the data alignment factor. + cie->data_alignment_factor = reader_->ReadSignedLEB128(cursor, &len); + if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie); + cursor += len; + + // Parse the return address register. This is a ubyte in version 1, and + // a ULEB128 in version 3. + if (cie->version == 1) { + if (cursor >= cie->end) return ReportIncomplete(cie); + cie->return_address_register = uint8(*cursor++); + } else { + cie->return_address_register = reader_->ReadUnsignedLEB128(cursor, &len); + if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie); + cursor += len; + } + + // If we have a 'z' augmentation string, find the augmentation data and + // use the augmentation string to parse it. + if (cie->has_z_augmentation) { + uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &len); + if (size_t(cie->end - cursor) < len + data_size) + return ReportIncomplete(cie); + cursor += len; + const char* data = cursor; + cursor += data_size; + const char* data_end = cursor; + + cie->has_z_lsda = false; + cie->has_z_personality = false; + cie->has_z_signal_frame = false; + + // Walk the augmentation string, and extract values from the + // augmentation data as the string directs. + for (size_t i = 1; i < cie->augmentation.size(); i++) { + switch (cie->augmentation[i]) { + case DW_Z_has_LSDA: + // The CIE's augmentation data holds the language-specific data + // area pointer's encoding, and the FDE's augmentation data holds + // the pointer itself. + cie->has_z_lsda = true; + // Fetch the LSDA encoding from the augmentation data. + if (data >= data_end) return ReportIncomplete(cie); + cie->lsda_encoding = DwarfPointerEncoding(*data++); + if (!reader_->ValidEncoding(cie->lsda_encoding)) { + reporter_->InvalidPointerEncoding(cie->offset, cie->lsda_encoding); + return false; + } + // Don't check if the encoding is usable here --- we haven't + // read the FDE's fields yet, so we're not prepared for + // DW_EH_PE_funcrel, although that's a fine encoding for the + // LSDA to use, since it appears in the FDE. + break; + + case DW_Z_has_personality_routine: + // The CIE's augmentation data holds the personality routine + // pointer's encoding, followed by the pointer itself. + cie->has_z_personality = true; + // Fetch the personality routine pointer's encoding from the + // augmentation data. + if (data >= data_end) return ReportIncomplete(cie); + cie->personality_encoding = DwarfPointerEncoding(*data++); + if (!reader_->ValidEncoding(cie->personality_encoding)) { + reporter_->InvalidPointerEncoding(cie->offset, + cie->personality_encoding); + return false; + } + if (!reader_->UsableEncoding(cie->personality_encoding)) { + reporter_->UnusablePointerEncoding(cie->offset, + cie->personality_encoding); + return false; + } + // Fetch the personality routine's pointer itself from the data. + cie->personality_address = reader_->ReadEncodedPointer( + data, cie->personality_encoding, &len); + if (len > size_t(data_end - data)) return ReportIncomplete(cie); + data += len; + break; + + case DW_Z_has_FDE_address_encoding: + // The CIE's augmentation data holds the pointer encoding to use + // for addresses in the FDE. + if (data >= data_end) return ReportIncomplete(cie); + cie->pointer_encoding = DwarfPointerEncoding(*data++); + if (!reader_->ValidEncoding(cie->pointer_encoding)) { + reporter_->InvalidPointerEncoding(cie->offset, + cie->pointer_encoding); + return false; + } + if (!reader_->UsableEncoding(cie->pointer_encoding)) { + reporter_->UnusablePointerEncoding(cie->offset, + cie->pointer_encoding); + return false; + } + break; + + case DW_Z_is_signal_trampoline: + // Frames using this CIE are signal delivery frames. + cie->has_z_signal_frame = true; + break; + + default: + // An augmentation we don't recognize. + reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation); + return false; + } + } + } + + // The CIE's instructions start here. + cie->instructions = cursor; + + return true; +} + +bool CallFrameInfo::ReadFDEFields(FDE* fde) { + const char* cursor = fde->fields; + size_t size; + + // At this point, for Dwarf 4 and above, we are assuming that the + // associated CIE has its |segment_size| field equal to zero. This is + // checked for in ReadCIEFields() above. If ReadCIEFields() is ever + // changed to allow non-zero |segment_size| CIEs then we will have to read + // the segment_selector value at this point. + + fde->address = + reader_->ReadEncodedPointer(cursor, fde->cie->pointer_encoding, &size); + if (size > size_t(fde->end - cursor)) return ReportIncomplete(fde); + cursor += size; + reader_->SetFunctionBase(fde->address); + + // For the length, we strip off the upper nybble of the encoding used for + // the starting address. + DwarfPointerEncoding length_encoding = + DwarfPointerEncoding(fde->cie->pointer_encoding & 0x0f); + fde->size = reader_->ReadEncodedPointer(cursor, length_encoding, &size); + if (size > size_t(fde->end - cursor)) return ReportIncomplete(fde); + cursor += size; + + // If the CIE has a 'z' augmentation string, then augmentation data + // appears here. + if (fde->cie->has_z_augmentation) { + uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &size); + if (size_t(fde->end - cursor) < size + data_size) + return ReportIncomplete(fde); + cursor += size; + + // In the abstract, we should walk the augmentation string, and extract + // items from the FDE's augmentation data as we encounter augmentation + // string characters that specify their presence: the ordering of items + // in the augmentation string determines the arrangement of values in + // the augmentation data. + // + // In practice, there's only ever one value in FDE augmentation data + // that we support --- the LSDA pointer --- and we have to bail if we + // see any unrecognized augmentation string characters. So if there is + // anything here at all, we know what it is, and where it starts. + if (fde->cie->has_z_lsda) { + // Check whether the LSDA's pointer encoding is usable now: only once + // we've parsed the FDE's starting address do we call reader_-> + // SetFunctionBase, so that the DW_EH_PE_funcrel encoding becomes + // usable. + if (!reader_->UsableEncoding(fde->cie->lsda_encoding)) { + reporter_->UnusablePointerEncoding(fde->cie->offset, + fde->cie->lsda_encoding); + return false; + } + + fde->lsda_address = + reader_->ReadEncodedPointer(cursor, fde->cie->lsda_encoding, &size); + if (size > data_size) return ReportIncomplete(fde); + // Ideally, we would also complain here if there were unconsumed + // augmentation data. + } + + cursor += data_size; + } + + // The FDE's instructions start after those. + fde->instructions = cursor; + + return true; +} + +bool CallFrameInfo::Start() { + const char* buffer_end = buffer_ + buffer_length_; + const char* cursor; + bool all_ok = true; + const char* entry_end; + bool ok; + + // Traverse all the entries in buffer_, skipping CIEs and offering + // FDEs to the handler. + for (cursor = buffer_; cursor < buffer_end; + cursor = entry_end, all_ok = all_ok && ok) { + FDE fde; + + // Make it easy to skip this entry with 'continue': assume that + // things are not okay until we've checked all the data, and + // prepare the address of the next entry. + ok = false; + + // Read the entry's prologue. + if (!ReadEntryPrologue(cursor, &fde)) { + if (!fde.end) { + // If we couldn't even figure out this entry's extent, then we + // must stop processing entries altogether. + all_ok = false; + break; + } + entry_end = fde.end; + continue; + } + + // The next iteration picks up after this entry. + entry_end = fde.end; + + // Did we see an .eh_frame terminating mark? + if (fde.kind == kTerminator) { + // If there appears to be more data left in the section after the + // terminating mark, warn the user. But this is just a warning; + // we leave all_ok true. + if (fde.end < buffer_end) reporter_->EarlyEHTerminator(fde.offset); + break; + } + + // In this loop, we skip CIEs. We only parse them fully when we + // parse an FDE that refers to them. This limits our memory + // consumption (beyond the buffer itself) to that needed to + // process the largest single entry. + if (fde.kind != kFDE) { + ok = true; + continue; + } + + // Validate the CIE pointer. + if (fde.id > buffer_length_) { + reporter_->CIEPointerOutOfRange(fde.offset, fde.id); + continue; + } + + CIE cie; + + // Parse this FDE's CIE header. + if (!ReadEntryPrologue(buffer_ + fde.id, &cie)) continue; + // This had better be an actual CIE. + if (cie.kind != kCIE) { + reporter_->BadCIEId(fde.offset, fde.id); + continue; + } + if (!ReadCIEFields(&cie)) continue; + + // We now have the values that govern both the CIE and the FDE. + cie.cie = &cie; + fde.cie = &cie; + + // Parse the FDE's header. + if (!ReadFDEFields(&fde)) continue; + + // Call Entry to ask the consumer if they're interested. + if (!handler_->Entry(fde.offset, fde.address, fde.size, cie.version, + cie.augmentation, cie.return_address_register)) { + // The handler isn't interested in this entry. That's not an error. + ok = true; + continue; + } + + if (cie.has_z_augmentation) { + // Report the personality routine address, if we have one. + if (cie.has_z_personality) { + if (!handler_->PersonalityRoutine( + cie.personality_address, + IsIndirectEncoding(cie.personality_encoding))) + continue; + } + + // Report the language-specific data area address, if we have one. + if (cie.has_z_lsda) { + if (!handler_->LanguageSpecificDataArea( + fde.lsda_address, IsIndirectEncoding(cie.lsda_encoding))) + continue; + } + + // If this is a signal-handling frame, report that. + if (cie.has_z_signal_frame) { + if (!handler_->SignalHandler()) continue; + } + } + + // Interpret the CIE's instructions, and then the FDE's instructions. + State state(reader_, handler_, reporter_, fde.address); + ok = state.InterpretCIE(cie) && state.InterpretFDE(fde); + + // Tell the ByteReader that the function start address from the + // FDE header is no longer valid. + reader_->ClearFunctionBase(); + + // Report the end of the entry. + handler_->End(); + } + + return all_ok; +} + +const char* CallFrameInfo::KindName(EntryKind kind) { + if (kind == CallFrameInfo::kUnknown) + return "entry"; + else if (kind == CallFrameInfo::kCIE) + return "common information entry"; + else if (kind == CallFrameInfo::kFDE) + return "frame description entry"; + else { + MOZ_ASSERT(kind == CallFrameInfo::kTerminator); + return ".eh_frame sequence terminator"; + } +} + +bool CallFrameInfo::ReportIncomplete(Entry* entry) { + reporter_->Incomplete(entry->offset, entry->kind); + return false; +} + +void CallFrameInfo::Reporter::Incomplete(uint64 offset, + CallFrameInfo::EntryKind kind) { + char buf[300]; + SprintfLiteral(buf, "%s: CFI %s at offset 0x%llx in '%s': entry ends early\n", + filename_.c_str(), CallFrameInfo::KindName(kind), offset, + section_.c_str()); + log_(buf); +} + +void CallFrameInfo::Reporter::EarlyEHTerminator(uint64 offset) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI at offset 0x%llx in '%s': saw end-of-data marker" + " before end of section contents\n", + filename_.c_str(), offset, section_.c_str()); + log_(buf); +} + +void CallFrameInfo::Reporter::CIEPointerOutOfRange(uint64 offset, + uint64 cie_offset) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI frame description entry at offset 0x%llx in '%s':" + " CIE pointer is out of range: 0x%llx\n", + filename_.c_str(), offset, section_.c_str(), cie_offset); + log_(buf); +} + +void CallFrameInfo::Reporter::BadCIEId(uint64 offset, uint64 cie_offset) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI frame description entry at offset 0x%llx in '%s':" + " CIE pointer does not point to a CIE: 0x%llx\n", + filename_.c_str(), offset, section_.c_str(), cie_offset); + log_(buf); +} + +void CallFrameInfo::Reporter::UnrecognizedVersion(uint64 offset, int version) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI frame description entry at offset 0x%llx in '%s':" + " CIE specifies unrecognized version: %d\n", + filename_.c_str(), offset, section_.c_str(), version); + log_(buf); +} + +void CallFrameInfo::Reporter::UnrecognizedAugmentation(uint64 offset, + const string& aug) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI frame description entry at offset 0x%llx in '%s':" + " CIE specifies unrecognized augmentation: '%s'\n", + filename_.c_str(), offset, section_.c_str(), aug.c_str()); + log_(buf); +} + +void CallFrameInfo::Reporter::InvalidDwarf4Artefact(uint64 offset, + const char* what) { + char* what_safe = strndup(what, 100); + char buf[300]; + SprintfLiteral(buf, + "%s: CFI frame description entry at offset 0x%llx in '%s':" + " CIE specifies invalid Dwarf4 artefact: %s\n", + filename_.c_str(), offset, section_.c_str(), what_safe); + log_(buf); + free(what_safe); +} + +void CallFrameInfo::Reporter::InvalidPointerEncoding(uint64 offset, + uint8 encoding) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI common information entry at offset 0x%llx in '%s':" + " 'z' augmentation specifies invalid pointer encoding: " + "0x%02x\n", + filename_.c_str(), offset, section_.c_str(), encoding); + log_(buf); +} + +void CallFrameInfo::Reporter::UnusablePointerEncoding(uint64 offset, + uint8 encoding) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI common information entry at offset 0x%llx in '%s':" + " 'z' augmentation specifies a pointer encoding for which" + " we have no base address: 0x%02x\n", + filename_.c_str(), offset, section_.c_str(), encoding); + log_(buf); +} + +void CallFrameInfo::Reporter::RestoreInCIE(uint64 offset, uint64 insn_offset) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI common information entry at offset 0x%llx in '%s':" + " the DW_CFA_restore instruction at offset 0x%llx" + " cannot be used in a common information entry\n", + filename_.c_str(), offset, section_.c_str(), insn_offset); + log_(buf); +} + +void CallFrameInfo::Reporter::BadInstruction(uint64 offset, + CallFrameInfo::EntryKind kind, + uint64 insn_offset) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI %s at offset 0x%llx in section '%s':" + " the instruction at offset 0x%llx is unrecognized\n", + filename_.c_str(), CallFrameInfo::KindName(kind), offset, + section_.c_str(), insn_offset); + log_(buf); +} + +void CallFrameInfo::Reporter::NoCFARule(uint64 offset, + CallFrameInfo::EntryKind kind, + uint64 insn_offset) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI %s at offset 0x%llx in section '%s':" + " the instruction at offset 0x%llx assumes that a CFA rule " + "has been set, but none has been set\n", + filename_.c_str(), CallFrameInfo::KindName(kind), offset, + section_.c_str(), insn_offset); + log_(buf); +} + +void CallFrameInfo::Reporter::EmptyStateStack(uint64 offset, + CallFrameInfo::EntryKind kind, + uint64 insn_offset) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI %s at offset 0x%llx in section '%s':" + " the DW_CFA_restore_state instruction at offset 0x%llx" + " should pop a saved state from the stack, but the stack " + "is empty\n", + filename_.c_str(), CallFrameInfo::KindName(kind), offset, + section_.c_str(), insn_offset); + log_(buf); +} + +void CallFrameInfo::Reporter::ClearingCFARule(uint64 offset, + CallFrameInfo::EntryKind kind, + uint64 insn_offset) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI %s at offset 0x%llx in section '%s':" + " the DW_CFA_restore_state instruction at offset 0x%llx" + " would clear the CFA rule in effect\n", + filename_.c_str(), CallFrameInfo::KindName(kind), offset, + section_.c_str(), insn_offset); + log_(buf); +} + +unsigned int DwarfCFIToModule::RegisterNames::I386() { + /* + 8 "$eax", "$ecx", "$edx", "$ebx", "$esp", "$ebp", "$esi", "$edi", + 3 "$eip", "$eflags", "$unused1", + 8 "$st0", "$st1", "$st2", "$st3", "$st4", "$st5", "$st6", "$st7", + 2 "$unused2", "$unused3", + 8 "$xmm0", "$xmm1", "$xmm2", "$xmm3", "$xmm4", "$xmm5", "$xmm6", "$xmm7", + 8 "$mm0", "$mm1", "$mm2", "$mm3", "$mm4", "$mm5", "$mm6", "$mm7", + 3 "$fcw", "$fsw", "$mxcsr", + 8 "$es", "$cs", "$ss", "$ds", "$fs", "$gs", "$unused4", "$unused5", + 2 "$tr", "$ldtr" + */ + return 8 + 3 + 8 + 2 + 8 + 8 + 3 + 8 + 2; +} + +unsigned int DwarfCFIToModule::RegisterNames::X86_64() { + /* + 8 "$rax", "$rdx", "$rcx", "$rbx", "$rsi", "$rdi", "$rbp", "$rsp", + 8 "$r8", "$r9", "$r10", "$r11", "$r12", "$r13", "$r14", "$r15", + 1 "$rip", + 8 "$xmm0","$xmm1","$xmm2", "$xmm3", "$xmm4", "$xmm5", "$xmm6", "$xmm7", + 8 "$xmm8","$xmm9","$xmm10","$xmm11","$xmm12","$xmm13","$xmm14","$xmm15", + 8 "$st0", "$st1", "$st2", "$st3", "$st4", "$st5", "$st6", "$st7", + 8 "$mm0", "$mm1", "$mm2", "$mm3", "$mm4", "$mm5", "$mm6", "$mm7", + 1 "$rflags", + 8 "$es", "$cs", "$ss", "$ds", "$fs", "$gs", "$unused1", "$unused2", + 4 "$fs.base", "$gs.base", "$unused3", "$unused4", + 2 "$tr", "$ldtr", + 3 "$mxcsr", "$fcw", "$fsw" + */ + return 8 + 8 + 1 + 8 + 8 + 8 + 8 + 1 + 8 + 4 + 2 + 3; +} + +// Per ARM IHI 0040A, section 3.1 +unsigned int DwarfCFIToModule::RegisterNames::ARM() { + /* + 8 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", + 8 "r8", "r9", "r10", "r11", "r12", "sp", "lr", "pc", + 8 "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", + 8 "fps", "cpsr", "", "", "", "", "", "", + 8 "", "", "", "", "", "", "", "", + 8 "", "", "", "", "", "", "", "", + 8 "", "", "", "", "", "", "", "", + 8 "", "", "", "", "", "", "", "", + 8 "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", + 8 "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15", + 8 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23", + 8 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", + 8 "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7" + */ + return 13 * 8; +} + +// Per ARM IHI 0057A, section 3.1 +unsigned int DwarfCFIToModule::RegisterNames::ARM64() { + /* + 8 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", + 8 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", + 8 "x16" "x17", "x18", "x19", "x20", "x21", "x22", "x23", + 8 "x24", "x25", "x26", "x27", "x28", "x29", "x30","sp", + 8 "", "", "", "", "", "", "", "", + 8 "", "", "", "", "", "", "", "", + 8 "", "", "", "", "", "", "", "", + 8 "", "", "", "", "", "", "", "", + 8 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + 8 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", + 8 "v16", "v17", "v18", "v19", "v20", "v21", "v22, "v23", + 8 "v24", "x25", "x26, "x27", "v28", "v29", "v30", "v31", + */ + return 12 * 8; +} + +unsigned int DwarfCFIToModule::RegisterNames::MIPS() { + /* + 8 "$zero", "$at", "$v0", "$v1", "$a0", "$a1", "$a2", "$a3", + 8 "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", + 8 "$s0", "$s1", "$s2", "$s3", "$s4", "$s5", "$s6", "$s7", + 8 "$t8", "$t9", "$k0", "$k1", "$gp", "$sp", "$fp", "$ra", + 9 "$lo", "$hi", "$pc", "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", + 8 "$f6", "$f7", "$f8", "$f9", "$f10", "$f11", "$f12", "$f13", + 7 "$f14", "$f15", "$f16", "$f17", "$f18", "$f19", "$f20", + 7 "$f21", "$f22", "$f23", "$f24", "$f25", "$f26", "$f27", + 6 "$f28", "$f29", "$f30", "$f31", "$fcsr", "$fir" + */ + return 8 + 8 + 8 + 8 + 9 + 8 + 7 + 7 + 6; +} + +// See prototype for comments. +int32_t parseDwarfExpr(Summariser* summ, const ByteReader* reader, string expr, + bool debug, bool pushCfaAtStart, bool derefAtEnd) { + const char* cursor = expr.c_str(); + const char* end1 = cursor + expr.length(); + + char buf[100]; + if (debug) { + SprintfLiteral(buf, "LUL.DW << DwarfExpr, len is %d\n", + (int)(end1 - cursor)); + summ->Log(buf); + } + + // Add a marker for the start of this expression. In it, indicate + // whether or not the CFA should be pushed onto the stack prior to + // evaluation. + int32_t start_ix = + summ->AddPfxInstr(PfxInstr(PX_Start, pushCfaAtStart ? 1 : 0)); + MOZ_ASSERT(start_ix >= 0); + + while (cursor < end1) { + uint8 opc = reader->ReadOneByte(cursor); + cursor++; + + const char* nm = nullptr; + PfxExprOp pxop = PX_End; + + switch (opc) { + case DW_OP_lit0 ... DW_OP_lit31: { + int32_t simm32 = (int32_t)(opc - DW_OP_lit0); + if (debug) { + SprintfLiteral(buf, "LUL.DW DW_OP_lit%d\n", (int)simm32); + summ->Log(buf); + } + (void)summ->AddPfxInstr(PfxInstr(PX_SImm32, simm32)); + break; + } + + case DW_OP_breg0 ... DW_OP_breg31: { + size_t len; + int64_t n = reader->ReadSignedLEB128(cursor, &len); + cursor += len; + DW_REG_NUMBER reg = (DW_REG_NUMBER)(opc - DW_OP_breg0); + if (debug) { + SprintfLiteral(buf, "LUL.DW DW_OP_breg%d %lld\n", (int)reg, + (long long int)n); + summ->Log(buf); + } + // PfxInstr only allows a 32 bit signed offset. So we + // must fail if the immediate is out of range. + if (n < INT32_MIN || INT32_MAX < n) goto fail; + (void)summ->AddPfxInstr(PfxInstr(PX_DwReg, reg)); + (void)summ->AddPfxInstr(PfxInstr(PX_SImm32, (int32_t)n)); + (void)summ->AddPfxInstr(PfxInstr(PX_Add)); + break; + } + + case DW_OP_const4s: { + uint64_t u64 = reader->ReadFourBytes(cursor); + cursor += 4; + // u64 is guaranteed by |ReadFourBytes| to be in the + // range 0 .. FFFFFFFF inclusive. But to be safe: + uint32_t u32 = (uint32_t)(u64 & 0xFFFFFFFF); + int32_t s32 = (int32_t)u32; + if (debug) { + SprintfLiteral(buf, "LUL.DW DW_OP_const4s %d\n", (int)s32); + summ->Log(buf); + } + (void)summ->AddPfxInstr(PfxInstr(PX_SImm32, s32)); + break; + } + + case DW_OP_deref: + nm = "deref"; + pxop = PX_Deref; + goto no_operands; + case DW_OP_and: + nm = "and"; + pxop = PX_And; + goto no_operands; + case DW_OP_plus: + nm = "plus"; + pxop = PX_Add; + goto no_operands; + case DW_OP_minus: + nm = "minus"; + pxop = PX_Sub; + goto no_operands; + case DW_OP_shl: + nm = "shl"; + pxop = PX_Shl; + goto no_operands; + case DW_OP_ge: + nm = "ge"; + pxop = PX_CmpGES; + goto no_operands; + no_operands: + MOZ_ASSERT(nm && pxop != PX_End); + if (debug) { + SprintfLiteral(buf, "LUL.DW DW_OP_%s\n", nm); + summ->Log(buf); + } + (void)summ->AddPfxInstr(PfxInstr(pxop)); + break; + + default: + if (debug) { + SprintfLiteral(buf, "LUL.DW unknown opc %d\n", (int)opc); + summ->Log(buf); + } + goto fail; + + } // switch (opc) + + } // while (cursor < end1) + + MOZ_ASSERT(cursor >= end1); + + if (cursor > end1) { + // We overran the Dwarf expression. Give up. + goto fail; + } + + // For DW_CFA_expression, what the expression denotes is the address + // of where the previous value is located. The caller of this routine + // may therefore request one last dereference before the end marker is + // inserted. + if (derefAtEnd) { + (void)summ->AddPfxInstr(PfxInstr(PX_Deref)); + } + + // Insert an end marker, and declare success. + (void)summ->AddPfxInstr(PfxInstr(PX_End)); + if (debug) { + SprintfLiteral(buf, + "LUL.DW conversion of dwarf expression succeeded, " + "ix = %d\n", + (int)start_ix); + summ->Log(buf); + summ->Log("LUL.DW >>\n"); + } + return start_ix; + +fail: + if (debug) { + summ->Log("LUL.DW conversion of dwarf expression failed\n"); + summ->Log("LUL.DW >>\n"); + } + return -1; +} + +bool DwarfCFIToModule::Entry(size_t offset, uint64 address, uint64 length, + uint8 version, const string& augmentation, + unsigned return_address) { + if (DEBUG_DWARF) { + char buf[100]; + SprintfLiteral(buf, "LUL.DW DwarfCFIToModule::Entry 0x%llx,+%lld\n", + address, length); + summ_->Log(buf); + } + + summ_->Entry(address, length); + + // If dwarf2reader::CallFrameInfo can handle this version and + // augmentation, then we should be okay with that, so there's no + // need to check them here. + + // Get ready to collect entries. + return_address_ = return_address; + + // Breakpad STACK CFI records must provide a .ra rule, but DWARF CFI + // may not establish any rule for .ra if the return address column + // is an ordinary register, and that register holds the return + // address on entry to the function. So establish an initial .ra + // rule citing the return address register. + if (return_address_ < num_dw_regs_) { + summ_->Rule(address, return_address_, NODEREF, return_address, 0); + } + + return true; +} + +const UniqueString* DwarfCFIToModule::RegisterName(int i) { + if (i < 0) { + MOZ_ASSERT(i == kCFARegister); + return usu_->ToUniqueString(".cfa"); + } + unsigned reg = i; + if (reg == return_address_) return usu_->ToUniqueString(".ra"); + + char buf[30]; + SprintfLiteral(buf, "dwarf_reg_%u", reg); + return usu_->ToUniqueString(buf); +} + +bool DwarfCFIToModule::UndefinedRule(uint64 address, int reg) { + reporter_->UndefinedNotSupported(entry_offset_, RegisterName(reg)); + // Treat this as a non-fatal error. + return true; +} + +bool DwarfCFIToModule::SameValueRule(uint64 address, int reg) { + if (DEBUG_DWARF) { + char buf[100]; + SprintfLiteral(buf, "LUL.DW 0x%llx: old r%d = Same\n", address, reg); + summ_->Log(buf); + } + // reg + 0 + summ_->Rule(address, reg, NODEREF, reg, 0); + return true; +} + +bool DwarfCFIToModule::OffsetRule(uint64 address, int reg, int base_register, + long offset) { + if (DEBUG_DWARF) { + char buf[100]; + SprintfLiteral(buf, "LUL.DW 0x%llx: old r%d = *(r%d + %ld)\n", address, + reg, base_register, offset); + summ_->Log(buf); + } + // *(base_register + offset) + summ_->Rule(address, reg, DEREF, base_register, offset); + return true; +} + +bool DwarfCFIToModule::ValOffsetRule(uint64 address, int reg, int base_register, + long offset) { + if (DEBUG_DWARF) { + char buf[100]; + SprintfLiteral(buf, "LUL.DW 0x%llx: old r%d = r%d + %ld\n", address, reg, + base_register, offset); + summ_->Log(buf); + } + // base_register + offset + summ_->Rule(address, reg, NODEREF, base_register, offset); + return true; +} + +bool DwarfCFIToModule::RegisterRule(uint64 address, int reg, + int base_register) { + if (DEBUG_DWARF) { + char buf[100]; + SprintfLiteral(buf, "LUL.DW 0x%llx: old r%d = r%d\n", address, reg, + base_register); + summ_->Log(buf); + } + // base_register + 0 + summ_->Rule(address, reg, NODEREF, base_register, 0); + return true; +} + +bool DwarfCFIToModule::ExpressionRule(uint64 address, int reg, + const string& expression) { + bool debug = !!DEBUG_DWARF; + int32_t start_ix = + parseDwarfExpr(summ_, reader_, expression, debug, true /*pushCfaAtStart*/, + true /*derefAtEnd*/); + if (start_ix >= 0) { + summ_->Rule(address, reg, PFXEXPR, 0, start_ix); + } else { + // Parsing of the Dwarf expression failed. Treat this as a + // non-fatal error, hence return |true| even on this path. + reporter_->ExpressionCouldNotBeSummarised(entry_offset_, RegisterName(reg)); + } + return true; +} + +bool DwarfCFIToModule::ValExpressionRule(uint64 address, int reg, + const string& expression) { + bool debug = !!DEBUG_DWARF; + int32_t start_ix = + parseDwarfExpr(summ_, reader_, expression, debug, true /*pushCfaAtStart*/, + false /*!derefAtEnd*/); + if (start_ix >= 0) { + summ_->Rule(address, reg, PFXEXPR, 0, start_ix); + } else { + // Parsing of the Dwarf expression failed. Treat this as a + // non-fatal error, hence return |true| even on this path. + reporter_->ExpressionCouldNotBeSummarised(entry_offset_, RegisterName(reg)); + } + return true; +} + +bool DwarfCFIToModule::End() { + // module_->AddStackFrameEntry(entry_); + if (DEBUG_DWARF) { + summ_->Log("LUL.DW DwarfCFIToModule::End()\n"); + } + summ_->End(); + return true; +} + +void DwarfCFIToModule::Reporter::UndefinedNotSupported( + size_t offset, const UniqueString* reg) { + char buf[300]; + SprintfLiteral(buf, "DwarfCFIToModule::Reporter::UndefinedNotSupported()\n"); + log_(buf); + // BPLOG(INFO) << file_ << ", section '" << section_ + // << "': the call frame entry at offset 0x" + // << std::setbase(16) << offset << std::setbase(10) + // << " sets the rule for register '" << FromUniqueString(reg) + // << "' to 'undefined', but the Breakpad symbol file format cannot " + // << " express this"; +} + +// FIXME: move this somewhere sensible +static bool is_power_of_2(uint64_t n) { + int i, nSetBits = 0; + for (i = 0; i < 8 * (int)sizeof(n); i++) { + if ((n & ((uint64_t)1) << i) != 0) nSetBits++; + } + return nSetBits <= 1; +} + +void DwarfCFIToModule::Reporter::ExpressionCouldNotBeSummarised( + size_t offset, const UniqueString* reg) { + static uint64_t n_complaints = 0; // This isn't threadsafe + n_complaints++; + if (!is_power_of_2(n_complaints)) return; + char buf[300]; + SprintfLiteral(buf, + "DwarfCFIToModule::Reporter::" + "ExpressionCouldNotBeSummarised(shown %llu times)\n", + (unsigned long long int)n_complaints); + log_(buf); +} + +} // namespace lul diff --git a/mozglue/baseprofiler/lul/LulDwarfExt.h b/mozglue/baseprofiler/lul/LulDwarfExt.h new file mode 100644 index 0000000000..dcd2500e5a --- /dev/null +++ b/mozglue/baseprofiler/lul/LulDwarfExt.h @@ -0,0 +1,1289 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ + +// Copyright 2006, 2010 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// This file is derived from the following files in +// toolkit/crashreporter/google-breakpad: +// src/common/dwarf/types.h +// src/common/dwarf/dwarf2enums.h +// src/common/dwarf/bytereader.h +// src/common/dwarf_cfi_to_module.h +// src/common/dwarf/dwarf2reader.h + +#ifndef LulDwarfExt_h +#define LulDwarfExt_h + +#include "LulDwarfSummariser.h" + +#include "mozilla/Assertions.h" + +#include <stdint.h> +#include <string> + +typedef signed char int8; +typedef short int16; +typedef int int32; +typedef long long int64; + +typedef unsigned char uint8; +typedef unsigned short uint16; +typedef unsigned int uint32; +typedef unsigned long long uint64; + +#ifdef __PTRDIFF_TYPE__ +typedef __PTRDIFF_TYPE__ intptr; +typedef unsigned __PTRDIFF_TYPE__ uintptr; +#else +# error "Can't find pointer-sized integral types." +#endif + +namespace lul { + +class UniqueString; + +// Exception handling frame description pointer formats, as described +// by the Linux Standard Base Core Specification 4.0, section 11.5, +// DWARF Extensions. +enum DwarfPointerEncoding { + DW_EH_PE_absptr = 0x00, + DW_EH_PE_omit = 0xff, + DW_EH_PE_uleb128 = 0x01, + DW_EH_PE_udata2 = 0x02, + DW_EH_PE_udata4 = 0x03, + DW_EH_PE_udata8 = 0x04, + DW_EH_PE_sleb128 = 0x09, + DW_EH_PE_sdata2 = 0x0A, + DW_EH_PE_sdata4 = 0x0B, + DW_EH_PE_sdata8 = 0x0C, + DW_EH_PE_pcrel = 0x10, + DW_EH_PE_textrel = 0x20, + DW_EH_PE_datarel = 0x30, + DW_EH_PE_funcrel = 0x40, + DW_EH_PE_aligned = 0x50, + + // The GNU toolchain sources define this enum value as well, + // simply to help classify the lower nybble values into signed and + // unsigned groups. + DW_EH_PE_signed = 0x08, + + // This is not documented in LSB 4.0, but it is used in both the + // Linux and OS X toolchains. It can be added to any other + // encoding (except DW_EH_PE_aligned), and indicates that the + // encoded value represents the address at which the true address + // is stored, not the true address itself. + DW_EH_PE_indirect = 0x80 +}; + +// We can't use the obvious name of LITTLE_ENDIAN and BIG_ENDIAN +// because it conflicts with a macro +enum Endianness { ENDIANNESS_BIG, ENDIANNESS_LITTLE }; + +// A ByteReader knows how to read single- and multi-byte values of +// various endiannesses, sizes, and encodings, as used in DWARF +// debugging information and Linux C++ exception handling data. +class ByteReader { + public: + // Construct a ByteReader capable of reading one-, two-, four-, and + // eight-byte values according to ENDIANNESS, absolute machine-sized + // addresses, DWARF-style "initial length" values, signed and + // unsigned LEB128 numbers, and Linux C++ exception handling data's + // encoded pointers. + explicit ByteReader(enum Endianness endianness); + virtual ~ByteReader(); + + // Read a single byte from BUFFER and return it as an unsigned 8 bit + // number. + uint8 ReadOneByte(const char* buffer) const; + + // Read two bytes from BUFFER and return them as an unsigned 16 bit + // number, using this ByteReader's endianness. + uint16 ReadTwoBytes(const char* buffer) const; + + // Read four bytes from BUFFER and return them as an unsigned 32 bit + // number, using this ByteReader's endianness. This function returns + // a uint64 so that it is compatible with ReadAddress and + // ReadOffset. The number it returns will never be outside the range + // of an unsigned 32 bit integer. + uint64 ReadFourBytes(const char* buffer) const; + + // Read eight bytes from BUFFER and return them as an unsigned 64 + // bit number, using this ByteReader's endianness. + uint64 ReadEightBytes(const char* buffer) const; + + // Read an unsigned LEB128 (Little Endian Base 128) number from + // BUFFER and return it as an unsigned 64 bit integer. Set LEN to + // the number of bytes read. + // + // The unsigned LEB128 representation of an integer N is a variable + // number of bytes: + // + // - If N is between 0 and 0x7f, then its unsigned LEB128 + // representation is a single byte whose value is N. + // + // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) | + // 0x80, followed by the unsigned LEB128 representation of N / + // 128, rounded towards negative infinity. + // + // In other words, we break VALUE into groups of seven bits, put + // them in little-endian order, and then write them as eight-bit + // bytes with the high bit on all but the last. + uint64 ReadUnsignedLEB128(const char* buffer, size_t* len) const; + + // Read a signed LEB128 number from BUFFER and return it as an + // signed 64 bit integer. Set LEN to the number of bytes read. + // + // The signed LEB128 representation of an integer N is a variable + // number of bytes: + // + // - If N is between -0x40 and 0x3f, then its signed LEB128 + // representation is a single byte whose value is N in two's + // complement. + // + // - Otherwise, its signed LEB128 representation is (N & 0x7f) | + // 0x80, followed by the signed LEB128 representation of N / 128, + // rounded towards negative infinity. + // + // In other words, we break VALUE into groups of seven bits, put + // them in little-endian order, and then write them as eight-bit + // bytes with the high bit on all but the last. + int64 ReadSignedLEB128(const char* buffer, size_t* len) const; + + // Indicate that addresses on this architecture are SIZE bytes long. SIZE + // must be either 4 or 8. (DWARF allows addresses to be any number of + // bytes in length from 1 to 255, but we only support 32- and 64-bit + // addresses at the moment.) You must call this before using the + // ReadAddress member function. + // + // For data in a .debug_info section, or something that .debug_info + // refers to like line number or macro data, the compilation unit + // header's address_size field indicates the address size to use. Call + // frame information doesn't indicate its address size (a shortcoming of + // the spec); you must supply the appropriate size based on the + // architecture of the target machine. + void SetAddressSize(uint8 size); + + // Return the current address size, in bytes. This is either 4, + // indicating 32-bit addresses, or 8, indicating 64-bit addresses. + uint8 AddressSize() const { return address_size_; } + + // Read an address from BUFFER and return it as an unsigned 64 bit + // integer, respecting this ByteReader's endianness and address size. You + // must call SetAddressSize before calling this function. + uint64 ReadAddress(const char* buffer) const; + + // DWARF actually defines two slightly different formats: 32-bit DWARF + // and 64-bit DWARF. This is *not* related to the size of registers or + // addresses on the target machine; it refers only to the size of section + // offsets and data lengths appearing in the DWARF data. One only needs + // 64-bit DWARF when the debugging data itself is larger than 4GiB. + // 32-bit DWARF can handle x86_64 or PPC64 code just fine, unless the + // debugging data itself is very large. + // + // DWARF information identifies itself as 32-bit or 64-bit DWARF: each + // compilation unit and call frame information entry begins with an + // "initial length" field, which, in addition to giving the length of the + // data, also indicates the size of section offsets and lengths appearing + // in that data. The ReadInitialLength member function, below, reads an + // initial length and sets the ByteReader's offset size as a side effect. + // Thus, in the normal process of reading DWARF data, the appropriate + // offset size is set automatically. So, you should only need to call + // SetOffsetSize if you are using the same ByteReader to jump from the + // midst of one block of DWARF data into another. + + // Read a DWARF "initial length" field from START, and return it as + // an unsigned 64 bit integer, respecting this ByteReader's + // endianness. Set *LEN to the length of the initial length in + // bytes, either four or twelve. As a side effect, set this + // ByteReader's offset size to either 4 (if we see a 32-bit DWARF + // initial length) or 8 (if we see a 64-bit DWARF initial length). + // + // A DWARF initial length is either: + // + // - a byte count stored as an unsigned 32-bit value less than + // 0xffffff00, indicating that the data whose length is being + // measured uses the 32-bit DWARF format, or + // + // - The 32-bit value 0xffffffff, followed by a 64-bit byte count, + // indicating that the data whose length is being measured uses + // the 64-bit DWARF format. + uint64 ReadInitialLength(const char* start, size_t* len); + + // Read an offset from BUFFER and return it as an unsigned 64 bit + // integer, respecting the ByteReader's endianness. In 32-bit DWARF, the + // offset is 4 bytes long; in 64-bit DWARF, the offset is eight bytes + // long. You must call ReadInitialLength or SetOffsetSize before calling + // this function; see the comments above for details. + uint64 ReadOffset(const char* buffer) const; + + // Return the current offset size, in bytes. + // A return value of 4 indicates that we are reading 32-bit DWARF. + // A return value of 8 indicates that we are reading 64-bit DWARF. + uint8 OffsetSize() const { return offset_size_; } + + // Indicate that section offsets and lengths are SIZE bytes long. SIZE + // must be either 4 (meaning 32-bit DWARF) or 8 (meaning 64-bit DWARF). + // Usually, you should not call this function yourself; instead, let a + // call to ReadInitialLength establish the data's offset size + // automatically. + void SetOffsetSize(uint8 size); + + // The Linux C++ ABI uses a variant of DWARF call frame information + // for exception handling. This data is included in the program's + // address space as the ".eh_frame" section, and intepreted at + // runtime to walk the stack, find exception handlers, and run + // cleanup code. The format is mostly the same as DWARF CFI, with + // some adjustments made to provide the additional + // exception-handling data, and to make the data easier to work with + // in memory --- for example, to allow it to be placed in read-only + // memory even when describing position-independent code. + // + // In particular, exception handling data can select a number of + // different encodings for pointers that appear in the data, as + // described by the DwarfPointerEncoding enum. There are actually + // four axes(!) to the encoding: + // + // - The pointer size: pointers can be 2, 4, or 8 bytes long, or use + // the DWARF LEB128 encoding. + // + // - The pointer's signedness: pointers can be signed or unsigned. + // + // - The pointer's base address: the data stored in the exception + // handling data can be the actual address (that is, an absolute + // pointer), or relative to one of a number of different base + // addreses --- including that of the encoded pointer itself, for + // a form of "pc-relative" addressing. + // + // - The pointer may be indirect: it may be the address where the + // true pointer is stored. (This is used to refer to things via + // global offset table entries, program linkage table entries, or + // other tricks used in position-independent code.) + // + // There are also two options that fall outside that matrix + // altogether: the pointer may be omitted, or it may have padding to + // align it on an appropriate address boundary. (That last option + // may seem like it should be just another axis, but it is not.) + + // Indicate that the exception handling data is loaded starting at + // SECTION_BASE, and that the start of its buffer in our own memory + // is BUFFER_BASE. This allows us to find the address that a given + // byte in our buffer would have when loaded into the program the + // data describes. We need this to resolve DW_EH_PE_pcrel pointers. + void SetCFIDataBase(uint64 section_base, const char* buffer_base); + + // Indicate that the base address of the program's ".text" section + // is TEXT_BASE. We need this to resolve DW_EH_PE_textrel pointers. + void SetTextBase(uint64 text_base); + + // Indicate that the base address for DW_EH_PE_datarel pointers is + // DATA_BASE. The proper value depends on the ABI; it is usually the + // address of the global offset table, held in a designated register in + // position-independent code. You will need to look at the startup code + // for the target system to be sure. I tried; my eyes bled. + void SetDataBase(uint64 data_base); + + // Indicate that the base address for the FDE we are processing is + // FUNCTION_BASE. This is the start address of DW_EH_PE_funcrel + // pointers. (This encoding does not seem to be used by the GNU + // toolchain.) + void SetFunctionBase(uint64 function_base); + + // Indicate that we are no longer processing any FDE, so any use of + // a DW_EH_PE_funcrel encoding is an error. + void ClearFunctionBase(); + + // Return true if ENCODING is a valid pointer encoding. + bool ValidEncoding(DwarfPointerEncoding encoding) const; + + // Return true if we have all the information we need to read a + // pointer that uses ENCODING. This checks that the appropriate + // SetFooBase function for ENCODING has been called. + bool UsableEncoding(DwarfPointerEncoding encoding) const; + + // Read an encoded pointer from BUFFER using ENCODING; return the + // absolute address it represents, and set *LEN to the pointer's + // length in bytes, including any padding for aligned pointers. + // + // This function calls 'abort' if ENCODING is invalid or refers to a + // base address this reader hasn't been given, so you should check + // with ValidEncoding and UsableEncoding first if you would rather + // die in a more helpful way. + uint64 ReadEncodedPointer(const char* buffer, DwarfPointerEncoding encoding, + size_t* len) const; + + private: + // Function pointer type for our address and offset readers. + typedef uint64 (ByteReader::*AddressReader)(const char*) const; + + // Read an offset from BUFFER and return it as an unsigned 64 bit + // integer. DWARF2/3 define offsets as either 4 or 8 bytes, + // generally depending on the amount of DWARF2/3 info present. + // This function pointer gets set by SetOffsetSize. + AddressReader offset_reader_; + + // Read an address from BUFFER and return it as an unsigned 64 bit + // integer. DWARF2/3 allow addresses to be any size from 0-255 + // bytes currently. Internally we support 4 and 8 byte addresses, + // and will CHECK on anything else. + // This function pointer gets set by SetAddressSize. + AddressReader address_reader_; + + Endianness endian_; + uint8 address_size_; + uint8 offset_size_; + + // Base addresses for Linux C++ exception handling data's encoded pointers. + bool have_section_base_, have_text_base_, have_data_base_; + bool have_function_base_; + uint64 section_base_; + uint64 text_base_, data_base_, function_base_; + const char* buffer_base_; +}; + +inline uint8 ByteReader::ReadOneByte(const char* buffer) const { + return buffer[0]; +} + +inline uint16 ByteReader::ReadTwoBytes(const char* signed_buffer) const { + const unsigned char* buffer = + reinterpret_cast<const unsigned char*>(signed_buffer); + const uint16 buffer0 = buffer[0]; + const uint16 buffer1 = buffer[1]; + if (endian_ == ENDIANNESS_LITTLE) { + return buffer0 | buffer1 << 8; + } else { + return buffer1 | buffer0 << 8; + } +} + +inline uint64 ByteReader::ReadFourBytes(const char* signed_buffer) const { + const unsigned char* buffer = + reinterpret_cast<const unsigned char*>(signed_buffer); + const uint32 buffer0 = buffer[0]; + const uint32 buffer1 = buffer[1]; + const uint32 buffer2 = buffer[2]; + const uint32 buffer3 = buffer[3]; + if (endian_ == ENDIANNESS_LITTLE) { + return buffer0 | buffer1 << 8 | buffer2 << 16 | buffer3 << 24; + } else { + return buffer3 | buffer2 << 8 | buffer1 << 16 | buffer0 << 24; + } +} + +inline uint64 ByteReader::ReadEightBytes(const char* signed_buffer) const { + const unsigned char* buffer = + reinterpret_cast<const unsigned char*>(signed_buffer); + const uint64 buffer0 = buffer[0]; + const uint64 buffer1 = buffer[1]; + const uint64 buffer2 = buffer[2]; + const uint64 buffer3 = buffer[3]; + const uint64 buffer4 = buffer[4]; + const uint64 buffer5 = buffer[5]; + const uint64 buffer6 = buffer[6]; + const uint64 buffer7 = buffer[7]; + if (endian_ == ENDIANNESS_LITTLE) { + return buffer0 | buffer1 << 8 | buffer2 << 16 | buffer3 << 24 | + buffer4 << 32 | buffer5 << 40 | buffer6 << 48 | buffer7 << 56; + } else { + return buffer7 | buffer6 << 8 | buffer5 << 16 | buffer4 << 24 | + buffer3 << 32 | buffer2 << 40 | buffer1 << 48 | buffer0 << 56; + } +} + +// Read an unsigned LEB128 number. Each byte contains 7 bits of +// information, plus one bit saying whether the number continues or +// not. + +inline uint64 ByteReader::ReadUnsignedLEB128(const char* buffer, + size_t* len) const { + uint64 result = 0; + size_t num_read = 0; + unsigned int shift = 0; + unsigned char byte; + + do { + byte = *buffer++; + num_read++; + + result |= (static_cast<uint64>(byte & 0x7f)) << shift; + + shift += 7; + + } while (byte & 0x80); + + *len = num_read; + + return result; +} + +// Read a signed LEB128 number. These are like regular LEB128 +// numbers, except the last byte may have a sign bit set. + +inline int64 ByteReader::ReadSignedLEB128(const char* buffer, + size_t* len) const { + int64 result = 0; + unsigned int shift = 0; + size_t num_read = 0; + unsigned char byte; + + do { + byte = *buffer++; + num_read++; + result |= (static_cast<uint64>(byte & 0x7f) << shift); + shift += 7; + } while (byte & 0x80); + + if ((shift < 8 * sizeof(result)) && (byte & 0x40)) + result |= -((static_cast<int64>(1)) << shift); + *len = num_read; + return result; +} + +inline uint64 ByteReader::ReadOffset(const char* buffer) const { + MOZ_ASSERT(this->offset_reader_); + return (this->*offset_reader_)(buffer); +} + +inline uint64 ByteReader::ReadAddress(const char* buffer) const { + MOZ_ASSERT(this->address_reader_); + return (this->*address_reader_)(buffer); +} + +inline void ByteReader::SetCFIDataBase(uint64 section_base, + const char* buffer_base) { + section_base_ = section_base; + buffer_base_ = buffer_base; + have_section_base_ = true; +} + +inline void ByteReader::SetTextBase(uint64 text_base) { + text_base_ = text_base; + have_text_base_ = true; +} + +inline void ByteReader::SetDataBase(uint64 data_base) { + data_base_ = data_base; + have_data_base_ = true; +} + +inline void ByteReader::SetFunctionBase(uint64 function_base) { + function_base_ = function_base; + have_function_base_ = true; +} + +inline void ByteReader::ClearFunctionBase() { have_function_base_ = false; } + +// (derived from) +// dwarf_cfi_to_module.h: Define the DwarfCFIToModule class, which +// accepts parsed DWARF call frame info and adds it to a Summariser object. + +// This class is a reader for DWARF's Call Frame Information. CFI +// describes how to unwind stack frames --- even for functions that do +// not follow fixed conventions for saving registers, whose frame size +// varies as they execute, etc. +// +// CFI describes, at each machine instruction, how to compute the +// stack frame's base address, how to find the return address, and +// where to find the saved values of the caller's registers (if the +// callee has stashed them somewhere to free up the registers for its +// own use). +// +// For example, suppose we have a function whose machine code looks +// like this (imagine an assembly language that looks like C, for a +// machine with 32-bit registers, and a stack that grows towards lower +// addresses): +// +// func: ; entry point; return address at sp +// func+0: sp = sp - 16 ; allocate space for stack frame +// func+1: sp[12] = r0 ; save r0 at sp+12 +// ... ; other code, not frame-related +// func+10: sp -= 4; *sp = x ; push some x on the stack +// ... ; other code, not frame-related +// func+20: r0 = sp[16] ; restore saved r0 +// func+21: sp += 20 ; pop whole stack frame +// func+22: pc = *sp; sp += 4 ; pop return address and jump to it +// +// DWARF CFI is (a very compressed representation of) a table with a +// row for each machine instruction address and a column for each +// register showing how to restore it, if possible. +// +// A special column named "CFA", for "Canonical Frame Address", tells how +// to compute the base address of the frame; registers' entries may +// refer to the CFA in describing where the registers are saved. +// +// Another special column, named "RA", represents the return address. +// +// For example, here is a complete (uncompressed) table describing the +// function above: +// +// insn cfa r0 r1 ... ra +// ======================================= +// func+0: sp cfa[0] +// func+1: sp+16 cfa[0] +// func+2: sp+16 cfa[-4] cfa[0] +// func+11: sp+20 cfa[-4] cfa[0] +// func+21: sp+20 cfa[0] +// func+22: sp cfa[0] +// +// Some things to note here: +// +// - Each row describes the state of affairs *before* executing the +// instruction at the given address. Thus, the row for func+0 +// describes the state before we allocate the stack frame. In the +// next row, the formula for computing the CFA has changed, +// reflecting that allocation. +// +// - The other entries are written in terms of the CFA; this allows +// them to remain unchanged as the stack pointer gets bumped around. +// For example, the rule for recovering the return address (the "ra" +// column) remains unchanged throughout the function, even as the +// stack pointer takes on three different offsets from the return +// address. +// +// - Although we haven't shown it, most calling conventions designate +// "callee-saves" and "caller-saves" registers. The callee must +// preserve the values of callee-saves registers; if it uses them, +// it must save their original values somewhere, and restore them +// before it returns. In contrast, the callee is free to trash +// caller-saves registers; if the callee uses these, it will +// probably not bother to save them anywhere, and the CFI will +// probably mark their values as "unrecoverable". +// +// (However, since the caller cannot assume the callee was going to +// save them, caller-saves registers are probably dead in the caller +// anyway, so compilers usually don't generate CFA for caller-saves +// registers.) +// +// - Exactly where the CFA points is a matter of convention that +// depends on the architecture and ABI in use. In the example, the +// CFA is the value the stack pointer had upon entry to the +// function, pointing at the saved return address. But on the x86, +// the call frame information generated by GCC follows the +// convention that the CFA is the address *after* the saved return +// address. +// +// But by definition, the CFA remains constant throughout the +// lifetime of the frame. This makes it a useful value for other +// columns to refer to. It is also gives debuggers a useful handle +// for identifying a frame. +// +// If you look at the table above, you'll notice that a given entry is +// often the same as the one immediately above it: most instructions +// change only one or two aspects of the stack frame, if they affect +// it at all. The DWARF format takes advantage of this fact, and +// reduces the size of the data by mentioning only the addresses and +// columns at which changes take place. So for the above, DWARF CFI +// data would only actually mention the following: +// +// insn cfa r0 r1 ... ra +// ======================================= +// func+0: sp cfa[0] +// func+1: sp+16 +// func+2: cfa[-4] +// func+11: sp+20 +// func+21: r0 +// func+22: sp +// +// In fact, this is the way the parser reports CFI to the consumer: as +// a series of statements of the form, "At address X, column Y changed +// to Z," and related conventions for describing the initial state. +// +// Naturally, it would be impractical to have to scan the entire +// program's CFI, noting changes as we go, just to recover the +// unwinding rules in effect at one particular instruction. To avoid +// this, CFI data is grouped into "entries", each of which covers a +// specified range of addresses and begins with a complete statement +// of the rules for all recoverable registers at that starting +// address. Each entry typically covers a single function. +// +// Thus, to compute the contents of a given row of the table --- that +// is, rules for recovering the CFA, RA, and registers at a given +// instruction --- the consumer should find the entry that covers that +// instruction's address, start with the initial state supplied at the +// beginning of the entry, and work forward until it has processed all +// the changes up to and including those for the present instruction. +// +// There are seven kinds of rules that can appear in an entry of the +// table: +// +// - "undefined": The given register is not preserved by the callee; +// its value cannot be recovered. +// +// - "same value": This register has the same value it did in the callee. +// +// - offset(N): The register is saved at offset N from the CFA. +// +// - val_offset(N): The value the register had in the caller is the +// CFA plus offset N. (This is usually only useful for describing +// the stack pointer.) +// +// - register(R): The register's value was saved in another register R. +// +// - expression(E): Evaluating the DWARF expression E using the +// current frame's registers' values yields the address at which the +// register was saved. +// +// - val_expression(E): Evaluating the DWARF expression E using the +// current frame's registers' values yields the value the register +// had in the caller. + +class CallFrameInfo { + public: + // The different kinds of entries one finds in CFI. Used internally, + // and for error reporting. + enum EntryKind { kUnknown, kCIE, kFDE, kTerminator }; + + // The handler class to which the parser hands the parsed call frame + // information. Defined below. + class Handler; + + // A reporter class, which CallFrameInfo uses to report errors + // encountered while parsing call frame information. Defined below. + class Reporter; + + // Create a DWARF CFI parser. BUFFER points to the contents of the + // .debug_frame section to parse; BUFFER_LENGTH is its length in bytes. + // REPORTER is an error reporter the parser should use to report + // problems. READER is a ByteReader instance that has the endianness and + // address size set properly. Report the data we find to HANDLER. + // + // This class can also parse Linux C++ exception handling data, as found + // in '.eh_frame' sections. This data is a variant of DWARF CFI that is + // placed in loadable segments so that it is present in the program's + // address space, and is interpreted by the C++ runtime to search the + // call stack for a handler interested in the exception being thrown, + // actually pop the frames, and find cleanup code to run. + // + // There are two differences between the call frame information described + // in the DWARF standard and the exception handling data Linux places in + // the .eh_frame section: + // + // - Exception handling data uses uses a different format for call frame + // information entry headers. The distinguished CIE id, the way FDEs + // refer to their CIEs, and the way the end of the series of entries is + // determined are all slightly different. + // + // If the constructor's EH_FRAME argument is true, then the + // CallFrameInfo parses the entry headers as Linux C++ exception + // handling data. If EH_FRAME is false or omitted, the CallFrameInfo + // parses standard DWARF call frame information. + // + // - Linux C++ exception handling data uses CIE augmentation strings + // beginning with 'z' to specify the presence of additional data after + // the CIE and FDE headers and special encodings used for addresses in + // frame description entries. + // + // CallFrameInfo can handle 'z' augmentations in either DWARF CFI or + // exception handling data if you have supplied READER with the base + // addresses needed to interpret the pointer encodings that 'z' + // augmentations can specify. See the ByteReader interface for details + // about the base addresses. See the CallFrameInfo::Handler interface + // for details about the additional information one might find in + // 'z'-augmented data. + // + // Thus: + // + // - If you are parsing standard DWARF CFI, as found in a .debug_frame + // section, you should pass false for the EH_FRAME argument, or omit + // it, and you need not worry about providing READER with the + // additional base addresses. + // + // - If you want to parse Linux C++ exception handling data from a + // .eh_frame section, you should pass EH_FRAME as true, and call + // READER's Set*Base member functions before calling our Start method. + // + // - If you want to parse DWARF CFI that uses the 'z' augmentations + // (although I don't think any toolchain ever emits such data), you + // could pass false for EH_FRAME, but call READER's Set*Base members. + // + // The extensions the Linux C++ ABI makes to DWARF for exception + // handling are described here, rather poorly: + // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html + // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html + // + // The mechanics of C++ exception handling, personality routines, + // and language-specific data areas are described here, rather nicely: + // http://www.codesourcery.com/public/cxx-abi/abi-eh.html + + CallFrameInfo(const char* buffer, size_t buffer_length, ByteReader* reader, + Handler* handler, Reporter* reporter, bool eh_frame = false) + : buffer_(buffer), + buffer_length_(buffer_length), + reader_(reader), + handler_(handler), + reporter_(reporter), + eh_frame_(eh_frame) {} + + ~CallFrameInfo() {} + + // Parse the entries in BUFFER, reporting what we find to HANDLER. + // Return true if we reach the end of the section successfully, or + // false if we encounter an error. + bool Start(); + + // Return the textual name of KIND. For error reporting. + static const char* KindName(EntryKind kind); + + private: + struct CIE; + + // A CFI entry, either an FDE or a CIE. + struct Entry { + // The starting offset of the entry in the section, for error + // reporting. + size_t offset; + + // The start of this entry in the buffer. + const char* start; + + // Which kind of entry this is. + // + // We want to be able to use this for error reporting even while we're + // in the midst of parsing. Error reporting code may assume that kind, + // offset, and start fields are valid, although kind may be kUnknown. + EntryKind kind; + + // The end of this entry's common prologue (initial length and id), and + // the start of this entry's kind-specific fields. + const char* fields; + + // The start of this entry's instructions. + const char* instructions; + + // The address past the entry's last byte in the buffer. (Note that + // since offset points to the entry's initial length field, and the + // length field is the number of bytes after that field, this is not + // simply buffer_ + offset + length.) + const char* end; + + // For both DWARF CFI and .eh_frame sections, this is the CIE id in a + // CIE, and the offset of the associated CIE in an FDE. + uint64 id; + + // The CIE that applies to this entry, if we've parsed it. If this is a + // CIE, then this field points to this structure. + CIE* cie; + }; + + // A common information entry (CIE). + struct CIE : public Entry { + uint8 version; // CFI data version number + std::string augmentation; // vendor format extension markers + uint64 code_alignment_factor; // scale for code address adjustments + int data_alignment_factor; // scale for stack pointer adjustments + unsigned return_address_register; // which register holds the return addr + + // True if this CIE includes Linux C++ ABI 'z' augmentation data. + bool has_z_augmentation; + + // Parsed 'z' augmentation data. These are meaningful only if + // has_z_augmentation is true. + bool has_z_lsda; // The 'z' augmentation included 'L'. + bool has_z_personality; // The 'z' augmentation included 'P'. + bool has_z_signal_frame; // The 'z' augmentation included 'S'. + + // If has_z_lsda is true, this is the encoding to be used for language- + // specific data area pointers in FDEs. + DwarfPointerEncoding lsda_encoding; + + // If has_z_personality is true, this is the encoding used for the + // personality routine pointer in the augmentation data. + DwarfPointerEncoding personality_encoding; + + // If has_z_personality is true, this is the address of the personality + // routine --- or, if personality_encoding & DW_EH_PE_indirect, the + // address where the personality routine's address is stored. + uint64 personality_address; + + // This is the encoding used for addresses in the FDE header and + // in DW_CFA_set_loc instructions. This is always valid, whether + // or not we saw a 'z' augmentation string; its default value is + // DW_EH_PE_absptr, which is what normal DWARF CFI uses. + DwarfPointerEncoding pointer_encoding; + }; + + // A frame description entry (FDE). + struct FDE : public Entry { + uint64 address; // start address of described code + uint64 size; // size of described code, in bytes + + // If cie->has_z_lsda is true, then this is the language-specific data + // area's address --- or its address's address, if cie->lsda_encoding + // has the DW_EH_PE_indirect bit set. + uint64 lsda_address; + }; + + // Internal use. + class Rule; + class UndefinedRule; + class SameValueRule; + class OffsetRule; + class ValOffsetRule; + class RegisterRule; + class ExpressionRule; + class ValExpressionRule; + class RuleMap; + class State; + + // Parse the initial length and id of a CFI entry, either a CIE, an FDE, + // or a .eh_frame end-of-data mark. CURSOR points to the beginning of the + // data to parse. On success, populate ENTRY as appropriate, and return + // true. On failure, report the problem, and return false. Even if we + // return false, set ENTRY->end to the first byte after the entry if we + // were able to figure that out, or NULL if we weren't. + bool ReadEntryPrologue(const char* cursor, Entry* entry); + + // Parse the fields of a CIE after the entry prologue, including any 'z' + // augmentation data. Assume that the 'Entry' fields of CIE are + // populated; use CIE->fields and CIE->end as the start and limit for + // parsing. On success, populate the rest of *CIE, and return true; on + // failure, report the problem and return false. + bool ReadCIEFields(CIE* cie); + + // Parse the fields of an FDE after the entry prologue, including any 'z' + // augmentation data. Assume that the 'Entry' fields of *FDE are + // initialized; use FDE->fields and FDE->end as the start and limit for + // parsing. Assume that FDE->cie is fully initialized. On success, + // populate the rest of *FDE, and return true; on failure, report the + // problem and return false. + bool ReadFDEFields(FDE* fde); + + // Report that ENTRY is incomplete, and return false. This is just a + // trivial wrapper for invoking reporter_->Incomplete; it provides a + // little brevity. + bool ReportIncomplete(Entry* entry); + + // Return true if ENCODING has the DW_EH_PE_indirect bit set. + static bool IsIndirectEncoding(DwarfPointerEncoding encoding) { + return encoding & DW_EH_PE_indirect; + } + + // The contents of the DWARF .debug_info section we're parsing. + const char* buffer_; + size_t buffer_length_; + + // For reading multi-byte values with the appropriate endianness. + ByteReader* reader_; + + // The handler to which we should report the data we find. + Handler* handler_; + + // For reporting problems in the info we're parsing. + Reporter* reporter_; + + // True if we are processing .eh_frame-format data. + bool eh_frame_; +}; + +// The handler class for CallFrameInfo. The a CFI parser calls the +// member functions of a handler object to report the data it finds. +class CallFrameInfo::Handler { + public: + // The pseudo-register number for the canonical frame address. + enum { kCFARegister = DW_REG_CFA }; + + Handler() {} + virtual ~Handler() {} + + // The parser has found CFI for the machine code at ADDRESS, + // extending for LENGTH bytes. OFFSET is the offset of the frame + // description entry in the section, for use in error messages. + // VERSION is the version number of the CFI format. AUGMENTATION is + // a string describing any producer-specific extensions present in + // the data. RETURN_ADDRESS is the number of the register that holds + // the address to which the function should return. + // + // Entry should return true to process this CFI, or false to skip to + // the next entry. + // + // The parser invokes Entry for each Frame Description Entry (FDE) + // it finds. The parser doesn't report Common Information Entries + // to the handler explicitly; instead, if the handler elects to + // process a given FDE, the parser reiterates the appropriate CIE's + // contents at the beginning of the FDE's rules. + virtual bool Entry(size_t offset, uint64 address, uint64 length, + uint8 version, const std::string& augmentation, + unsigned return_address) = 0; + + // When the Entry function returns true, the parser calls these + // handler functions repeatedly to describe the rules for recovering + // registers at each instruction in the given range of machine code. + // Immediately after a call to Entry, the handler should assume that + // the rule for each callee-saves register is "unchanged" --- that + // is, that the register still has the value it had in the caller. + // + // If a *Rule function returns true, we continue processing this entry's + // instructions. If a *Rule function returns false, we stop evaluating + // instructions, and skip to the next entry. Either way, we call End + // before going on to the next entry. + // + // In all of these functions, if the REG parameter is kCFARegister, then + // the rule describes how to find the canonical frame address. + // kCFARegister may be passed as a BASE_REGISTER argument, meaning that + // the canonical frame address should be used as the base address for the + // computation. All other REG values will be positive. + + // At ADDRESS, register REG's value is not recoverable. + virtual bool UndefinedRule(uint64 address, int reg) = 0; + + // At ADDRESS, register REG's value is the same as that it had in + // the caller. + virtual bool SameValueRule(uint64 address, int reg) = 0; + + // At ADDRESS, register REG has been saved at offset OFFSET from + // BASE_REGISTER. + virtual bool OffsetRule(uint64 address, int reg, int base_register, + long offset) = 0; + + // At ADDRESS, the caller's value of register REG is the current + // value of BASE_REGISTER plus OFFSET. (This rule doesn't provide an + // address at which the register's value is saved.) + virtual bool ValOffsetRule(uint64 address, int reg, int base_register, + long offset) = 0; + + // At ADDRESS, register REG has been saved in BASE_REGISTER. This differs + // from ValOffsetRule(ADDRESS, REG, BASE_REGISTER, 0), in that + // BASE_REGISTER is the "home" for REG's saved value: if you want to + // assign to a variable whose home is REG in the calling frame, you + // should put the value in BASE_REGISTER. + virtual bool RegisterRule(uint64 address, int reg, int base_register) = 0; + + // At ADDRESS, the DWARF expression EXPRESSION yields the address at + // which REG was saved. + virtual bool ExpressionRule(uint64 address, int reg, + const std::string& expression) = 0; + + // At ADDRESS, the DWARF expression EXPRESSION yields the caller's + // value for REG. (This rule doesn't provide an address at which the + // register's value is saved.) + virtual bool ValExpressionRule(uint64 address, int reg, + const std::string& expression) = 0; + + // Indicate that the rules for the address range reported by the + // last call to Entry are complete. End should return true if + // everything is okay, or false if an error has occurred and parsing + // should stop. + virtual bool End() = 0; + + // Handler functions for Linux C++ exception handling data. These are + // only called if the data includes 'z' augmentation strings. + + // The Linux C++ ABI uses an extension of the DWARF CFI format to + // walk the stack to propagate exceptions from the throw to the + // appropriate catch, and do the appropriate cleanups along the way. + // CFI entries used for exception handling have two additional data + // associated with them: + // + // - The "language-specific data area" describes which exception + // types the function has 'catch' clauses for, and indicates how + // to go about re-entering the function at the appropriate catch + // clause. If the exception is not caught, it describes the + // destructors that must run before the frame is popped. + // + // - The "personality routine" is responsible for interpreting the + // language-specific data area's contents, and deciding whether + // the exception should continue to propagate down the stack, + // perhaps after doing some cleanup for this frame, or whether the + // exception will be caught here. + // + // In principle, the language-specific data area is opaque to + // everybody but the personality routine. In practice, these values + // may be useful or interesting to readers with extra context, and + // we have to at least skip them anyway, so we might as well report + // them to the handler. + + // This entry's exception handling personality routine's address is + // ADDRESS. If INDIRECT is true, then ADDRESS is the address at + // which the routine's address is stored. The default definition for + // this handler function simply returns true, allowing parsing of + // the entry to continue. + virtual bool PersonalityRoutine(uint64 address, bool indirect) { + return true; + } + + // This entry's language-specific data area (LSDA) is located at + // ADDRESS. If INDIRECT is true, then ADDRESS is the address at + // which the area's address is stored. The default definition for + // this handler function simply returns true, allowing parsing of + // the entry to continue. + virtual bool LanguageSpecificDataArea(uint64 address, bool indirect) { + return true; + } + + // This entry describes a signal trampoline --- this frame is the + // caller of a signal handler. The default definition for this + // handler function simply returns true, allowing parsing of the + // entry to continue. + // + // The best description of the rationale for and meaning of signal + // trampoline CFI entries seems to be in the GCC bug database: + // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26208 + virtual bool SignalHandler() { return true; } +}; + +// The CallFrameInfo class makes calls on an instance of this class to +// report errors or warn about problems in the data it is parsing. +// These messages are sent to the message sink |aLog| provided to the +// constructor. +class CallFrameInfo::Reporter { + public: + // Create an error reporter which attributes troubles to the section + // named SECTION in FILENAME. + // + // Normally SECTION would be .debug_frame, but the Mac puts CFI data + // in a Mach-O section named __debug_frame. If we support + // Linux-style exception handling data, we could be reading an + // .eh_frame section. + Reporter(void (*aLog)(const char*), const std::string& filename, + const std::string& section = ".debug_frame") + : log_(aLog), filename_(filename), section_(section) {} + virtual ~Reporter() {} + + // The CFI entry at OFFSET ends too early to be well-formed. KIND + // indicates what kind of entry it is; KIND can be kUnknown if we + // haven't parsed enough of the entry to tell yet. + virtual void Incomplete(uint64 offset, CallFrameInfo::EntryKind kind); + + // The .eh_frame data has a four-byte zero at OFFSET where the next + // entry's length would be; this is a terminator. However, the buffer + // length as given to the CallFrameInfo constructor says there should be + // more data. + virtual void EarlyEHTerminator(uint64 offset); + + // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the + // section is not that large. + virtual void CIEPointerOutOfRange(uint64 offset, uint64 cie_offset); + + // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the entry + // there is not a CIE. + virtual void BadCIEId(uint64 offset, uint64 cie_offset); + + // The FDE at OFFSET refers to a CIE with version number VERSION, + // which we don't recognize. We cannot parse DWARF CFI if it uses + // a version number we don't recognize. + virtual void UnrecognizedVersion(uint64 offset, int version); + + // The FDE at OFFSET refers to a CIE with augmentation AUGMENTATION, + // which we don't recognize. We cannot parse DWARF CFI if it uses + // augmentations we don't recognize. + virtual void UnrecognizedAugmentation(uint64 offset, + const std::string& augmentation); + + // The FDE at OFFSET contains an invalid or otherwise unusable Dwarf4 + // specific field (currently, only "address_size" or "segment_size"). + // Parsing DWARF CFI with unexpected values here seems dubious at best, + // so we stop. WHAT gives a little more information about what is wrong. + virtual void InvalidDwarf4Artefact(uint64 offset, const char* what); + + // The pointer encoding ENCODING, specified by the CIE at OFFSET, is not + // a valid encoding. + virtual void InvalidPointerEncoding(uint64 offset, uint8 encoding); + + // The pointer encoding ENCODING, specified by the CIE at OFFSET, depends + // on a base address which has not been supplied. + virtual void UnusablePointerEncoding(uint64 offset, uint8 encoding); + + // The CIE at OFFSET contains a DW_CFA_restore instruction at + // INSN_OFFSET, which may not appear in a CIE. + virtual void RestoreInCIE(uint64 offset, uint64 insn_offset); + + // The entry at OFFSET, of kind KIND, has an unrecognized + // instruction at INSN_OFFSET. + virtual void BadInstruction(uint64 offset, CallFrameInfo::EntryKind kind, + uint64 insn_offset); + + // The instruction at INSN_OFFSET in the entry at OFFSET, of kind + // KIND, establishes a rule that cites the CFA, but we have not + // established a CFA rule yet. + virtual void NoCFARule(uint64 offset, CallFrameInfo::EntryKind kind, + uint64 insn_offset); + + // The instruction at INSN_OFFSET in the entry at OFFSET, of kind + // KIND, is a DW_CFA_restore_state instruction, but the stack of + // saved states is empty. + virtual void EmptyStateStack(uint64 offset, CallFrameInfo::EntryKind kind, + uint64 insn_offset); + + // The DW_CFA_remember_state instruction at INSN_OFFSET in the entry + // at OFFSET, of kind KIND, would restore a state that has no CFA + // rule, whereas the current state does have a CFA rule. This is + // bogus input, which the CallFrameInfo::Handler interface doesn't + // (and shouldn't) have any way to report. + virtual void ClearingCFARule(uint64 offset, CallFrameInfo::EntryKind kind, + uint64 insn_offset); + + private: + // A logging sink function, as supplied by LUL's user. + void (*log_)(const char*); + + protected: + // The name of the file whose CFI we're reading. + std::string filename_; + + // The name of the CFI section in that file. + std::string section_; +}; + +using lul::CallFrameInfo; +using lul::Summariser; + +// A class that accepts parsed call frame information from the DWARF +// CFI parser and populates a google_breakpad::Module object with the +// contents. +class DwarfCFIToModule : public CallFrameInfo::Handler { + public: + // DwarfCFIToModule uses an instance of this class to report errors + // detected while converting DWARF CFI to Breakpad STACK CFI records. + class Reporter { + public: + // Create a reporter that writes messages to the message sink + // |aLog|. FILE is the name of the file we're processing, and + // SECTION is the name of the section within that file that we're + // looking at (.debug_frame, .eh_frame, etc.). + Reporter(void (*aLog)(const char*), const std::string& file, + const std::string& section) + : log_(aLog), file_(file), section_(section) {} + virtual ~Reporter() {} + + // The DWARF CFI entry at OFFSET says that REG is undefined, but the + // Breakpad symbol file format cannot express this. + virtual void UndefinedNotSupported(size_t offset, const UniqueString* reg); + + // The DWARF CFI entry at OFFSET says that REG uses a DWARF + // expression to find its value, but parseDwarfExpr could not + // convert it to a sequence of PfxInstrs. + virtual void ExpressionCouldNotBeSummarised(size_t offset, + const UniqueString* reg); + + private: + // A logging sink function, as supplied by LUL's user. + void (*log_)(const char*); + + protected: + std::string file_, section_; + }; + + // Register name tables. If TABLE is a vector returned by one of these + // functions, then TABLE[R] is the name of the register numbered R in + // DWARF call frame information. + class RegisterNames { + public: + // Intel's "x86" or IA-32. + static unsigned int I386(); + + // AMD x86_64, AMD64, Intel EM64T, or Intel 64 + static unsigned int X86_64(); + + // ARM. + static unsigned int ARM(); + + // AARCH64. + static unsigned int ARM64(); + + // MIPS. + static unsigned int MIPS(); + }; + + // Create a handler for the dwarf2reader::CallFrameInfo parser that + // records the stack unwinding information it receives in SUMM. + // + // Use REGISTER_NAMES[I] as the name of register number I; *this + // keeps a reference to the vector, so the vector should remain + // alive for as long as the DwarfCFIToModule does. + // + // Use REPORTER for reporting problems encountered in the conversion + // process. + DwarfCFIToModule(const unsigned int num_dw_regs, Reporter* reporter, + ByteReader* reader, + /*MOD*/ UniqueStringUniverse* usu, + /*OUT*/ Summariser* summ) + : summ_(summ), + usu_(usu), + num_dw_regs_(num_dw_regs), + reporter_(reporter), + reader_(reader), + return_address_(-1) {} + virtual ~DwarfCFIToModule() {} + + virtual bool Entry(size_t offset, uint64 address, uint64 length, + uint8 version, const std::string& augmentation, + unsigned return_address) override; + virtual bool UndefinedRule(uint64 address, int reg) override; + virtual bool SameValueRule(uint64 address, int reg) override; + virtual bool OffsetRule(uint64 address, int reg, int base_register, + long offset) override; + virtual bool ValOffsetRule(uint64 address, int reg, int base_register, + long offset) override; + virtual bool RegisterRule(uint64 address, int reg, + int base_register) override; + virtual bool ExpressionRule(uint64 address, int reg, + const std::string& expression) override; + virtual bool ValExpressionRule(uint64 address, int reg, + const std::string& expression) override; + virtual bool End() override; + + private: + // Return the name to use for register I. + const UniqueString* RegisterName(int i); + + // The Summariser to which we should give entries + Summariser* summ_; + + // Universe for creating UniqueStrings in, should that be necessary. + UniqueStringUniverse* usu_; + + // The number of Dwarf-defined register names for this architecture. + const unsigned int num_dw_regs_; + + // The reporter to use to report problems. + Reporter* reporter_; + + // The ByteReader to use for parsing Dwarf expressions. + ByteReader* reader_; + + // The section offset of the current frame description entry, for + // use in error messages. + size_t entry_offset_; + + // The return address column for that entry. + unsigned return_address_; +}; + +// Convert the Dwarf expression in |expr| into PfxInstrs stored in the +// SecMap referred to by |summ|, and return the index of the starting +// PfxInstr added, which must be >= 0. In case of failure return -1. +int32_t parseDwarfExpr(Summariser* summ, const ByteReader* reader, + std::string expr, bool debug, bool pushCfaAtStart, + bool derefAtEnd); + +} // namespace lul + +#endif // LulDwarfExt_h diff --git a/mozglue/baseprofiler/lul/LulDwarfInt.h b/mozglue/baseprofiler/lul/LulDwarfInt.h new file mode 100644 index 0000000000..b72c6e08e3 --- /dev/null +++ b/mozglue/baseprofiler/lul/LulDwarfInt.h @@ -0,0 +1,193 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ + +// Copyright (c) 2008, 2010 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// CFI reader author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// This file is derived from the following file in +// toolkit/crashreporter/google-breakpad: +// src/common/dwarf/dwarf2enums.h + +#ifndef LulDwarfInt_h +#define LulDwarfInt_h + +#include "LulCommonExt.h" +#include "LulDwarfExt.h" + +namespace lul { + +// These enums do not follow the google3 style only because they are +// known universally (specs, other implementations) by the names in +// exactly this capitalization. +// Tag names and codes. + +// Call Frame Info instructions. +enum DwarfCFI { + DW_CFA_advance_loc = 0x40, + DW_CFA_offset = 0x80, + DW_CFA_restore = 0xc0, + DW_CFA_nop = 0x00, + DW_CFA_set_loc = 0x01, + DW_CFA_advance_loc1 = 0x02, + DW_CFA_advance_loc2 = 0x03, + DW_CFA_advance_loc4 = 0x04, + DW_CFA_offset_extended = 0x05, + DW_CFA_restore_extended = 0x06, + DW_CFA_undefined = 0x07, + DW_CFA_same_value = 0x08, + DW_CFA_register = 0x09, + DW_CFA_remember_state = 0x0a, + DW_CFA_restore_state = 0x0b, + DW_CFA_def_cfa = 0x0c, + DW_CFA_def_cfa_register = 0x0d, + DW_CFA_def_cfa_offset = 0x0e, + DW_CFA_def_cfa_expression = 0x0f, + DW_CFA_expression = 0x10, + DW_CFA_offset_extended_sf = 0x11, + DW_CFA_def_cfa_sf = 0x12, + DW_CFA_def_cfa_offset_sf = 0x13, + DW_CFA_val_offset = 0x14, + DW_CFA_val_offset_sf = 0x15, + DW_CFA_val_expression = 0x16, + + // Opcodes in this range are reserved for user extensions. + DW_CFA_lo_user = 0x1c, + DW_CFA_hi_user = 0x3f, + + // SGI/MIPS specific. + DW_CFA_MIPS_advance_loc8 = 0x1d, + + // GNU extensions. + DW_CFA_GNU_window_save = 0x2d, + DW_CFA_GNU_args_size = 0x2e, + DW_CFA_GNU_negative_offset_extended = 0x2f +}; + +// Exception handling 'z' augmentation letters. +enum DwarfZAugmentationCodes { + // If the CFI augmentation string begins with 'z', then the CIE and FDE + // have an augmentation data area just before the instructions, whose + // contents are determined by the subsequent augmentation letters. + DW_Z_augmentation_start = 'z', + + // If this letter is present in a 'z' augmentation string, the CIE + // augmentation data includes a pointer encoding, and the FDE + // augmentation data includes a language-specific data area pointer, + // represented using that encoding. + DW_Z_has_LSDA = 'L', + + // If this letter is present in a 'z' augmentation string, the CIE + // augmentation data includes a pointer encoding, followed by a pointer + // to a personality routine, represented using that encoding. + DW_Z_has_personality_routine = 'P', + + // If this letter is present in a 'z' augmentation string, the CIE + // augmentation data includes a pointer encoding describing how the FDE's + // initial location, address range, and DW_CFA_set_loc operands are + // encoded. + DW_Z_has_FDE_address_encoding = 'R', + + // If this letter is present in a 'z' augmentation string, then code + // addresses covered by FDEs that cite this CIE are signal delivery + // trampolines. Return addresses of frames in trampolines should not be + // adjusted as described in section 6.4.4 of the DWARF 3 spec. + DW_Z_is_signal_trampoline = 'S' +}; + +// Expression opcodes +enum DwarfExpressionOpcodes { + DW_OP_addr = 0x03, + DW_OP_deref = 0x06, + DW_OP_const1s = 0x09, + DW_OP_const2u = 0x0a, + DW_OP_const2s = 0x0b, + DW_OP_const4u = 0x0c, + DW_OP_const4s = 0x0d, + DW_OP_const8u = 0x0e, + DW_OP_const8s = 0x0f, + DW_OP_constu = 0x10, + DW_OP_consts = 0x11, + DW_OP_dup = 0x12, + DW_OP_drop = 0x13, + DW_OP_over = 0x14, + DW_OP_pick = 0x15, + DW_OP_swap = 0x16, + DW_OP_rot = 0x17, + DW_OP_xderef = 0x18, + DW_OP_abs = 0x19, + DW_OP_and = 0x1a, + DW_OP_div = 0x1b, + DW_OP_minus = 0x1c, + DW_OP_mod = 0x1d, + DW_OP_mul = 0x1e, + DW_OP_neg = 0x1f, + DW_OP_not = 0x20, + DW_OP_or = 0x21, + DW_OP_plus = 0x22, + DW_OP_plus_uconst = 0x23, + DW_OP_shl = 0x24, + DW_OP_shr = 0x25, + DW_OP_shra = 0x26, + DW_OP_xor = 0x27, + DW_OP_skip = 0x2f, + DW_OP_bra = 0x28, + DW_OP_eq = 0x29, + DW_OP_ge = 0x2a, + DW_OP_gt = 0x2b, + DW_OP_le = 0x2c, + DW_OP_lt = 0x2d, + DW_OP_ne = 0x2e, + DW_OP_lit0 = 0x30, + DW_OP_lit31 = 0x4f, + DW_OP_reg0 = 0x50, + DW_OP_reg31 = 0x6f, + DW_OP_breg0 = 0x70, + DW_OP_breg31 = 0x8f, + DW_OP_regx = 0x90, + DW_OP_fbreg = 0x91, + DW_OP_bregx = 0x92, + DW_OP_piece = 0x93, + DW_OP_deref_size = 0x94, + DW_OP_xderef_size = 0x95, + DW_OP_nop = 0x96, + DW_OP_push_object_address = 0x97, + DW_OP_call2 = 0x98, + DW_OP_call4 = 0x99, + DW_OP_call_ref = 0x9a, + DW_OP_form_tls_address = 0x9b, + DW_OP_call_frame_cfa = 0x9c, + DW_OP_bit_piece = 0x9d, + DW_OP_lo_user = 0xe0, + DW_OP_hi_user = 0xff +}; + +} // namespace lul + +#endif // LulDwarfInt_h diff --git a/mozglue/baseprofiler/lul/LulDwarfSummariser.cpp b/mozglue/baseprofiler/lul/LulDwarfSummariser.cpp new file mode 100644 index 0000000000..ff0f212f6c --- /dev/null +++ b/mozglue/baseprofiler/lul/LulDwarfSummariser.cpp @@ -0,0 +1,553 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "LulDwarfSummariser.h" + +#include "mozilla/Assertions.h" +#include "mozilla/Sprintf.h" + +#include "LulDwarfExt.h" + +// Set this to 1 for verbose logging +#define DEBUG_SUMMARISER 0 + +namespace lul { + +// Do |s64|'s lowest 32 bits sign extend back to |s64| itself? +static inline bool fitsIn32Bits(int64 s64) { + return s64 == ((s64 & 0xffffffff) ^ 0x80000000) - 0x80000000; +} + +// Check a LExpr prefix expression, starting at pfxInstrs[start] up to +// the next PX_End instruction, to ensure that: +// * It only mentions registers that are tracked on this target +// * The start point is sane +// If the expression is ok, return NULL. Else return a pointer +// a const char* holding a bit of text describing the problem. +static const char* checkPfxExpr(const vector<PfxInstr>* pfxInstrs, + int64_t start) { + size_t nInstrs = pfxInstrs->size(); + if (start < 0 || start >= (ssize_t)nInstrs) { + return "bogus start point"; + } + size_t i; + for (i = start; i < nInstrs; i++) { + PfxInstr pxi = (*pfxInstrs)[i]; + if (pxi.mOpcode == PX_End) break; + if (pxi.mOpcode == PX_DwReg && + !registerIsTracked((DW_REG_NUMBER)pxi.mOperand)) { + return "uses untracked reg"; + } + } + return nullptr; // success +} + +Summariser::Summariser(SecMap* aSecMap, uintptr_t aTextBias, + void (*aLog)(const char*)) + : mSecMap(aSecMap), mTextBias(aTextBias), mLog(aLog) { + mCurrAddr = 0; + mMax1Addr = 0; // Gives an empty range. + + // Initialise the running RuleSet to "haven't got a clue" status. + new (&mCurrRules) RuleSet(); +} + +void Summariser::Entry(uintptr_t aAddress, uintptr_t aLength) { + aAddress += mTextBias; + if (DEBUG_SUMMARISER) { + char buf[100]; + SprintfLiteral(buf, "LUL Entry(%llx, %llu)\n", + (unsigned long long int)aAddress, + (unsigned long long int)aLength); + mLog(buf); + } + // This throws away any previous summary, that is, assumes + // that the previous summary, if any, has been properly finished + // by a call to End(). + mCurrAddr = aAddress; + mMax1Addr = aAddress + aLength; + new (&mCurrRules) RuleSet(); +} + +void Summariser::Rule(uintptr_t aAddress, int aNewReg, LExprHow how, + int16_t oldReg, int64_t offset) { + aAddress += mTextBias; + if (DEBUG_SUMMARISER) { + char buf[100]; + if (how == NODEREF || how == DEREF) { + bool deref = how == DEREF; + SprintfLiteral(buf, "LUL 0x%llx old-r%d = %sr%d + %lld%s\n", + (unsigned long long int)aAddress, aNewReg, + deref ? "*(" : "", (int)oldReg, (long long int)offset, + deref ? ")" : ""); + } else if (how == PFXEXPR) { + SprintfLiteral(buf, "LUL 0x%llx old-r%d = pfx-expr-at %lld\n", + (unsigned long long int)aAddress, aNewReg, + (long long int)offset); + } else { + SprintfLiteral(buf, "LUL 0x%llx old-r%d = (invalid LExpr!)\n", + (unsigned long long int)aAddress, aNewReg); + } + mLog(buf); + } + + if (mCurrAddr < aAddress) { + // Flush the existing summary first. + mCurrRules.mAddr = mCurrAddr; + mCurrRules.mLen = aAddress - mCurrAddr; + mSecMap->AddRuleSet(&mCurrRules); + if (DEBUG_SUMMARISER) { + mLog("LUL "); + mCurrRules.Print(mLog); + mLog("\n"); + } + mCurrAddr = aAddress; + } + + // If for some reason summarisation fails, either or both of these + // become non-null and point at constant text describing the + // problem. Using two rather than just one avoids complications of + // having to concatenate two strings to produce a complete error message. + const char* reason1 = nullptr; + const char* reason2 = nullptr; + + // |offset| needs to be a 32 bit value that sign extends to 64 bits + // on a 64 bit target. We will need to incorporate |offset| into + // any LExpr made here. So we may as well check it right now. + if (!fitsIn32Bits(offset)) { + reason1 = "offset not in signed 32-bit range"; + goto cant_summarise; + } + + // FIXME: factor out common parts of the arch-dependent summarisers. + +#if defined(GP_ARCH_arm) + + // ----------------- arm ----------------- // + + // Now, can we add the rule to our summary? This depends on whether + // the registers and the overall expression are representable. This + // is the heart of the summarisation process. + switch (aNewReg) { + case DW_REG_CFA: + // This is a rule that defines the CFA. The only forms we + // choose to represent are: r7/11/12/13 + offset. The offset + // must fit into 32 bits since 'uintptr_t' is 32 bit on ARM, + // hence there is no need to check it for overflow. + if (how != NODEREF) { + reason1 = "rule for DW_REG_CFA: invalid |how|"; + goto cant_summarise; + } + switch (oldReg) { + case DW_REG_ARM_R7: + case DW_REG_ARM_R11: + case DW_REG_ARM_R12: + case DW_REG_ARM_R13: + break; + default: + reason1 = "rule for DW_REG_CFA: invalid |oldReg|"; + goto cant_summarise; + } + mCurrRules.mCfaExpr = LExpr(how, oldReg, offset); + break; + + case DW_REG_ARM_R7: + case DW_REG_ARM_R11: + case DW_REG_ARM_R12: + case DW_REG_ARM_R13: + case DW_REG_ARM_R14: + case DW_REG_ARM_R15: { + // This is a new rule for R7, R11, R12, R13 (SP), R14 (LR) or + // R15 (the return address). + switch (how) { + case NODEREF: + case DEREF: + // Check the old register is one we're tracking. + if (!registerIsTracked((DW_REG_NUMBER)oldReg) && + oldReg != DW_REG_CFA) { + reason1 = "rule for R7/11/12/13/14/15: uses untracked reg"; + goto cant_summarise; + } + break; + case PFXEXPR: { + // Check that the prefix expression only mentions tracked registers. + const vector<PfxInstr>* pfxInstrs = mSecMap->GetPfxInstrs(); + reason2 = checkPfxExpr(pfxInstrs, offset); + if (reason2) { + reason1 = "rule for R7/11/12/13/14/15: "; + goto cant_summarise; + } + break; + } + default: + goto cant_summarise; + } + LExpr expr = LExpr(how, oldReg, offset); + switch (aNewReg) { + case DW_REG_ARM_R7: + mCurrRules.mR7expr = expr; + break; + case DW_REG_ARM_R11: + mCurrRules.mR11expr = expr; + break; + case DW_REG_ARM_R12: + mCurrRules.mR12expr = expr; + break; + case DW_REG_ARM_R13: + mCurrRules.mR13expr = expr; + break; + case DW_REG_ARM_R14: + mCurrRules.mR14expr = expr; + break; + case DW_REG_ARM_R15: + mCurrRules.mR15expr = expr; + break; + default: + MOZ_ASSERT(0); + } + break; + } + + default: + // Leave |reason1| and |reason2| unset here. This program point + // is reached so often that it causes a flood of "Can't + // summarise" messages. In any case, we don't really care about + // the fact that this summary would produce a new value for a + // register that we're not tracking. We do on the other hand + // care if the summary's expression *uses* a register that we're + // not tracking. But in that case one of the above failures + // should tell us which. + goto cant_summarise; + } + + // Mark callee-saved registers (r4 .. r11) as unchanged, if there is + // no other information about them. FIXME: do this just once, at + // the point where the ruleset is committed. + if (mCurrRules.mR7expr.mHow == UNKNOWN) { + mCurrRules.mR7expr = LExpr(NODEREF, DW_REG_ARM_R7, 0); + } + if (mCurrRules.mR11expr.mHow == UNKNOWN) { + mCurrRules.mR11expr = LExpr(NODEREF, DW_REG_ARM_R11, 0); + } + if (mCurrRules.mR12expr.mHow == UNKNOWN) { + mCurrRules.mR12expr = LExpr(NODEREF, DW_REG_ARM_R12, 0); + } + + // The old r13 (SP) value before the call is always the same as the + // CFA. + mCurrRules.mR13expr = LExpr(NODEREF, DW_REG_CFA, 0); + + // If there's no information about R15 (the return address), say + // it's a copy of R14 (the link register). + if (mCurrRules.mR15expr.mHow == UNKNOWN) { + mCurrRules.mR15expr = LExpr(NODEREF, DW_REG_ARM_R14, 0); + } + +#elif defined(GP_ARCH_arm64) + + // ----------------- arm64 ----------------- // + + switch (aNewReg) { + case DW_REG_CFA: + if (how != NODEREF) { + reason1 = "rule for DW_REG_CFA: invalid |how|"; + goto cant_summarise; + } + switch (oldReg) { + case DW_REG_AARCH64_X29: + case DW_REG_AARCH64_SP: + break; + default: + reason1 = "rule for DW_REG_CFA: invalid |oldReg|"; + goto cant_summarise; + } + mCurrRules.mCfaExpr = LExpr(how, oldReg, offset); + break; + + case DW_REG_AARCH64_X29: + case DW_REG_AARCH64_X30: + case DW_REG_AARCH64_SP: { + switch (how) { + case NODEREF: + case DEREF: + // Check the old register is one we're tracking. + if (!registerIsTracked((DW_REG_NUMBER)oldReg) && + oldReg != DW_REG_CFA) { + reason1 = "rule for X29/X30/SP: uses untracked reg"; + goto cant_summarise; + } + break; + case PFXEXPR: { + // Check that the prefix expression only mentions tracked registers. + const vector<PfxInstr>* pfxInstrs = mSecMap->GetPfxInstrs(); + reason2 = checkPfxExpr(pfxInstrs, offset); + if (reason2) { + reason1 = "rule for X29/X30/SP: "; + goto cant_summarise; + } + break; + } + default: + goto cant_summarise; + } + LExpr expr = LExpr(how, oldReg, offset); + switch (aNewReg) { + case DW_REG_AARCH64_X29: + mCurrRules.mX29expr = expr; + break; + case DW_REG_AARCH64_X30: + mCurrRules.mX30expr = expr; + break; + case DW_REG_AARCH64_SP: + mCurrRules.mSPexpr = expr; + break; + default: + MOZ_ASSERT(0); + } + break; + } + default: + // Leave |reason1| and |reason2| unset here, for the reasons explained + // in the analogous point + goto cant_summarise; + } + + if (mCurrRules.mX29expr.mHow == UNKNOWN) { + mCurrRules.mX29expr = LExpr(NODEREF, DW_REG_AARCH64_X29, 0); + } + if (mCurrRules.mX30expr.mHow == UNKNOWN) { + mCurrRules.mX30expr = LExpr(NODEREF, DW_REG_AARCH64_X30, 0); + } + // On aarch64, it seems the old SP value before the call is always the + // same as the CFA. Therefore, in the absence of any other way to + // recover the SP, specify that the CFA should be copied. + if (mCurrRules.mSPexpr.mHow == UNKNOWN) { + mCurrRules.mSPexpr = LExpr(NODEREF, DW_REG_CFA, 0); + } +#elif defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + + // ---------------- x64/x86 ---------------- // + + // Now, can we add the rule to our summary? This depends on whether + // the registers and the overall expression are representable. This + // is the heart of the summarisation process. + switch (aNewReg) { + case DW_REG_CFA: { + // This is a rule that defines the CFA. The only forms we choose to + // represent are: = SP+offset, = FP+offset, or =prefix-expr. + switch (how) { + case NODEREF: + if (oldReg != DW_REG_INTEL_XSP && oldReg != DW_REG_INTEL_XBP) { + reason1 = "rule for DW_REG_CFA: invalid |oldReg|"; + goto cant_summarise; + } + break; + case DEREF: + reason1 = "rule for DW_REG_CFA: invalid |how|"; + goto cant_summarise; + case PFXEXPR: { + // Check that the prefix expression only mentions tracked registers. + const vector<PfxInstr>* pfxInstrs = mSecMap->GetPfxInstrs(); + reason2 = checkPfxExpr(pfxInstrs, offset); + if (reason2) { + reason1 = "rule for CFA: "; + goto cant_summarise; + } + break; + } + default: + goto cant_summarise; + } + mCurrRules.mCfaExpr = LExpr(how, oldReg, offset); + break; + } + + case DW_REG_INTEL_XSP: + case DW_REG_INTEL_XBP: + case DW_REG_INTEL_XIP: { + // This is a new rule for XSP, XBP or XIP (the return address). + switch (how) { + case NODEREF: + case DEREF: + // Check the old register is one we're tracking. + if (!registerIsTracked((DW_REG_NUMBER)oldReg) && + oldReg != DW_REG_CFA) { + reason1 = "rule for XSP/XBP/XIP: uses untracked reg"; + goto cant_summarise; + } + break; + case PFXEXPR: { + // Check that the prefix expression only mentions tracked registers. + const vector<PfxInstr>* pfxInstrs = mSecMap->GetPfxInstrs(); + reason2 = checkPfxExpr(pfxInstrs, offset); + if (reason2) { + reason1 = "rule for XSP/XBP/XIP: "; + goto cant_summarise; + } + break; + } + default: + goto cant_summarise; + } + LExpr expr = LExpr(how, oldReg, offset); + switch (aNewReg) { + case DW_REG_INTEL_XBP: + mCurrRules.mXbpExpr = expr; + break; + case DW_REG_INTEL_XSP: + mCurrRules.mXspExpr = expr; + break; + case DW_REG_INTEL_XIP: + mCurrRules.mXipExpr = expr; + break; + default: + MOZ_CRASH("impossible value for aNewReg"); + } + break; + } + + default: + // Leave |reason1| and |reason2| unset here, for the reasons + // explained in the analogous point in the ARM case just above. + goto cant_summarise; + } + + // On Intel, it seems the old SP value before the call is always the + // same as the CFA. Therefore, in the absence of any other way to + // recover the SP, specify that the CFA should be copied. + if (mCurrRules.mXspExpr.mHow == UNKNOWN) { + mCurrRules.mXspExpr = LExpr(NODEREF, DW_REG_CFA, 0); + } + + // Also, gcc says "Undef" for BP when it is unchanged. + if (mCurrRules.mXbpExpr.mHow == UNKNOWN) { + mCurrRules.mXbpExpr = LExpr(NODEREF, DW_REG_INTEL_XBP, 0); + } + +#elif defined(GP_ARCH_mips64) + // ---------------- mips ---------------- // + // + // Now, can we add the rule to our summary? This depends on whether + // the registers and the overall expression are representable. This + // is the heart of the summarisation process. + switch (aNewReg) { + case DW_REG_CFA: + // This is a rule that defines the CFA. The only forms we can + // represent are: = SP+offset or = FP+offset. + if (how != NODEREF) { + reason1 = "rule for DW_REG_CFA: invalid |how|"; + goto cant_summarise; + } + if (oldReg != DW_REG_MIPS_SP && oldReg != DW_REG_MIPS_FP) { + reason1 = "rule for DW_REG_CFA: invalid |oldReg|"; + goto cant_summarise; + } + mCurrRules.mCfaExpr = LExpr(how, oldReg, offset); + break; + + case DW_REG_MIPS_SP: + case DW_REG_MIPS_FP: + case DW_REG_MIPS_PC: { + // This is a new rule for SP, FP or PC (the return address). + switch (how) { + case NODEREF: + case DEREF: + // Check the old register is one we're tracking. + if (!registerIsTracked((DW_REG_NUMBER)oldReg) && + oldReg != DW_REG_CFA) { + reason1 = "rule for SP/FP/PC: uses untracked reg"; + goto cant_summarise; + } + break; + case PFXEXPR: { + // Check that the prefix expression only mentions tracked registers. + const vector<PfxInstr>* pfxInstrs = mSecMap->GetPfxInstrs(); + reason2 = checkPfxExpr(pfxInstrs, offset); + if (reason2) { + reason1 = "rule for SP/FP/PC: "; + goto cant_summarise; + } + break; + } + default: + goto cant_summarise; + } + LExpr expr = LExpr(how, oldReg, offset); + switch (aNewReg) { + case DW_REG_MIPS_FP: + mCurrRules.mFPexpr = expr; + break; + case DW_REG_MIPS_SP: + mCurrRules.mSPexpr = expr; + break; + case DW_REG_MIPS_PC: + mCurrRules.mPCexpr = expr; + break; + default: + MOZ_CRASH("impossible value for aNewReg"); + } + break; + } + default: + // Leave |reason1| and |reason2| unset here, for the reasons + // explained in the analogous point in the ARM case just above. + goto cant_summarise; + } + + // On MIPS, it seems the old SP value before the call is always the + // same as the CFA. Therefore, in the absence of any other way to + // recover the SP, specify that the CFA should be copied. + if (mCurrRules.mSPexpr.mHow == UNKNOWN) { + mCurrRules.mSPexpr = LExpr(NODEREF, DW_REG_CFA, 0); + } + + // Also, gcc says "Undef" for FP when it is unchanged. + if (mCurrRules.mFPexpr.mHow == UNKNOWN) { + mCurrRules.mFPexpr = LExpr(NODEREF, DW_REG_MIPS_FP, 0); + } + +#else + +# error "Unsupported arch" +#endif + + return; + +cant_summarise: + if (reason1 || reason2) { + char buf[200]; + SprintfLiteral(buf, + "LUL can't summarise: " + "SVMA=0x%llx: %s%s, expr=LExpr(%s,%u,%lld)\n", + (unsigned long long int)(aAddress - mTextBias), + reason1 ? reason1 : "", reason2 ? reason2 : "", + NameOf_LExprHow(how), (unsigned int)oldReg, + (long long int)offset); + mLog(buf); + } +} + +uint32_t Summariser::AddPfxInstr(PfxInstr pfxi) { + return mSecMap->AddPfxInstr(pfxi); +} + +void Summariser::End() { + if (DEBUG_SUMMARISER) { + mLog("LUL End\n"); + } + if (mCurrAddr < mMax1Addr) { + mCurrRules.mAddr = mCurrAddr; + mCurrRules.mLen = mMax1Addr - mCurrAddr; + mSecMap->AddRuleSet(&mCurrRules); + if (DEBUG_SUMMARISER) { + mLog("LUL "); + mCurrRules.Print(mLog); + mLog("\n"); + } + } +} + +} // namespace lul diff --git a/mozglue/baseprofiler/lul/LulDwarfSummariser.h b/mozglue/baseprofiler/lul/LulDwarfSummariser.h new file mode 100644 index 0000000000..30f1ba23c1 --- /dev/null +++ b/mozglue/baseprofiler/lul/LulDwarfSummariser.h @@ -0,0 +1,64 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef LulDwarfSummariser_h +#define LulDwarfSummariser_h + +#include "LulMainInt.h" + +namespace lul { + +class Summariser { + public: + Summariser(SecMap* aSecMap, uintptr_t aTextBias, void (*aLog)(const char*)); + + virtual void Entry(uintptr_t aAddress, uintptr_t aLength); + virtual void End(); + + // Tell the summariser that the value for |aNewReg| at |aAddress| is + // recovered using the LExpr that can be constructed using the + // components |how|, |oldReg| and |offset|. The summariser will + // inspect the components and may reject them for various reasons, + // but the hope is that it will find them acceptable and record this + // rule permanently. + virtual void Rule(uintptr_t aAddress, int aNewReg, LExprHow how, + int16_t oldReg, int64_t offset); + + virtual uint32_t AddPfxInstr(PfxInstr pfxi); + + // Send output to the logging sink, for debugging. + virtual void Log(const char* str) { mLog(str); } + + private: + // The SecMap in which we park the finished summaries (RuleSets) and + // also any PfxInstrs derived from Dwarf expressions. + SecMap* mSecMap; + + // Running state for the current summary (RuleSet) under construction. + RuleSet mCurrRules; + + // The start of the address range to which the RuleSet under + // construction applies. + uintptr_t mCurrAddr; + + // The highest address, plus one, for which the RuleSet under + // construction could possibly apply. If there are no further + // incoming events then mCurrRules will eventually be emitted + // as-is, for the range mCurrAddr.. mMax1Addr - 1, if that is + // nonempty. + uintptr_t mMax1Addr; + + // The bias value (to add to the SVMAs, to get AVMAs) to be used + // when adding entries into mSecMap. + uintptr_t mTextBias; + + // A logging sink, for debugging. + void (*mLog)(const char* aFmt); +}; + +} // namespace lul + +#endif // LulDwarfSummariser_h diff --git a/mozglue/baseprofiler/lul/LulElf.cpp b/mozglue/baseprofiler/lul/LulElf.cpp new file mode 100644 index 0000000000..b5d5e772be --- /dev/null +++ b/mozglue/baseprofiler/lul/LulElf.cpp @@ -0,0 +1,873 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ + +// Copyright (c) 2006, 2011, 2012 Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Restructured in 2009 by: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// (derived from) +// dump_symbols.cc: implement google_breakpad::WriteSymbolFile: +// Find all the debugging info in a file and dump it as a Breakpad symbol file. +// +// dump_symbols.h: Read debugging information from an ELF file, and write +// it out as a Breakpad symbol file. + +// This file is derived from the following files in +// toolkit/crashreporter/google-breakpad: +// src/common/linux/dump_symbols.cc +// src/common/linux/elfutils.cc +// src/common/linux/file_id.cc + +#include <errno.h> +#include <fcntl.h> +#include <libgen.h> +#include <stdio.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <unistd.h> +#include <arpa/inet.h> + +#include <cstdlib> +#include <set> +#include <string> +#include <vector> + +#include "mozilla/Assertions.h" +#include "mozilla/Sprintf.h" + +#include "PlatformMacros.h" +#include "LulCommonExt.h" +#include "LulDwarfExt.h" +#include "LulElfInt.h" +#include "LulMainInt.h" + +#if defined(GP_PLAT_arm_android) && !defined(SHT_ARM_EXIDX) +// bionic and older glibsc don't define it +# define SHT_ARM_EXIDX (SHT_LOPROC + 1) +#endif + +// Old Linux header doesn't define EM_AARCH64 +#ifndef EM_AARCH64 +# define EM_AARCH64 183 +#endif + +// This namespace contains helper functions. +namespace { + +using lul::DwarfCFIToModule; +using lul::FindElfSectionByName; +using lul::GetOffset; +using lul::IsValidElf; +using lul::Module; +using lul::scoped_ptr; +using lul::Summariser; +using lul::UniqueStringUniverse; +using std::set; +using std::string; +using std::vector; + +// +// FDWrapper +// +// Wrapper class to make sure opened file is closed. +// +class FDWrapper { + public: + explicit FDWrapper(int fd) : fd_(fd) {} + ~FDWrapper() { + if (fd_ != -1) close(fd_); + } + int get() { return fd_; } + int release() { + int fd = fd_; + fd_ = -1; + return fd; + } + + private: + int fd_; +}; + +// +// MmapWrapper +// +// Wrapper class to make sure mapped regions are unmapped. +// +class MmapWrapper { + public: + MmapWrapper() : is_set_(false), base_(NULL), size_(0) {} + ~MmapWrapper() { + if (is_set_ && base_ != NULL) { + MOZ_ASSERT(size_ > 0); + munmap(base_, size_); + } + } + void set(void* mapped_address, size_t mapped_size) { + is_set_ = true; + base_ = mapped_address; + size_ = mapped_size; + } + void release() { + MOZ_ASSERT(is_set_); + is_set_ = false; + base_ = NULL; + size_ = 0; + } + + private: + bool is_set_; + void* base_; + size_t size_; +}; + +// Set NUM_DW_REGNAMES to be the number of Dwarf register names +// appropriate to the machine architecture given in HEADER. Return +// true on success, or false if HEADER's machine architecture is not +// supported. +template <typename ElfClass> +bool DwarfCFIRegisterNames(const typename ElfClass::Ehdr* elf_header, + unsigned int* num_dw_regnames) { + switch (elf_header->e_machine) { + case EM_386: + *num_dw_regnames = DwarfCFIToModule::RegisterNames::I386(); + return true; + case EM_ARM: + *num_dw_regnames = DwarfCFIToModule::RegisterNames::ARM(); + return true; + case EM_X86_64: + *num_dw_regnames = DwarfCFIToModule::RegisterNames::X86_64(); + return true; + case EM_MIPS: + *num_dw_regnames = DwarfCFIToModule::RegisterNames::MIPS(); + return true; + case EM_AARCH64: + *num_dw_regnames = DwarfCFIToModule::RegisterNames::ARM64(); + return true; + default: + MOZ_ASSERT(0); + return false; + } +} + +template <typename ElfClass> +bool LoadDwarfCFI(const string& dwarf_filename, + const typename ElfClass::Ehdr* elf_header, + const char* section_name, + const typename ElfClass::Shdr* section, const bool eh_frame, + const typename ElfClass::Shdr* got_section, + const typename ElfClass::Shdr* text_section, + const bool big_endian, SecMap* smap, uintptr_t text_bias, + UniqueStringUniverse* usu, void (*log)(const char*)) { + // Find the appropriate set of register names for this file's + // architecture. + unsigned int num_dw_regs = 0; + if (!DwarfCFIRegisterNames<ElfClass>(elf_header, &num_dw_regs)) { + fprintf(stderr, + "%s: unrecognized ELF machine architecture '%d';" + " cannot convert DWARF call frame information\n", + dwarf_filename.c_str(), elf_header->e_machine); + return false; + } + + const lul::Endianness endianness = + big_endian ? lul::ENDIANNESS_BIG : lul::ENDIANNESS_LITTLE; + + // Find the call frame information and its size. + const char* cfi = GetOffset<ElfClass, char>(elf_header, section->sh_offset); + size_t cfi_size = section->sh_size; + + // Plug together the parser, handler, and their entourages. + + // Here's a summariser, which will receive the output of the + // parser, create summaries, and add them to |smap|. + Summariser summ(smap, text_bias, log); + + lul::ByteReader reader(endianness); + reader.SetAddressSize(ElfClass::kAddrSize); + + DwarfCFIToModule::Reporter module_reporter(log, dwarf_filename, section_name); + DwarfCFIToModule handler(num_dw_regs, &module_reporter, &reader, usu, &summ); + + // Provide the base addresses for .eh_frame encoded pointers, if + // possible. + reader.SetCFIDataBase(section->sh_addr, cfi); + if (got_section) reader.SetDataBase(got_section->sh_addr); + if (text_section) reader.SetTextBase(text_section->sh_addr); + + lul::CallFrameInfo::Reporter dwarf_reporter(log, dwarf_filename, + section_name); + lul::CallFrameInfo parser(cfi, cfi_size, &reader, &handler, &dwarf_reporter, + eh_frame); + parser.Start(); + + return true; +} + +bool LoadELF(const string& obj_file, MmapWrapper* map_wrapper, + void** elf_header) { + int obj_fd = open(obj_file.c_str(), O_RDONLY); + if (obj_fd < 0) { + fprintf(stderr, "Failed to open ELF file '%s': %s\n", obj_file.c_str(), + strerror(errno)); + return false; + } + FDWrapper obj_fd_wrapper(obj_fd); + struct stat st; + if (fstat(obj_fd, &st) != 0 && st.st_size <= 0) { + fprintf(stderr, "Unable to fstat ELF file '%s': %s\n", obj_file.c_str(), + strerror(errno)); + return false; + } + // Mapping it read-only is good enough. In any case, mapping it + // read-write confuses Valgrind's debuginfo acquire/discard + // heuristics, making it hard to profile the profiler. + void* obj_base = mmap(nullptr, st.st_size, PROT_READ, MAP_PRIVATE, obj_fd, 0); + if (obj_base == MAP_FAILED) { + fprintf(stderr, "Failed to mmap ELF file '%s': %s\n", obj_file.c_str(), + strerror(errno)); + return false; + } + map_wrapper->set(obj_base, st.st_size); + *elf_header = obj_base; + if (!IsValidElf(*elf_header)) { + fprintf(stderr, "Not a valid ELF file: %s\n", obj_file.c_str()); + return false; + } + return true; +} + +// Get the endianness of ELF_HEADER. If it's invalid, return false. +template <typename ElfClass> +bool ElfEndianness(const typename ElfClass::Ehdr* elf_header, + bool* big_endian) { + if (elf_header->e_ident[EI_DATA] == ELFDATA2LSB) { + *big_endian = false; + return true; + } + if (elf_header->e_ident[EI_DATA] == ELFDATA2MSB) { + *big_endian = true; + return true; + } + + fprintf(stderr, "bad data encoding in ELF header: %d\n", + elf_header->e_ident[EI_DATA]); + return false; +} + +// +// LoadSymbolsInfo +// +// Holds the state between the two calls to LoadSymbols() in case it's necessary +// to follow the .gnu_debuglink section and load debug information from a +// different file. +// +template <typename ElfClass> +class LoadSymbolsInfo { + public: + typedef typename ElfClass::Addr Addr; + + explicit LoadSymbolsInfo(const vector<string>& dbg_dirs) + : debug_dirs_(dbg_dirs), has_loading_addr_(false) {} + + // Keeps track of which sections have been loaded so sections don't + // accidentally get loaded twice from two different files. + void LoadedSection(const string& section) { + if (loaded_sections_.count(section) == 0) { + loaded_sections_.insert(section); + } else { + fprintf(stderr, "Section %s has already been loaded.\n", section.c_str()); + } + } + + string debuglink_file() const { return debuglink_file_; } + + private: + const vector<string>& debug_dirs_; // Directories in which to + // search for the debug ELF file. + + string debuglink_file_; // Full path to the debug ELF file. + + bool has_loading_addr_; // Indicate if LOADING_ADDR_ is valid. + + set<string> loaded_sections_; // Tracks the Loaded ELF sections + // between calls to LoadSymbols(). +}; + +// Find the preferred loading address of the binary. +template <typename ElfClass> +typename ElfClass::Addr GetLoadingAddress( + const typename ElfClass::Phdr* program_headers, int nheader) { + typedef typename ElfClass::Phdr Phdr; + + // For non-PIC executables (e_type == ET_EXEC), the load address is + // the start address of the first PT_LOAD segment. (ELF requires + // the segments to be sorted by load address.) For PIC executables + // and dynamic libraries (e_type == ET_DYN), this address will + // normally be zero. + for (int i = 0; i < nheader; ++i) { + const Phdr& header = program_headers[i]; + if (header.p_type == PT_LOAD) return header.p_vaddr; + } + return 0; +} + +template <typename ElfClass> +bool LoadSymbols(const string& obj_file, const bool big_endian, + const typename ElfClass::Ehdr* elf_header, + const bool read_gnu_debug_link, + LoadSymbolsInfo<ElfClass>* info, SecMap* smap, void* rx_avma, + size_t rx_size, UniqueStringUniverse* usu, + void (*log)(const char*)) { + typedef typename ElfClass::Phdr Phdr; + typedef typename ElfClass::Shdr Shdr; + + char buf[500]; + SprintfLiteral(buf, "LoadSymbols: BEGIN %s\n", obj_file.c_str()); + buf[sizeof(buf) - 1] = 0; + log(buf); + + // This is how the text bias is calculated. + // BEGIN CALCULATE BIAS + uintptr_t loading_addr = GetLoadingAddress<ElfClass>( + GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff), + elf_header->e_phnum); + uintptr_t text_bias = ((uintptr_t)rx_avma) - loading_addr; + SprintfLiteral(buf, "LoadSymbols: rx_avma=%llx, text_bias=%llx", + (unsigned long long int)(uintptr_t)rx_avma, + (unsigned long long int)text_bias); + buf[sizeof(buf) - 1] = 0; + log(buf); + // END CALCULATE BIAS + + const Shdr* sections = + GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff); + const Shdr* section_names = sections + elf_header->e_shstrndx; + const char* names = + GetOffset<ElfClass, char>(elf_header, section_names->sh_offset); + const char* names_end = names + section_names->sh_size; + bool found_usable_info = false; + + // Dwarf Call Frame Information (CFI) is actually independent from + // the other DWARF debugging information, and can be used alone. + const Shdr* dwarf_cfi_section = + FindElfSectionByName<ElfClass>(".debug_frame", SHT_PROGBITS, sections, + names, names_end, elf_header->e_shnum); + if (dwarf_cfi_section) { + // Ignore the return value of this function; even without call frame + // information, the other debugging information could be perfectly + // useful. + info->LoadedSection(".debug_frame"); + bool result = LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".debug_frame", + dwarf_cfi_section, false, 0, 0, + big_endian, smap, text_bias, usu, log); + found_usable_info = found_usable_info || result; + if (result) log("LoadSymbols: read CFI from .debug_frame"); + } + + // Linux C++ exception handling information can also provide + // unwinding data. + const Shdr* eh_frame_section = + FindElfSectionByName<ElfClass>(".eh_frame", SHT_PROGBITS, sections, names, + names_end, elf_header->e_shnum); + if (eh_frame_section) { + // Pointers in .eh_frame data may be relative to the base addresses of + // certain sections. Provide those sections if present. + const Shdr* got_section = FindElfSectionByName<ElfClass>( + ".got", SHT_PROGBITS, sections, names, names_end, elf_header->e_shnum); + const Shdr* text_section = FindElfSectionByName<ElfClass>( + ".text", SHT_PROGBITS, sections, names, names_end, elf_header->e_shnum); + info->LoadedSection(".eh_frame"); + // As above, ignore the return value of this function. + bool result = LoadDwarfCFI<ElfClass>( + obj_file, elf_header, ".eh_frame", eh_frame_section, true, got_section, + text_section, big_endian, smap, text_bias, usu, log); + found_usable_info = found_usable_info || result; + if (result) log("LoadSymbols: read CFI from .eh_frame"); + } + + SprintfLiteral(buf, "LoadSymbols: END %s\n", obj_file.c_str()); + buf[sizeof(buf) - 1] = 0; + log(buf); + + return found_usable_info; +} + +// Return the breakpad symbol file identifier for the architecture of +// ELF_HEADER. +template <typename ElfClass> +const char* ElfArchitecture(const typename ElfClass::Ehdr* elf_header) { + typedef typename ElfClass::Half Half; + Half arch = elf_header->e_machine; + switch (arch) { + case EM_386: + return "x86"; + case EM_ARM: + return "arm"; + case EM_AARCH64: + return "arm64"; + case EM_MIPS: + return "mips"; + case EM_PPC64: + return "ppc64"; + case EM_PPC: + return "ppc"; + case EM_S390: + return "s390"; + case EM_SPARC: + return "sparc"; + case EM_SPARCV9: + return "sparcv9"; + case EM_X86_64: + return "x86_64"; + default: + return NULL; + } +} + +// Format the Elf file identifier in IDENTIFIER as a UUID with the +// dashes removed. +string FormatIdentifier(unsigned char identifier[16]) { + char identifier_str[40]; + lul::FileID::ConvertIdentifierToString(identifier, identifier_str, + sizeof(identifier_str)); + string id_no_dash; + for (int i = 0; identifier_str[i] != '\0'; ++i) + if (identifier_str[i] != '-') id_no_dash += identifier_str[i]; + // Add an extra "0" by the end. PDB files on Windows have an 'age' + // number appended to the end of the file identifier; this isn't + // really used or necessary on other platforms, but be consistent. + id_no_dash += '0'; + return id_no_dash; +} + +// Return the non-directory portion of FILENAME: the portion after the +// last slash, or the whole filename if there are no slashes. +string BaseFileName(const string& filename) { + // Lots of copies! basename's behavior is less than ideal. + char* c_filename = strdup(filename.c_str()); + string base = basename(c_filename); + free(c_filename); + return base; +} + +template <typename ElfClass> +bool ReadSymbolDataElfClass(const typename ElfClass::Ehdr* elf_header, + const string& obj_filename, + const vector<string>& debug_dirs, SecMap* smap, + void* rx_avma, size_t rx_size, + UniqueStringUniverse* usu, + void (*log)(const char*)) { + typedef typename ElfClass::Ehdr Ehdr; + + unsigned char identifier[16]; + if (!lul ::FileID::ElfFileIdentifierFromMappedFile(elf_header, identifier)) { + fprintf(stderr, "%s: unable to generate file identifier\n", + obj_filename.c_str()); + return false; + } + + const char* architecture = ElfArchitecture<ElfClass>(elf_header); + if (!architecture) { + fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n", + obj_filename.c_str(), elf_header->e_machine); + return false; + } + + // Figure out what endianness this file is. + bool big_endian; + if (!ElfEndianness<ElfClass>(elf_header, &big_endian)) return false; + + string name = BaseFileName(obj_filename); + string os = "Linux"; + string id = FormatIdentifier(identifier); + + LoadSymbolsInfo<ElfClass> info(debug_dirs); + if (!LoadSymbols<ElfClass>(obj_filename, big_endian, elf_header, + !debug_dirs.empty(), &info, smap, rx_avma, rx_size, + usu, log)) { + const string debuglink_file = info.debuglink_file(); + if (debuglink_file.empty()) return false; + + // Load debuglink ELF file. + fprintf(stderr, "Found debugging info in %s\n", debuglink_file.c_str()); + MmapWrapper debug_map_wrapper; + Ehdr* debug_elf_header = NULL; + if (!LoadELF(debuglink_file, &debug_map_wrapper, + reinterpret_cast<void**>(&debug_elf_header))) + return false; + // Sanity checks to make sure everything matches up. + const char* debug_architecture = + ElfArchitecture<ElfClass>(debug_elf_header); + if (!debug_architecture) { + fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n", + debuglink_file.c_str(), debug_elf_header->e_machine); + return false; + } + if (strcmp(architecture, debug_architecture)) { + fprintf(stderr, + "%s with ELF machine architecture %s does not match " + "%s with ELF architecture %s\n", + debuglink_file.c_str(), debug_architecture, obj_filename.c_str(), + architecture); + return false; + } + + bool debug_big_endian; + if (!ElfEndianness<ElfClass>(debug_elf_header, &debug_big_endian)) + return false; + if (debug_big_endian != big_endian) { + fprintf(stderr, "%s and %s does not match in endianness\n", + obj_filename.c_str(), debuglink_file.c_str()); + return false; + } + + if (!LoadSymbols<ElfClass>(debuglink_file, debug_big_endian, + debug_elf_header, false, &info, smap, rx_avma, + rx_size, usu, log)) { + return false; + } + } + + return true; +} + +} // namespace + +namespace lul { + +bool ReadSymbolDataInternal(const uint8_t* obj_file, const string& obj_filename, + const vector<string>& debug_dirs, SecMap* smap, + void* rx_avma, size_t rx_size, + UniqueStringUniverse* usu, + void (*log)(const char*)) { + if (!IsValidElf(obj_file)) { + fprintf(stderr, "Not a valid ELF file: %s\n", obj_filename.c_str()); + return false; + } + + int elfclass = ElfClass(obj_file); + if (elfclass == ELFCLASS32) { + return ReadSymbolDataElfClass<ElfClass32>( + reinterpret_cast<const Elf32_Ehdr*>(obj_file), obj_filename, debug_dirs, + smap, rx_avma, rx_size, usu, log); + } + if (elfclass == ELFCLASS64) { + return ReadSymbolDataElfClass<ElfClass64>( + reinterpret_cast<const Elf64_Ehdr*>(obj_file), obj_filename, debug_dirs, + smap, rx_avma, rx_size, usu, log); + } + + return false; +} + +bool ReadSymbolData(const string& obj_file, const vector<string>& debug_dirs, + SecMap* smap, void* rx_avma, size_t rx_size, + UniqueStringUniverse* usu, void (*log)(const char*)) { + MmapWrapper map_wrapper; + void* elf_header = NULL; + if (!LoadELF(obj_file, &map_wrapper, &elf_header)) return false; + + return ReadSymbolDataInternal(reinterpret_cast<uint8_t*>(elf_header), + obj_file, debug_dirs, smap, rx_avma, rx_size, + usu, log); +} + +namespace { + +template <typename ElfClass> +void FindElfClassSection(const char* elf_base, const char* section_name, + typename ElfClass::Word section_type, + const void** section_start, int* section_size) { + typedef typename ElfClass::Ehdr Ehdr; + typedef typename ElfClass::Shdr Shdr; + + MOZ_ASSERT(elf_base); + MOZ_ASSERT(section_start); + MOZ_ASSERT(section_size); + + MOZ_ASSERT(strncmp(elf_base, ELFMAG, SELFMAG) == 0); + + const Ehdr* elf_header = reinterpret_cast<const Ehdr*>(elf_base); + MOZ_ASSERT(elf_header->e_ident[EI_CLASS] == ElfClass::kClass); + + const Shdr* sections = + GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff); + const Shdr* section_names = sections + elf_header->e_shstrndx; + const char* names = + GetOffset<ElfClass, char>(elf_header, section_names->sh_offset); + const char* names_end = names + section_names->sh_size; + + const Shdr* section = + FindElfSectionByName<ElfClass>(section_name, section_type, sections, + names, names_end, elf_header->e_shnum); + + if (section != NULL && section->sh_size > 0) { + *section_start = elf_base + section->sh_offset; + *section_size = section->sh_size; + } +} + +template <typename ElfClass> +void FindElfClassSegment(const char* elf_base, + typename ElfClass::Word segment_type, + const void** segment_start, int* segment_size) { + typedef typename ElfClass::Ehdr Ehdr; + typedef typename ElfClass::Phdr Phdr; + + MOZ_ASSERT(elf_base); + MOZ_ASSERT(segment_start); + MOZ_ASSERT(segment_size); + + MOZ_ASSERT(strncmp(elf_base, ELFMAG, SELFMAG) == 0); + + const Ehdr* elf_header = reinterpret_cast<const Ehdr*>(elf_base); + MOZ_ASSERT(elf_header->e_ident[EI_CLASS] == ElfClass::kClass); + + const Phdr* phdrs = + GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff); + + for (int i = 0; i < elf_header->e_phnum; ++i) { + if (phdrs[i].p_type == segment_type) { + *segment_start = elf_base + phdrs[i].p_offset; + *segment_size = phdrs[i].p_filesz; + return; + } + } +} + +} // namespace + +bool IsValidElf(const void* elf_base) { + return strncmp(reinterpret_cast<const char*>(elf_base), ELFMAG, SELFMAG) == 0; +} + +int ElfClass(const void* elf_base) { + const ElfW(Ehdr)* elf_header = reinterpret_cast<const ElfW(Ehdr)*>(elf_base); + + return elf_header->e_ident[EI_CLASS]; +} + +bool FindElfSection(const void* elf_mapped_base, const char* section_name, + uint32_t section_type, const void** section_start, + int* section_size, int* elfclass) { + MOZ_ASSERT(elf_mapped_base); + MOZ_ASSERT(section_start); + MOZ_ASSERT(section_size); + + *section_start = NULL; + *section_size = 0; + + if (!IsValidElf(elf_mapped_base)) return false; + + int cls = ElfClass(elf_mapped_base); + if (elfclass) { + *elfclass = cls; + } + + const char* elf_base = static_cast<const char*>(elf_mapped_base); + + if (cls == ELFCLASS32) { + FindElfClassSection<ElfClass32>(elf_base, section_name, section_type, + section_start, section_size); + return *section_start != NULL; + } else if (cls == ELFCLASS64) { + FindElfClassSection<ElfClass64>(elf_base, section_name, section_type, + section_start, section_size); + return *section_start != NULL; + } + + return false; +} + +bool FindElfSegment(const void* elf_mapped_base, uint32_t segment_type, + const void** segment_start, int* segment_size, + int* elfclass) { + MOZ_ASSERT(elf_mapped_base); + MOZ_ASSERT(segment_start); + MOZ_ASSERT(segment_size); + + *segment_start = NULL; + *segment_size = 0; + + if (!IsValidElf(elf_mapped_base)) return false; + + int cls = ElfClass(elf_mapped_base); + if (elfclass) { + *elfclass = cls; + } + + const char* elf_base = static_cast<const char*>(elf_mapped_base); + + if (cls == ELFCLASS32) { + FindElfClassSegment<ElfClass32>(elf_base, segment_type, segment_start, + segment_size); + return *segment_start != NULL; + } else if (cls == ELFCLASS64) { + FindElfClassSegment<ElfClass64>(elf_base, segment_type, segment_start, + segment_size); + return *segment_start != NULL; + } + + return false; +} + +// (derived from) +// file_id.cc: Return a unique identifier for a file +// +// See file_id.h for documentation +// + +// ELF note name and desc are 32-bits word padded. +#define NOTE_PADDING(a) ((a + 3) & ~3) + +// These functions are also used inside the crashed process, so be safe +// and use the syscall/libc wrappers instead of direct syscalls or libc. + +template <typename ElfClass> +static bool ElfClassBuildIDNoteIdentifier(const void* section, int length, + uint8_t identifier[kMDGUIDSize]) { + typedef typename ElfClass::Nhdr Nhdr; + + const void* section_end = reinterpret_cast<const char*>(section) + length; + const Nhdr* note_header = reinterpret_cast<const Nhdr*>(section); + while (reinterpret_cast<const void*>(note_header) < section_end) { + if (note_header->n_type == NT_GNU_BUILD_ID) break; + note_header = reinterpret_cast<const Nhdr*>( + reinterpret_cast<const char*>(note_header) + sizeof(Nhdr) + + NOTE_PADDING(note_header->n_namesz) + + NOTE_PADDING(note_header->n_descsz)); + } + if (reinterpret_cast<const void*>(note_header) >= section_end || + note_header->n_descsz == 0) { + return false; + } + + const char* build_id = reinterpret_cast<const char*>(note_header) + + sizeof(Nhdr) + NOTE_PADDING(note_header->n_namesz); + // Copy as many bits of the build ID as will fit + // into the GUID space. + memset(identifier, 0, kMDGUIDSize); + memcpy(identifier, build_id, + std::min(kMDGUIDSize, (size_t)note_header->n_descsz)); + + return true; +} + +// Attempt to locate a .note.gnu.build-id section in an ELF binary +// and copy as many bytes of it as will fit into |identifier|. +static bool FindElfBuildIDNote(const void* elf_mapped_base, + uint8_t identifier[kMDGUIDSize]) { + void* note_section; + int note_size, elfclass; + if ((!FindElfSegment(elf_mapped_base, PT_NOTE, (const void**)¬e_section, + ¬e_size, &elfclass) || + note_size == 0) && + (!FindElfSection(elf_mapped_base, ".note.gnu.build-id", SHT_NOTE, + (const void**)¬e_section, ¬e_size, &elfclass) || + note_size == 0)) { + return false; + } + + if (elfclass == ELFCLASS32) { + return ElfClassBuildIDNoteIdentifier<ElfClass32>(note_section, note_size, + identifier); + } else if (elfclass == ELFCLASS64) { + return ElfClassBuildIDNoteIdentifier<ElfClass64>(note_section, note_size, + identifier); + } + + return false; +} + +// Attempt to locate the .text section of an ELF binary and generate +// a simple hash by XORing the first page worth of bytes into |identifier|. +static bool HashElfTextSection(const void* elf_mapped_base, + uint8_t identifier[kMDGUIDSize]) { + void* text_section; + int text_size; + if (!FindElfSection(elf_mapped_base, ".text", SHT_PROGBITS, + (const void**)&text_section, &text_size, NULL) || + text_size == 0) { + return false; + } + + memset(identifier, 0, kMDGUIDSize); + const uint8_t* ptr = reinterpret_cast<const uint8_t*>(text_section); + const uint8_t* ptr_end = ptr + std::min(text_size, 4096); + while (ptr < ptr_end) { + for (unsigned i = 0; i < kMDGUIDSize; i++) identifier[i] ^= ptr[i]; + ptr += kMDGUIDSize; + } + return true; +} + +// static +bool FileID::ElfFileIdentifierFromMappedFile(const void* base, + uint8_t identifier[kMDGUIDSize]) { + // Look for a build id note first. + if (FindElfBuildIDNote(base, identifier)) return true; + + // Fall back on hashing the first page of the text section. + return HashElfTextSection(base, identifier); +} + +// static +void FileID::ConvertIdentifierToString(const uint8_t identifier[kMDGUIDSize], + char* buffer, int buffer_length) { + uint8_t identifier_swapped[kMDGUIDSize]; + + // Endian-ness swap to match dump processor expectation. + memcpy(identifier_swapped, identifier, kMDGUIDSize); + uint32_t* data1 = reinterpret_cast<uint32_t*>(identifier_swapped); + *data1 = htonl(*data1); + uint16_t* data2 = reinterpret_cast<uint16_t*>(identifier_swapped + 4); + *data2 = htons(*data2); + uint16_t* data3 = reinterpret_cast<uint16_t*>(identifier_swapped + 6); + *data3 = htons(*data3); + + int buffer_idx = 0; + for (unsigned int idx = 0; + (buffer_idx < buffer_length) && (idx < kMDGUIDSize); ++idx) { + int hi = (identifier_swapped[idx] >> 4) & 0x0F; + int lo = (identifier_swapped[idx]) & 0x0F; + + if (idx == 4 || idx == 6 || idx == 8 || idx == 10) + buffer[buffer_idx++] = '-'; + + buffer[buffer_idx++] = (hi >= 10) ? 'A' + hi - 10 : '0' + hi; + buffer[buffer_idx++] = (lo >= 10) ? 'A' + lo - 10 : '0' + lo; + } + + // NULL terminate + buffer[(buffer_idx < buffer_length) ? buffer_idx : buffer_idx - 1] = 0; +} + +} // namespace lul diff --git a/mozglue/baseprofiler/lul/LulElfExt.h b/mozglue/baseprofiler/lul/LulElfExt.h new file mode 100644 index 0000000000..73d9ff7f15 --- /dev/null +++ b/mozglue/baseprofiler/lul/LulElfExt.h @@ -0,0 +1,69 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ + +// Copyright (c) 2006, 2011, 2012 Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// This file is derived from the following files in +// toolkit/crashreporter/google-breakpad: +// src/common/linux/dump_symbols.h + +#ifndef LulElfExt_h +#define LulElfExt_h + +// These two functions are the external interface to the +// ELF/Dwarf/EXIDX reader. + +#include "LulMainInt.h" + +using lul::SecMap; + +namespace lul { + +class UniqueStringUniverse; + +// Find all the unwind information in OBJ_FILE, an ELF executable +// or shared library, and add it to SMAP. +bool ReadSymbolData(const std::string& obj_file, + const std::vector<std::string>& debug_dirs, SecMap* smap, + void* rx_avma, size_t rx_size, UniqueStringUniverse* usu, + void (*log)(const char*)); + +// The same as ReadSymbolData, except that OBJ_FILE is assumed to +// point to a mapped-in image of OBJ_FILENAME. +bool ReadSymbolDataInternal(const uint8_t* obj_file, + const std::string& obj_filename, + const std::vector<std::string>& debug_dirs, + SecMap* smap, void* rx_avma, size_t rx_size, + UniqueStringUniverse* usu, + void (*log)(const char*)); + +} // namespace lul + +#endif // LulElfExt_h diff --git a/mozglue/baseprofiler/lul/LulElfInt.h b/mozglue/baseprofiler/lul/LulElfInt.h new file mode 100644 index 0000000000..31ffba8ff0 --- /dev/null +++ b/mozglue/baseprofiler/lul/LulElfInt.h @@ -0,0 +1,218 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ + +// Copyright (c) 2006, 2012, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// This file is derived from the following files in +// toolkit/crashreporter/google-breakpad: +// src/common/android/include/elf.h +// src/common/linux/elfutils.h +// src/common/linux/file_id.h +// src/common/linux/elfutils-inl.h + +#ifndef LulElfInt_h +#define LulElfInt_h + +// This header defines functions etc internal to the ELF reader. It +// should not be included outside of LulElf.cpp. + +#include <elf.h> +#include <stdlib.h> + +#include "mozilla/Assertions.h" + +#include "PlatformMacros.h" + +// (derived from) +// elfutils.h: Utilities for dealing with ELF files. +// +#include <link.h> + +#if defined(GP_OS_android) + +// From toolkit/crashreporter/google-breakpad/src/common/android/include/elf.h +// The Android headers don't always define this constant. +# ifndef EM_X86_64 +# define EM_X86_64 62 +# endif + +# ifndef EM_PPC64 +# define EM_PPC64 21 +# endif + +# ifndef EM_S390 +# define EM_S390 22 +# endif + +# ifndef NT_GNU_BUILD_ID +# define NT_GNU_BUILD_ID 3 +# endif + +# ifndef ElfW +# define ElfW(type) _ElfW(Elf, ELFSIZE, type) +# define _ElfW(e, w, t) _ElfW_1(e, w, _##t) +# define _ElfW_1(e, w, t) e##w##t +# endif + +#endif + +#if defined(GP_OS_freebsd) + +# ifndef ElfW +# define ElfW(type) Elf_##type +# endif + +#endif + +namespace lul { + +// Traits classes so consumers can write templatized code to deal +// with specific ELF bits. +struct ElfClass32 { + typedef Elf32_Addr Addr; + typedef Elf32_Ehdr Ehdr; + typedef Elf32_Nhdr Nhdr; + typedef Elf32_Phdr Phdr; + typedef Elf32_Shdr Shdr; + typedef Elf32_Half Half; + typedef Elf32_Off Off; + typedef Elf32_Word Word; + static const int kClass = ELFCLASS32; + static const size_t kAddrSize = sizeof(Elf32_Addr); +}; + +struct ElfClass64 { + typedef Elf64_Addr Addr; + typedef Elf64_Ehdr Ehdr; + typedef Elf64_Nhdr Nhdr; + typedef Elf64_Phdr Phdr; + typedef Elf64_Shdr Shdr; + typedef Elf64_Half Half; + typedef Elf64_Off Off; + typedef Elf64_Word Word; + static const int kClass = ELFCLASS64; + static const size_t kAddrSize = sizeof(Elf64_Addr); +}; + +bool IsValidElf(const void* elf_header); +int ElfClass(const void* elf_base); + +// Attempt to find a section named |section_name| of type |section_type| +// in the ELF binary data at |elf_mapped_base|. On success, returns true +// and sets |*section_start| to point to the start of the section data, +// and |*section_size| to the size of the section's data. If |elfclass| +// is not NULL, set |*elfclass| to the ELF file class. +bool FindElfSection(const void* elf_mapped_base, const char* section_name, + uint32_t section_type, const void** section_start, + int* section_size, int* elfclass); + +// Internal helper method, exposed for convenience for callers +// that already have more info. +template <typename ElfClass> +const typename ElfClass::Shdr* FindElfSectionByName( + const char* name, typename ElfClass::Word section_type, + const typename ElfClass::Shdr* sections, const char* section_names, + const char* names_end, int nsection); + +// Attempt to find the first segment of type |segment_type| in the ELF +// binary data at |elf_mapped_base|. On success, returns true and sets +// |*segment_start| to point to the start of the segment data, and +// and |*segment_size| to the size of the segment's data. If |elfclass| +// is not NULL, set |*elfclass| to the ELF file class. +bool FindElfSegment(const void* elf_mapped_base, uint32_t segment_type, + const void** segment_start, int* segment_size, + int* elfclass); + +// Convert an offset from an Elf header into a pointer to the mapped +// address in the current process. Takes an extra template parameter +// to specify the return type to avoid having to dynamic_cast the +// result. +template <typename ElfClass, typename T> +const T* GetOffset(const typename ElfClass::Ehdr* elf_header, + typename ElfClass::Off offset); + +// (derived from) +// file_id.h: Return a unique identifier for a file +// + +static const size_t kMDGUIDSize = sizeof(MDGUID); + +class FileID { + public: + // Load the identifier for the elf file mapped into memory at |base| into + // |identifier|. Return false if the identifier could not be created for the + // file. + static bool ElfFileIdentifierFromMappedFile(const void* base, + uint8_t identifier[kMDGUIDSize]); + + // Convert the |identifier| data to a NULL terminated string. The string will + // be formatted as a UUID (e.g., 22F065BB-FC9C-49F7-80FE-26A7CEBD7BCE). + // The |buffer| should be at least 37 bytes long to receive all of the data + // and termination. Shorter buffers will contain truncated data. + static void ConvertIdentifierToString(const uint8_t identifier[kMDGUIDSize], + char* buffer, int buffer_length); +}; + +template <typename ElfClass, typename T> +const T* GetOffset(const typename ElfClass::Ehdr* elf_header, + typename ElfClass::Off offset) { + return reinterpret_cast<const T*>(reinterpret_cast<uintptr_t>(elf_header) + + offset); +} + +template <typename ElfClass> +const typename ElfClass::Shdr* FindElfSectionByName( + const char* name, typename ElfClass::Word section_type, + const typename ElfClass::Shdr* sections, const char* section_names, + const char* names_end, int nsection) { + MOZ_ASSERT(name != NULL); + MOZ_ASSERT(sections != NULL); + MOZ_ASSERT(nsection > 0); + + int name_len = strlen(name); + if (name_len == 0) return NULL; + + for (int i = 0; i < nsection; ++i) { + const char* section_name = section_names + sections[i].sh_name; + if (sections[i].sh_type == section_type && + names_end - section_name >= name_len + 1 && + strcmp(name, section_name) == 0) { + return sections + i; + } + } + return NULL; +} + +} // namespace lul + +// And finally, the external interface, offered to LulMain.cpp +#include "LulElfExt.h" + +#endif // LulElfInt_h diff --git a/mozglue/baseprofiler/lul/LulMain.cpp b/mozglue/baseprofiler/lul/LulMain.cpp new file mode 100644 index 0000000000..f513920a0b --- /dev/null +++ b/mozglue/baseprofiler/lul/LulMain.cpp @@ -0,0 +1,1956 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "LulMain.h" + +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> // write(), only for testing LUL + +#include <algorithm> // std::sort +#include <string> +#include <utility> + +#include "mozilla/Assertions.h" +#include "mozilla/ArrayUtils.h" +#include "mozilla/CheckedInt.h" +#include "mozilla/DebugOnly.h" +#include "mozilla/MemoryChecking.h" +#include "mozilla/Sprintf.h" +#include "mozilla/UniquePtr.h" +#include "mozilla/Unused.h" + +#include "BaseProfiler.h" +#include "LulCommonExt.h" +#include "LulElfExt.h" +#include "LulMainInt.h" + +using mozilla::baseprofiler::profiler_current_process_id; +using mozilla::baseprofiler::profiler_current_thread_id; + +// Set this to 1 for verbose logging +#define DEBUG_MAIN 0 + +namespace lul { + +using mozilla::CheckedInt; +using mozilla::DebugOnly; +using mozilla::MallocSizeOf; +using mozilla::Unused; +using std::pair; +using std::string; +using std::vector; + +// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING +// +// Some functions in this file are marked RUNS IN NO-MALLOC CONTEXT. +// Any such function -- and, hence, the transitive closure of those +// reachable from it -- must not do any dynamic memory allocation. +// Doing so risks deadlock. There is exactly one root function for +// the transitive closure: Lul::Unwind. +// +// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING + +//////////////////////////////////////////////////////////////// +// RuleSet // +//////////////////////////////////////////////////////////////// + +static const char* NameOf_DW_REG(int16_t aReg) { + switch (aReg) { + case DW_REG_CFA: + return "cfa"; +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + case DW_REG_INTEL_XBP: + return "xbp"; + case DW_REG_INTEL_XSP: + return "xsp"; + case DW_REG_INTEL_XIP: + return "xip"; +#elif defined(GP_ARCH_arm) + case DW_REG_ARM_R7: + return "r7"; + case DW_REG_ARM_R11: + return "r11"; + case DW_REG_ARM_R12: + return "r12"; + case DW_REG_ARM_R13: + return "r13"; + case DW_REG_ARM_R14: + return "r14"; + case DW_REG_ARM_R15: + return "r15"; +#elif defined(GP_ARCH_arm64) + case DW_REG_AARCH64_X29: + return "x29"; + case DW_REG_AARCH64_X30: + return "x30"; + case DW_REG_AARCH64_SP: + return "sp"; +#elif defined(GP_ARCH_mips64) + case DW_REG_MIPS_SP: + return "sp"; + case DW_REG_MIPS_FP: + return "fp"; + case DW_REG_MIPS_PC: + return "pc"; +#else +# error "Unsupported arch" +#endif + default: + return "???"; + } +} + +string LExpr::ShowRule(const char* aNewReg) const { + char buf[64]; + string res = string(aNewReg) + "="; + switch (mHow) { + case UNKNOWN: + res += "Unknown"; + break; + case NODEREF: + SprintfLiteral(buf, "%s+%d", NameOf_DW_REG(mReg), (int)mOffset); + res += buf; + break; + case DEREF: + SprintfLiteral(buf, "*(%s+%d)", NameOf_DW_REG(mReg), (int)mOffset); + res += buf; + break; + case PFXEXPR: + SprintfLiteral(buf, "PfxExpr-at-%d", (int)mOffset); + res += buf; + break; + default: + res += "???"; + break; + } + return res; +} + +void RuleSet::Print(void (*aLog)(const char*)) const { + char buf[96]; + SprintfLiteral(buf, "[%llx .. %llx]: let ", (unsigned long long int)mAddr, + (unsigned long long int)(mAddr + mLen - 1)); + string res = string(buf); + res += mCfaExpr.ShowRule("cfa"); + res += " in"; + // For each reg we care about, print the recovery expression. +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + res += mXipExpr.ShowRule(" RA"); + res += mXspExpr.ShowRule(" SP"); + res += mXbpExpr.ShowRule(" BP"); +#elif defined(GP_ARCH_arm) + res += mR15expr.ShowRule(" R15"); + res += mR7expr.ShowRule(" R7"); + res += mR11expr.ShowRule(" R11"); + res += mR12expr.ShowRule(" R12"); + res += mR13expr.ShowRule(" R13"); + res += mR14expr.ShowRule(" R14"); +#elif defined(GP_ARCH_arm64) + res += mX29expr.ShowRule(" X29"); + res += mX30expr.ShowRule(" X30"); + res += mSPexpr.ShowRule(" SP"); +#elif defined(GP_ARCH_mips64) + res += mPCexpr.ShowRule(" PC"); + res += mSPexpr.ShowRule(" SP"); + res += mFPexpr.ShowRule(" FP"); +#else +# error "Unsupported arch" +#endif + aLog(res.c_str()); +} + +LExpr* RuleSet::ExprForRegno(DW_REG_NUMBER aRegno) { + switch (aRegno) { + case DW_REG_CFA: + return &mCfaExpr; +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + case DW_REG_INTEL_XIP: + return &mXipExpr; + case DW_REG_INTEL_XSP: + return &mXspExpr; + case DW_REG_INTEL_XBP: + return &mXbpExpr; +#elif defined(GP_ARCH_arm) + case DW_REG_ARM_R15: + return &mR15expr; + case DW_REG_ARM_R14: + return &mR14expr; + case DW_REG_ARM_R13: + return &mR13expr; + case DW_REG_ARM_R12: + return &mR12expr; + case DW_REG_ARM_R11: + return &mR11expr; + case DW_REG_ARM_R7: + return &mR7expr; +#elif defined(GP_ARCH_arm64) + case DW_REG_AARCH64_X29: + return &mX29expr; + case DW_REG_AARCH64_X30: + return &mX30expr; + case DW_REG_AARCH64_SP: + return &mSPexpr; +#elif defined(GP_ARCH_mips64) + case DW_REG_MIPS_SP: + return &mSPexpr; + case DW_REG_MIPS_FP: + return &mFPexpr; + case DW_REG_MIPS_PC: + return &mPCexpr; +#else +# error "Unknown arch" +#endif + default: + return nullptr; + } +} + +RuleSet::RuleSet() { + mAddr = 0; + mLen = 0; + // The only other fields are of type LExpr and those are initialised + // by LExpr::LExpr(). +} + +//////////////////////////////////////////////////////////////// +// SecMap // +//////////////////////////////////////////////////////////////// + +// See header file LulMainInt.h for comments about invariants. + +SecMap::SecMap(void (*aLog)(const char*)) + : mSummaryMinAddr(1), mSummaryMaxAddr(0), mUsable(true), mLog(aLog) {} + +SecMap::~SecMap() { mRuleSets.clear(); } + +// RUNS IN NO-MALLOC CONTEXT +RuleSet* SecMap::FindRuleSet(uintptr_t ia) { + // Binary search mRuleSets to find one that brackets |ia|. + // lo and hi need to be signed, else the loop termination tests + // don't work properly. Note that this works correctly even when + // mRuleSets.size() == 0. + + // Can't do this until the array has been sorted and preened. + MOZ_ASSERT(mUsable); + + long int lo = 0; + long int hi = (long int)mRuleSets.size() - 1; + while (true) { + // current unsearched space is from lo to hi, inclusive. + if (lo > hi) { + // not found + return nullptr; + } + long int mid = lo + ((hi - lo) / 2); + RuleSet* mid_ruleSet = &mRuleSets[mid]; + uintptr_t mid_minAddr = mid_ruleSet->mAddr; + uintptr_t mid_maxAddr = mid_minAddr + mid_ruleSet->mLen - 1; + if (ia < mid_minAddr) { + hi = mid - 1; + continue; + } + if (ia > mid_maxAddr) { + lo = mid + 1; + continue; + } + MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr); + return mid_ruleSet; + } + // NOTREACHED +} + +// Add a RuleSet to the collection. The rule is copied in. Calling +// this makes the map non-searchable. +void SecMap::AddRuleSet(const RuleSet* rs) { + mUsable = false; + mRuleSets.push_back(*rs); +} + +// Add a PfxInstr to the vector of such instrs, and return the index +// in the vector. Calling this makes the map non-searchable. +uint32_t SecMap::AddPfxInstr(PfxInstr pfxi) { + mUsable = false; + mPfxInstrs.push_back(pfxi); + return mPfxInstrs.size() - 1; +} + +static bool CmpRuleSetsByAddrLE(const RuleSet& rs1, const RuleSet& rs2) { + return rs1.mAddr < rs2.mAddr; +} + +// Prepare the map for searching. Completely remove any which don't +// fall inside the specified range [start, +len). +void SecMap::PrepareRuleSets(uintptr_t aStart, size_t aLen) { + if (mRuleSets.empty()) { + return; + } + + MOZ_ASSERT(aLen > 0); + if (aLen == 0) { + // This should never happen. + mRuleSets.clear(); + return; + } + + // Sort by start addresses. + std::sort(mRuleSets.begin(), mRuleSets.end(), CmpRuleSetsByAddrLE); + + // Detect any entry not completely contained within [start, +len). + // Set its length to zero, so that the next pass will remove it. + for (size_t i = 0; i < mRuleSets.size(); ++i) { + RuleSet* rs = &mRuleSets[i]; + if (rs->mLen > 0 && + (rs->mAddr < aStart || rs->mAddr + rs->mLen > aStart + aLen)) { + rs->mLen = 0; + } + } + + // Iteratively truncate any overlaps and remove any zero length + // entries that might result, or that may have been present + // initially. Unless the input is seriously screwy, this is + // expected to iterate only once. + while (true) { + size_t i; + size_t n = mRuleSets.size(); + size_t nZeroLen = 0; + + if (n == 0) { + break; + } + + for (i = 1; i < n; ++i) { + RuleSet* prev = &mRuleSets[i - 1]; + RuleSet* here = &mRuleSets[i]; + MOZ_ASSERT(prev->mAddr <= here->mAddr); + if (prev->mAddr + prev->mLen > here->mAddr) { + prev->mLen = here->mAddr - prev->mAddr; + } + if (prev->mLen == 0) nZeroLen++; + } + + if (mRuleSets[n - 1].mLen == 0) { + nZeroLen++; + } + + // At this point, the entries are in-order and non-overlapping. + // If none of them are zero-length, we are done. + if (nZeroLen == 0) { + break; + } + + // Slide back the entries to remove the zero length ones. + size_t j = 0; // The write-point. + for (i = 0; i < n; ++i) { + if (mRuleSets[i].mLen == 0) { + continue; + } + if (j != i) mRuleSets[j] = mRuleSets[i]; + ++j; + } + MOZ_ASSERT(i == n); + MOZ_ASSERT(nZeroLen <= n); + MOZ_ASSERT(j == n - nZeroLen); + while (nZeroLen > 0) { + mRuleSets.pop_back(); + nZeroLen--; + } + + MOZ_ASSERT(mRuleSets.size() == j); + } + + size_t n = mRuleSets.size(); + +#ifdef DEBUG + // Do a final check on the rules: their address ranges must be + // ascending, non overlapping, non zero sized. + if (n > 0) { + MOZ_ASSERT(mRuleSets[0].mLen > 0); + for (size_t i = 1; i < n; ++i) { + RuleSet* prev = &mRuleSets[i - 1]; + RuleSet* here = &mRuleSets[i]; + MOZ_ASSERT(prev->mAddr < here->mAddr); + MOZ_ASSERT(here->mLen > 0); + MOZ_ASSERT(prev->mAddr + prev->mLen <= here->mAddr); + } + } +#endif + + // Set the summary min and max address values. + if (n == 0) { + // Use the values defined in comments in the class declaration. + mSummaryMinAddr = 1; + mSummaryMaxAddr = 0; + } else { + mSummaryMinAddr = mRuleSets[0].mAddr; + mSummaryMaxAddr = mRuleSets[n - 1].mAddr + mRuleSets[n - 1].mLen - 1; + } + char buf[150]; + SprintfLiteral(buf, "PrepareRuleSets: %d entries, smin/smax 0x%llx, 0x%llx\n", + (int)n, (unsigned long long int)mSummaryMinAddr, + (unsigned long long int)mSummaryMaxAddr); + buf[sizeof(buf) - 1] = 0; + mLog(buf); + + // Is now usable for binary search. + mUsable = true; + +#if 0 + mLog("\nRulesets after preening\n"); + for (size_t i = 0; i < mRuleSets.size(); ++i) { + mRuleSets[i].Print(mLog); + mLog("\n"); + } + mLog("\n"); +#endif +} + +bool SecMap::IsEmpty() { return mRuleSets.empty(); } + +size_t SecMap::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const { + size_t n = aMallocSizeOf(this); + + // It's conceivable that these calls would be unsafe with some + // implementations of std::vector, but it seems to be working for now... + n += aMallocSizeOf(mRuleSets.data()); + n += aMallocSizeOf(mPfxInstrs.data()); + + return n; +} + +//////////////////////////////////////////////////////////////// +// SegArray // +//////////////////////////////////////////////////////////////// + +// A SegArray holds a set of address ranges that together exactly +// cover an address range, with no overlaps or holes. Each range has +// an associated value, which in this case has been specialised to be +// a simple boolean. The representation is kept to minimal canonical +// form in which adjacent ranges with the same associated value are +// merged together. Each range is represented by a |struct Seg|. +// +// SegArrays are used to keep track of which parts of the address +// space are known to contain instructions. +class SegArray { + public: + void add(uintptr_t lo, uintptr_t hi, bool val) { + if (lo > hi) { + return; + } + split_at(lo); + if (hi < UINTPTR_MAX) { + split_at(hi + 1); + } + std::vector<Seg>::size_type iLo, iHi, i; + iLo = find(lo); + iHi = find(hi); + for (i = iLo; i <= iHi; ++i) { + mSegs[i].val = val; + } + preen(); + } + + // RUNS IN NO-MALLOC CONTEXT + bool getBoundingCodeSegment(/*OUT*/ uintptr_t* rx_min, + /*OUT*/ uintptr_t* rx_max, uintptr_t addr) { + std::vector<Seg>::size_type i = find(addr); + if (!mSegs[i].val) { + return false; + } + *rx_min = mSegs[i].lo; + *rx_max = mSegs[i].hi; + return true; + } + + SegArray() { + Seg s(0, UINTPTR_MAX, false); + mSegs.push_back(s); + } + + private: + struct Seg { + Seg(uintptr_t lo, uintptr_t hi, bool val) : lo(lo), hi(hi), val(val) {} + uintptr_t lo; + uintptr_t hi; + bool val; + }; + + void preen() { + for (std::vector<Seg>::iterator iter = mSegs.begin(); + iter < mSegs.end() - 1; ++iter) { + if (iter[0].val != iter[1].val) { + continue; + } + iter[0].hi = iter[1].hi; + mSegs.erase(iter + 1); + // Back up one, so as not to miss an opportunity to merge + // with the entry after this one. + --iter; + } + } + + // RUNS IN NO-MALLOC CONTEXT + std::vector<Seg>::size_type find(uintptr_t a) { + long int lo = 0; + long int hi = (long int)mSegs.size(); + while (true) { + // The unsearched space is lo .. hi inclusive. + if (lo > hi) { + // Not found. This can't happen. + return (std::vector<Seg>::size_type)(-1); + } + long int mid = lo + ((hi - lo) / 2); + uintptr_t mid_lo = mSegs[mid].lo; + uintptr_t mid_hi = mSegs[mid].hi; + if (a < mid_lo) { + hi = mid - 1; + continue; + } + if (a > mid_hi) { + lo = mid + 1; + continue; + } + return (std::vector<Seg>::size_type)mid; + } + } + + void split_at(uintptr_t a) { + std::vector<Seg>::size_type i = find(a); + if (mSegs[i].lo == a) { + return; + } + mSegs.insert(mSegs.begin() + i + 1, mSegs[i]); + mSegs[i].hi = a - 1; + mSegs[i + 1].lo = a; + } + + void show() { + printf("<< %d entries:\n", (int)mSegs.size()); + for (std::vector<Seg>::iterator iter = mSegs.begin(); iter < mSegs.end(); + ++iter) { + printf(" %016llx %016llx %s\n", (unsigned long long int)(*iter).lo, + (unsigned long long int)(*iter).hi, + (*iter).val ? "true" : "false"); + } + printf(">>\n"); + } + + std::vector<Seg> mSegs; +}; + +//////////////////////////////////////////////////////////////// +// PriMap // +//////////////////////////////////////////////////////////////// + +class PriMap { + public: + explicit PriMap(void (*aLog)(const char*)) : mLog(aLog) {} + + // RUNS IN NO-MALLOC CONTEXT + pair<const RuleSet*, const vector<PfxInstr>*> Lookup(uintptr_t ia) { + SecMap* sm = FindSecMap(ia); + return pair<const RuleSet*, const vector<PfxInstr>*>( + sm ? sm->FindRuleSet(ia) : nullptr, sm ? sm->GetPfxInstrs() : nullptr); + } + + // Add a secondary map. No overlaps allowed w.r.t. existing + // secondary maps. + void AddSecMap(mozilla::UniquePtr<SecMap>&& aSecMap) { + // We can't add an empty SecMap to the PriMap. But that's OK + // since we'd never be able to find anything in it anyway. + if (aSecMap->IsEmpty()) { + return; + } + + // Iterate through the SecMaps and find the right place for this + // one. At the same time, ensure that the in-order + // non-overlapping invariant is preserved (and, generally, holds). + // FIXME: this gives a cost that is O(N^2) in the total number of + // shared objects in the system. ToDo: better. + MOZ_ASSERT(aSecMap->mSummaryMinAddr <= aSecMap->mSummaryMaxAddr); + + size_t num_secMaps = mSecMaps.size(); + uintptr_t i; + for (i = 0; i < num_secMaps; ++i) { + mozilla::UniquePtr<SecMap>& sm_i = mSecMaps[i]; + MOZ_ASSERT(sm_i->mSummaryMinAddr <= sm_i->mSummaryMaxAddr); + if (aSecMap->mSummaryMinAddr < sm_i->mSummaryMaxAddr) { + // |aSecMap| needs to be inserted immediately before mSecMaps[i]. + break; + } + } + MOZ_ASSERT(i <= num_secMaps); + if (i == num_secMaps) { + // It goes at the end. + mSecMaps.push_back(std::move(aSecMap)); + } else { + std::vector<mozilla::UniquePtr<SecMap>>::iterator iter = + mSecMaps.begin() + i; + mSecMaps.insert(iter, std::move(aSecMap)); + } + char buf[100]; + SprintfLiteral(buf, "AddSecMap: now have %d SecMaps\n", + (int)mSecMaps.size()); + buf[sizeof(buf) - 1] = 0; + mLog(buf); + } + + // Remove and delete any SecMaps in the mapping, that intersect + // with the specified address range. + void RemoveSecMapsInRange(uintptr_t avma_min, uintptr_t avma_max) { + MOZ_ASSERT(avma_min <= avma_max); + size_t num_secMaps = mSecMaps.size(); + if (num_secMaps > 0) { + intptr_t i; + // Iterate from end to start over the vector, so as to ensure + // that the special case where |avma_min| and |avma_max| denote + // the entire address space, can be completed in time proportional + // to the number of elements in the map. + for (i = (intptr_t)num_secMaps - 1; i >= 0; i--) { + mozilla::UniquePtr<SecMap>& sm_i = mSecMaps[i]; + if (sm_i->mSummaryMaxAddr < avma_min || + avma_max < sm_i->mSummaryMinAddr) { + // There's no overlap. Move on. + continue; + } + // We need to remove mSecMaps[i] and slide all those above it + // downwards to cover the hole. + mSecMaps.erase(mSecMaps.begin() + i); + } + } + } + + // Return the number of currently contained SecMaps. + size_t CountSecMaps() { return mSecMaps.size(); } + + size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const { + size_t n = aMallocSizeOf(this); + + // It's conceivable that this call would be unsafe with some + // implementations of std::vector, but it seems to be working for now... + n += aMallocSizeOf(mSecMaps.data()); + + for (size_t i = 0; i < mSecMaps.size(); i++) { + n += mSecMaps[i]->SizeOfIncludingThis(aMallocSizeOf); + } + + return n; + } + + private: + // RUNS IN NO-MALLOC CONTEXT + SecMap* FindSecMap(uintptr_t ia) { + // Binary search mSecMaps to find one that brackets |ia|. + // lo and hi need to be signed, else the loop termination tests + // don't work properly. + long int lo = 0; + long int hi = (long int)mSecMaps.size() - 1; + while (true) { + // current unsearched space is from lo to hi, inclusive. + if (lo > hi) { + // not found + return nullptr; + } + long int mid = lo + ((hi - lo) / 2); + mozilla::UniquePtr<SecMap>& mid_secMap = mSecMaps[mid]; + uintptr_t mid_minAddr = mid_secMap->mSummaryMinAddr; + uintptr_t mid_maxAddr = mid_secMap->mSummaryMaxAddr; + if (ia < mid_minAddr) { + hi = mid - 1; + continue; + } + if (ia > mid_maxAddr) { + lo = mid + 1; + continue; + } + MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr); + return mid_secMap.get(); + } + // NOTREACHED + } + + private: + // sorted array of per-object ranges, non overlapping, non empty + std::vector<mozilla::UniquePtr<SecMap>> mSecMaps; + + // a logging sink, for debugging. + void (*mLog)(const char*); +}; + +//////////////////////////////////////////////////////////////// +// LUL // +//////////////////////////////////////////////////////////////// + +#define LUL_LOG(_str) \ + do { \ + char buf[200]; \ + SprintfLiteral(buf, "LUL: pid %d tid %d lul-obj %p: %s", \ + profiler_current_process_id(), \ + profiler_current_thread_id(), this, (_str)); \ + buf[sizeof(buf) - 1] = 0; \ + mLog(buf); \ + } while (0) + +LUL::LUL(void (*aLog)(const char*)) + : mLog(aLog), + mAdminMode(true), + mAdminThreadId(profiler_current_thread_id()), + mPriMap(new PriMap(aLog)), + mSegArray(new SegArray()), + mUSU(new UniqueStringUniverse()) { + LUL_LOG("LUL::LUL: Created object"); +} + +LUL::~LUL() { + LUL_LOG("LUL::~LUL: Destroyed object"); + delete mPriMap; + delete mSegArray; + mLog = nullptr; + delete mUSU; +} + +void LUL::MaybeShowStats() { + // This is racey in the sense that it can't guarantee that + // n_new == n_new_Context + n_new_CFI + n_new_Scanned + // if it should happen that mStats is updated by some other thread + // in between computation of n_new and n_new_{Context,CFI,FP}. + // But it's just stats printing, so we don't really care. + uint32_t n_new = mStats - mStatsPrevious; + if (n_new >= 5000) { + uint32_t n_new_Context = mStats.mContext - mStatsPrevious.mContext; + uint32_t n_new_CFI = mStats.mCFI - mStatsPrevious.mCFI; + uint32_t n_new_FP = mStats.mFP - mStatsPrevious.mFP; + mStatsPrevious = mStats; + char buf[200]; + SprintfLiteral(buf, + "LUL frame stats: TOTAL %5u" + " CTX %4u CFI %4u FP %4u", + n_new, n_new_Context, n_new_CFI, n_new_FP); + buf[sizeof(buf) - 1] = 0; + mLog(buf); + } +} + +size_t LUL::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const { + size_t n = aMallocSizeOf(this); + n += mPriMap->SizeOfIncludingThis(aMallocSizeOf); + + // Measurement of the following members may be added later if DMD finds it + // is worthwhile: + // - mSegArray + // - mUSU + + return n; +} + +void LUL::EnableUnwinding() { + LUL_LOG("LUL::EnableUnwinding"); + // Don't assert for Admin mode here. That is, tolerate a call here + // if we are already in Unwinding mode. + MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId); + + mAdminMode = false; +} + +void LUL::NotifyAfterMap(uintptr_t aRXavma, size_t aSize, const char* aFileName, + const void* aMappedImage) { + MOZ_RELEASE_ASSERT(mAdminMode); + MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId); + + mLog(":\n"); + char buf[200]; + SprintfLiteral(buf, "NotifyMap %llx %llu %s\n", + (unsigned long long int)aRXavma, (unsigned long long int)aSize, + aFileName); + buf[sizeof(buf) - 1] = 0; + mLog(buf); + + // Ignore obviously-stupid notifications. + if (aSize > 0) { + // Here's a new mapping, for this object. + mozilla::UniquePtr<SecMap> smap = mozilla::MakeUnique<SecMap>(mLog); + + // Read CFI or EXIDX unwind data into |smap|. + if (!aMappedImage) { + (void)lul::ReadSymbolData(string(aFileName), std::vector<string>(), + smap.get(), (void*)aRXavma, aSize, mUSU, mLog); + } else { + (void)lul::ReadSymbolDataInternal( + (const uint8_t*)aMappedImage, string(aFileName), + std::vector<string>(), smap.get(), (void*)aRXavma, aSize, mUSU, mLog); + } + + mLog("NotifyMap .. preparing entries\n"); + + smap->PrepareRuleSets(aRXavma, aSize); + + SprintfLiteral(buf, "NotifyMap got %lld entries\n", + (long long int)smap->Size()); + buf[sizeof(buf) - 1] = 0; + mLog(buf); + + // Add it to the primary map (the top level set of mapped objects). + mPriMap->AddSecMap(std::move(smap)); + + // Tell the segment array about the mapping, so that the stack + // scan and __kernel_syscall mechanisms know where valid code is. + mSegArray->add(aRXavma, aRXavma + aSize - 1, true); + } +} + +void LUL::NotifyExecutableArea(uintptr_t aRXavma, size_t aSize) { + MOZ_RELEASE_ASSERT(mAdminMode); + MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId); + + mLog(":\n"); + char buf[200]; + SprintfLiteral(buf, "NotifyExecutableArea %llx %llu\n", + (unsigned long long int)aRXavma, + (unsigned long long int)aSize); + buf[sizeof(buf) - 1] = 0; + mLog(buf); + + // Ignore obviously-stupid notifications. + if (aSize > 0) { + // Tell the segment array about the mapping, so that the stack + // scan and __kernel_syscall mechanisms know where valid code is. + mSegArray->add(aRXavma, aRXavma + aSize - 1, true); + } +} + +void LUL::NotifyBeforeUnmap(uintptr_t aRXavmaMin, uintptr_t aRXavmaMax) { + MOZ_RELEASE_ASSERT(mAdminMode); + MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId); + + mLog(":\n"); + char buf[100]; + SprintfLiteral(buf, "NotifyUnmap %016llx-%016llx\n", + (unsigned long long int)aRXavmaMin, + (unsigned long long int)aRXavmaMax); + buf[sizeof(buf) - 1] = 0; + mLog(buf); + + MOZ_ASSERT(aRXavmaMin <= aRXavmaMax); + + // Remove from the primary map, any secondary maps that intersect + // with the address range. Also delete the secondary maps. + mPriMap->RemoveSecMapsInRange(aRXavmaMin, aRXavmaMax); + + // Tell the segment array that the address range no longer + // contains valid code. + mSegArray->add(aRXavmaMin, aRXavmaMax, false); + + SprintfLiteral(buf, "NotifyUnmap: now have %d SecMaps\n", + (int)mPriMap->CountSecMaps()); + buf[sizeof(buf) - 1] = 0; + mLog(buf); +} + +size_t LUL::CountMappings() { + MOZ_RELEASE_ASSERT(mAdminMode); + MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId); + + return mPriMap->CountSecMaps(); +} + +// RUNS IN NO-MALLOC CONTEXT +static TaggedUWord DerefTUW(TaggedUWord aAddr, const StackImage* aStackImg) { + if (!aAddr.Valid()) { + return TaggedUWord(); + } + + // Lower limit check. |aAddr.Value()| is the lowest requested address + // and |aStackImg->mStartAvma| is the lowest address we actually have, + // so the comparison is straightforward. + if (aAddr.Value() < aStackImg->mStartAvma) { + return TaggedUWord(); + } + + // Upper limit check. We must compute the highest requested address + // and the highest address we actually have, but being careful to + // avoid overflow. In particular if |aAddr| is 0xFFF...FFF or the + // 3/7 values below that, then we will get overflow. See bug #1245477. + typedef CheckedInt<uintptr_t> CheckedUWord; + CheckedUWord highest_requested_plus_one = + CheckedUWord(aAddr.Value()) + CheckedUWord(sizeof(uintptr_t)); + CheckedUWord highest_available_plus_one = + CheckedUWord(aStackImg->mStartAvma) + CheckedUWord(aStackImg->mLen); + if (!highest_requested_plus_one.isValid() // overflow? + || !highest_available_plus_one.isValid() // overflow? + || (highest_requested_plus_one.value() > + highest_available_plus_one.value())) { // in range? + return TaggedUWord(); + } + + return TaggedUWord( + *(uintptr_t*)(&aStackImg + ->mContents[aAddr.Value() - aStackImg->mStartAvma])); +} + +// RUNS IN NO-MALLOC CONTEXT +static TaggedUWord EvaluateReg(int16_t aReg, const UnwindRegs* aOldRegs, + TaggedUWord aCFA) { + switch (aReg) { + case DW_REG_CFA: + return aCFA; +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + case DW_REG_INTEL_XBP: + return aOldRegs->xbp; + case DW_REG_INTEL_XSP: + return aOldRegs->xsp; + case DW_REG_INTEL_XIP: + return aOldRegs->xip; +#elif defined(GP_ARCH_arm) + case DW_REG_ARM_R7: + return aOldRegs->r7; + case DW_REG_ARM_R11: + return aOldRegs->r11; + case DW_REG_ARM_R12: + return aOldRegs->r12; + case DW_REG_ARM_R13: + return aOldRegs->r13; + case DW_REG_ARM_R14: + return aOldRegs->r14; + case DW_REG_ARM_R15: + return aOldRegs->r15; +#elif defined(GP_ARCH_arm64) + case DW_REG_AARCH64_X29: + return aOldRegs->x29; + case DW_REG_AARCH64_X30: + return aOldRegs->x30; + case DW_REG_AARCH64_SP: + return aOldRegs->sp; +#elif defined(GP_ARCH_mips64) + case DW_REG_MIPS_SP: + return aOldRegs->sp; + case DW_REG_MIPS_FP: + return aOldRegs->fp; + case DW_REG_MIPS_PC: + return aOldRegs->pc; +#else +# error "Unsupported arch" +#endif + default: + MOZ_ASSERT(0); + return TaggedUWord(); + } +} + +// RUNS IN NO-MALLOC CONTEXT +// See prototype for comment. +TaggedUWord EvaluatePfxExpr(int32_t start, const UnwindRegs* aOldRegs, + TaggedUWord aCFA, const StackImage* aStackImg, + const vector<PfxInstr>& aPfxInstrs) { + // A small evaluation stack, and a stack pointer, which points to + // the highest numbered in-use element. + const int N_STACK = 10; + TaggedUWord stack[N_STACK]; + int stackPointer = -1; + for (int i = 0; i < N_STACK; i++) stack[i] = TaggedUWord(); + +#define PUSH(_tuw) \ + do { \ + if (stackPointer >= N_STACK - 1) goto fail; /* overflow */ \ + stack[++stackPointer] = (_tuw); \ + } while (0) + +#define POP(_lval) \ + do { \ + if (stackPointer < 0) goto fail; /* underflow */ \ + _lval = stack[stackPointer--]; \ + } while (0) + + // Cursor in the instruction sequence. + size_t curr = start + 1; + + // Check the start point is sane. + size_t nInstrs = aPfxInstrs.size(); + if (start < 0 || (size_t)start >= nInstrs) goto fail; + + { + // The instruction sequence must start with PX_Start. If not, + // something is seriously wrong. + PfxInstr first = aPfxInstrs[start]; + if (first.mOpcode != PX_Start) goto fail; + + // Push the CFA on the stack to start with (or not), as required by + // the original DW_OP_*expression* CFI. + if (first.mOperand != 0) PUSH(aCFA); + } + + while (true) { + if (curr >= nInstrs) goto fail; // ran off the end of the sequence + + PfxInstr pfxi = aPfxInstrs[curr++]; + if (pfxi.mOpcode == PX_End) break; // we're done + + switch (pfxi.mOpcode) { + case PX_Start: + // This should appear only at the start of the sequence. + goto fail; + case PX_End: + // We just took care of that, so we shouldn't see it again. + MOZ_ASSERT(0); + goto fail; + case PX_SImm32: + PUSH(TaggedUWord((intptr_t)pfxi.mOperand)); + break; + case PX_DwReg: { + DW_REG_NUMBER reg = (DW_REG_NUMBER)pfxi.mOperand; + MOZ_ASSERT(reg != DW_REG_CFA); + PUSH(EvaluateReg(reg, aOldRegs, aCFA)); + break; + } + case PX_Deref: { + TaggedUWord addr; + POP(addr); + PUSH(DerefTUW(addr, aStackImg)); + break; + } + case PX_Add: { + TaggedUWord x, y; + POP(x); + POP(y); + PUSH(y + x); + break; + } + case PX_Sub: { + TaggedUWord x, y; + POP(x); + POP(y); + PUSH(y - x); + break; + } + case PX_And: { + TaggedUWord x, y; + POP(x); + POP(y); + PUSH(y & x); + break; + } + case PX_Or: { + TaggedUWord x, y; + POP(x); + POP(y); + PUSH(y | x); + break; + } + case PX_CmpGES: { + TaggedUWord x, y; + POP(x); + POP(y); + PUSH(y.CmpGEs(x)); + break; + } + case PX_Shl: { + TaggedUWord x, y; + POP(x); + POP(y); + PUSH(y << x); + break; + } + default: + MOZ_ASSERT(0); + goto fail; + } + } // while (true) + + // Evaluation finished. The top value on the stack is the result. + if (stackPointer >= 0) { + return stack[stackPointer]; + } + // Else fall through + +fail: + return TaggedUWord(); + +#undef PUSH +#undef POP +} + +// RUNS IN NO-MALLOC CONTEXT +TaggedUWord LExpr::EvaluateExpr(const UnwindRegs* aOldRegs, TaggedUWord aCFA, + const StackImage* aStackImg, + const vector<PfxInstr>* aPfxInstrs) const { + switch (mHow) { + case UNKNOWN: + return TaggedUWord(); + case NODEREF: { + TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA); + tuw = tuw + TaggedUWord((intptr_t)mOffset); + return tuw; + } + case DEREF: { + TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA); + tuw = tuw + TaggedUWord((intptr_t)mOffset); + return DerefTUW(tuw, aStackImg); + } + case PFXEXPR: { + MOZ_ASSERT(aPfxInstrs); + if (!aPfxInstrs) { + return TaggedUWord(); + } + return EvaluatePfxExpr(mOffset, aOldRegs, aCFA, aStackImg, *aPfxInstrs); + } + default: + MOZ_ASSERT(0); + return TaggedUWord(); + } +} + +// RUNS IN NO-MALLOC CONTEXT +static void UseRuleSet(/*MOD*/ UnwindRegs* aRegs, const StackImage* aStackImg, + const RuleSet* aRS, const vector<PfxInstr>* aPfxInstrs) { + // Take a copy of regs, since we'll need to refer to the old values + // whilst computing the new ones. + UnwindRegs old_regs = *aRegs; + + // Mark all the current register values as invalid, so that the + // caller can see, on our return, which ones have been computed + // anew. If we don't even manage to compute a new PC value, then + // the caller will have to abandon the unwind. + // FIXME: Create and use instead: aRegs->SetAllInvalid(); +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + aRegs->xbp = TaggedUWord(); + aRegs->xsp = TaggedUWord(); + aRegs->xip = TaggedUWord(); +#elif defined(GP_ARCH_arm) + aRegs->r7 = TaggedUWord(); + aRegs->r11 = TaggedUWord(); + aRegs->r12 = TaggedUWord(); + aRegs->r13 = TaggedUWord(); + aRegs->r14 = TaggedUWord(); + aRegs->r15 = TaggedUWord(); +#elif defined(GP_ARCH_arm64) + aRegs->x29 = TaggedUWord(); + aRegs->x30 = TaggedUWord(); + aRegs->sp = TaggedUWord(); + aRegs->pc = TaggedUWord(); +#elif defined(GP_ARCH_mips64) + aRegs->sp = TaggedUWord(); + aRegs->fp = TaggedUWord(); + aRegs->pc = TaggedUWord(); +#else +# error "Unsupported arch" +#endif + + // This is generally useful. + const TaggedUWord inval = TaggedUWord(); + + // First, compute the CFA. + TaggedUWord cfa = aRS->mCfaExpr.EvaluateExpr(&old_regs, inval /*old cfa*/, + aStackImg, aPfxInstrs); + + // If we didn't manage to compute the CFA, well .. that's ungood, + // but keep going anyway. It'll be OK provided none of the register + // value rules mention the CFA. In any case, compute the new values + // for each register that we're tracking. + +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + aRegs->xbp = + aRS->mXbpExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->xsp = + aRS->mXspExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->xip = + aRS->mXipExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); +#elif defined(GP_ARCH_arm) + aRegs->r7 = aRS->mR7expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->r11 = + aRS->mR11expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->r12 = + aRS->mR12expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->r13 = + aRS->mR13expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->r14 = + aRS->mR14expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->r15 = + aRS->mR15expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); +#elif defined(GP_ARCH_arm64) + aRegs->x29 = + aRS->mX29expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->x30 = + aRS->mX30expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->sp = aRS->mSPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); +#elif defined(GP_ARCH_mips64) + aRegs->sp = aRS->mSPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->fp = aRS->mFPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->pc = aRS->mPCexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); +#else +# error "Unsupported arch" +#endif + + // We're done. Any regs for which we didn't manage to compute a + // new value will now be marked as invalid. +} + +// RUNS IN NO-MALLOC CONTEXT +void LUL::Unwind(/*OUT*/ uintptr_t* aFramePCs, + /*OUT*/ uintptr_t* aFrameSPs, + /*OUT*/ size_t* aFramesUsed, + /*OUT*/ size_t* aFramePointerFramesAcquired, + size_t aFramesAvail, UnwindRegs* aStartRegs, + StackImage* aStackImg) { + MOZ_RELEASE_ASSERT(!mAdminMode); + + ///////////////////////////////////////////////////////// + // BEGIN UNWIND + + *aFramesUsed = 0; + + UnwindRegs regs = *aStartRegs; + TaggedUWord last_valid_sp = TaggedUWord(); + + while (true) { + if (DEBUG_MAIN) { + char buf[300]; + mLog("\n"); +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + SprintfLiteral( + buf, "LoopTop: rip %d/%llx rsp %d/%llx rbp %d/%llx\n", + (int)regs.xip.Valid(), (unsigned long long int)regs.xip.Value(), + (int)regs.xsp.Valid(), (unsigned long long int)regs.xsp.Value(), + (int)regs.xbp.Valid(), (unsigned long long int)regs.xbp.Value()); + buf[sizeof(buf) - 1] = 0; + mLog(buf); +#elif defined(GP_ARCH_arm) + SprintfLiteral( + buf, + "LoopTop: r15 %d/%llx r7 %d/%llx r11 %d/%llx" + " r12 %d/%llx r13 %d/%llx r14 %d/%llx\n", + (int)regs.r15.Valid(), (unsigned long long int)regs.r15.Value(), + (int)regs.r7.Valid(), (unsigned long long int)regs.r7.Value(), + (int)regs.r11.Valid(), (unsigned long long int)regs.r11.Value(), + (int)regs.r12.Valid(), (unsigned long long int)regs.r12.Value(), + (int)regs.r13.Valid(), (unsigned long long int)regs.r13.Value(), + (int)regs.r14.Valid(), (unsigned long long int)regs.r14.Value()); + buf[sizeof(buf) - 1] = 0; + mLog(buf); +#elif defined(GP_ARCH_arm64) + SprintfLiteral( + buf, + "LoopTop: pc %d/%llx x29 %d/%llx x30 %d/%llx" + " sp %d/%llx\n", + (int)regs.pc.Valid(), (unsigned long long int)regs.pc.Value(), + (int)regs.x29.Valid(), (unsigned long long int)regs.x29.Value(), + (int)regs.x30.Valid(), (unsigned long long int)regs.x30.Value(), + (int)regs.sp.Valid(), (unsigned long long int)regs.sp.Value()); + buf[sizeof(buf) - 1] = 0; + mLog(buf); +#elif defined(GP_ARCH_mips64) + SprintfLiteral( + buf, "LoopTop: pc %d/%llx sp %d/%llx fp %d/%llx\n", + (int)regs.pc.Valid(), (unsigned long long int)regs.pc.Value(), + (int)regs.sp.Valid(), (unsigned long long int)regs.sp.Value(), + (int)regs.fp.Valid(), (unsigned long long int)regs.fp.Value()); + buf[sizeof(buf) - 1] = 0; + mLog(buf); +#else +# error "Unsupported arch" +#endif + } + +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + TaggedUWord ia = regs.xip; + TaggedUWord sp = regs.xsp; +#elif defined(GP_ARCH_arm) + TaggedUWord ia = (*aFramesUsed == 0 ? regs.r15 : regs.r14); + TaggedUWord sp = regs.r13; +#elif defined(GP_ARCH_arm64) + TaggedUWord ia = (*aFramesUsed == 0 ? regs.pc : regs.x30); + TaggedUWord sp = regs.sp; +#elif defined(GP_ARCH_mips64) + TaggedUWord ia = regs.pc; + TaggedUWord sp = regs.sp; +#else +# error "Unsupported arch" +#endif + + if (*aFramesUsed >= aFramesAvail) { + break; + } + + // If we don't have a valid value for the PC, give up. + if (!ia.Valid()) { + break; + } + + // If this is the innermost frame, record the SP value, which + // presumably is valid. If this isn't the innermost frame, and we + // have a valid SP value, check that its SP value isn't less that + // the one we've seen so far, so as to catch potential SP value + // cycles. + if (*aFramesUsed == 0) { + last_valid_sp = sp; + } else { + MOZ_ASSERT(last_valid_sp.Valid()); + if (sp.Valid()) { + if (sp.Value() < last_valid_sp.Value()) { + // Hmm, SP going in the wrong direction. Let's stop. + break; + } + // Remember where we got to. + last_valid_sp = sp; + } + } + + // For the innermost frame, the IA value is what we need. For all + // other frames, it's actually the return address, so back up one + // byte so as to get it into the calling instruction. + aFramePCs[*aFramesUsed] = ia.Value() - (*aFramesUsed == 0 ? 0 : 1); + aFrameSPs[*aFramesUsed] = sp.Valid() ? sp.Value() : 0; + (*aFramesUsed)++; + + // Find the RuleSet for the current IA, if any. This will also + // query the backing (secondary) maps if it isn't found in the + // thread-local cache. + + // If this isn't the innermost frame, back up into the calling insn. + if (*aFramesUsed > 1) { + ia = ia + TaggedUWord((uintptr_t)(-1)); + } + + pair<const RuleSet*, const vector<PfxInstr>*> ruleset_and_pfxinstrs = + mPriMap->Lookup(ia.Value()); + const RuleSet* ruleset = ruleset_and_pfxinstrs.first; + const vector<PfxInstr>* pfxinstrs = ruleset_and_pfxinstrs.second; + + if (DEBUG_MAIN) { + char buf[100]; + SprintfLiteral(buf, "ruleset for 0x%llx = %p\n", + (unsigned long long int)ia.Value(), ruleset); + buf[sizeof(buf) - 1] = 0; + mLog(buf); + } + +#if defined(GP_PLAT_x86_android) || defined(GP_PLAT_x86_linux) + ///////////////////////////////////////////// + //// + // On 32 bit x86-linux, syscalls are often done via the VDSO + // function __kernel_vsyscall, which doesn't have a corresponding + // object that we can read debuginfo from. That effectively kills + // off all stack traces for threads blocked in syscalls. Hence + // special-case by looking at the code surrounding the program + // counter. + // + // 0xf7757420 <__kernel_vsyscall+0>: push %ecx + // 0xf7757421 <__kernel_vsyscall+1>: push %edx + // 0xf7757422 <__kernel_vsyscall+2>: push %ebp + // 0xf7757423 <__kernel_vsyscall+3>: mov %esp,%ebp + // 0xf7757425 <__kernel_vsyscall+5>: sysenter + // 0xf7757427 <__kernel_vsyscall+7>: nop + // 0xf7757428 <__kernel_vsyscall+8>: nop + // 0xf7757429 <__kernel_vsyscall+9>: nop + // 0xf775742a <__kernel_vsyscall+10>: nop + // 0xf775742b <__kernel_vsyscall+11>: nop + // 0xf775742c <__kernel_vsyscall+12>: nop + // 0xf775742d <__kernel_vsyscall+13>: nop + // 0xf775742e <__kernel_vsyscall+14>: int $0x80 + // 0xf7757430 <__kernel_vsyscall+16>: pop %ebp + // 0xf7757431 <__kernel_vsyscall+17>: pop %edx + // 0xf7757432 <__kernel_vsyscall+18>: pop %ecx + // 0xf7757433 <__kernel_vsyscall+19>: ret + // + // In cases where the sampled thread is blocked in a syscall, its + // program counter will point at "pop %ebp". Hence we look for + // the sequence "int $0x80; pop %ebp; pop %edx; pop %ecx; ret", and + // the corresponding register-recovery actions are: + // new_ebp = *(old_esp + 0) + // new eip = *(old_esp + 12) + // new_esp = old_esp + 16 + // + // It may also be the case that the program counter points two + // nops before the "int $0x80", viz, is __kernel_vsyscall+12, in + // the case where the syscall has been restarted but the thread + // hasn't been rescheduled. The code below doesn't handle that; + // it could easily be made to. + // + if (!ruleset && *aFramesUsed == 1 && ia.Valid() && sp.Valid()) { + uintptr_t insns_min, insns_max; + uintptr_t eip = ia.Value(); + bool b = mSegArray->getBoundingCodeSegment(&insns_min, &insns_max, eip); + if (b && eip - 2 >= insns_min && eip + 3 <= insns_max) { + uint8_t* eipC = (uint8_t*)eip; + if (eipC[-2] == 0xCD && eipC[-1] == 0x80 && eipC[0] == 0x5D && + eipC[1] == 0x5A && eipC[2] == 0x59 && eipC[3] == 0xC3) { + TaggedUWord sp_plus_0 = sp; + TaggedUWord sp_plus_12 = sp; + TaggedUWord sp_plus_16 = sp; + sp_plus_12 = sp_plus_12 + TaggedUWord(12); + sp_plus_16 = sp_plus_16 + TaggedUWord(16); + TaggedUWord new_ebp = DerefTUW(sp_plus_0, aStackImg); + TaggedUWord new_eip = DerefTUW(sp_plus_12, aStackImg); + TaggedUWord new_esp = sp_plus_16; + if (new_ebp.Valid() && new_eip.Valid() && new_esp.Valid()) { + regs.xbp = new_ebp; + regs.xip = new_eip; + regs.xsp = new_esp; + continue; + } + } + } + } + //// + ///////////////////////////////////////////// +#endif // defined(GP_PLAT_x86_android) || defined(GP_PLAT_x86_linux) + + // So, do we have a ruleset for this address? If so, use it now. + if (ruleset) { + if (DEBUG_MAIN) { + ruleset->Print(mLog); + mLog("\n"); + } + // Use the RuleSet to compute the registers for the previous + // frame. |regs| is modified in-place. + UseRuleSet(®s, aStackImg, ruleset, pfxinstrs); + continue; + } + +#if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) || \ + defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) || \ + defined(GP_PLAT_amd64_freebsd) + // There's no RuleSet for the specified address. On amd64/x86_linux, see if + // it's possible to recover the caller's frame by using the frame pointer. + + // We seek to compute (new_IP, new_SP, new_BP) from (old_BP, stack image), + // and assume the following layout: + // + // <--- new_SP + // +----------+ + // | new_IP | (return address) + // +----------+ + // | new_BP | <--- old_BP + // +----------+ + // | .... | + // | .... | + // | .... | + // +----------+ <---- old_SP (arbitrary, but must be <= old_BP) + + const size_t wordSzB = sizeof(uintptr_t); + TaggedUWord old_xsp = regs.xsp; + + // points at new_BP ? + TaggedUWord old_xbp = regs.xbp; + // points at new_IP ? + TaggedUWord old_xbp_plus1 = regs.xbp + TaggedUWord(1 * wordSzB); + // is the new_SP ? + TaggedUWord old_xbp_plus2 = regs.xbp + TaggedUWord(2 * wordSzB); + + if (old_xbp.Valid() && old_xbp.IsAligned() && old_xsp.Valid() && + old_xsp.IsAligned() && old_xsp.Value() <= old_xbp.Value()) { + // We don't need to do any range, alignment or validity checks for + // addresses passed to DerefTUW, since that performs them itself, and + // returns an invalid value on failure. Any such value will poison + // subsequent uses, and we do a final check for validity before putting + // the computed values into |regs|. + TaggedUWord new_xbp = DerefTUW(old_xbp, aStackImg); + if (new_xbp.Valid() && new_xbp.IsAligned() && + old_xbp.Value() < new_xbp.Value()) { + TaggedUWord new_xip = DerefTUW(old_xbp_plus1, aStackImg); + TaggedUWord new_xsp = old_xbp_plus2; + if (new_xbp.Valid() && new_xip.Valid() && new_xsp.Valid()) { + regs.xbp = new_xbp; + regs.xip = new_xip; + regs.xsp = new_xsp; + (*aFramePointerFramesAcquired)++; + continue; + } + } + } +#elif defined(GP_ARCH_arm64) + // Here is an example of generated code for prologue and epilogue.. + // + // stp x29, x30, [sp, #-16]! + // mov x29, sp + // ... + // ldp x29, x30, [sp], #16 + // ret + // + // Next is another example of generated code. + // + // stp x20, x19, [sp, #-32]! + // stp x29, x30, [sp, #16] + // add x29, sp, #0x10 + // ... + // ldp x29, x30, [sp, #16] + // ldp x20, x19, [sp], #32 + // ret + // + // Previous x29 and x30 register are stored in the address of x29 register. + // But since sp register value depends on local variables, we cannot compute + // previous sp register from current sp/fp/lr register and there is no + // regular rule for sp register in prologue. But since return address is lr + // register, if x29 is valid, we will get return address without sp + // register. + // + // So we assume the following layout that if no rule set. x29 is frame + // pointer, so we will be able to compute x29 and x30 . + // + // +----------+ <--- new_sp (cannot compute) + // | .... | + // +----------+ + // | new_lr | (return address) + // +----------+ + // | new_fp | <--- old_fp + // +----------+ + // | .... | + // | .... | + // +----------+ <---- old_sp (arbitrary, but unused) + + TaggedUWord old_fp = regs.x29; + if (old_fp.Valid() && old_fp.IsAligned() && last_valid_sp.Valid() && + last_valid_sp.Value() <= old_fp.Value()) { + TaggedUWord new_fp = DerefTUW(old_fp, aStackImg); + if (new_fp.Valid() && new_fp.IsAligned() && + old_fp.Value() < new_fp.Value()) { + TaggedUWord old_fp_plus1 = old_fp + TaggedUWord(8); + TaggedUWord new_lr = DerefTUW(old_fp_plus1, aStackImg); + if (new_lr.Valid()) { + regs.x29 = new_fp; + regs.x30 = new_lr; + // When using frame pointer to walk stack, we cannot compute sp + // register since we cannot compute sp register from fp/lr/sp + // register, and there is no regular rule to compute previous sp + // register. So mark as invalid. + regs.sp = TaggedUWord(); + (*aFramePointerFramesAcquired)++; + continue; + } + } + } +#endif // defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) || + // defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) + + // We failed to recover a frame either using CFI or FP chasing, and we + // have no other ways to recover the frame. So we have to give up. + break; + + } // top level unwind loop + + // END UNWIND + ///////////////////////////////////////////////////////// +} + +//////////////////////////////////////////////////////////////// +// LUL Unit Testing // +//////////////////////////////////////////////////////////////// + +static const int LUL_UNIT_TEST_STACK_SIZE = 32768; + +#if defined(GP_ARCH_mips64) +static __attribute__((noinline)) unsigned long __getpc(void) { + unsigned long rtaddr; + __asm__ volatile("move %0, $31" : "=r"(rtaddr)); + return rtaddr; +} +#endif + +// This function is innermost in the test call sequence. It uses LUL +// to unwind, and compares the result with the sequence specified in +// the director string. These need to agree in order for the test to +// pass. In order not to screw up the results, this function needs +// to have a not-very big stack frame, since we're only presenting +// the innermost LUL_UNIT_TEST_STACK_SIZE bytes of stack to LUL, and +// that chunk unavoidably includes the frame for this function. +// +// This function must not be inlined into its callers. Doing so will +// cause the expected-vs-actual backtrace consistency checking to +// fail. Prints summary results to |aLUL|'s logging sink and also +// returns a boolean indicating whether or not the test passed. +static __attribute__((noinline)) bool GetAndCheckStackTrace( + LUL* aLUL, const char* dstring) { + // Get hold of the current unwind-start registers. + UnwindRegs startRegs; + memset(&startRegs, 0, sizeof(startRegs)); +#if defined(GP_ARCH_amd64) + volatile uintptr_t block[3]; + MOZ_ASSERT(sizeof(block) == 24); + __asm__ __volatile__( + "leaq 0(%%rip), %%r15" + "\n\t" + "movq %%r15, 0(%0)" + "\n\t" + "movq %%rsp, 8(%0)" + "\n\t" + "movq %%rbp, 16(%0)" + "\n" + : + : "r"(&block[0]) + : "memory", "r15"); + startRegs.xip = TaggedUWord(block[0]); + startRegs.xsp = TaggedUWord(block[1]); + startRegs.xbp = TaggedUWord(block[2]); + const uintptr_t REDZONE_SIZE = 128; + uintptr_t start = block[1] - REDZONE_SIZE; +#elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android) + volatile uintptr_t block[3]; + MOZ_ASSERT(sizeof(block) == 12); + __asm__ __volatile__( + ".byte 0xE8,0x00,0x00,0x00,0x00" /*call next insn*/ + "\n\t" + "popl %%edi" + "\n\t" + "movl %%edi, 0(%0)" + "\n\t" + "movl %%esp, 4(%0)" + "\n\t" + "movl %%ebp, 8(%0)" + "\n" + : + : "r"(&block[0]) + : "memory", "edi"); + startRegs.xip = TaggedUWord(block[0]); + startRegs.xsp = TaggedUWord(block[1]); + startRegs.xbp = TaggedUWord(block[2]); + const uintptr_t REDZONE_SIZE = 0; + uintptr_t start = block[1] - REDZONE_SIZE; +#elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android) + volatile uintptr_t block[6]; + MOZ_ASSERT(sizeof(block) == 24); + __asm__ __volatile__( + "mov r0, r15" + "\n\t" + "str r0, [%0, #0]" + "\n\t" + "str r14, [%0, #4]" + "\n\t" + "str r13, [%0, #8]" + "\n\t" + "str r12, [%0, #12]" + "\n\t" + "str r11, [%0, #16]" + "\n\t" + "str r7, [%0, #20]" + "\n" + : + : "r"(&block[0]) + : "memory", "r0"); + startRegs.r15 = TaggedUWord(block[0]); + startRegs.r14 = TaggedUWord(block[1]); + startRegs.r13 = TaggedUWord(block[2]); + startRegs.r12 = TaggedUWord(block[3]); + startRegs.r11 = TaggedUWord(block[4]); + startRegs.r7 = TaggedUWord(block[5]); + const uintptr_t REDZONE_SIZE = 0; + uintptr_t start = block[1] - REDZONE_SIZE; +#elif defined(GP_ARCH_arm64) + volatile uintptr_t block[4]; + MOZ_ASSERT(sizeof(block) == 32); + __asm__ __volatile__( + "adr x0, . \n\t" + "str x0, [%0, #0] \n\t" + "str x29, [%0, #8] \n\t" + "str x30, [%0, #16] \n\t" + "mov x0, sp \n\t" + "str x0, [%0, #24] \n\t" + : + : "r"(&block[0]) + : "memory", "x0"); + startRegs.pc = TaggedUWord(block[0]); + startRegs.x29 = TaggedUWord(block[1]); + startRegs.x30 = TaggedUWord(block[2]); + startRegs.sp = TaggedUWord(block[3]); + const uintptr_t REDZONE_SIZE = 0; + uintptr_t start = block[1] - REDZONE_SIZE; +#elif defined(GP_ARCH_mips64) + volatile uintptr_t block[3]; + MOZ_ASSERT(sizeof(block) == 24); + __asm__ __volatile__( + "sd $29, 8(%0) \n" + "sd $30, 16(%0) \n" + : + : "r"(block) + : "memory"); + block[0] = __getpc(); + startRegs.pc = TaggedUWord(block[0]); + startRegs.sp = TaggedUWord(block[1]); + startRegs.fp = TaggedUWord(block[2]); + const uintptr_t REDZONE_SIZE = 0; + uintptr_t start = block[1] - REDZONE_SIZE; +#else +# error "Unsupported platform" +#endif + + // Get hold of the innermost LUL_UNIT_TEST_STACK_SIZE bytes of the + // stack. + uintptr_t end = start + LUL_UNIT_TEST_STACK_SIZE; + uintptr_t ws = sizeof(void*); + start &= ~(ws - 1); + end &= ~(ws - 1); + uintptr_t nToCopy = end - start; + if (nToCopy > lul::N_STACK_BYTES) { + nToCopy = lul::N_STACK_BYTES; + } + MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES); + StackImage* stackImg = new StackImage(); + stackImg->mLen = nToCopy; + stackImg->mStartAvma = start; + if (nToCopy > 0) { + MOZ_MAKE_MEM_DEFINED((void*)start, nToCopy); + memcpy(&stackImg->mContents[0], (void*)start, nToCopy); + } + + // Unwind it. + const int MAX_TEST_FRAMES = 64; + uintptr_t framePCs[MAX_TEST_FRAMES]; + uintptr_t frameSPs[MAX_TEST_FRAMES]; + size_t framesAvail = mozilla::ArrayLength(framePCs); + size_t framesUsed = 0; + size_t framePointerFramesAcquired = 0; + aLUL->Unwind(&framePCs[0], &frameSPs[0], &framesUsed, + &framePointerFramesAcquired, framesAvail, &startRegs, stackImg); + + delete stackImg; + + // if (0) { + // // Show what we have. + // fprintf(stderr, "Got %d frames:\n", (int)framesUsed); + // for (size_t i = 0; i < framesUsed; i++) { + // fprintf(stderr, " [%2d] SP %p PC %p\n", + // (int)i, (void*)frameSPs[i], (void*)framePCs[i]); + // } + // fprintf(stderr, "\n"); + //} + + // Check to see if there's a consistent binding between digits in + // the director string ('1' .. '8') and the PC values acquired by + // the unwind. If there isn't, the unwinding has failed somehow. + uintptr_t binding[8]; // binding for '1' .. binding for '8' + memset((void*)binding, 0, sizeof(binding)); + + // The general plan is to work backwards along the director string + // and forwards along the framePCs array. Doing so corresponds to + // working outwards from the innermost frame of the recursive test set. + const char* cursor = dstring; + + // Find the end. This leaves |cursor| two bytes past the first + // character we want to look at -- see comment below. + while (*cursor) cursor++; + + // Counts the number of consistent frames. + size_t nConsistent = 0; + + // Iterate back to the start of the director string. The starting + // points are a bit complex. We can't use framePCs[0] because that + // contains the PC in this frame (above). We can't use framePCs[1] + // because that will contain the PC at return point in the recursive + // test group (TestFn[1-8]) for their call "out" to this function, + // GetAndCheckStackTrace. Although LUL will compute a correct + // return address, that will not be the same return address as for a + // recursive call out of the the function to another function in the + // group. Hence we can only start consistency checking at + // framePCs[2]. + // + // To be consistent, then, we must ignore the last element in the + // director string as that corresponds to framePCs[1]. Hence the + // start points are: framePCs[2] and the director string 2 bytes + // before the terminating zero. + // + // Also as a result of this, the number of consistent frames counted + // will always be one less than the length of the director string + // (not including its terminating zero). + size_t frameIx; + for (cursor = cursor - 2, frameIx = 2; + cursor >= dstring && frameIx < framesUsed; cursor--, frameIx++) { + char c = *cursor; + uintptr_t pc = framePCs[frameIx]; + // If this doesn't hold, the director string is ill-formed. + MOZ_ASSERT(c >= '1' && c <= '8'); + int n = ((int)c) - ((int)'1'); + if (binding[n] == 0) { + // There's no binding for |c| yet, so install |pc| and carry on. + binding[n] = pc; + nConsistent++; + continue; + } + // There's a pre-existing binding for |c|. Check it's consistent. + if (binding[n] != pc) { + // Not consistent. Give up now. + break; + } + // Consistent. Keep going. + nConsistent++; + } + + // So, did we succeed? + bool passed = nConsistent + 1 == strlen(dstring); + + // Show the results. + char buf[200]; + SprintfLiteral(buf, "LULUnitTest: dstring = %s\n", dstring); + buf[sizeof(buf) - 1] = 0; + aLUL->mLog(buf); + SprintfLiteral(buf, "LULUnitTest: %d consistent, %d in dstring: %s\n", + (int)nConsistent, (int)strlen(dstring), + passed ? "PASS" : "FAIL"); + buf[sizeof(buf) - 1] = 0; + aLUL->mLog(buf); + + return passed; +} + +// Macro magic to create a set of 8 mutually recursive functions with +// varying frame sizes. These will recurse amongst themselves as +// specified by |strP|, the directory string, and call +// GetAndCheckStackTrace when the string becomes empty, passing it the +// original value of the string. This checks the result, printing +// results on |aLUL|'s logging sink, and also returns a boolean +// indicating whether or not the results are acceptable (correct). + +#define DECL_TEST_FN(NAME) \ + bool NAME(LUL* aLUL, const char* strPorig, const char* strP); + +#define GEN_TEST_FN(NAME, FRAMESIZE) \ + bool NAME(LUL* aLUL, const char* strPorig, const char* strP) { \ + /* Create a frame of size (at least) FRAMESIZE, so that the */ \ + /* 8 functions created by this macro offer some variation in frame */ \ + /* sizes. This isn't as simple as it might seem, since a clever */ \ + /* optimizing compiler (eg, clang-5) detects that the array is unused */ \ + /* and removes it. We try to defeat this by passing it to a function */ \ + /* in a different compilation unit, and hoping that clang does not */ \ + /* notice that the call is a no-op. */ \ + char space[FRAMESIZE]; \ + Unused << write(1, space, 0); /* write zero bytes of |space| to stdout */ \ + \ + if (*strP == '\0') { \ + /* We've come to the end of the director string. */ \ + /* Take a stack snapshot. */ \ + return GetAndCheckStackTrace(aLUL, strPorig); \ + } else { \ + /* Recurse onwards. This is a bit subtle. The obvious */ \ + /* thing to do here is call onwards directly, from within the */ \ + /* arms of the case statement. That gives a problem in that */ \ + /* there will be multiple return points inside each function when */ \ + /* unwinding, so it will be difficult to check for consistency */ \ + /* against the director string. Instead, we make an indirect */ \ + /* call, so as to guarantee that there is only one call site */ \ + /* within each function. This does assume that the compiler */ \ + /* won't transform it back to the simple direct-call form. */ \ + /* To discourage it from doing so, the call is bracketed with */ \ + /* __asm__ __volatile__ sections so as to make it not-movable. */ \ + bool (*nextFn)(LUL*, const char*, const char*) = NULL; \ + switch (*strP) { \ + case '1': \ + nextFn = TestFn1; \ + break; \ + case '2': \ + nextFn = TestFn2; \ + break; \ + case '3': \ + nextFn = TestFn3; \ + break; \ + case '4': \ + nextFn = TestFn4; \ + break; \ + case '5': \ + nextFn = TestFn5; \ + break; \ + case '6': \ + nextFn = TestFn6; \ + break; \ + case '7': \ + nextFn = TestFn7; \ + break; \ + case '8': \ + nextFn = TestFn8; \ + break; \ + default: \ + nextFn = TestFn8; \ + break; \ + } \ + /* "use" |space| immediately after the recursive call, */ \ + /* so as to dissuade clang from deallocating the space while */ \ + /* the call is active, or otherwise messing with the stack frame. */ \ + __asm__ __volatile__("" ::: "cc", "memory"); \ + bool passed = nextFn(aLUL, strPorig, strP + 1); \ + Unused << write(1, space, 0); \ + __asm__ __volatile__("" ::: "cc", "memory"); \ + return passed; \ + } \ + } + +// The test functions are mutually recursive, so it is necessary to +// declare them before defining them. +DECL_TEST_FN(TestFn1) +DECL_TEST_FN(TestFn2) +DECL_TEST_FN(TestFn3) +DECL_TEST_FN(TestFn4) +DECL_TEST_FN(TestFn5) +DECL_TEST_FN(TestFn6) +DECL_TEST_FN(TestFn7) +DECL_TEST_FN(TestFn8) + +GEN_TEST_FN(TestFn1, 123) +GEN_TEST_FN(TestFn2, 456) +GEN_TEST_FN(TestFn3, 789) +GEN_TEST_FN(TestFn4, 23) +GEN_TEST_FN(TestFn5, 47) +GEN_TEST_FN(TestFn6, 117) +GEN_TEST_FN(TestFn7, 1) +GEN_TEST_FN(TestFn8, 99) + +// This starts the test sequence going. Call here to generate a +// sequence of calls as directed by the string |dstring|. The call +// sequence will, from its innermost frame, finish by calling +// GetAndCheckStackTrace() and passing it |dstring|. +// GetAndCheckStackTrace() will unwind the stack, check consistency +// of those results against |dstring|, and print a pass/fail message +// to aLUL's logging sink. It also updates the counters in *aNTests +// and aNTestsPassed. +__attribute__((noinline)) void TestUnw(/*OUT*/ int* aNTests, + /*OUT*/ int* aNTestsPassed, LUL* aLUL, + const char* dstring) { + // Ensure that the stack has at least this much space on it. This + // makes it safe to saw off the top LUL_UNIT_TEST_STACK_SIZE bytes + // and hand it to LUL. Safe in the sense that no segfault can + // happen because the stack is at least this big. This is all + // somewhat dubious in the sense that a sufficiently clever compiler + // (clang, for one) can figure out that space[] is unused and delete + // it from the frame. Hence the somewhat elaborate hoop jumping to + // fill it up before the call and to at least appear to use the + // value afterwards. + int i; + volatile char space[LUL_UNIT_TEST_STACK_SIZE]; + for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) { + space[i] = (char)(i & 0x7F); + } + + // Really run the test. + bool passed = TestFn1(aLUL, dstring, dstring); + + // Appear to use space[], by visiting the value to compute some kind + // of checksum, and then (apparently) using the checksum. + int sum = 0; + for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) { + // If this doesn't fool LLVM, I don't know what will. + sum += space[i] - 3 * i; + } + __asm__ __volatile__("" : : "r"(sum)); + + // Update the counters. + (*aNTests)++; + if (passed) { + (*aNTestsPassed)++; + } +} + +void RunLulUnitTests(/*OUT*/ int* aNTests, /*OUT*/ int* aNTestsPassed, + LUL* aLUL) { + aLUL->mLog(":\n"); + aLUL->mLog("LULUnitTest: BEGIN\n"); + *aNTests = *aNTestsPassed = 0; + TestUnw(aNTests, aNTestsPassed, aLUL, "11111111"); + TestUnw(aNTests, aNTestsPassed, aLUL, "11222211"); + TestUnw(aNTests, aNTestsPassed, aLUL, "111222333"); + TestUnw(aNTests, aNTestsPassed, aLUL, "1212121231212331212121212121212"); + TestUnw(aNTests, aNTestsPassed, aLUL, "31415827271828325332173258"); + TestUnw(aNTests, aNTestsPassed, aLUL, + "123456781122334455667788777777777777777777777"); + aLUL->mLog("LULUnitTest: END\n"); + aLUL->mLog(":\n"); +} + +} // namespace lul diff --git a/mozglue/baseprofiler/lul/LulMain.h b/mozglue/baseprofiler/lul/LulMain.h new file mode 100644 index 0000000000..b0cb7f4f1e --- /dev/null +++ b/mozglue/baseprofiler/lul/LulMain.h @@ -0,0 +1,377 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef LulMain_h +#define LulMain_h + +#include "PlatformMacros.h" +#include "mozilla/Atomics.h" +#include "mozilla/MemoryReporting.h" + +// LUL: A Lightweight Unwind Library. +// This file provides the end-user (external) interface for LUL. + +// Some comments about naming in the implementation. These are safe +// to ignore if you are merely using LUL, but are important if you +// hack on its internals. +// +// Debuginfo readers in general have tended to use the word "address" +// to mean several different things. This sometimes makes them +// difficult to understand and maintain. LUL tries hard to avoid +// using the word "address" and instead uses the following more +// precise terms: +// +// * SVMA ("Stated Virtual Memory Address"): this is an address of a +// symbol (etc) as it is stated in the symbol table, or other +// metadata, of an object. Such values are typically small and +// start from zero or thereabouts, unless the object has been +// prelinked. +// +// * AVMA ("Actual Virtual Memory Address"): this is the address of a +// symbol (etc) in a running process, that is, once the associated +// object has been mapped into a process. Such values are typically +// much larger than SVMAs, since objects can get mapped arbitrarily +// far along the address space. +// +// * "Bias": the difference between AVMA and SVMA for a given symbol +// (specifically, AVMA - SVMA). The bias is always an integral +// number of pages. Once we know the bias for a given object's +// text section (for example), we can compute the AVMAs of all of +// its text symbols by adding the bias to their SVMAs. +// +// * "Image address": typically, to read debuginfo from an object we +// will temporarily mmap in the file so as to read symbol tables +// etc. Addresses in this temporary mapping are called "Image +// addresses". Note that the temporary mapping is entirely +// unrelated to the mappings of the file that the dynamic linker +// must perform merely in order to get the program to run. Hence +// image addresses are unrelated to either SVMAs or AVMAs. + +namespace lul { + +// A machine word plus validity tag. +class TaggedUWord { + public: + // RUNS IN NO-MALLOC CONTEXT + // Construct a valid one. + explicit TaggedUWord(uintptr_t w) : mValue(w), mValid(true) {} + + // RUNS IN NO-MALLOC CONTEXT + // Construct an invalid one. + TaggedUWord() : mValue(0), mValid(false) {} + + // RUNS IN NO-MALLOC CONTEXT + TaggedUWord operator+(TaggedUWord rhs) const { + return (Valid() && rhs.Valid()) ? TaggedUWord(Value() + rhs.Value()) + : TaggedUWord(); + } + + // RUNS IN NO-MALLOC CONTEXT + TaggedUWord operator-(TaggedUWord rhs) const { + return (Valid() && rhs.Valid()) ? TaggedUWord(Value() - rhs.Value()) + : TaggedUWord(); + } + + // RUNS IN NO-MALLOC CONTEXT + TaggedUWord operator&(TaggedUWord rhs) const { + return (Valid() && rhs.Valid()) ? TaggedUWord(Value() & rhs.Value()) + : TaggedUWord(); + } + + // RUNS IN NO-MALLOC CONTEXT + TaggedUWord operator|(TaggedUWord rhs) const { + return (Valid() && rhs.Valid()) ? TaggedUWord(Value() | rhs.Value()) + : TaggedUWord(); + } + + // RUNS IN NO-MALLOC CONTEXT + TaggedUWord CmpGEs(TaggedUWord rhs) const { + if (Valid() && rhs.Valid()) { + intptr_t s1 = (intptr_t)Value(); + intptr_t s2 = (intptr_t)rhs.Value(); + return TaggedUWord(s1 >= s2 ? 1 : 0); + } + return TaggedUWord(); + } + + // RUNS IN NO-MALLOC CONTEXT + TaggedUWord operator<<(TaggedUWord rhs) const { + if (Valid() && rhs.Valid()) { + uintptr_t shift = rhs.Value(); + if (shift < 8 * sizeof(uintptr_t)) return TaggedUWord(Value() << shift); + } + return TaggedUWord(); + } + + // RUNS IN NO-MALLOC CONTEXT + // Is equal? Note: non-validity on either side gives non-equality. + bool operator==(TaggedUWord other) const { + return (mValid && other.Valid()) ? (mValue == other.Value()) : false; + } + + // RUNS IN NO-MALLOC CONTEXT + // Is it word-aligned? + bool IsAligned() const { + return mValid && (mValue & (sizeof(uintptr_t) - 1)) == 0; + } + + // RUNS IN NO-MALLOC CONTEXT + uintptr_t Value() const { return mValue; } + + // RUNS IN NO-MALLOC CONTEXT + bool Valid() const { return mValid; } + + private: + uintptr_t mValue; + bool mValid; +}; + +// The registers, with validity tags, that will be unwound. + +struct UnwindRegs { +#if defined(GP_ARCH_arm) + TaggedUWord r7; + TaggedUWord r11; + TaggedUWord r12; + TaggedUWord r13; + TaggedUWord r14; + TaggedUWord r15; +#elif defined(GP_ARCH_arm64) + TaggedUWord x29; + TaggedUWord x30; + TaggedUWord sp; + TaggedUWord pc; +#elif defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + TaggedUWord xbp; + TaggedUWord xsp; + TaggedUWord xip; +#elif defined(GP_ARCH_mips64) + TaggedUWord sp; + TaggedUWord fp; + TaggedUWord pc; +#else +# error "Unknown plat" +#endif +}; + +// The maximum number of bytes in a stack snapshot. This value can be increased +// if necessary, but testing showed that 160k is enough to obtain good +// backtraces on x86_64 Linux. Most backtraces fit comfortably into 4-8k of +// stack space, but we do have some very deep stacks occasionally. Please see +// the comments in DoNativeBacktrace as to why it's OK to have this value be so +// large. +static const size_t N_STACK_BYTES = 160 * 1024; + +// The stack chunk image that will be unwound. +struct StackImage { + // [start_avma, +len) specify the address range in the buffer. + // Obviously we require 0 <= len <= N_STACK_BYTES. + uintptr_t mStartAvma; + size_t mLen; + uint8_t mContents[N_STACK_BYTES]; +}; + +// Statistics collection for the unwinder. +template <typename T> +class LULStats { + public: + LULStats() : mContext(0), mCFI(0), mFP(0) {} + + template <typename S> + explicit LULStats(const LULStats<S>& aOther) + : mContext(aOther.mContext), mCFI(aOther.mCFI), mFP(aOther.mFP) {} + + template <typename S> + LULStats<T>& operator=(const LULStats<S>& aOther) { + mContext = aOther.mContext; + mCFI = aOther.mCFI; + mFP = aOther.mFP; + return *this; + } + + template <typename S> + uint32_t operator-(const LULStats<S>& aOther) { + return (mContext - aOther.mContext) + (mCFI - aOther.mCFI) + + (mFP - aOther.mFP); + } + + T mContext; // Number of context frames + T mCFI; // Number of CFI/EXIDX frames + T mFP; // Number of frame-pointer recovered frames +}; + +// The core unwinder library class. Just one of these is needed, and +// it can be shared by multiple unwinder threads. +// +// The library operates in one of two modes. +// +// * Admin mode. The library is this state after creation. In Admin +// mode, no unwinding may be performed. It is however allowable to +// perform administrative tasks -- primarily, loading of unwind info +// -- in this mode. In particular, it is safe for the library to +// perform dynamic memory allocation in this mode. Safe in the +// sense that there is no risk of deadlock against unwinding threads +// that might -- because of where they have been sampled -- hold the +// system's malloc lock. +// +// * Unwind mode. In this mode, calls to ::Unwind may be made, but +// nothing else. ::Unwind guarantees not to make any dynamic memory +// requests, so as to guarantee that the calling thread won't +// deadlock in the case where it already holds the system's malloc lock. +// +// The library is created in Admin mode. After debuginfo is loaded, +// the caller must switch it into Unwind mode by calling +// ::EnableUnwinding. There is no way to switch it back to Admin mode +// after that. To safely switch back to Admin mode would require the +// caller (or other external agent) to guarantee that there are no +// pending ::Unwind calls. + +class PriMap; +class SegArray; +class UniqueStringUniverse; + +class LUL { + public: + // Create; supply a logging sink. Sets the object in Admin mode. + explicit LUL(void (*aLog)(const char*)); + + // Destroy. Caller is responsible for ensuring that no other + // threads are in Unwind calls. All resources are freed and all + // registered unwinder threads are deregistered. Can be called + // either in Admin or Unwind mode. + ~LUL(); + + // Notify the library that unwinding is now allowed and so + // admin-mode calls are no longer allowed. The object is initially + // created in admin mode. The only possible transition is + // admin->unwinding, therefore. + void EnableUnwinding(); + + // Notify of a new r-x mapping, and load the associated unwind info. + // The filename is strdup'd and used for debug printing. If + // aMappedImage is NULL, this function will mmap/munmap the file + // itself, so as to be able to read the unwind info. If + // aMappedImage is non-NULL then it is assumed to point to a + // called-supplied and caller-managed mapped image of the file. + // May only be called in Admin mode. + void NotifyAfterMap(uintptr_t aRXavma, size_t aSize, const char* aFileName, + const void* aMappedImage); + + // In rare cases we know an executable area exists but don't know + // what the associated file is. This call notifies LUL of such + // areas. This is important for correct functioning of stack + // scanning and of the x86-{linux,android} special-case + // __kernel_syscall function handling. + // This must be called only after the code area in + // question really has been mapped. + // May only be called in Admin mode. + void NotifyExecutableArea(uintptr_t aRXavma, size_t aSize); + + // Notify that a mapped area has been unmapped; discard any + // associated unwind info. Acquires mRWlock for writing. Note that + // to avoid segfaulting the stack-scan unwinder, which inspects code + // areas, this must be called before the code area in question is + // really unmapped. Note that, unlike NotifyAfterMap(), this + // function takes the start and end addresses of the range to be + // unmapped, rather than a start and a length parameter. This is so + // as to make it possible to notify an unmap for the entire address + // space using a single call. + // May only be called in Admin mode. + void NotifyBeforeUnmap(uintptr_t aAvmaMin, uintptr_t aAvmaMax); + + // Apply NotifyBeforeUnmap to the entire address space. This causes + // LUL to discard all unwind and executable-area information for the + // entire address space. + // May only be called in Admin mode. + void NotifyBeforeUnmapAll() { NotifyBeforeUnmap(0, UINTPTR_MAX); } + + // Returns the number of mappings currently registered. + // May only be called in Admin mode. + size_t CountMappings(); + + // Unwind |aStackImg| starting with the context in |aStartRegs|. + // Write the number of frames recovered in *aFramesUsed. Put + // the PC values in aFramePCs[0 .. *aFramesUsed-1] and + // the SP values in aFrameSPs[0 .. *aFramesUsed-1]. + // |aFramesAvail| is the size of the two output arrays and hence the + // largest possible value of *aFramesUsed. PC values are always + // valid, and the unwind will stop when the PC becomes invalid, but + // the SP values might be invalid, in which case the value zero will + // be written in the relevant frameSPs[] slot. + // + // This function assumes that the SP values increase as it unwinds + // away from the innermost frame -- that is, that the stack grows + // down. It monitors SP values as it unwinds to check they + // decrease, so as to avoid looping on corrupted stacks. + // + // May only be called in Unwind mode. Multiple threads may unwind + // at once. LUL user is responsible for ensuring that no thread makes + // any Admin calls whilst in Unwind mode. + // MOZ_CRASHes if the calling thread is not registered for unwinding. + // + // The calling thread must previously have been registered via a call to + // RegisterSampledThread. + void Unwind(/*OUT*/ uintptr_t* aFramePCs, + /*OUT*/ uintptr_t* aFrameSPs, + /*OUT*/ size_t* aFramesUsed, + /*OUT*/ size_t* aFramePointerFramesAcquired, size_t aFramesAvail, + UnwindRegs* aStartRegs, StackImage* aStackImg); + + // The logging sink. Call to send debug strings to the caller- + // specified destination. Can only be called by the Admin thread. + void (*mLog)(const char*); + + // Statistics relating to unwinding. These have to be atomic since + // unwinding can occur on different threads simultaneously. + LULStats<mozilla::Atomic<uint32_t>> mStats; + + // Possibly show the statistics. This may not be called from any + // registered sampling thread, since it involves I/O. + void MaybeShowStats(); + + size_t SizeOfIncludingThis(mozilla::MallocSizeOf) const; + + private: + // The statistics counters at the point where they were last printed. + LULStats<uint32_t> mStatsPrevious; + + // Are we in admin mode? Initially |true| but changes to |false| + // once unwinding begins. + bool mAdminMode; + + // The thread ID associated with admin mode. This is the only thread + // that is allowed do perform non-Unwind calls on this object. Conversely, + // no registered Unwinding thread may be the admin thread. This is so + // as to clearly partition the one thread that may do dynamic memory + // allocation from the threads that are being sampled, since the latter + // absolutely may not do dynamic memory allocation. + int mAdminThreadId; + + // The top level mapping from code address ranges to postprocessed + // unwind info. Basically a sorted array of (addr, len, info) + // records. This field is updated by NotifyAfterMap and NotifyBeforeUnmap. + PriMap* mPriMap; + + // An auxiliary structure that records which address ranges are + // mapped r-x, for the benefit of the stack scanner. + SegArray* mSegArray; + + // A UniqueStringUniverse that holds all the strdup'd strings created + // whilst reading unwind information. This is included so as to make + // it possible to free them in ~LUL. + UniqueStringUniverse* mUSU; +}; + +// Run unit tests on an initialised, loaded-up LUL instance, and print +// summary results on |aLUL|'s logging sink. Also return the number +// of tests run in *aNTests and the number that passed in +// *aNTestsPassed. +void RunLulUnitTests(/*OUT*/ int* aNTests, /*OUT*/ int* aNTestsPassed, + LUL* aLUL); + +} // namespace lul + +#endif // LulMain_h diff --git a/mozglue/baseprofiler/lul/LulMainInt.h b/mozglue/baseprofiler/lul/LulMainInt.h new file mode 100644 index 0000000000..c2ee45d73d --- /dev/null +++ b/mozglue/baseprofiler/lul/LulMainInt.h @@ -0,0 +1,420 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef LulMainInt_h +#define LulMainInt_h + +#include "PlatformMacros.h" +#include "LulMain.h" // for TaggedUWord + +#include "mozilla/Assertions.h" + +#include <string> +#include <vector> + +// This file is provides internal interface inside LUL. If you are an +// end-user of LUL, do not include it in your code. The end-user +// interface is in LulMain.h. + +namespace lul { + +using std::vector; + +//////////////////////////////////////////////////////////////// +// DW_REG_ constants // +//////////////////////////////////////////////////////////////// + +// These are the Dwarf CFI register numbers, as (presumably) defined +// in the ELF ABI supplements for each architecture. + +enum DW_REG_NUMBER { + // No real register has this number. It's convenient to be able to + // treat the CFA (Canonical Frame Address) as "just another + // register", though. + DW_REG_CFA = -1, +#if defined(GP_ARCH_arm) + // ARM registers + DW_REG_ARM_R7 = 7, + DW_REG_ARM_R11 = 11, + DW_REG_ARM_R12 = 12, + DW_REG_ARM_R13 = 13, + DW_REG_ARM_R14 = 14, + DW_REG_ARM_R15 = 15, +#elif defined(GP_ARCH_arm64) + // aarch64 registers + DW_REG_AARCH64_X29 = 29, + DW_REG_AARCH64_X30 = 30, + DW_REG_AARCH64_SP = 31, +#elif defined(GP_ARCH_amd64) + // Because the X86 (32 bit) and AMD64 (64 bit) summarisers are + // combined, a merged set of register constants is needed. + DW_REG_INTEL_XBP = 6, + DW_REG_INTEL_XSP = 7, + DW_REG_INTEL_XIP = 16, +#elif defined(GP_ARCH_x86) + DW_REG_INTEL_XBP = 5, + DW_REG_INTEL_XSP = 4, + DW_REG_INTEL_XIP = 8, +#elif defined(GP_ARCH_mips64) + DW_REG_MIPS_SP = 29, + DW_REG_MIPS_FP = 30, + DW_REG_MIPS_PC = 34, +#else +# error "Unknown arch" +#endif +}; + +//////////////////////////////////////////////////////////////// +// PfxExpr // +//////////////////////////////////////////////////////////////// + +enum PfxExprOp { + // meaning of mOperand effect on stack + PX_Start, // bool start-with-CFA? start, with CFA on stack, or not + PX_End, // none stop; result is at top of stack + PX_SImm32, // int32 push signed int32 + PX_DwReg, // DW_REG_NUMBER push value of the specified reg + PX_Deref, // none pop X ; push *X + PX_Add, // none pop X ; pop Y ; push Y + X + PX_Sub, // none pop X ; pop Y ; push Y - X + PX_And, // none pop X ; pop Y ; push Y & X + PX_Or, // none pop X ; pop Y ; push Y | X + PX_CmpGES, // none pop X ; pop Y ; push (Y >=s X) ? 1 : 0 + PX_Shl // none pop X ; pop Y ; push Y << X +}; + +struct PfxInstr { + PfxInstr(PfxExprOp opcode, int32_t operand) + : mOpcode(opcode), mOperand(operand) {} + explicit PfxInstr(PfxExprOp opcode) : mOpcode(opcode), mOperand(0) {} + bool operator==(const PfxInstr& other) const { + return mOpcode == other.mOpcode && mOperand == other.mOperand; + } + PfxExprOp mOpcode; + int32_t mOperand; +}; + +static_assert(sizeof(PfxInstr) <= 8, "PfxInstr size changed unexpectedly"); + +// Evaluate the prefix expression whose PfxInstrs start at aPfxInstrs[start]. +// In the case of any mishap (stack over/underflow, running off the end of +// the instruction vector, obviously malformed sequences), +// return an invalid TaggedUWord. +// RUNS IN NO-MALLOC CONTEXT +TaggedUWord EvaluatePfxExpr(int32_t start, const UnwindRegs* aOldRegs, + TaggedUWord aCFA, const StackImage* aStackImg, + const vector<PfxInstr>& aPfxInstrs); + +//////////////////////////////////////////////////////////////// +// LExpr // +//////////////////////////////////////////////////////////////// + +// An expression -- very primitive. Denotes either "register + +// offset", a dereferenced version of the same, or a reference to a +// prefix expression stored elsewhere. So as to allow convenient +// handling of Dwarf-derived unwind info, the register may also denote +// the CFA. A large number of these need to be stored, so we ensure +// it fits into 8 bytes. See comment below on RuleSet to see how +// expressions fit into the bigger picture. + +enum LExprHow { + UNKNOWN = 0, // This LExpr denotes no value. + NODEREF, // Value is (mReg + mOffset). + DEREF, // Value is *(mReg + mOffset). + PFXEXPR // Value is EvaluatePfxExpr(secMap->mPfxInstrs[mOffset]) +}; + +inline static const char* NameOf_LExprHow(LExprHow how) { + switch (how) { + case UNKNOWN: + return "UNKNOWN"; + case NODEREF: + return "NODEREF"; + case DEREF: + return "DEREF"; + case PFXEXPR: + return "PFXEXPR"; + default: + return "LExpr-??"; + } +} + +struct LExpr { + // Denotes an expression with no value. + LExpr() : mHow(UNKNOWN), mReg(0), mOffset(0) {} + + // Denotes any expressible expression. + LExpr(LExprHow how, int16_t reg, int32_t offset) + : mHow(how), mReg(reg), mOffset(offset) { + switch (how) { + case UNKNOWN: + MOZ_ASSERT(reg == 0 && offset == 0); + break; + case NODEREF: + break; + case DEREF: + break; + case PFXEXPR: + MOZ_ASSERT(reg == 0 && offset >= 0); + break; + default: + MOZ_ASSERT(0, "LExpr::LExpr: invalid how"); + } + } + + // Change the offset for an expression that references memory. + LExpr add_delta(long delta) { + MOZ_ASSERT(mHow == NODEREF); + // If this is a non-debug build and the above assertion would have + // failed, at least return LExpr() so that the machinery that uses + // the resulting expression fails in a repeatable way. + return (mHow == NODEREF) ? LExpr(mHow, mReg, mOffset + delta) + : LExpr(); // Gone bad + } + + // Dereference an expression that denotes a memory address. + LExpr deref() { + MOZ_ASSERT(mHow == NODEREF); + // Same rationale as for add_delta(). + return (mHow == NODEREF) ? LExpr(DEREF, mReg, mOffset) + : LExpr(); // Gone bad + } + + // Print a rule for recovery of |aNewReg| whose recovered value + // is this LExpr. + std::string ShowRule(const char* aNewReg) const; + + // Evaluate this expression, producing a TaggedUWord. |aOldRegs| + // holds register values that may be referred to by the expression. + // |aCFA| holds the CFA value, if any, that applies. |aStackImg| + // contains a chuck of stack that will be consulted if the expression + // references memory. |aPfxInstrs| holds the vector of PfxInstrs + // that will be consulted if this is a PFXEXPR. + // RUNS IN NO-MALLOC CONTEXT + TaggedUWord EvaluateExpr(const UnwindRegs* aOldRegs, TaggedUWord aCFA, + const StackImage* aStackImg, + const vector<PfxInstr>* aPfxInstrs) const; + + // Representation of expressions. If |mReg| is DW_REG_CFA (-1) then + // it denotes the CFA. All other allowed values for |mReg| are + // nonnegative and are DW_REG_ values. + LExprHow mHow : 8; + int16_t mReg; // A DW_REG_ value + int32_t mOffset; // 32-bit signed offset should be more than enough. +}; + +static_assert(sizeof(LExpr) <= 8, "LExpr size changed unexpectedly"); + +//////////////////////////////////////////////////////////////// +// RuleSet // +//////////////////////////////////////////////////////////////// + +// This is platform-dependent. For some address range, describes how +// to recover the CFA and then how to recover the registers for the +// previous frame. +// +// The set of LExprs contained in a given RuleSet describe a DAG which +// says how to compute the caller's registers ("new registers") from +// the callee's registers ("old registers"). The DAG can contain a +// single internal node, which is the value of the CFA for the callee. +// It would be possible to construct a DAG that omits the CFA, but +// including it makes the summarisers simpler, and the Dwarf CFI spec +// has the CFA as a central concept. +// +// For this to make sense, |mCfaExpr| can't have +// |mReg| == DW_REG_CFA since we have no previous value for the CFA. +// All of the other |Expr| fields can -- and usually do -- specify +// |mReg| == DW_REG_CFA. +// +// With that in place, the unwind algorithm proceeds as follows. +// +// (0) Initially: we have values for the old registers, and a memory +// image. +// +// (1) Compute the CFA by evaluating |mCfaExpr|. Add the computed +// value to the set of "old registers". +// +// (2) Compute values for the registers by evaluating all of the other +// |Expr| fields in the RuleSet. These can depend on both the old +// register values and the just-computed CFA. +// +// If we are unwinding without computing a CFA, perhaps because the +// RuleSets are derived from EXIDX instead of Dwarf, then +// |mCfaExpr.mHow| will be LExpr::UNKNOWN, so the computed value will +// be invalid -- that is, TaggedUWord() -- and so any attempt to use +// that will result in the same value. But that's OK because the +// RuleSet would make no sense if depended on the CFA but specified no +// way to compute it. +// +// A RuleSet is not allowed to cover zero address range. Having zero +// length would break binary searching in SecMaps and PriMaps. + +class RuleSet { + public: + RuleSet(); + void Print(void (*aLog)(const char*)) const; + + // Find the LExpr* for a given DW_REG_ value in this class. + LExpr* ExprForRegno(DW_REG_NUMBER aRegno); + + uintptr_t mAddr; + uintptr_t mLen; + // How to compute the CFA. + LExpr mCfaExpr; + // How to compute caller register values. These may reference the + // value defined by |mCfaExpr|. +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + LExpr mXipExpr; // return address + LExpr mXspExpr; + LExpr mXbpExpr; +#elif defined(GP_ARCH_arm) + LExpr mR15expr; // return address + LExpr mR14expr; + LExpr mR13expr; + LExpr mR12expr; + LExpr mR11expr; + LExpr mR7expr; +#elif defined(GP_ARCH_arm64) + LExpr mX29expr; // frame pointer register + LExpr mX30expr; // link register + LExpr mSPexpr; +#elif defined(GP_ARCH_mips64) + LExpr mPCexpr; + LExpr mFPexpr; + LExpr mSPexpr; +#else +# error "Unknown arch" +#endif +}; + +// Returns |true| for Dwarf register numbers which are members +// of the set of registers that LUL unwinds on this target. +static inline bool registerIsTracked(DW_REG_NUMBER reg) { + switch (reg) { +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + case DW_REG_INTEL_XBP: + case DW_REG_INTEL_XSP: + case DW_REG_INTEL_XIP: + return true; +#elif defined(GP_ARCH_arm) + case DW_REG_ARM_R7: + case DW_REG_ARM_R11: + case DW_REG_ARM_R12: + case DW_REG_ARM_R13: + case DW_REG_ARM_R14: + case DW_REG_ARM_R15: + return true; +#elif defined(GP_ARCH_arm64) + case DW_REG_AARCH64_X29: + case DW_REG_AARCH64_X30: + case DW_REG_AARCH64_SP: + return true; +#elif defined(GP_ARCH_mips64) + case DW_REG_MIPS_FP: + case DW_REG_MIPS_SP: + case DW_REG_MIPS_PC: + return true; +#else +# error "Unknown arch" +#endif + default: + return false; + } +} + +//////////////////////////////////////////////////////////////// +// SecMap // +//////////////////////////////////////////////////////////////// + +// A SecMap may have zero address range, temporarily, whilst RuleSets +// are being added to it. But adding a zero-range SecMap to a PriMap +// will make it impossible to maintain the total order of the PriMap +// entries, and so that can't be allowed to happen. + +class SecMap { + public: + // These summarise the contained mRuleSets, in that they give + // exactly the lowest and highest addresses that any of the entries + // in this SecMap cover. Hence invariants: + // + // mRuleSets is nonempty + // <=> mSummaryMinAddr <= mSummaryMaxAddr + // && mSummaryMinAddr == mRuleSets[0].mAddr + // && mSummaryMaxAddr == mRuleSets[#rulesets-1].mAddr + // + mRuleSets[#rulesets-1].mLen - 1; + // + // This requires that no RuleSet has zero length. + // + // mRuleSets is empty + // <=> mSummaryMinAddr > mSummaryMaxAddr + // + // This doesn't constrain mSummaryMinAddr and mSummaryMaxAddr uniquely, + // so let's use mSummaryMinAddr == 1 and mSummaryMaxAddr == 0 to denote + // this case. + + explicit SecMap(void (*aLog)(const char*)); + ~SecMap(); + + // Binary search mRuleSets to find one that brackets |ia|, or nullptr + // if none is found. It's not allowable to do this until PrepareRuleSets + // has been called first. + RuleSet* FindRuleSet(uintptr_t ia); + + // Add a RuleSet to the collection. The rule is copied in. Calling + // this makes the map non-searchable. + void AddRuleSet(const RuleSet* rs); + + // Add a PfxInstr to the vector of such instrs, and return the index + // in the vector. Calling this makes the map non-searchable. + uint32_t AddPfxInstr(PfxInstr pfxi); + + // Returns the entire vector of PfxInstrs. + const vector<PfxInstr>* GetPfxInstrs() { return &mPfxInstrs; } + + // Prepare the map for searching. Also, remove any rules for code + // address ranges which don't fall inside [start, +len). |len| may + // not be zero. + void PrepareRuleSets(uintptr_t start, size_t len); + + bool IsEmpty(); + + size_t Size() { return mRuleSets.size(); } + + size_t SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) const; + + // The min and max addresses of the addresses in the contained + // RuleSets. See comment above for invariants. + uintptr_t mSummaryMinAddr; + uintptr_t mSummaryMaxAddr; + + private: + // False whilst adding entries; true once it is safe to call FindRuleSet. + // Transition (false->true) is caused by calling PrepareRuleSets(). + bool mUsable; + + // A vector of RuleSets, sorted, nonoverlapping (post Prepare()). + vector<RuleSet> mRuleSets; + + // A vector of PfxInstrs, which are referred to by the RuleSets. + // These are provided as a representation of Dwarf expressions + // (DW_CFA_val_expression, DW_CFA_expression, DW_CFA_def_cfa_expression), + // are relatively expensive to evaluate, and and are therefore + // expected to be used only occasionally. + // + // The vector holds a bunch of separate PfxInstr programs, each one + // starting with a PX_Start and terminated by a PX_End, all + // concatenated together. When a RuleSet can't recover a value + // using a self-contained LExpr, it uses a PFXEXPR whose mOffset is + // the index in this vector of start of the necessary PfxInstr program. + vector<PfxInstr> mPfxInstrs; + + // A logging sink, for debugging. + void (*mLog)(const char*); +}; + +} // namespace lul + +#endif // ndef LulMainInt_h diff --git a/mozglue/baseprofiler/lul/platform-linux-lul.cpp b/mozglue/baseprofiler/lul/platform-linux-lul.cpp new file mode 100644 index 0000000000..a9ee65858d --- /dev/null +++ b/mozglue/baseprofiler/lul/platform-linux-lul.cpp @@ -0,0 +1,76 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <stdio.h> +#include <signal.h> +#include <string.h> +#include <stdlib.h> +#include <time.h> + +#include "AutoObjectMapper.h" +#include "BaseProfiler.h" +#include "BaseProfilerSharedLibraries.h" +#include "platform.h" +#include "PlatformMacros.h" +#include "LulMain.h" + +// Contains miscellaneous helpers that are used to connect the Gecko Profiler +// and LUL. + +// Find out, in a platform-dependent way, where the code modules got +// mapped in the process' virtual address space, and get |aLUL| to +// load unwind info for them. +void read_procmaps(lul::LUL* aLUL) { + MOZ_ASSERT(aLUL->CountMappings() == 0); + +#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd) + SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf(); + + for (size_t i = 0; i < info.GetSize(); i++) { + const SharedLibrary& lib = info.GetEntry(i); + + std::string nativePath = lib.GetDebugPath(); + + // We can use the standard POSIX-based mapper. + AutoObjectMapperPOSIX mapper(aLUL->mLog); + + // Ask |mapper| to map the object. Then hand its mapped address + // to NotifyAfterMap(). + void* image = nullptr; + size_t size = 0; + bool ok = mapper.Map(&image, &size, nativePath); + if (ok && image && size > 0) { + aLUL->NotifyAfterMap(lib.GetStart(), lib.GetEnd() - lib.GetStart(), + nativePath.c_str(), image); + } else if (!ok && lib.GetDebugName().empty()) { + // The object has no name and (as a consequence) the mapper failed to map + // it. This happens on Linux, where GetInfoForSelf() produces such a + // mapping for the VDSO. This is a problem on x86-{linux,android} because + // lack of knowledge about the mapped area inhibits LUL's special + // __kernel_syscall handling. Hence notify |aLUL| at least of the + // mapping, even though it can't read any unwind information for the area. + aLUL->NotifyExecutableArea(lib.GetStart(), lib.GetEnd() - lib.GetStart()); + } + + // |mapper| goes out of scope at this point and so its destructor + // unmaps the object. + } + +#else +# error "Unknown platform" +#endif +} + +// LUL needs a callback for its logging sink. +void logging_sink_for_LUL(const char* str) { + // These are only printed when Verbose logging is enabled (e.g. with + // MOZ_BASE_PROFILER_VERBOSE_LOGGING=1). This is because LUL's logging is much + // more verbose than the rest of the profiler's logging, which occurs at the + // Info (3) and Debug (4) levels. + // FIXME: This causes a build failure in memory/replace/dmd/test/SmokeDMD (!) + // and other places, because it doesn't link the implementation in + // platform.cpp. + // VERBOSE_LOG("[%d] %s", profiler_current_process_id(), str); +} diff --git a/mozglue/baseprofiler/lul/platform-linux-lul.h b/mozglue/baseprofiler/lul/platform-linux-lul.h new file mode 100644 index 0000000000..b54e80edcf --- /dev/null +++ b/mozglue/baseprofiler/lul/platform-linux-lul.h @@ -0,0 +1,21 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef MOZ_PLATFORM_LINUX_LUL_H +#define MOZ_PLATFORM_LINUX_LUL_H + +#include "platform.h" + +#include "BaseProfiler.h" + +// Find out, in a platform-dependent way, where the code modules got +// mapped in the process' virtual address space, and get |aLUL| to +// load unwind info for them. +void read_procmaps(lul::LUL* aLUL); + +// LUL needs a callback for its logging sink. +void logging_sink_for_LUL(const char* str); + +#endif /* ndef MOZ_PLATFORM_LINUX_LUL_H */ diff --git a/mozglue/baseprofiler/moz.build b/mozglue/baseprofiler/moz.build new file mode 100644 index 0000000000..db2949863f --- /dev/null +++ b/mozglue/baseprofiler/moz.build @@ -0,0 +1,118 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# This is pretty much a copy from tools/profiler, cut down to exclude anything +# that cannot work in mozglue (because they are totally dependent on libxul- +# specific code). +# All exported headers now prefixed with "Base" to avoid #include name clashes. + +if CONFIG["MOZ_GECKO_PROFILER"]: + DEFINES["IMPL_MFBT"] = True + EXPORTS += [ + "public/BaseProfilerSharedLibraries.h", + "public/BaseProfilingCategory.h", + "public/BaseProfilingStack.h", + "public/ProfilingCategoryList.h", + ] + UNIFIED_SOURCES += [ + "core/PageInformation.cpp", + "core/platform.cpp", + "core/ProfileBuffer.cpp", + "core/ProfileBufferEntry.cpp", + "core/ProfiledThreadData.cpp", + "core/ProfileJSONWriter.cpp", + "core/ProfilerBacktrace.cpp", + "core/ProfilerMarkers.cpp", + "core/ProfilingCategory.cpp", + "core/ProfilingStack.cpp", + "core/RegisteredThread.cpp", + ] + + if CONFIG["OS_TARGET"] in ("Android", "Linux", "FreeBSD"): + if CONFIG["CPU_ARCH"] in ("arm", "aarch64", "x86", "x86_64", "mips64"): + UNIFIED_SOURCES += [ + "lul/AutoObjectMapper.cpp", + "lul/LulCommon.cpp", + "lul/LulDwarf.cpp", + "lul/LulDwarfSummariser.cpp", + "lul/LulElf.cpp", + "lul/LulMain.cpp", + "lul/platform-linux-lul.cpp", + ] + # These files cannot be built in unified mode because of name clashes with mozglue headers on Android. + SOURCES += [ + "core/shared-libraries-linux.cc", + ] + if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] != "FreeBSD": + SOURCES += [ + "core/EHABIStackWalk.cpp", + ] + elif CONFIG["OS_TARGET"] == "Darwin": + UNIFIED_SOURCES += [ + "core/shared-libraries-macos.cc", + ] + elif CONFIG["OS_TARGET"] == "WINNT": + SOURCES += [ + "core/shared-libraries-win32.cc", + ] + + LOCAL_INCLUDES += [ + "/mozglue/baseprofiler/core/", + "/mozglue/linker", + ] + + if CONFIG["OS_TARGET"] == "Android": + DEFINES["ANDROID_NDK_MAJOR_VERSION"] = CONFIG["ANDROID_NDK_MAJOR_VERSION"] + DEFINES["ANDROID_NDK_MINOR_VERSION"] = CONFIG["ANDROID_NDK_MINOR_VERSION"] + + FINAL_LIBRARY = "mozglue" + +# Many of the headers in this directory are usable in non-MOZ_GECKO_PROFILER +# builds. +# BaseProfiler.h and BaseProfilerCounts.h only contain no-op macros in that +# case. +EXPORTS += [ + "public/BaseProfiler.h", +] + +EXPORTS.mozilla += [ + "public/BaseProfileJSONWriter.h", + "public/BaseProfilerCounts.h", + "public/BaseProfilerDetail.h", + "public/BaseProfilerMarkers.h", + "public/BaseProfilerMarkersDetail.h", + "public/BaseProfilerMarkersPrerequisites.h", + "public/BaseProfilerMarkerTypes.h", + "public/BlocksRingBuffer.h", + "public/leb128iterator.h", + "public/ModuloBuffer.h", + "public/PowerOfTwo.h", + "public/ProfileBufferChunk.h", + "public/ProfileBufferChunkManager.h", + "public/ProfileBufferChunkManagerSingle.h", + "public/ProfileBufferChunkManagerWithLocalLimit.h", + "public/ProfileBufferControlledChunkManager.h", + "public/ProfileBufferEntryKinds.h", + "public/ProfileBufferEntrySerialization.h", + "public/ProfileBufferIndex.h", + "public/ProfileChunkedBuffer.h", +] + +if CONFIG["MOZ_VTUNE"]: + DEFINES["MOZ_VTUNE_INSTRUMENTATION"] = True + UNIFIED_SOURCES += [ + "core/VTuneProfiler.cpp", + ] + + +if CONFIG["CC_TYPE"] in ("clang", "gcc"): + CXXFLAGS += [ + "-Wno-error=shadow", + "-Wno-ignored-qualifiers", # due to use of breakpad headers + ] + +with Files("**"): + BUG_COMPONENT = ("Core", "Gecko Profiler") diff --git a/mozglue/baseprofiler/public/BaseProfileJSONWriter.h b/mozglue/baseprofiler/public/BaseProfileJSONWriter.h new file mode 100644 index 0000000000..5dcf06f3f3 --- /dev/null +++ b/mozglue/baseprofiler/public/BaseProfileJSONWriter.h @@ -0,0 +1,388 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef BASEPROFILEJSONWRITER_H +#define BASEPROFILEJSONWRITER_H + +#include "mozilla/HashFunctions.h" +#include "mozilla/HashTable.h" +#include "mozilla/JSONWriter.h" +#include "mozilla/TimeStamp.h" +#include "mozilla/UniquePtr.h" + +#include <functional> +#include <ostream> +#include <string_view> + +namespace mozilla { +namespace baseprofiler { + +class SpliceableJSONWriter; + +// On average, profile JSONs are large enough such that we want to avoid +// reallocating its buffer when expanding. Additionally, the contents of the +// profile are not accessed until the profile is entirely written. For these +// reasons we use a chunked writer that keeps an array of chunks, which is +// concatenated together after writing is finished. +class ChunkedJSONWriteFunc final : public JSONWriteFunc { + public: + friend class SpliceableJSONWriter; + + ChunkedJSONWriteFunc() : mChunkPtr{nullptr}, mChunkEnd{nullptr} { + AllocChunk(kChunkSize); + } + + bool IsEmpty() const { + MOZ_ASSERT_IF(!mChunkPtr, !mChunkEnd && mChunkList.length() == 0 && + mChunkLengths.length() == 0); + return !mChunkPtr; + } + + void Write(const Span<const char>& aStr) override { + MOZ_ASSERT(mChunkPtr >= mChunkList.back().get() && mChunkPtr <= mChunkEnd); + MOZ_ASSERT(mChunkEnd >= mChunkList.back().get() + mChunkLengths.back()); + MOZ_ASSERT(*mChunkPtr == '\0'); + + // Most strings to be written are small, but subprocess profiles (e.g., + // from the content process in e10s) may be huge. If the string is larger + // than a chunk, allocate its own chunk. + char* newPtr; + if (aStr.size() >= kChunkSize) { + AllocChunk(aStr.size() + 1); + newPtr = mChunkPtr + aStr.size(); + } else { + newPtr = mChunkPtr + aStr.size(); + if (newPtr >= mChunkEnd) { + AllocChunk(kChunkSize); + newPtr = mChunkPtr + aStr.size(); + } + } + + memcpy(mChunkPtr, aStr.data(), aStr.size()); + *newPtr = '\0'; + mChunkPtr = newPtr; + mChunkLengths.back() += aStr.size(); + } + void CopyDataIntoLazilyAllocatedBuffer( + const std::function<char*(size_t)>& aAllocator) const { + // Request a buffer for the full content plus a null terminator. + MOZ_ASSERT(mChunkLengths.length() == mChunkList.length()); + size_t totalLen = 1; + for (size_t i = 0; i < mChunkLengths.length(); i++) { + MOZ_ASSERT(strlen(mChunkList[i].get()) == mChunkLengths[i]); + totalLen += mChunkLengths[i]; + } + char* ptr = aAllocator(totalLen); + + if (!ptr) { + // Failed to allocate memory. + return; + } + + for (size_t i = 0; i < mChunkList.length(); i++) { + size_t len = mChunkLengths[i]; + memcpy(ptr, mChunkList[i].get(), len); + ptr += len; + } + *ptr = '\0'; + } + UniquePtr<char[]> CopyData() const { + UniquePtr<char[]> c; + CopyDataIntoLazilyAllocatedBuffer([&](size_t allocationSize) { + c = MakeUnique<char[]>(allocationSize); + return c.get(); + }); + return c; + } + void Take(ChunkedJSONWriteFunc&& aOther) { + for (size_t i = 0; i < aOther.mChunkList.length(); i++) { + MOZ_ALWAYS_TRUE(mChunkLengths.append(aOther.mChunkLengths[i])); + MOZ_ALWAYS_TRUE(mChunkList.append(std::move(aOther.mChunkList[i]))); + } + mChunkPtr = mChunkList.back().get() + mChunkLengths.back(); + mChunkEnd = mChunkPtr; + aOther.mChunkPtr = nullptr; + aOther.mChunkEnd = nullptr; + aOther.mChunkList.clear(); + aOther.mChunkLengths.clear(); + } + + private: + void AllocChunk(size_t aChunkSize) { + MOZ_ASSERT(mChunkLengths.length() == mChunkList.length()); + UniquePtr<char[]> newChunk = MakeUnique<char[]>(aChunkSize); + mChunkPtr = newChunk.get(); + mChunkEnd = mChunkPtr + aChunkSize; + *mChunkPtr = '\0'; + MOZ_ALWAYS_TRUE(mChunkLengths.append(0)); + MOZ_ALWAYS_TRUE(mChunkList.append(std::move(newChunk))); + } + + static const size_t kChunkSize = 4096 * 512; + + // Pointer for writing inside the current chunk. + // + // The current chunk is always at the back of mChunkList, i.e., + // mChunkList.back() <= mChunkPtr <= mChunkEnd. + char* mChunkPtr; + + // Pointer to the end of the current chunk. + // + // The current chunk is always at the back of mChunkList, i.e., + // mChunkEnd >= mChunkList.back() + mChunkLengths.back(). + char* mChunkEnd; + + // List of chunks and their lengths. + // + // For all i, the length of the string in mChunkList[i] is + // mChunkLengths[i]. + Vector<UniquePtr<char[]>> mChunkList; + Vector<size_t> mChunkLengths; +}; + +struct OStreamJSONWriteFunc final : public JSONWriteFunc { + explicit OStreamJSONWriteFunc(std::ostream& aStream) : mStream(aStream) {} + + void Write(const Span<const char>& aStr) override { + std::string_view sv(aStr.data(), aStr.size()); + mStream << sv; + } + + std::ostream& mStream; +}; + +class UniqueJSONStrings; + +class SpliceableJSONWriter : public JSONWriter { + public: + explicit SpliceableJSONWriter(UniquePtr<JSONWriteFunc> aWriter) + : JSONWriter(std::move(aWriter)) {} + + void StartBareList(CollectionStyle aStyle = MultiLineStyle) { + StartCollection(scEmptyString, scEmptyString, aStyle); + } + + void EndBareList() { EndCollection(scEmptyString); } + + // This function must be used to correctly stream timestamps in profiles. + // Null timestamps don't output anything. + void TimeProperty(const Span<const char>& aName, const TimeStamp& aTime) { + if (!aTime.IsNull()) { + DoubleProperty(aName, + (aTime - TimeStamp::ProcessCreation()).ToMilliseconds()); + } + } + + void NullElements(uint32_t aCount) { + for (uint32_t i = 0; i < aCount; i++) { + NullElement(); + } + } + + void Splice(const Span<const char>& aStr) { + Separator(); + WriteFunc()->Write(aStr); + mNeedComma[mDepth] = true; + } + + void Splice(const char* aStr, size_t aLen) { + Separator(); + WriteFunc()->Write(Span<const char>(aStr, aLen)); + mNeedComma[mDepth] = true; + } + + // Splice the given JSON directly in, without quoting. + void SplicedJSONProperty(const Span<const char>& aMaybePropertyName, + const Span<const char>& aJsonValue) { + Scalar(aMaybePropertyName, aJsonValue); + } + + void CopyAndSplice(const ChunkedJSONWriteFunc& aFunc) { + Separator(); + for (size_t i = 0; i < aFunc.mChunkList.length(); i++) { + WriteFunc()->Write( + Span<const char>(aFunc.mChunkList[i].get(), aFunc.mChunkLengths[i])); + } + mNeedComma[mDepth] = true; + } + + // Takes the chunks from aFunc and write them. If move is not possible + // (e.g., using OStreamJSONWriteFunc), aFunc's chunks are copied and its + // storage cleared. + virtual void TakeAndSplice(ChunkedJSONWriteFunc&& aFunc) { + Separator(); + for (size_t i = 0; i < aFunc.mChunkList.length(); i++) { + WriteFunc()->Write( + Span<const char>(aFunc.mChunkList[i].get(), aFunc.mChunkLengths[i])); + } + aFunc.mChunkPtr = nullptr; + aFunc.mChunkEnd = nullptr; + aFunc.mChunkList.clear(); + aFunc.mChunkLengths.clear(); + mNeedComma[mDepth] = true; + } + + // Set (or reset) the pointer to a UniqueJSONStrings. + void SetUniqueStrings(UniqueJSONStrings& aUniqueStrings) { + MOZ_RELEASE_ASSERT(!mUniqueStrings); + mUniqueStrings = &aUniqueStrings; + } + + // Set (or reset) the pointer to a UniqueJSONStrings. + void ResetUniqueStrings() { + MOZ_RELEASE_ASSERT(mUniqueStrings); + mUniqueStrings = nullptr; + } + + // Add `aStr` to the unique-strings list (if not already there), and write its + // index as a named object property. + inline void UniqueStringProperty(const Span<const char>& aName, + const Span<const char>& aStr); + + // Add `aStr` to the unique-strings list (if not already there), and write its + // index as an array element. + inline void UniqueStringElement(const Span<const char>& aStr); + + private: + UniqueJSONStrings* mUniqueStrings = nullptr; +}; + +class SpliceableChunkedJSONWriter final : public SpliceableJSONWriter { + public: + explicit SpliceableChunkedJSONWriter() + : SpliceableJSONWriter(MakeUnique<ChunkedJSONWriteFunc>()) {} + + // Access the ChunkedJSONWriteFunc as reference-to-const, usually to copy data + // out. + const ChunkedJSONWriteFunc& ChunkedWriteFunc() const { + MOZ_ASSERT(!mTaken); + // The WriteFunc was non-fallibly allocated as a ChunkedJSONWriteFunc in the + // only constructor above, so it's safe to cast to ChunkedJSONWriteFunc*. + return *static_cast<const ChunkedJSONWriteFunc*>(WriteFunc()); + } + + // Access the ChunkedJSONWriteFunc as rvalue-reference, usually to take its + // data out. This writer shouldn't be used anymore after this. + ChunkedJSONWriteFunc&& TakeChunkedWriteFunc() { +#ifdef DEBUG + MOZ_ASSERT(!mTaken); + mTaken = true; +#endif // + // The WriteFunc was non-fallibly allocated as a ChunkedJSONWriteFunc in the + // only constructor above, so it's safe to cast to ChunkedJSONWriteFunc*. + return std::move(*static_cast<ChunkedJSONWriteFunc*>(WriteFunc())); + } + + // Adopts the chunks from aFunc without copying. + void TakeAndSplice(ChunkedJSONWriteFunc&& aFunc) override { + MOZ_ASSERT(!mTaken); + Separator(); + // The WriteFunc was non-fallibly allocated as a ChunkedJSONWriteFunc in the + // only constructor above, so it's safe to cast to ChunkedJSONWriteFunc*. + static_cast<ChunkedJSONWriteFunc*>(WriteFunc())->Take(std::move(aFunc)); + mNeedComma[mDepth] = true; + } + +#ifdef DEBUG + private: + bool mTaken = false; +#endif // +}; + +class JSONSchemaWriter { + JSONWriter& mWriter; + uint32_t mIndex; + + public: + explicit JSONSchemaWriter(JSONWriter& aWriter) : mWriter(aWriter), mIndex(0) { + aWriter.StartObjectProperty("schema", + SpliceableJSONWriter::SingleLineStyle); + } + + void WriteField(const Span<const char>& aName) { + mWriter.IntProperty(aName, mIndex++); + } + + template <size_t Np1> + void WriteField(const char (&aName)[Np1]) { + WriteField(Span<const char>(aName, Np1 - 1)); + } + + ~JSONSchemaWriter() { mWriter.EndObject(); } +}; + +// This class helps create an indexed list of unique strings, and inserts the +// index as a JSON value. The collected list of unique strings can later be +// inserted as a JSON array. +// This can be useful for elements/properties with many repeated strings. +// +// With only JSONWriter w, +// `w.WriteElement("a"); w.WriteElement("b"); w.WriteElement("a");` +// when done inside a JSON array, will generate: +// `["a", "b", "c"]` +// +// With UniqueStrings u, +// `u.WriteElement(w, "a"); u.WriteElement(w, "b"); u.WriteElement(w, "a");` +// when done inside a JSON array, will generate: +// `[0, 1, 0]` +// and later, `u.SpliceStringTableElements(w)` (inside a JSON array), will +// output the corresponding indexed list of unique strings: +// `["a", "b"]` +class UniqueJSONStrings { + public: + // Start an empty list of unique strings. + MFBT_API explicit UniqueJSONStrings( + JSONWriter::CollectionStyle aStyle = JSONWriter::MultiLineStyle); + + // Start with a copy of the strings from another list. + MFBT_API explicit UniqueJSONStrings( + const UniqueJSONStrings& aOther, + JSONWriter::CollectionStyle aStyle = JSONWriter::MultiLineStyle); + + MFBT_API ~UniqueJSONStrings(); + + // Add `aStr` to the list (if not already there), and write its index as a + // named object property. + void WriteProperty(JSONWriter& aWriter, const Span<const char>& aName, + const Span<const char>& aStr) { + aWriter.IntProperty(aName, GetOrAddIndex(aStr)); + } + + // Add `aStr` to the list (if not already there), and write its index as an + // array element. + void WriteElement(JSONWriter& aWriter, const Span<const char>& aStr) { + aWriter.IntElement(GetOrAddIndex(aStr)); + } + + // Splice all collected unique strings into an array. This should only be done + // once, and then this UniqueStrings shouldn't be used anymore. + MFBT_API void SpliceStringTableElements(SpliceableJSONWriter& aWriter); + + private: + // If `aStr` is already listed, return its index. + // Otherwise add it to the list and return the new index. + MFBT_API uint32_t GetOrAddIndex(const Span<const char>& aStr); + + SpliceableChunkedJSONWriter mStringTableWriter; + HashMap<HashNumber, uint32_t> mStringHashToIndexMap; +}; + +void SpliceableJSONWriter::UniqueStringProperty(const Span<const char>& aName, + const Span<const char>& aStr) { + MOZ_RELEASE_ASSERT(mUniqueStrings); + mUniqueStrings->WriteProperty(*this, aName, aStr); +} + +// Add `aStr` to the list (if not already there), and write its index as an +// array element. +void SpliceableJSONWriter::UniqueStringElement(const Span<const char>& aStr) { + MOZ_RELEASE_ASSERT(mUniqueStrings); + mUniqueStrings->WriteElement(*this, aStr); +} + +} // namespace baseprofiler +} // namespace mozilla + +#endif // BASEPROFILEJSONWRITER_H diff --git a/mozglue/baseprofiler/public/BaseProfiler.h b/mozglue/baseprofiler/public/BaseProfiler.h new file mode 100644 index 0000000000..4bf1705041 --- /dev/null +++ b/mozglue/baseprofiler/public/BaseProfiler.h @@ -0,0 +1,964 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// The Gecko Profiler is an always-on profiler that takes fast and low overhead +// samples of the program execution using only userspace functionality for +// portability. The goal of this module is to provide performance data in a +// generic cross-platform way without requiring custom tools or kernel support. +// +// Samples are collected to form a timeline with optional timeline event +// (markers) used for filtering. The samples include both native stacks and +// platform-independent "label stack" frames. + +#ifndef BaseProfiler_h +#define BaseProfiler_h + +// This file is safe to include unconditionally, and only defines +// empty macros if MOZ_GECKO_PROFILER is not set. + +// These headers are also safe to include unconditionally, with empty macros if +// MOZ_GECKO_PROFILER is not set. +#include "mozilla/BaseProfilerCounts.h" + +// BaseProfilerMarkers.h is #included in the middle of this header! +// #include "mozilla/BaseProfilerMarkers.h" + +#ifndef MOZ_GECKO_PROFILER + +# include "mozilla/BaseProfilerMarkers.h" +# include "mozilla/UniquePtr.h" + +// This file can be #included unconditionally. However, everything within this +// file must be guarded by a #ifdef MOZ_GECKO_PROFILER, *except* for the +// following macros and functions, which encapsulate the most common operations +// and thus avoid the need for many #ifdefs. + +# define AUTO_BASE_PROFILER_INIT + +# define BASE_PROFILER_REGISTER_THREAD(name) +# define BASE_PROFILER_UNREGISTER_THREAD() +# define AUTO_BASE_PROFILER_REGISTER_THREAD(name) + +# define AUTO_BASE_PROFILER_THREAD_SLEEP +# define AUTO_BASE_PROFILER_THREAD_WAKE + +# define AUTO_BASE_PROFILER_LABEL(label, categoryPair) +# define AUTO_BASE_PROFILER_LABEL_CATEGORY_PAIR(categoryPair) +# define AUTO_BASE_PROFILER_LABEL_DYNAMIC_CSTR(label, categoryPair, cStr) +# define AUTO_BASE_PROFILER_LABEL_DYNAMIC_STRING(label, categoryPair, str) +# define AUTO_BASE_PROFILER_LABEL_FAST(label, categoryPair, ctx) +# define AUTO_BASE_PROFILER_LABEL_DYNAMIC_FAST(label, dynamicString, \ + categoryPair, ctx, flags) + +# define AUTO_PROFILER_STATS(name) + +// Function stubs for when MOZ_GECKO_PROFILER is not defined. + +namespace mozilla { +// This won't be used, it's just there to allow the empty definition of +// `profiler_capture_backtrace`. +class ProfileChunkedBuffer {}; + +namespace baseprofiler { +// This won't be used, it's just there to allow the empty definition of +// `profiler_get_backtrace`. +struct ProfilerBacktrace {}; +using UniqueProfilerBacktrace = UniquePtr<ProfilerBacktrace>; + +// Get/Capture-backtrace functions can return nullptr or false, the result +// should be fed to another empty macro or stub anyway. + +static inline UniqueProfilerBacktrace profiler_get_backtrace() { + return nullptr; +} + +static inline bool profiler_capture_backtrace_into( + ProfileChunkedBuffer& aChunkedBuffer) { + return false; +} + +static inline UniquePtr<ProfileChunkedBuffer> profiler_capture_backtrace() { + return nullptr; +} +} // namespace baseprofiler +} // namespace mozilla + +#else // !MOZ_GECKO_PROFILER + +# include "BaseProfilingStack.h" + +# include "mozilla/Assertions.h" +# include "mozilla/Atomics.h" +# include "mozilla/Attributes.h" +# include "mozilla/Maybe.h" +# include "mozilla/PowerOfTwo.h" +# include "mozilla/Sprintf.h" +# include "mozilla/ThreadLocal.h" +# include "mozilla/TimeStamp.h" +# include "mozilla/UniquePtr.h" + +# include <functional> +# include <stdint.h> +# include <string> + +namespace mozilla { + +class MallocAllocPolicy; +class ProfileChunkedBuffer; +template <class T, size_t MinInlineCapacity, class AllocPolicy> +class Vector; + +namespace baseprofiler { + +class ProfilerBacktrace; +class SpliceableJSONWriter; + +// Macros used by the AUTO_PROFILER_* macros below. +# define BASE_PROFILER_RAII_PASTE(id, line) id##line +# define BASE_PROFILER_RAII_EXPAND(id, line) BASE_PROFILER_RAII_PASTE(id, line) +# define BASE_PROFILER_RAII BASE_PROFILER_RAII_EXPAND(raiiObject, __LINE__) + +//--------------------------------------------------------------------------- +// Profiler features +//--------------------------------------------------------------------------- + +// Higher-order macro containing all the feature info in one place. Define +// |MACRO| appropriately to extract the relevant parts. Note that the number +// values are used internally only and so can be changed without consequence. +// Any changes to this list should also be applied to the feature list in +// toolkit/components/extensions/schemas/geckoProfiler.json. +# define BASE_PROFILER_FOR_EACH_FEATURE(MACRO) \ + MACRO(0, "java", Java, "Profile Java code, Android only") \ + \ + MACRO(1, "js", JS, \ + "Get the JS engine to expose the JS stack to the profiler") \ + \ + /* The DevTools profiler doesn't want the native addresses. */ \ + MACRO(2, "leaf", Leaf, "Include the C++ leaf node if not stackwalking") \ + \ + MACRO(3, "mainthreadio", MainThreadIO, "Add main thread file I/O") \ + \ + MACRO(4, "fileio", FileIO, \ + "Add file I/O from all profiled threads, implies mainthreadio") \ + \ + MACRO(5, "fileioall", FileIOAll, \ + "Add file I/O from all threads, implies fileio") \ + \ + MACRO(6, "noiostacks", NoIOStacks, \ + "File I/O markers do not capture stacks, to reduce overhead") \ + \ + MACRO(7, "screenshots", Screenshots, \ + "Take a snapshot of the window on every composition") \ + \ + MACRO(8, "seqstyle", SequentialStyle, \ + "Disable parallel traversal in styling") \ + \ + MACRO(9, "stackwalk", StackWalk, \ + "Walk the C++ stack, not available on all platforms") \ + \ + MACRO(10, "tasktracer", TaskTracer, \ + "Start profiling with feature TaskTracer") \ + \ + MACRO(11, "threads", Threads, "Profile the registered secondary threads") \ + \ + MACRO(12, "jstracer", JSTracer, "Enable tracing of the JavaScript engine") \ + \ + MACRO(13, "jsallocations", JSAllocations, \ + "Have the JavaScript engine track allocations") \ + \ + MACRO(14, "nostacksampling", NoStackSampling, \ + "Disable all stack sampling: Cancels \"js\", \"leaf\", " \ + "\"stackwalk\" and labels") \ + \ + MACRO(15, "preferencereads", PreferenceReads, \ + "Track when preferences are read") \ + \ + MACRO(16, "nativeallocations", NativeAllocations, \ + "Collect the stacks from a smaller subset of all native " \ + "allocations, biasing towards collecting larger allocations") \ + \ + MACRO(17, "ipcmessages", IPCMessages, \ + "Have the IPC layer track cross-process messages") \ + \ + MACRO(18, "audiocallbacktracing", AudioCallbackTracing, \ + "Audio callback tracing") \ + \ + MACRO(19, "cpu", CPUUtilization, "CPU utilization") + +struct ProfilerFeature { +# define DECLARE(n_, str_, Name_, desc_) \ + static constexpr uint32_t Name_ = (1u << n_); \ + static constexpr bool Has##Name_(uint32_t aFeatures) { \ + return aFeatures & Name_; \ + } \ + static constexpr void Set##Name_(uint32_t& aFeatures) { \ + aFeatures |= Name_; \ + } \ + static constexpr void Clear##Name_(uint32_t& aFeatures) { \ + aFeatures &= ~Name_; \ + } + + // Define a bitfield constant, a getter, and two setters for each feature. + BASE_PROFILER_FOR_EACH_FEATURE(DECLARE) + +# undef DECLARE +}; + +namespace detail { + +// RacyFeatures is only defined in this header file so that its methods can +// be inlined into profiler_is_active(). Please do not use anything from the +// detail namespace outside the profiler. + +// Within the profiler's code, the preferred way to check profiler activeness +// and features is via ActivePS(). However, that requires locking gPSMutex. +// There are some hot operations where absolute precision isn't required, so we +// duplicate the activeness/feature state in a lock-free manner in this class. +class RacyFeatures { + public: + MFBT_API static void SetActive(uint32_t aFeatures); + + MFBT_API static void SetInactive(); + + MFBT_API static void SetPaused(); + + MFBT_API static void SetUnpaused(); + + MFBT_API static void SetSamplingPaused(); + + MFBT_API static void SetSamplingUnpaused(); + + MFBT_API static bool IsActive(); + + MFBT_API static bool IsActiveWithFeature(uint32_t aFeature); + + // True if profiler is active, and not fully paused. + // Note that periodic sampling *could* be paused! + MFBT_API static bool IsActiveAndUnpaused(); + + // True if profiler is active, and sampling is not paused (though generic + // `SetPaused()` or specific `SetSamplingPaused()`). + MFBT_API static bool IsActiveAndSamplingUnpaused(); + + private: + static constexpr uint32_t Active = 1u << 31; + static constexpr uint32_t Paused = 1u << 30; + static constexpr uint32_t SamplingPaused = 1u << 29; + +// Ensure Active/Paused don't overlap with any of the feature bits. +# define NO_OVERLAP(n_, str_, Name_, desc_) \ + static_assert(ProfilerFeature::Name_ != SamplingPaused, \ + "bad feature value"); + + BASE_PROFILER_FOR_EACH_FEATURE(NO_OVERLAP); + +# undef NO_OVERLAP + + // We combine the active bit with the feature bits so they can be read or + // written in a single atomic operation. + // TODO: Could this be MFBT_DATA for better inlining optimization? + static Atomic<uint32_t, MemoryOrdering::Relaxed> sActiveAndFeatures; +}; + +MFBT_API bool IsThreadBeingProfiled(); + +} // namespace detail + +//--------------------------------------------------------------------------- +// Start and stop the profiler +//--------------------------------------------------------------------------- + +static constexpr PowerOfTwo32 BASE_PROFILER_DEFAULT_ENTRIES = +# if !defined(GP_PLAT_arm_android) + MakePowerOfTwo32<1024 * 1024>(); // 1M entries = 8MB +# else + MakePowerOfTwo32<128 * 1024>(); // 128k entries = 1MB +# endif + +// Startup profiling usually need to capture more data, especially on slow +// systems. +static constexpr PowerOfTwo32 BASE_PROFILER_DEFAULT_STARTUP_ENTRIES = +# if !defined(GP_PLAT_arm_android) + MakePowerOfTwo32<4 * 1024 * 1024>(); // 4M entries = 32MB +# else + MakePowerOfTwo32<256 * 1024>(); // 256k entries = 2MB +# endif + +# define BASE_PROFILER_DEFAULT_DURATION 20 +# define BASE_PROFILER_DEFAULT_INTERVAL 1 + +// Initialize the profiler. If MOZ_PROFILER_STARTUP is set the profiler will +// also be started. This call must happen before any other profiler calls +// (except profiler_start(), which will call profiler_init() if it hasn't +// already run). +MFBT_API void profiler_init(void* stackTop); + +# define AUTO_BASE_PROFILER_INIT \ + ::mozilla::baseprofiler::AutoProfilerInit BASE_PROFILER_RAII + +// Clean up the profiler module, stopping it if required. This function may +// also save a shutdown profile if requested. No profiler calls should happen +// after this point and all profiling stack labels should have been popped. +MFBT_API void profiler_shutdown(); + +// Start the profiler -- initializing it first if necessary -- with the +// selected options. Stops and restarts the profiler if it is already active. +// After starting the profiler is "active". The samples will be recorded in a +// circular buffer. +// "aCapacity" is the maximum number of 8-byte entries in the profiler's +// circular buffer. +// "aInterval" the sampling interval, measured in millseconds. +// "aFeatures" is the feature set. Features unsupported by this +// platform/configuration are ignored. +// "aFilters" is the list of thread filters. Threads that do not match any +// of the filters are not profiled. A filter matches a thread if +// (a) the thread name contains the filter as a case-insensitive +// substring, or +// (b) the filter is of the form "pid:<n>" where n is the process +// id of the process that the thread is running in. +// "aDuration" is the duration of entries in the profiler's circular buffer. +MFBT_API void profiler_start(PowerOfTwo32 aCapacity, double aInterval, + uint32_t aFeatures, const char** aFilters, + uint32_t aFilterCount, + const Maybe<double>& aDuration = Nothing()); + +// Stop the profiler and discard the profile without saving it. A no-op if the +// profiler is inactive. After stopping the profiler is "inactive". +MFBT_API void profiler_stop(); + +// If the profiler is inactive, start it. If it's already active, restart it if +// the requested settings differ from the current settings. Both the check and +// the state change are performed while the profiler state is locked. +// The only difference to profiler_start is that the current buffer contents are +// not discarded if the profiler is already running with the requested settings. +MFBT_API void profiler_ensure_started( + PowerOfTwo32 aCapacity, double aInterval, uint32_t aFeatures, + const char** aFilters, uint32_t aFilterCount, + const Maybe<double>& aDuration = Nothing()); + +//--------------------------------------------------------------------------- +// Control the profiler +//--------------------------------------------------------------------------- + +// Register/unregister threads with the profiler. Both functions operate the +// same whether the profiler is active or inactive. +# define BASE_PROFILER_REGISTER_THREAD(name) \ + do { \ + char stackTop; \ + ::mozilla::baseprofiler::profiler_register_thread(name, &stackTop); \ + } while (0) +# define BASE_PROFILER_UNREGISTER_THREAD() \ + ::mozilla::baseprofiler::profiler_unregister_thread() +MFBT_API ProfilingStack* profiler_register_thread(const char* name, + void* guessStackTop); +MFBT_API void profiler_unregister_thread(); + +// Registers a DOM Window (the JS global `window`) with the profiler. Each +// Window _roughly_ corresponds to a single document loaded within a +// BrowsingContext. The unique IDs for both the Window and BrowsingContext are +// recorded to allow correlating different Windows loaded within the same tab or +// frame element. +// +// We register pages for each navigations but we do not register +// history.pushState or history.replaceState since they correspond to the same +// Inner Window ID. When a Browsing context is first loaded, the first url +// loaded in it will be about:blank. Because of that, this call keeps the first +// non-about:blank registration of window and discards the previous one. +// +// "aBrowsingContextID" is the ID of the browsing context that document +// belongs to. That's used to determine the tab of +// that page. +// "aInnerWindowID" is the ID of the `window` global object of that +// document. +// "aUrl" is the URL of the page. +// "aEmbedderInnerWindowID" is the inner window id of embedder. It's used to +// determine sub documents of a page. +MFBT_API void profiler_register_page(uint64_t aBrowsingContextID, + uint64_t aInnerWindowID, + const std::string& aUrl, + uint64_t aEmbedderInnerWindowID); +// Unregister page with the profiler. +// +// Take a Inner Window ID and unregister the page entry that has the same ID. +MFBT_API void profiler_unregister_page(uint64_t aRegisteredInnerWindowID); + +// Remove all registered and unregistered pages in the profiler. +void profiler_clear_all_pages(); + +class BaseProfilerCount; +MFBT_API void profiler_add_sampled_counter(BaseProfilerCount* aCounter); +MFBT_API void profiler_remove_sampled_counter(BaseProfilerCount* aCounter); + +// Register and unregister a thread within a scope. +# define AUTO_BASE_PROFILER_REGISTER_THREAD(name) \ + ::mozilla::baseprofiler::AutoProfilerRegisterThread BASE_PROFILER_RAII(name) + +// Pause and resume the profiler. No-ops if the profiler is inactive. While +// paused the profile will not take any samples and will not record any data +// into its buffers. The profiler remains fully initialized in this state. +// This feature will keep JavaScript profiling enabled, thus allowing toggling +// the profiler without invalidating the JIT. +MFBT_API void profiler_pause(); +MFBT_API void profiler_resume(); + +// Only pause and resume the periodic sampling loop, including stack sampling, +// counters, and profiling overheads. +MFBT_API void profiler_pause_sampling(); +MFBT_API void profiler_resume_sampling(); + +// These functions tell the profiler that a thread went to sleep so that we can +// avoid sampling it while it's sleeping. Calling profiler_thread_sleep() +// twice without an intervening profiler_thread_wake() is an error. All three +// functions operate the same whether the profiler is active or inactive. +MFBT_API void profiler_thread_sleep(); +MFBT_API void profiler_thread_wake(); + +// Mark a thread as asleep/awake within a scope. +# define AUTO_BASE_PROFILER_THREAD_SLEEP \ + ::mozilla::baseprofiler::AutoProfilerThreadSleep BASE_PROFILER_RAII +# define AUTO_BASE_PROFILER_THREAD_WAKE \ + ::mozilla::baseprofiler::AutoProfilerThreadWake BASE_PROFILER_RAII + +//--------------------------------------------------------------------------- +// Get information from the profiler +//--------------------------------------------------------------------------- + +// Is the profiler active? Note: the return value of this function can become +// immediately out-of-date. E.g. the profile might be active but then +// profiler_stop() is called immediately afterward. One common and reasonable +// pattern of usage is the following: +// +// if (profiler_is_active()) { +// ExpensiveData expensiveData = CreateExpensiveData(); +// PROFILER_OPERATION(expensiveData); +// } +// +// where PROFILER_OPERATION is a no-op if the profiler is inactive. In this +// case the profiler_is_active() check is just an optimization -- it prevents +// us calling CreateExpensiveData() unnecessarily in most cases, but the +// expensive data will end up being created but not used if another thread +// stops the profiler between the CreateExpensiveData() and PROFILER_OPERATION +// calls. +inline bool profiler_is_active() { + return baseprofiler::detail::RacyFeatures::IsActive(); +} + +// Same as profiler_is_active(), but with the same extra checks that determine +// if the profiler would currently store markers. So this should be used before +// doing some potentially-expensive work that's used in a marker. E.g.: +// +// if (profiler_can_accept_markers()) { +// BASE_PROFILER_MARKER(name, OTHER, SomeMarkerType, expensivePayload); +// } +inline bool profiler_can_accept_markers() { + return baseprofiler::detail::RacyFeatures::IsActiveAndUnpaused(); +} + +// Is the profiler active, and is the current thread being profiled? +// (Same caveats and recommented usage as profiler_is_active().) +inline bool profiler_thread_is_being_profiled() { + return profiler_is_active() && baseprofiler::detail::IsThreadBeingProfiled(); +} + +// Is the profiler active and paused? Returns false if the profiler is inactive. +MFBT_API bool profiler_is_paused(); + +// Is the profiler active and sampling is paused? Returns false if the profiler +// is inactive. +MFBT_API bool profiler_is_sampling_paused(); + +// Is the current thread sleeping? +MFBT_API bool profiler_thread_is_sleeping(); + +// Get all the features supported by the profiler that are accepted by +// profiler_start(). The result is the same whether the profiler is active or +// not. +MFBT_API uint32_t profiler_get_available_features(); + +// Check if a profiler feature (specified via the ProfilerFeature type) is +// active. Returns false if the profiler is inactive. Note: the return value +// can become immediately out-of-date, much like the return value of +// profiler_is_active(). +MFBT_API bool profiler_feature_active(uint32_t aFeature); + +// Get the params used to start the profiler. Returns 0 and an empty vector +// (via outparams) if the profile is inactive. It's possible that the features +// returned may be slightly different to those requested due to required +// adjustments. +MFBT_API void profiler_get_start_params( + int* aEntrySize, Maybe<double>* aDuration, double* aInterval, + uint32_t* aFeatures, Vector<const char*, 0, MallocAllocPolicy>* aFilters); + +// The number of milliseconds since the process started. Operates the same +// whether the profiler is active or inactive. +MFBT_API double profiler_time(); + +// Get the current process's ID. +MFBT_API int profiler_current_process_id(); + +// Get the current thread's ID. +MFBT_API int profiler_current_thread_id(); + +// Statically initialized to 0, then set once from profiler_init(), which should +// be called from the main thread before any other use of the profiler. +extern MFBT_DATA int scProfilerMainThreadId; + +inline int profiler_main_thread_id() { return scProfilerMainThreadId; } + +inline bool profiler_is_main_thread() { + return profiler_current_thread_id() == profiler_main_thread_id(); +} + +// An object of this class is passed to profiler_suspend_and_sample_thread(). +// For each stack frame, one of the Collect methods will be called. +class ProfilerStackCollector { + public: + // Some collectors need to worry about possibly overwriting previous + // generations of data. If that's not an issue, this can return Nothing, + // which is the default behaviour. + virtual Maybe<uint64_t> SamplePositionInBuffer() { return Nothing(); } + virtual Maybe<uint64_t> BufferRangeStart() { return Nothing(); } + + // This method will be called once if the thread being suspended is the main + // thread. Default behaviour is to do nothing. + virtual void SetIsMainThread() {} + + // WARNING: The target thread is suspended when the Collect methods are + // called. Do not try to allocate or acquire any locks, or you could + // deadlock. The target thread will have resumed by the time this function + // returns. + + virtual void CollectNativeLeafAddr(void* aAddr) = 0; + + virtual void CollectProfilingStackFrame( + const ProfilingStackFrame& aFrame) = 0; +}; + +// This method suspends the thread identified by aThreadId, samples its +// profiling stack, JS stack, and (optionally) native stack, passing the +// collected frames into aCollector. aFeatures dictates which compiler features +// are used. |Leaf| is the only relevant one. +MFBT_API void profiler_suspend_and_sample_thread( + int aThreadId, uint32_t aFeatures, ProfilerStackCollector& aCollector, + bool aSampleNative = true); + +struct ProfilerBacktraceDestructor { + MFBT_API void operator()(ProfilerBacktrace*); +}; + +using UniqueProfilerBacktrace = + UniquePtr<ProfilerBacktrace, ProfilerBacktraceDestructor>; + +// Immediately capture the current thread's call stack, store it in the provided +// buffer (usually to avoid allocations if you can construct the buffer on the +// stack). Returns false if unsuccessful, or if the profiler is inactive. +MFBT_API bool profiler_capture_backtrace_into( + ProfileChunkedBuffer& aChunkedBuffer); + +// Immediately capture the current thread's call stack, and return it in a +// ProfileChunkedBuffer (usually for later use in MarkerStack::TakeBacktrace()). +// May be null if unsuccessful, or if the profiler is inactive. +MFBT_API UniquePtr<ProfileChunkedBuffer> profiler_capture_backtrace(); + +// Immediately capture the current thread's call stack, and return it in a +// ProfilerBacktrace (usually for later use in marker function that take a +// ProfilerBacktrace). May be null if unsuccessful, or if the profiler is +// inactive. +MFBT_API UniqueProfilerBacktrace profiler_get_backtrace(); + +struct ProfilerStats { + unsigned n = 0; + double sum = 0; + double min = std::numeric_limits<double>::max(); + double max = 0; + void Count(double v) { + ++n; + sum += v; + if (v < min) { + min = v; + } + if (v > max) { + max = v; + } + } +}; + +struct ProfilerBufferInfo { + // Index of the oldest entry. + uint64_t mRangeStart; + // Index of the newest entry. + uint64_t mRangeEnd; + // Buffer capacity in number of 8-byte entries. + uint32_t mEntryCount; + // Sampling stats: Interval (us) between successive samplings. + ProfilerStats mIntervalsUs; + // Sampling stats: Total duration (us) of each sampling. (Split detail below.) + ProfilerStats mOverheadsUs; + // Sampling stats: Time (us) to acquire the lock before sampling. + ProfilerStats mLockingsUs; + // Sampling stats: Time (us) to discard expired data. + ProfilerStats mCleaningsUs; + // Sampling stats: Time (us) to collect counter data. + ProfilerStats mCountersUs; + // Sampling stats: Time (us) to sample thread stacks. + ProfilerStats mThreadsUs; +}; + +// Get information about the current buffer status. +// Returns Nothing() if the profiler is inactive. +// +// This information may be useful to a user-interface displaying the current +// status of the profiler, allowing the user to get a sense for how fast the +// buffer is being written to, and how much data is visible. +MFBT_API Maybe<ProfilerBufferInfo> profiler_get_buffer_info(); + +// Uncomment the following line to display profiler runtime statistics at +// shutdown. +// # define PROFILER_RUNTIME_STATS + +# ifdef PROFILER_RUNTIME_STATS +// This class gathers durations and displays some basic stats when destroyed. +// It is intended to be used as a static variable (see `AUTO_PROFILER_STATS` +// below), to display stats at the end of the program. +class StaticBaseProfilerStats { + public: + explicit StaticBaseProfilerStats(const char* aName) : mName(aName) {} + + ~StaticBaseProfilerStats() { + // Using unsigned long long for computations and printfs. + using ULL = unsigned long long; + ULL n = static_cast<ULL>(mNumberDurations); + if (n != 0) { + ULL sumNs = static_cast<ULL>(mSumDurationsNs); + printf( + "[%d] Profiler stats `%s`: %llu ns / %llu = %llu ns, max %llu ns\n", + profiler_current_process_id(), mName, sumNs, n, sumNs / n, + static_cast<ULL>(mLongestDurationNs)); + } else { + printf("[%d] Profiler stats `%s`: (nothing)\n", + profiler_current_process_id(), mName); + } + } + + void AddDurationFrom(TimeStamp aStart) { + DurationNs duration = static_cast<DurationNs>( + (TimeStamp::NowUnfuzzed() - aStart).ToMicroseconds() * 1000 + 0.5); + mSumDurationsNs += duration; + ++mNumberDurations; + // Update mLongestDurationNs if this one is longer. + for (;;) { + DurationNs longest = mLongestDurationNs; + if (MOZ_LIKELY(longest >= duration)) { + // This duration is not the longest, nothing to do. + break; + } + if (MOZ_LIKELY(mLongestDurationNs.compareExchange(longest, duration))) { + // Successfully updated `mLongestDurationNs` with the new value. + break; + } + // Otherwise someone else just updated `mLongestDurationNs`, we need to + // try again by looping. + } + } + + private: + using DurationNs = uint64_t; + using Count = uint32_t; + + Atomic<DurationNs> mSumDurationsNs{0}; + Atomic<DurationNs> mLongestDurationNs{0}; + Atomic<Count> mNumberDurations{0}; + const char* mName; +}; + +// RAII object that measure its scoped lifetime duration and reports it to a +// `StaticBaseProfilerStats`. +class MOZ_RAII AutoProfilerStats { + public: + explicit AutoProfilerStats(StaticBaseProfilerStats& aStats) + : mStats(aStats), mStart(TimeStamp::NowUnfuzzed()) {} + + ~AutoProfilerStats() { mStats.AddDurationFrom(mStart); } + + private: + StaticBaseProfilerStats& mStats; + TimeStamp mStart; +}; + +// Macro that should be used to collect basic statistics from measurements of +// block durations, from where this macro is, until the end of its enclosing +// scope. The name is used in the static variable name and when displaying stats +// at the end of the program; Another location could use the same name but their +// stats will not be combined, so use different name if these locations should +// be distinguished. +# define AUTO_PROFILER_STATS(name) \ + static ::mozilla::baseprofiler::StaticBaseProfilerStats sStat##name( \ + #name); \ + ::mozilla::baseprofiler::AutoProfilerStats autoStat##name(sStat##name); + +# else // PROFILER_RUNTIME_STATS + +# define AUTO_PROFILER_STATS(name) + +# endif // PROFILER_RUNTIME_STATS else + +} // namespace baseprofiler +} // namespace mozilla + +// BaseProfilerMarkers.h requires some stuff from this header. +// TODO: Move common stuff to shared header, and move this #include to the top. +# include "mozilla/BaseProfilerMarkers.h" + +namespace mozilla { +namespace baseprofiler { + +//--------------------------------------------------------------------------- +// Put profiling data into the profiler (labels and markers) +//--------------------------------------------------------------------------- + +// Insert an RAII object in this scope to enter a label stack frame. Any +// samples collected in this scope will contain this label in their stack. +// The label argument must be a static C string. It is usually of the +// form "ClassName::FunctionName". (Ideally we'd use the compiler to provide +// that for us, but __func__ gives us the function name without the class +// name.) If the label applies to only part of a function, you can qualify it +// like this: "ClassName::FunctionName:PartName". +// +// Use AUTO_BASE_PROFILER_LABEL_DYNAMIC_* if you want to add additional / +// dynamic information to the label stack frame. +# define AUTO_BASE_PROFILER_LABEL(label, categoryPair) \ + ::mozilla::baseprofiler::AutoProfilerLabel BASE_PROFILER_RAII( \ + label, nullptr, \ + ::mozilla::baseprofiler::ProfilingCategoryPair::categoryPair) + +// Similar to AUTO_BASE_PROFILER_LABEL, but with only one argument: the category +// pair. The label string is taken from the category pair. This is convenient +// for labels like +// AUTO_BASE_PROFILER_LABEL_CATEGORY_PAIR(GRAPHICS_LayerBuilding) which would +// otherwise just repeat the string. +# define AUTO_BASE_PROFILER_LABEL_CATEGORY_PAIR(categoryPair) \ + ::mozilla::baseprofiler::AutoProfilerLabel BASE_PROFILER_RAII( \ + "", nullptr, \ + ::mozilla::baseprofiler::ProfilingCategoryPair::categoryPair, \ + uint32_t(::mozilla::baseprofiler::ProfilingStackFrame::Flags:: \ + LABEL_DETERMINED_BY_CATEGORY_PAIR)) + +// Similar to AUTO_BASE_PROFILER_LABEL, but with an additional string. The +// inserted RAII object stores the cStr pointer in a field; it does not copy the +// string. +// +// WARNING: This means that the string you pass to this macro needs to live at +// least until the end of the current scope. Be careful using this macro with +// ns[C]String; the other AUTO_BASE_PROFILER_LABEL_DYNAMIC_* macros below are +// preferred because they avoid this problem. +// +// If the profiler samples the current thread and walks the label stack while +// this RAII object is on the stack, it will copy the supplied string into the +// profile buffer. So there's one string copy operation, and it happens at +// sample time. +// +// Compare this to the plain AUTO_BASE_PROFILER_LABEL macro, which only accepts +// literal strings: When the label stack frames generated by +// AUTO_BASE_PROFILER_LABEL are sampled, no string copy needs to be made because +// the profile buffer can just store the raw pointers to the literal strings. +// Consequently, AUTO_BASE_PROFILER_LABEL frames take up considerably less space +// in the profile buffer than AUTO_BASE_PROFILER_LABEL_DYNAMIC_* frames. +# define AUTO_BASE_PROFILER_LABEL_DYNAMIC_CSTR(label, categoryPair, cStr) \ + ::mozilla::baseprofiler::AutoProfilerLabel BASE_PROFILER_RAII( \ + label, cStr, \ + ::mozilla::baseprofiler::ProfilingCategoryPair::categoryPair) + +// Similar to AUTO_BASE_PROFILER_LABEL_DYNAMIC_CSTR, but takes an std::string. +// +// Note: The use of the Maybe<>s ensures the scopes for the dynamic string and +// the AutoProfilerLabel are appropriate, while also not incurring the runtime +// cost of the string assignment unless the profiler is active. Therefore, +// unlike AUTO_BASE_PROFILER_LABEL and AUTO_BASE_PROFILER_LABEL_DYNAMIC_CSTR, +// this macro doesn't push/pop a label when the profiler is inactive. +# define AUTO_BASE_PROFILER_LABEL_DYNAMIC_STRING(label, categoryPair, str) \ + Maybe<std::string> autoStr; \ + Maybe<::mozilla::baseprofiler::AutoProfilerLabel> raiiObjectString; \ + if (::mozilla::baseprofiler::profiler_is_active()) { \ + autoStr.emplace(str); \ + raiiObjectString.emplace( \ + label, autoStr->c_str(), \ + ::mozilla::baseprofiler::ProfilingCategoryPair::categoryPair); \ + } + +// Similar to AUTO_BASE_PROFILER_LABEL, but accepting a JSContext* parameter, +// and a no-op if the profiler is disabled. Used to annotate functions for which +// overhead in the range of nanoseconds is noticeable. It avoids overhead from +// the TLS lookup because it can get the ProfilingStack from the JS context, and +// avoids almost all overhead in the case where the profiler is disabled. +# define AUTO_BASE_PROFILER_LABEL_FAST(label, categoryPair, ctx) \ + ::mozilla::baseprofiler::AutoProfilerLabel BASE_PROFILER_RAII( \ + ctx, label, nullptr, \ + ::mozilla::baseprofiler::ProfilingCategoryPair::categoryPair) + +// Similar to AUTO_BASE_PROFILER_LABEL_FAST, but also takes an extra string and +// an additional set of flags. The flags parameter should carry values from the +// ProfilingStackFrame::Flags enum. +# define AUTO_BASE_PROFILER_LABEL_DYNAMIC_FAST(label, dynamicString, \ + categoryPair, ctx, flags) \ + ::mozilla::baseprofiler::AutoProfilerLabel BASE_PROFILER_RAII( \ + ctx, label, dynamicString, \ + ::mozilla::baseprofiler::ProfilingCategoryPair::categoryPair, flags) + +MFBT_API void profiler_add_js_marker(const char* aMarkerName, + const char* aMarkerText); + +// Returns true if any of the profiler mutexes are currently locked *on the +// current thread*. This may be used by re-entrant code that may call profiler +// functions while the same of a different profiler mutex is locked, which could +// deadlock. +bool profiler_is_locked_on_current_thread(); + +//--------------------------------------------------------------------------- +// Output profiles +//--------------------------------------------------------------------------- + +// Set a user-friendly process name, used in JSON stream. +MFBT_API void profiler_set_process_name(const std::string& aProcessName, + const std::string* aETLDplus1); + +// Get the profile encoded as a JSON string. A no-op (returning nullptr) if the +// profiler is inactive. +// If aIsShuttingDown is true, the current time is included as the process +// shutdown time in the JSON's "meta" object. +MFBT_API UniquePtr<char[]> profiler_get_profile(double aSinceTime = 0, + bool aIsShuttingDown = false, + bool aOnlyThreads = false); + +// Write the profile for this process (excluding subprocesses) into aWriter. +// Returns false if the profiler is inactive. +MFBT_API bool profiler_stream_json_for_this_process( + SpliceableJSONWriter& aWriter, double aSinceTime = 0, + bool aIsShuttingDown = false, bool aOnlyThreads = false); + +// Get the profile and write it into a file. A no-op if the profile is +// inactive. +MFBT_API void profiler_save_profile_to_file(const char* aFilename); + +//--------------------------------------------------------------------------- +// RAII classes +//--------------------------------------------------------------------------- + +class MOZ_RAII AutoProfilerInit { + public: + explicit AutoProfilerInit() { profiler_init(this); } + + ~AutoProfilerInit() { profiler_shutdown(); } + + private: +}; + +// Convenience class to register and unregister a thread with the profiler. +// Needs to be the first object on the stack of the thread. +class MOZ_RAII AutoProfilerRegisterThread final { + public: + explicit AutoProfilerRegisterThread(const char* aName) { + profiler_register_thread(aName, this); + } + + ~AutoProfilerRegisterThread() { profiler_unregister_thread(); } + + private: + AutoProfilerRegisterThread(const AutoProfilerRegisterThread&) = delete; + AutoProfilerRegisterThread& operator=(const AutoProfilerRegisterThread&) = + delete; +}; + +class MOZ_RAII AutoProfilerThreadSleep { + public: + explicit AutoProfilerThreadSleep() { profiler_thread_sleep(); } + + ~AutoProfilerThreadSleep() { profiler_thread_wake(); } + + private: +}; + +// Temporarily wake up the profiling of a thread while servicing events such as +// Asynchronous Procedure Calls (APCs). +class MOZ_RAII AutoProfilerThreadWake { + public: + explicit AutoProfilerThreadWake() + : mIssuedWake(profiler_thread_is_sleeping()) { + if (mIssuedWake) { + profiler_thread_wake(); + } + } + + ~AutoProfilerThreadWake() { + if (mIssuedWake) { + MOZ_ASSERT(!profiler_thread_is_sleeping()); + profiler_thread_sleep(); + } + } + + private: + bool mIssuedWake; +}; + +// This class creates a non-owning ProfilingStack reference. Objects of this +// class are stack-allocated, and so exist within a thread, and are thus bounded +// by the lifetime of the thread, which ensures that the references held can't +// be used after the ProfilingStack is destroyed. +class MOZ_RAII AutoProfilerLabel { + public: + // This is the AUTO_BASE_PROFILER_LABEL and AUTO_BASE_PROFILER_LABEL_DYNAMIC + // variant. + AutoProfilerLabel(const char* aLabel, const char* aDynamicString, + ProfilingCategoryPair aCategoryPair, uint32_t aFlags = 0) { + // Get the ProfilingStack from TLS. + Push(GetProfilingStack(), aLabel, aDynamicString, aCategoryPair, aFlags); + } + + void Push(ProfilingStack* aProfilingStack, const char* aLabel, + const char* aDynamicString, ProfilingCategoryPair aCategoryPair, + uint32_t aFlags = 0) { + // This function runs both on and off the main thread. + + mProfilingStack = aProfilingStack; + if (mProfilingStack) { + mProfilingStack->pushLabelFrame(aLabel, aDynamicString, this, + aCategoryPair, aFlags); + } + } + + ~AutoProfilerLabel() { + // This function runs both on and off the main thread. + + if (mProfilingStack) { + mProfilingStack->pop(); + } + } + + MFBT_API static ProfilingStack* GetProfilingStack(); + + private: + // We save a ProfilingStack pointer in the ctor so we don't have to redo the + // TLS lookup in the dtor. + ProfilingStack* mProfilingStack; + + public: + // See the comment on the definition in platform.cpp for details about this. + static MOZ_THREAD_LOCAL(ProfilingStack*) sProfilingStack; +}; + +// Get the MOZ_PROFILER_STARTUP* environment variables that should be +// supplied to a child process that is about to be launched, in order +// to make that child process start with the same profiler settings as +// in the current process. The given function is invoked once for +// each variable to be set. +MFBT_API void GetProfilerEnvVarsForChildProcess( + std::function<void(const char* key, const char* value)>&& aSetEnv); + +} // namespace baseprofiler +} // namespace mozilla + +#endif // !MOZ_GECKO_PROFILER + +#endif // BaseProfiler_h diff --git a/mozglue/baseprofiler/public/BaseProfilerCounts.h b/mozglue/baseprofiler/public/BaseProfilerCounts.h new file mode 100644 index 0000000000..fbcc713744 --- /dev/null +++ b/mozglue/baseprofiler/public/BaseProfilerCounts.h @@ -0,0 +1,280 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef BaseProfilerCounts_h +#define BaseProfilerCounts_h + +#ifndef MOZ_GECKO_PROFILER + +# define BASE_PROFILER_DEFINE_COUNT_TOTAL(label, category, description) +# define BASE_PROFILER_DEFINE_COUNT(label, category, description) +# define BASE_PROFILER_DEFINE_STATIC_COUNT_TOTAL(label, category, description) +# define AUTO_BASE_PROFILER_COUNT_TOTAL(label, count) +# define AUTO_BASE_PROFILER_COUNT(label) +# define AUTO_BASE_PROFILER_STATIC_COUNT(label, count) +# define AUTO_BASE_PROFILER_FORCE_ALLOCATION(label) + +#else + +# include "mozilla/Atomics.h" + +namespace mozilla { +namespace baseprofiler { + +class BaseProfilerCount; +MFBT_API void profiler_add_sampled_counter(BaseProfilerCount* aCounter); +MFBT_API void profiler_remove_sampled_counter(BaseProfilerCount* aCounter); + +typedef Atomic<int64_t, MemoryOrdering::Relaxed> ProfilerAtomicSigned; +typedef Atomic<uint64_t, MemoryOrdering::Relaxed> ProfilerAtomicUnsigned; + +// Counter support +// There are two types of counters: +// 1) a simple counter which can be added to or subtracted from. This could +// track the number of objects of a type, the number of calls to something +// (reflow, JIT, etc). +// 2) a combined counter which has the above, plus a number-of-calls counter +// that is incremented by 1 for each call to modify the count. This provides +// an optional source for a 'heatmap' of access. This can be used (for +// example) to track the amount of memory allocated, and provide a heatmap of +// memory operations (allocs/frees). +// +// Counters are sampled by the profiler once per sample-period. At this time, +// all counters are global to the process. In the future, there might be more +// versions with per-thread or other discriminators. +// +// Typical usage: +// There are two ways to use counters: With heap-created counter objects, +// or using macros. Note: the macros use statics, and will be slightly +// faster/smaller, and you need to care about creating them before using +// them. They're similar to the use-pattern for the other AUTO_PROFILER* +// macros, but they do need the PROFILER_DEFINE* to be use to instantiate +// the statics. +// +// PROFILER_DEFINE_COUNT(mything, "JIT", "Some JIT byte count") +// ... +// void foo() { ... AUTO_PROFILER_COUNT(mything, number_of_bytes_used); ... } +// +// or (to also get a heatmap) +// +// PROFILER_DEFINE_COUNT_TOTAL(mything, "JIT", "Some JIT byte count") +// ... +// void foo() { +// ... +// AUTO_PROFILER_COUNT_TOTAL(mything, number_of_bytes_generated); +// ... +// } +// +// To use without statics/macros: +// +// UniquePtr<ProfilerCounter> myCounter; +// ... +// myCounter = +// MakeUnique<ProfilerCounter>("mything", "JIT", "Some JIT byte count")); +// ... +// void foo() { ... myCounter->Add(number_of_bytes_generated0; ... } + +class BaseProfilerCount { + public: + BaseProfilerCount(const char* aLabel, ProfilerAtomicSigned* aCounter, + ProfilerAtomicUnsigned* aNumber, const char* aCategory, + const char* aDescription) + : mLabel(aLabel), + mCategory(aCategory), + mDescription(aDescription), + mCounter(aCounter), + mNumber(aNumber) { +# define COUNTER_CANARY 0xDEADBEEF +# ifdef DEBUG + mCanary = COUNTER_CANARY; + mPrevNumber = 0; +# endif + // Can't call profiler_* here since this may be non-xul-library + } +# ifdef DEBUG + ~BaseProfilerCount() { mCanary = 0; } +# endif + + void Sample(int64_t& aCounter, uint64_t& aNumber) { + MOZ_ASSERT(mCanary == COUNTER_CANARY); + + aCounter = *mCounter; + aNumber = mNumber ? *mNumber : 0; +# ifdef DEBUG + MOZ_ASSERT(aNumber >= mPrevNumber); + mPrevNumber = aNumber; +# endif + } + + // We don't define ++ and Add() here, since the static defines directly + // increment the atomic counters, and the subclasses implement ++ and + // Add() directly. + + // These typically are static strings (for example if you use the macros + // below) + const char* mLabel; + const char* mCategory; + const char* mDescription; + // We're ok with these being un-ordered in race conditions. These are + // pointers because we want to be able to use statics and increment them + // directly. Otherwise we could just have them inline, and not need the + // constructor args. + // These can be static globals (using the macros below), though they + // don't have to be - their lifetime must be longer than the use of them + // by the profiler (see profiler_add/remove_sampled_counter()). If you're + // using a lot of these, they probably should be allocated at runtime (see + // class ProfilerCountOnly below). + ProfilerAtomicSigned* mCounter; + ProfilerAtomicUnsigned* mNumber; // may be null + +# ifdef DEBUG + uint32_t mCanary; + uint64_t mPrevNumber; // value of number from the last Sample() +# endif +}; + +// Designed to be allocated dynamically, and simply incremented with obj++ +// or obj->Add(n) +class ProfilerCounter final : public BaseProfilerCount { + public: + ProfilerCounter(const char* aLabel, const char* aCategory, + const char* aDescription) + : BaseProfilerCount(aLabel, &mCounter, nullptr, aCategory, aDescription) { + // Assume we're in libxul + profiler_add_sampled_counter(this); + } + + virtual ~ProfilerCounter() { profiler_remove_sampled_counter(this); } + + BaseProfilerCount& operator++() { + Add(1); + return *this; + } + + void Add(int64_t aNumber) { mCounter += aNumber; } + + ProfilerAtomicSigned mCounter; +}; + +// Also keeps a heatmap (number of calls to ++/Add()) +class ProfilerCounterTotal final : public BaseProfilerCount { + public: + ProfilerCounterTotal(const char* aLabel, const char* aCategory, + const char* aDescription) + : BaseProfilerCount(aLabel, &mCounter, &mNumber, aCategory, + aDescription) { + // Assume we're in libxul + profiler_add_sampled_counter(this); + } + + virtual ~ProfilerCounterTotal() { profiler_remove_sampled_counter(this); } + + BaseProfilerCount& operator++() { + Add(1); + return *this; + } + + void Add(int64_t aNumber) { + mCounter += aNumber; + mNumber++; + } + + ProfilerAtomicSigned mCounter; + ProfilerAtomicUnsigned mNumber; +}; + +// Defines a counter that is sampled on each profiler tick, with a running +// count (signed), and number-of-instances. Note that because these are two +// independent Atomics, there is a possiblity that count will not include +// the last call, but number of uses will. I think this is not worth +// worrying about +# define BASE_PROFILER_DEFINE_COUNT_TOTAL(label, category, description) \ + ProfilerAtomicSigned profiler_count_##label(0); \ + ProfilerAtomicUnsigned profiler_number_##label(0); \ + const char profiler_category_##label[] = category; \ + const char profiler_description_##label[] = description; \ + UniquePtr<::mozilla::baseprofiler::BaseProfilerCount> AutoCount_##label; + +// This counts, but doesn't keep track of the number of calls to +// AUTO_PROFILER_COUNT() +# define BASE_PROFILER_DEFINE_COUNT(label, category, description) \ + ProfilerAtomicSigned profiler_count_##label(0); \ + const char profiler_category_##label[] = category; \ + const char profiler_description_##label[] = description; \ + UniquePtr<::mozilla::baseprofiler::BaseProfilerCount> AutoCount_##label; + +// This will create a static initializer if used, but avoids a possible +// allocation. +# define BASE_PROFILER_DEFINE_STATIC_COUNT_TOTAL(label, category, \ + description) \ + ProfilerAtomicSigned profiler_count_##label(0); \ + ProfilerAtomicUnsigned profiler_number_##label(0); \ + ::mozilla::baseprofiler::BaseProfilerCount AutoCount_##label( \ + #label, &profiler_count_##label, &profiler_number_##label, category, \ + description); + +// If we didn't care about static initializers, we could avoid the need for +// a ptr to the BaseProfilerCount object. + +// XXX It would be better to do this without the if() and without the +// theoretical race to set the UniquePtr (i.e. possible leak). +# define AUTO_BASE_PROFILER_COUNT_TOTAL(label, count) \ + do { \ + profiler_number_##label++; /* do this first*/ \ + profiler_count_##label += count; \ + if (!AutoCount_##label) { \ + /* Ignore that we could call this twice in theory, and that we leak \ + * them \ + */ \ + AutoCount_##label.reset(new BaseProfilerCount( \ + #label, &profiler_count_##label, &profiler_number_##label, \ + profiler_category_##label, profiler_description_##label)); \ + ::mozilla::baseprofiler::profiler_add_sampled_counter( \ + AutoCount_##label.get()); \ + } \ + } while (0) + +# define AUTO_BASE_PROFILER_COUNT(label, count) \ + do { \ + profiler_count_##label += count; /* do this first*/ \ + if (!AutoCount_##label) { \ + /* Ignore that we could call this twice in theory, and that we leak \ + * them \ + */ \ + AutoCount_##label.reset(new BaseProfilerCount( \ + #label, nullptr, &profiler_number_##label, \ + profiler_category_##label, profiler_description_##label)); \ + ::mozilla::baseprofiler::profiler_add_sampled_counter( \ + AutoCount_##label.get()); \ + } \ + } while (0) + +# define AUTO_BASE_PROFILER_STATIC_COUNT(label, count) \ + do { \ + profiler_number_##label++; /* do this first*/ \ + profiler_count_##label += count; \ + } while (0) + +// if we need to force the allocation +# define AUTO_BASE_PROFILER_FORCE_ALLOCATION(label) \ + do { \ + if (!AutoCount_##label) { \ + /* Ignore that we could call this twice in theory, and that we leak \ + * them \ + */ \ + AutoCount_##label.reset( \ + new ::mozilla::baseprofiler::BaseProfilerCount( \ + #label, &profiler_count_##label, &profiler_number_##label, \ + profiler_category_##label, profiler_description_##label)); \ + } \ + } while (0) + +} // namespace baseprofiler +} // namespace mozilla + +#endif // !MOZ_GECKO_PROFILER + +#endif // BaseProfilerCounts_h diff --git a/mozglue/baseprofiler/public/BaseProfilerDetail.h b/mozglue/baseprofiler/public/BaseProfilerDetail.h new file mode 100644 index 0000000000..9027f32bc7 --- /dev/null +++ b/mozglue/baseprofiler/public/BaseProfilerDetail.h @@ -0,0 +1,189 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// Internal Base Profiler utilities. + +#ifndef BaseProfilerDetail_h +#define BaseProfilerDetail_h + +#include "mozilla/Atomics.h" +#include "mozilla/Maybe.h" +#include "mozilla/PlatformMutex.h" + +#ifndef MOZ_GECKO_PROFILER +# error Do not #include this header when MOZ_GECKO_PROFILER is not #defined. +#endif + +namespace mozilla { +namespace baseprofiler { + +// Implemented in platform.cpp +MFBT_API int profiler_current_thread_id(); + +namespace detail { + +// Thin shell around mozglue PlatformMutex, for Base Profiler internal use. +class BaseProfilerMutex : private ::mozilla::detail::MutexImpl { + public: + BaseProfilerMutex() : ::mozilla::detail::MutexImpl() {} + explicit BaseProfilerMutex(const char* aName) + : ::mozilla::detail::MutexImpl(), mName(aName) {} + + BaseProfilerMutex(const BaseProfilerMutex&) = delete; + BaseProfilerMutex& operator=(const BaseProfilerMutex&) = delete; + BaseProfilerMutex(BaseProfilerMutex&&) = delete; + BaseProfilerMutex& operator=(BaseProfilerMutex&&) = delete; + +#ifdef DEBUG + ~BaseProfilerMutex() { MOZ_ASSERT(mOwningThreadId == 0); } +#endif // DEBUG + + [[nodiscard]] bool IsLockedOnCurrentThread() const { + return mOwningThreadId == baseprofiler::profiler_current_thread_id(); + } + + void AssertCurrentThreadOwns() const { + MOZ_ASSERT(IsLockedOnCurrentThread()); + } + + void Lock() { + const int tid = baseprofiler::profiler_current_thread_id(); + MOZ_ASSERT(tid != 0); + MOZ_ASSERT(!IsLockedOnCurrentThread(), "Recursive locking"); + ::mozilla::detail::MutexImpl::lock(); + MOZ_ASSERT(mOwningThreadId == 0, "Not unlocked properly"); + mOwningThreadId = tid; + } + + [[nodiscard]] bool TryLock() { + const int tid = baseprofiler::profiler_current_thread_id(); + MOZ_ASSERT(tid != 0); + MOZ_ASSERT(!IsLockedOnCurrentThread(), "Recursive locking"); + if (!::mozilla::detail::MutexImpl::tryLock()) { + // Failed to lock, nothing more to do. + return false; + } + MOZ_ASSERT(mOwningThreadId == 0, "Not unlocked properly"); + mOwningThreadId = tid; + return true; + } + + void Unlock() { + MOZ_ASSERT(IsLockedOnCurrentThread(), "Unlocking when not locked here"); + // We're still holding the mutex here, so it's safe to just reset + // `mOwningThreadId`. + mOwningThreadId = 0; + ::mozilla::detail::MutexImpl::unlock(); + } + + const char* GetName() const { return mName; } + + private: + // Thread currently owning the lock, or 0. + // Atomic because it may be read at any time independent of the mutex. + // Relaxed because threads only need to know if they own it already, so: + // - If it's their id, only *they* wrote that value with a locked mutex. + // - If it's different from their thread id it doesn't matter what other + // number it is (0 or another id) and that it can change again at any time. + Atomic<int, MemoryOrdering::Relaxed> mOwningThreadId{0}; + + const char* mName = nullptr; +}; + +// RAII class to lock a mutex. +class MOZ_RAII BaseProfilerAutoLock { + public: + explicit BaseProfilerAutoLock(BaseProfilerMutex& aMutex) : mMutex(aMutex) { + mMutex.Lock(); + } + + BaseProfilerAutoLock(const BaseProfilerAutoLock&) = delete; + BaseProfilerAutoLock& operator=(const BaseProfilerAutoLock&) = delete; + BaseProfilerAutoLock(BaseProfilerAutoLock&&) = delete; + BaseProfilerAutoLock& operator=(BaseProfilerAutoLock&&) = delete; + + ~BaseProfilerAutoLock() { mMutex.Unlock(); } + + private: + BaseProfilerMutex& mMutex; +}; + +// Thin shell around mozglue PlatformMutex, for Base Profiler internal use. +// Actual mutex may be disabled at construction time. +class BaseProfilerMaybeMutex : private ::mozilla::detail::MutexImpl { + public: + explicit BaseProfilerMaybeMutex(bool aActivate) { + if (aActivate) { + mMaybeMutex.emplace(); + } + } + + BaseProfilerMaybeMutex(const BaseProfilerMaybeMutex&) = delete; + BaseProfilerMaybeMutex& operator=(const BaseProfilerMaybeMutex&) = delete; + BaseProfilerMaybeMutex(BaseProfilerMaybeMutex&&) = delete; + BaseProfilerMaybeMutex& operator=(BaseProfilerMaybeMutex&&) = delete; + + ~BaseProfilerMaybeMutex() = default; + + bool IsActivated() const { return mMaybeMutex.isSome(); } + + [[nodiscard]] bool IsActivatedAndLockedOnCurrentThread() const { + if (!IsActivated()) { + // Not activated, so we can never be locked. + return false; + } + return mMaybeMutex->IsLockedOnCurrentThread(); + } + + void AssertCurrentThreadOwns() const { +#ifdef DEBUG + if (IsActivated()) { + mMaybeMutex->AssertCurrentThreadOwns(); + } +#endif // DEBUG + } + + void Lock() { + if (IsActivated()) { + mMaybeMutex->Lock(); + } + } + + void Unlock() { + if (IsActivated()) { + mMaybeMutex->Unlock(); + } + } + + private: + Maybe<BaseProfilerMutex> mMaybeMutex; +}; + +// RAII class to lock a mutex. +class MOZ_RAII BaseProfilerMaybeAutoLock { + public: + explicit BaseProfilerMaybeAutoLock(BaseProfilerMaybeMutex& aMaybeMutex) + : mMaybeMutex(aMaybeMutex) { + mMaybeMutex.Lock(); + } + + BaseProfilerMaybeAutoLock(const BaseProfilerMaybeAutoLock&) = delete; + BaseProfilerMaybeAutoLock& operator=(const BaseProfilerMaybeAutoLock&) = + delete; + BaseProfilerMaybeAutoLock(BaseProfilerMaybeAutoLock&&) = delete; + BaseProfilerMaybeAutoLock& operator=(BaseProfilerMaybeAutoLock&&) = delete; + + ~BaseProfilerMaybeAutoLock() { mMaybeMutex.Unlock(); } + + private: + BaseProfilerMaybeMutex& mMaybeMutex; +}; + +} // namespace detail +} // namespace baseprofiler +} // namespace mozilla + +#endif // BaseProfilerDetail_h diff --git a/mozglue/baseprofiler/public/BaseProfilerMarkerTypes.h b/mozglue/baseprofiler/public/BaseProfilerMarkerTypes.h new file mode 100644 index 0000000000..1556b7a272 --- /dev/null +++ b/mozglue/baseprofiler/public/BaseProfilerMarkerTypes.h @@ -0,0 +1,69 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef BaseProfilerMarkerTypes_h +#define BaseProfilerMarkerTypes_h + +// This header contains common marker type definitions. +// +// It #include's "mozilla/BaseProfilerMarkers.h", see that file for how to +// define other marker types, and how to add markers to the profiler buffers. +// +// If you don't need to use these common types, #include +// "mozilla/BaseProfilerMarkers.h" instead. +// +// Types in this files can be defined without relying on xpcom. +// Others are defined in "ProfilerMarkerTypes.h". + +// !!! /!\ WORK IN PROGRESS /!\ !!! +// This file contains draft marker definitions, but most are not used yet. +// Further work is needed to complete these definitions, and use them to convert +// existing PROFILER_ADD_MARKER calls. See meta bug 1661394. + +#include "mozilla/BaseProfilerMarkers.h" + +#ifdef MOZ_GECKO_PROFILER + +namespace mozilla::baseprofiler::markers { + +struct MediaSampleMarker { + static constexpr Span<const char> MarkerTypeName() { + return MakeStringSpan("MediaSample"); + } + static void StreamJSONMarkerData(SpliceableJSONWriter& aWriter, + int64_t aSampleStartTimeUs, + int64_t aSampleEndTimeUs) { + aWriter.IntProperty("sampleStartTimeUs", aSampleStartTimeUs); + aWriter.IntProperty("sampleEndTimeUs", aSampleEndTimeUs); + } + static MarkerSchema MarkerTypeDisplay() { + using MS = MarkerSchema; + MS schema{MS::Location::markerChart, MS::Location::markerTable}; + schema.AddKeyLabelFormat("sampleStartTimeUs", "Sample start time", + MS::Format::microseconds); + schema.AddKeyLabelFormat("sampleEndTimeUs", "Sample end time", + MS::Format::microseconds); + return schema; + } +}; + +struct ContentBuildMarker { + static constexpr Span<const char> MarkerTypeName() { + return MakeStringSpan("CONTENT_FULL_PAINT_TIME"); + } + static void StreamJSONMarkerData(SpliceableJSONWriter& aWriter) {} + static MarkerSchema MarkerTypeDisplay() { + using MS = MarkerSchema; + MS schema{MS::Location::markerChart, MS::Location::markerTable}; + return schema; + } +}; + +} // namespace mozilla::baseprofiler::markers + +#endif // MOZ_GECKO_PROFILER + +#endif // BaseProfilerMarkerTypes_h diff --git a/mozglue/baseprofiler/public/BaseProfilerMarkers.h b/mozglue/baseprofiler/public/BaseProfilerMarkers.h new file mode 100644 index 0000000000..c63b018f95 --- /dev/null +++ b/mozglue/baseprofiler/public/BaseProfilerMarkers.h @@ -0,0 +1,242 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// Markers are useful to delimit something important happening such as the first +// paint. Unlike labels, which are only recorded in the profile buffer if a +// sample is collected while the label is on the label stack, markers will +// always be recorded in the profile buffer. +// +// This header contains basic definitions necessary to create marker types, and +// to add markers to the profiler buffers. +// +// If basic marker types are needed, #include +// "mozilla/BaseProfilerMarkerTypes.h" instead. +// +// But if you want to create your own marker type locally, you can #include this +// header only; look at mozilla/BaseProfilerMarkerTypes.h for examples of how to +// define types, and mozilla/BaseProfilerMarkerPrerequisites.h for some +// supporting types. +// +// To then record markers: +// - Use `baseprofiler::AddMarker(...)` from mozglue or other libraries that +// are outside of xul, especially if they may happen outside of xpcom's +// lifetime (typically startup, shutdown, or tests). +// - Otherwise #include "ProfilerMarkers.h" instead, and use +// `profiler_add_marker(...)`. +// See these functions for more details. + +#ifndef BaseProfilerMarkers_h +#define BaseProfilerMarkers_h + +#include "mozilla/BaseProfilerMarkersDetail.h" + +#ifndef MOZ_GECKO_PROFILER + +# define BASE_PROFILER_MARKER_UNTYPED(markerName, categoryName, ...) +# define BASE_PROFILER_MARKER(markerName, categoryName, options, MarkerType, \ + ...) +# define BASE_PROFILER_MARKER_TEXT(markerName, categoryName, options, text) +# define AUTO_BASE_PROFILER_MARKER_TEXT(markerName, categoryName, options, \ + text) + +#else // ndef MOZ_GECKO_PROFILER + +# include "mozilla/ProfileChunkedBuffer.h" +# include "mozilla/TimeStamp.h" +# include "mozilla/Unused.h" + +# include <functional> +# include <string> +# include <utility> + +namespace mozilla::baseprofiler { + +// Add a marker to a given buffer. `AddMarker()` and related macros should be +// used in most cases, see below for more information about them and the +// parameters; This function may be useful when markers need to be recorded in a +// local buffer outside of the main profiler buffer. +template <typename MarkerType, typename... PayloadArguments> +ProfileBufferBlockIndex AddMarkerToBuffer( + ProfileChunkedBuffer& aBuffer, const ProfilerString8View& aName, + const MarkerCategory& aCategory, MarkerOptions&& aOptions, + MarkerType aMarkerType, const PayloadArguments&... aPayloadArguments) { + Unused << aMarkerType; // Only the empty object type is useful. + return base_profiler_markers_detail::AddMarkerToBuffer<MarkerType>( + aBuffer, aName, aCategory, std::move(aOptions), + ::mozilla::baseprofiler::profiler_capture_backtrace_into, + aPayloadArguments...); +} + +// Add a marker (without payload) to a given buffer. +inline ProfileBufferBlockIndex AddMarkerToBuffer( + ProfileChunkedBuffer& aBuffer, const ProfilerString8View& aName, + const MarkerCategory& aCategory, MarkerOptions&& aOptions = {}) { + return AddMarkerToBuffer(aBuffer, aName, aCategory, std::move(aOptions), + markers::NoPayload{}); +} + +// Add a marker to the Base Profiler buffer. +// - aName: Main name of this marker. +// - aCategory: Category for this marker. +// - aOptions: Optional settings (such as timing, inner window id, +// backtrace...), see `MarkerOptions` for details. +// - aMarkerType: Empty object that specifies the type of marker. +// - aPayloadArguments: Arguments expected by this marker type's +// ` StreamJSONMarkerData` function. +template <typename MarkerType, typename... PayloadArguments> +ProfileBufferBlockIndex AddMarker( + const ProfilerString8View& aName, const MarkerCategory& aCategory, + MarkerOptions&& aOptions, MarkerType aMarkerType, + const PayloadArguments&... aPayloadArguments) { + if (!baseprofiler::profiler_can_accept_markers()) { + return {}; + } + return ::mozilla::baseprofiler::AddMarkerToBuffer( + base_profiler_markers_detail::CachedBaseCoreBuffer(), aName, aCategory, + std::move(aOptions), aMarkerType, aPayloadArguments...); +} + +// Add a marker (without payload) to the Base Profiler buffer. +inline ProfileBufferBlockIndex AddMarker(const ProfilerString8View& aName, + const MarkerCategory& aCategory, + MarkerOptions&& aOptions = {}) { + return AddMarker(aName, aCategory, std::move(aOptions), markers::NoPayload{}); +} + +} // namespace mozilla::baseprofiler + +// Same as `AddMarker()` (without payload). This macro is safe to use even if +// MOZ_GECKO_PROFILER is not #defined. +# define BASE_PROFILER_MARKER_UNTYPED(markerName, categoryName, ...) \ + do { \ + AUTO_PROFILER_STATS(BASE_PROFILER_MARKER_UNTYPED); \ + ::mozilla::baseprofiler::AddMarker( \ + markerName, ::mozilla::baseprofiler::category::categoryName, \ + ##__VA_ARGS__); \ + } while (false) + +// Same as `AddMarker()` (with payload). This macro is safe to use even if +// MOZ_GECKO_PROFILER is not #defined. +# define BASE_PROFILER_MARKER(markerName, categoryName, options, MarkerType, \ + ...) \ + do { \ + AUTO_PROFILER_STATS(BASE_PROFILER_MARKER_with_##MarkerType); \ + ::mozilla::baseprofiler::AddMarker( \ + markerName, ::mozilla::baseprofiler::category::categoryName, \ + options, ::mozilla::baseprofiler::markers::MarkerType{}, \ + ##__VA_ARGS__); \ + } while (false) + +namespace mozilla::baseprofiler::markers { +// Most common marker type. Others are in BaseProfilerMarkerTypes.h. +struct TextMarker { + static constexpr Span<const char> MarkerTypeName() { + return MakeStringSpan("Text"); + } + static void StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter& aWriter, + const ProfilerString8View& aText) { + aWriter.StringProperty("name", aText); + } + static MarkerSchema MarkerTypeDisplay() { + using MS = MarkerSchema; + MS schema{MS::Location::markerChart, MS::Location::markerTable}; + schema.SetChartLabel("{marker.data.name}"); + schema.SetTableLabel("{marker.name} - {marker.data.name}"); + schema.AddKeyLabelFormat("name", "Details", MarkerSchema::Format::string); + return schema; + } +}; + +struct Tracing { + static constexpr Span<const char> MarkerTypeName() { + return MakeStringSpan("tracing"); + } + static void StreamJSONMarkerData(SpliceableJSONWriter& aWriter, + const ProfilerString8View& aCategory) { + if (aCategory.Length() != 0) { + aWriter.StringProperty("category", aCategory); + } + } + static MarkerSchema MarkerTypeDisplay() { + using MS = MarkerSchema; + MS schema{MS::Location::markerChart, MS::Location::markerTable, + MS::Location::timelineOverview}; + schema.AddKeyLabelFormat("category", "Type", MS::Format::string); + return schema; + } +}; +} // namespace mozilla::baseprofiler::markers + +// Add a text marker. This macro is safe to use even if MOZ_GECKO_PROFILER is +// not #defined. +# define BASE_PROFILER_MARKER_TEXT(markerName, categoryName, options, text) \ + do { \ + AUTO_PROFILER_STATS(BASE_PROFILER_MARKER_TEXT); \ + ::mozilla::baseprofiler::AddMarker( \ + markerName, ::mozilla::baseprofiler::category::categoryName, \ + options, ::mozilla::baseprofiler::markers::TextMarker{}, text); \ + } while (false) + +namespace mozilla::baseprofiler { + +// RAII object that adds a BASE_PROFILER_MARKER_TEXT when destroyed; the +// marker's timing will be the interval from construction (unless an instant or +// start time is already specified in the provided options) until destruction. +class MOZ_RAII AutoProfilerTextMarker { + public: + AutoProfilerTextMarker(const char* aMarkerName, + const MarkerCategory& aCategory, + MarkerOptions&& aOptions, const std::string& aText) + : mMarkerName(aMarkerName), + mCategory(aCategory), + mOptions(std::move(aOptions)), + mText(aText) { + MOZ_ASSERT(mOptions.Timing().EndTime().IsNull(), + "AutoProfilerTextMarker options shouldn't have an end time"); + if (mOptions.Timing().StartTime().IsNull()) { + mOptions.Set(MarkerTiming::InstantNow()); + } + } + + ~AutoProfilerTextMarker() { + mOptions.TimingRef().SetIntervalEnd(); + AUTO_PROFILER_STATS(AUTO_BASE_PROFILER_MARKER_TEXT); + AddMarker(ProfilerString8View::WrapNullTerminatedString(mMarkerName), + mCategory, std::move(mOptions), markers::TextMarker{}, mText); + } + + protected: + const char* mMarkerName; + MarkerCategory mCategory; + MarkerOptions mOptions; + std::string mText; +}; + +extern template MFBT_API ProfileBufferBlockIndex +AddMarker(const ProfilerString8View&, const MarkerCategory&, MarkerOptions&&, + markers::TextMarker, const std::string&); + +extern template MFBT_API ProfileBufferBlockIndex +AddMarkerToBuffer(ProfileChunkedBuffer&, const ProfilerString8View&, + const MarkerCategory&, MarkerOptions&&, markers::NoPayload); + +extern template MFBT_API ProfileBufferBlockIndex AddMarkerToBuffer( + ProfileChunkedBuffer&, const ProfilerString8View&, const MarkerCategory&, + MarkerOptions&&, markers::TextMarker, const std::string&); + +} // namespace mozilla::baseprofiler + +// Creates an AutoProfilerTextMarker RAII object. This macro is safe to use +// even if MOZ_GECKO_PROFILER is not #defined. +# define AUTO_BASE_PROFILER_MARKER_TEXT(markerName, categoryName, options, \ + text) \ + ::mozilla::baseprofiler::AutoProfilerTextMarker BASE_PROFILER_RAII( \ + markerName, ::mozilla::baseprofiler::category::categoryName, options, \ + text) + +#endif // nfed MOZ_GECKO_PROFILER else + +#endif // BaseProfilerMarkers_h diff --git a/mozglue/baseprofiler/public/BaseProfilerMarkersDetail.h b/mozglue/baseprofiler/public/BaseProfilerMarkersDetail.h new file mode 100644 index 0000000000..b5dbe27343 --- /dev/null +++ b/mozglue/baseprofiler/public/BaseProfilerMarkersDetail.h @@ -0,0 +1,674 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef BaseProfilerMarkersDetail_h +#define BaseProfilerMarkersDetail_h + +#ifndef BaseProfilerMarkers_h +# error "This header should only be #included by BaseProfilerMarkers.h" +#endif + +#include "mozilla/BaseProfilerMarkersPrerequisites.h" + +#ifdef MOZ_GECKO_PROFILER + +// ~~ HERE BE DRAGONS ~~ +// +// Everything below is internal implementation detail, you shouldn't need to +// look at it unless working on the profiler code. + +# include "mozilla/BaseProfileJSONWriter.h" +# include "mozilla/ProfileBufferEntryKinds.h" + +# include <limits> +# include <tuple> + +namespace mozilla::baseprofiler { +// Implemented in platform.cpp +MFBT_API ProfileChunkedBuffer& profiler_get_core_buffer(); +} // namespace mozilla::baseprofiler + +namespace mozilla::base_profiler_markers_detail { + +// Get the core buffer from the profiler, and cache it in a +// non-templated-function static reference. +inline ProfileChunkedBuffer& CachedBaseCoreBuffer() { + static ProfileChunkedBuffer& coreBuffer = + baseprofiler::profiler_get_core_buffer(); + return coreBuffer; +} + +struct Streaming { + // A `MarkerDataDeserializer` is a free function that can read a serialized + // payload from an `EntryReader` and streams it as JSON object properties. + using MarkerDataDeserializer = void (*)(ProfileBufferEntryReader&, + baseprofiler::SpliceableJSONWriter&); + + // A `MarkerTypeNameFunction` is a free function that returns the name of the + // marker type. + using MarkerTypeNameFunction = Span<const char> (*)(); + + // A `MarkerSchemaFunction` is a free function that returns a + // `MarkerSchema`, which contains all the information needed to stream + // the display schema associated with a marker type. + using MarkerSchemaFunction = MarkerSchema (*)(); + + struct MarkerTypeFunctions { + MarkerDataDeserializer mMarkerDataDeserializer = nullptr; + MarkerTypeNameFunction mMarkerTypeNameFunction = nullptr; + MarkerSchemaFunction mMarkerSchemaFunction = nullptr; + }; + + // A `DeserializerTag` will be added before the payload, to help select the + // correct deserializer when reading back the payload. + using DeserializerTag = uint8_t; + + // Store a deserializer (and other marker-type-specific functions) and get its + // `DeserializerTag`. + // This is intended to be only used once per deserializer when a new marker + // type is used for the first time, so it should be called to initialize a + // `static const` tag that will be re-used by all markers of the corresponding + // payload type -- see use below. + MFBT_API static DeserializerTag TagForMarkerTypeFunctions( + MarkerDataDeserializer aDeserializer, + MarkerTypeNameFunction aMarkerTypeNameFunction, + MarkerSchemaFunction aMarkerSchemaFunction); + + // Get the `MarkerDataDeserializer` for a given `DeserializerTag`. + MFBT_API static MarkerDataDeserializer DeserializerForTag( + DeserializerTag aTag); + + // Retrieve all MarkerTypeFunctions's. + MFBT_API static Span<const MarkerTypeFunctions> MarkerTypeFunctionsArray(); +}; + +// This helper will examine a marker type's `StreamJSONMarkerData` function, see +// specialization below. +template <typename T> +struct StreamFunctionTypeHelper; + +// Helper specialization that takes the expected +// `StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter&, ...)` function and +// provide information about the `...` parameters. +template <typename R, typename... As> +struct StreamFunctionTypeHelper<R(baseprofiler::SpliceableJSONWriter&, As...)> { + constexpr static size_t scArity = sizeof...(As); + using TupleType = + std::tuple<std::remove_cv_t<std::remove_reference_t<As>>...>; + + // Serialization function that takes the exact same parameter types + // (const-ref'd) as `StreamJSONMarkerData`. This has to be inside the helper + // because only here can we access the raw parameter pack `As...`. + // And because we're using the same argument types through + // references-to-const, permitted implicit conversions can happen. + static ProfileBufferBlockIndex Serialize( + ProfileChunkedBuffer& aBuffer, const ProfilerString8View& aName, + const MarkerCategory& aCategory, MarkerOptions&& aOptions, + Streaming::DeserializerTag aDeserializerTag, const As&... aAs) { + // Note that options are first after the entry kind, because they contain + // the thread id, which is handled first to filter markers by threads. + return aBuffer.PutObjects(ProfileBufferEntryKind::Marker, aOptions, aName, + aCategory, aDeserializerTag, aAs...); + } +}; + +// Helper for a marker type. +// A marker type is defined in a `struct` with some expected static member +// functions. See example in BaseProfilerMarkers.h. +template <typename MarkerType> +struct MarkerTypeSerialization { + // Definitions to access the expected + // `StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter&, ...)` function + // and its parameters. + using StreamFunctionType = + StreamFunctionTypeHelper<decltype(MarkerType::StreamJSONMarkerData)>; + constexpr static size_t scStreamFunctionParameterCount = + StreamFunctionType::scArity; + using StreamFunctionUserParametersTuple = + typename StreamFunctionType::TupleType; + template <size_t i> + using StreamFunctionParameter = + std::tuple_element_t<i, StreamFunctionUserParametersTuple>; + + template <typename... Ts> + static ProfileBufferBlockIndex Serialize(ProfileChunkedBuffer& aBuffer, + const ProfilerString8View& aName, + const MarkerCategory& aCategory, + MarkerOptions&& aOptions, + const Ts&... aTs) { + static_assert(!std::is_same_v<MarkerType, + ::mozilla::baseprofiler::markers::NoPayload>, + "NoPayload should have been handled in the caller."); + // Register marker type functions, and get the tag for this deserializer. + // Note that the tag is stored in a function-static object, and this + // function is static in a templated struct, so there should only be one tag + // per MarkerType. + // Making the tag class-static may have been more efficient (to avoid a + // thread-safe init check at every call), but random global static + // initialization order would make it more complex to coordinate with + // `Streaming::TagForMarkerTypeFunctions()`, and also would add a (small) + // cost for everybody, even the majority of users not using the profiler. + static const Streaming::DeserializerTag tag = + Streaming::TagForMarkerTypeFunctions(Deserialize, + MarkerType::MarkerTypeName, + MarkerType::MarkerTypeDisplay); + return StreamFunctionType::Serialize(aBuffer, aName, aCategory, + std::move(aOptions), tag, aTs...); + } + + private: + // This templated function will recursively deserialize each argument expected + // by `MarkerType::StreamJSONMarkerData()` on the stack, and call it at the + // end. E.g., for `StreamJSONMarkerData(int, char)`: + // - DeserializeArguments<0>(aER, aWriter) reads an int and calls: + // - DeserializeArguments<1>(aER, aWriter, const int&) reads a char and calls: + // - MarkerType::StreamJSONMarkerData(aWriter, const int&, const char&). + // Prototyping on godbolt showed that clang and gcc can flatten these + // recursive calls into one function with successive reads followed by the one + // stream call; tested up to 40 arguments: https://godbolt.org/z/5KeeM4 + template <size_t i = 0, typename... Args> + static void DeserializeArguments(ProfileBufferEntryReader& aEntryReader, + baseprofiler::SpliceableJSONWriter& aWriter, + const Args&... aArgs) { + static_assert(sizeof...(Args) == i, + "We should have collected `i` arguments so far"); + if constexpr (i < scStreamFunctionParameterCount) { + // Deserialize the i-th argument on this stack. + auto argument = aEntryReader.ReadObject<StreamFunctionParameter<i>>(); + // Add our local argument to the next recursive call. + DeserializeArguments<i + 1>(aEntryReader, aWriter, aArgs..., argument); + } else { + // We've read all the arguments, finally call the `StreamJSONMarkerData` + // function, which should write the appropriate JSON elements for this + // marker type. Note that the MarkerType-specific "type" element is + // already written. + MarkerType::StreamJSONMarkerData(aWriter, aArgs...); + } + } + + public: + static void Deserialize(ProfileBufferEntryReader& aEntryReader, + baseprofiler::SpliceableJSONWriter& aWriter) { + aWriter.StringProperty("type", MarkerType::MarkerTypeName()); + DeserializeArguments(aEntryReader, aWriter); + } +}; + +template <> +struct MarkerTypeSerialization<::mozilla::baseprofiler::markers::NoPayload> { + // Nothing! NoPayload has special handling avoiding payload work. +}; + +template <typename MarkerType, typename... Ts> +static ProfileBufferBlockIndex AddMarkerWithOptionalStackToBuffer( + ProfileChunkedBuffer& aBuffer, const ProfilerString8View& aName, + const MarkerCategory& aCategory, MarkerOptions&& aOptions, + const Ts&... aTs) { + if constexpr (std::is_same_v<MarkerType, + ::mozilla::baseprofiler::markers::NoPayload>) { + static_assert(sizeof...(Ts) == 0, + "NoPayload does not accept any payload arguments."); + // Special case for NoPayload where there is a stack or inner window id: + // Because these options would be stored in the payload 'data' object, but + // there is no such object for NoPayload, we convert the marker to another + // type (without user fields in the 'data' object), so that the stack and/or + // inner window id are not lost. + // TODO: Remove this when bug 1646714 lands. + if (aOptions.Stack().GetChunkedBuffer() || + !aOptions.InnerWindowId().IsUnspecified()) { + struct NoPayloadUserData { + static constexpr Span<const char> MarkerTypeName() { + return MakeStringSpan("NoPayloadUserData"); + } + static void StreamJSONMarkerData( + baseprofiler::SpliceableJSONWriter& aWriter) { + // No user payload. + } + static mozilla::MarkerSchema MarkerTypeDisplay() { + using MS = mozilla::MarkerSchema; + MS schema{MS::Location::markerChart, MS::Location::markerTable}; + // No user data to display. + return schema; + } + }; + return MarkerTypeSerialization<NoPayloadUserData>::Serialize( + aBuffer, aName, aCategory, std::move(aOptions)); + } + + // Note that options are first after the entry kind, because they contain + // the thread id, which is handled first to filter markers by threads. + return aBuffer.PutObjects( + ProfileBufferEntryKind::Marker, std::move(aOptions), aName, aCategory, + base_profiler_markers_detail::Streaming::DeserializerTag(0)); + } else { + return MarkerTypeSerialization<MarkerType>::Serialize( + aBuffer, aName, aCategory, std::move(aOptions), aTs...); + } +} + +// Pointer to a function that can capture a backtrace into the provided +// `ProfileChunkedBuffer`, and returns true when successful. +using BacktraceCaptureFunction = bool (*)(ProfileChunkedBuffer&); + +// Add a marker with the given name, options, and arguments to the given buffer. +// Because this may be called from either Base or Gecko Profiler functions, the +// appropriate backtrace-capturing function must also be provided. +template <typename MarkerType, typename... Ts> +ProfileBufferBlockIndex AddMarkerToBuffer( + ProfileChunkedBuffer& aBuffer, const ProfilerString8View& aName, + const MarkerCategory& aCategory, MarkerOptions&& aOptions, + BacktraceCaptureFunction aBacktraceCaptureFunction, const Ts&... aTs) { + if (aOptions.ThreadId().IsUnspecified()) { + // If yet unspecified, set thread to this thread where the marker is added. + aOptions.Set(MarkerThreadId::CurrentThread()); + } + + if (aOptions.IsTimingUnspecified()) { + // If yet unspecified, set timing to this instant of adding the marker. + aOptions.Set(MarkerTiming::InstantNow()); + } + + if (aOptions.Stack().IsCaptureNeeded()) { + // A capture was requested, let's attempt to do it here&now. This avoids a + // lot of allocations that would be necessary if capturing a backtrace + // separately. + // TODO use a local on-stack byte buffer to remove last allocation. + // TODO reduce internal profiler stack levels, see bug 1659872. + ProfileBufferChunkManagerSingle chunkManager( + ProfileBufferChunkManager::scExpectedMaximumStackSize); + ProfileChunkedBuffer chunkedBuffer( + ProfileChunkedBuffer::ThreadSafety::WithoutMutex, chunkManager); + aOptions.StackRef().UseRequestedBacktrace( + aBacktraceCaptureFunction(chunkedBuffer) ? &chunkedBuffer : nullptr); + // This call must be made from here, while chunkedBuffer is in scope. + return AddMarkerWithOptionalStackToBuffer<MarkerType>( + aBuffer, aName, aCategory, std::move(aOptions), aTs...); + } + + return AddMarkerWithOptionalStackToBuffer<MarkerType>( + aBuffer, aName, aCategory, std::move(aOptions), aTs...); +} + +template <typename StackCallback> +[[nodiscard]] bool DeserializeAfterKindAndStream( + ProfileBufferEntryReader& aEntryReader, + baseprofiler::SpliceableJSONWriter& aWriter, int aThreadIdOrZero, + StackCallback&& aStackCallback) { + // Each entry is made up of the following: + // ProfileBufferEntry::Kind::Marker, <- already read by caller + // options, <- next location in entries + // name, + // payload + const MarkerOptions options = aEntryReader.ReadObject<MarkerOptions>(); + if (aThreadIdOrZero != 0 && + options.ThreadId().ThreadId() != aThreadIdOrZero) { + // A specific thread is being read, we're not in it. + return false; + } + // Write the information to JSON with the following schema: + // [name, startTime, endTime, phase, category, data] + aWriter.StartArrayElement(); + { + aWriter.UniqueStringElement(aEntryReader.ReadObject<ProfilerString8View>()); + + const double startTime = options.Timing().GetStartTime(); + aWriter.DoubleElement(startTime); + + const double endTime = options.Timing().GetEndTime(); + aWriter.DoubleElement(endTime); + + aWriter.IntElement(static_cast<int64_t>(options.Timing().MarkerPhase())); + + MarkerCategory category = aEntryReader.ReadObject<MarkerCategory>(); + aWriter.IntElement(static_cast<int64_t>(category.GetCategory())); + + if (const auto tag = + aEntryReader.ReadObject<mozilla::base_profiler_markers_detail:: + Streaming::DeserializerTag>(); + tag != 0) { + aWriter.StartObjectElement(JSONWriter::SingleLineStyle); + { + // Stream "common props". + + // TODO: Move this to top-level tuple, when frontend supports it. + if (!options.InnerWindowId().IsUnspecified()) { + // Here, we are converting uint64_t to double. Both Browsing Context + // and Inner Window IDs are created using + // `nsContentUtils::GenerateProcessSpecificId`, which is specifically + // designed to only use 53 of the 64 bits to be lossless when passed + // into and out of JS as a double. + aWriter.DoubleProperty( + "innerWindowID", + static_cast<double>(options.InnerWindowId().Id())); + } + + // TODO: Move this to top-level tuple, when frontend supports it. + if (ProfileChunkedBuffer* chunkedBuffer = + options.Stack().GetChunkedBuffer(); + chunkedBuffer) { + aWriter.StartObjectProperty("stack"); + { std::forward<StackCallback>(aStackCallback)(*chunkedBuffer); } + aWriter.EndObject(); + } + + // Stream the payload, including the type. + mozilla::base_profiler_markers_detail::Streaming::MarkerDataDeserializer + deserializer = mozilla::base_profiler_markers_detail::Streaming:: + DeserializerForTag(tag); + MOZ_RELEASE_ASSERT(deserializer); + deserializer(aEntryReader, aWriter); + } + aWriter.EndObject(); + } + } + aWriter.EndArray(); + return true; +} + +} // namespace mozilla::base_profiler_markers_detail + +namespace mozilla { + +// ---------------------------------------------------------------------------- +// Serializer, Deserializer: ProfilerStringView<CHAR> + +// The serialization starts with a ULEB128 number that encodes both whether the +// ProfilerStringView is literal (Least Significant Bit = 0) or not (LSB = 1), +// plus the string length (excluding null terminator) in bytes, shifted left by +// 1 bit. Following that number: +// - If literal, the string pointer value. +// - If non-literal, the contents as bytes (excluding null terminator if any). +template <typename CHAR> +struct ProfileBufferEntryWriter::Serializer<ProfilerStringView<CHAR>> { + static Length Bytes(const ProfilerStringView<CHAR>& aString) { + MOZ_RELEASE_ASSERT( + aString.Length() < std::numeric_limits<Length>::max() / 2, + "Double the string length doesn't fit in Length type"); + const Length stringLength = static_cast<Length>(aString.Length()); + if (aString.IsLiteral()) { + // Literal -> Length shifted left and LSB=0, then pointer. + return ULEB128Size(stringLength << 1 | 0u) + + static_cast<ProfileChunkedBuffer::Length>(sizeof(const CHAR*)); + } + // Non-literal -> Length shifted left and LSB=1, then string size in bytes. + return ULEB128Size((stringLength << 1) | 1u) + stringLength * sizeof(CHAR); + } + + static void Write(ProfileBufferEntryWriter& aEW, + const ProfilerStringView<CHAR>& aString) { + MOZ_RELEASE_ASSERT( + aString.Length() < std::numeric_limits<Length>::max() / 2, + "Double the string length doesn't fit in Length type"); + const Length stringLength = static_cast<Length>(aString.Length()); + if (aString.IsLiteral()) { + // Literal -> Length shifted left and LSB=0, then pointer. + aEW.WriteULEB128(stringLength << 1 | 0u); + aEW.WriteObject(WrapProfileBufferRawPointer(aString.Data())); + return; + } + // Non-literal -> Length shifted left and LSB=1, then string size in bytes. + aEW.WriteULEB128(stringLength << 1 | 1u); + aEW.WriteBytes(aString.Data(), stringLength * sizeof(CHAR)); + } +}; + +template <typename CHAR> +struct ProfileBufferEntryReader::Deserializer<ProfilerStringView<CHAR>> { + static void ReadInto(ProfileBufferEntryReader& aER, + ProfilerStringView<CHAR>& aString) { + const Length lengthAndIsLiteral = aER.ReadULEB128<Length>(); + const Length stringLength = lengthAndIsLiteral >> 1; + if ((lengthAndIsLiteral & 1u) == 0u) { + // LSB==0 -> Literal string, read the string pointer. + aString.mStringView = std::basic_string_view<CHAR>( + aER.ReadObject<const CHAR*>(), stringLength); + aString.mOwnership = ProfilerStringView<CHAR>::Ownership::Literal; + return; + } + // LSB==1 -> Not a literal string, allocate a buffer to store the string + // (plus terminal, for safety), and give it to the ProfilerStringView; Note + // that this is a secret use of ProfilerStringView, which is intended to + // only be used between deserialization and JSON streaming. + CHAR* buffer = new CHAR[stringLength + 1]; + aER.ReadBytes(buffer, stringLength * sizeof(CHAR)); + buffer[stringLength] = CHAR(0); + aString.mStringView = std::basic_string_view<CHAR>(buffer, stringLength); + aString.mOwnership = + ProfilerStringView<CHAR>::Ownership::OwnedThroughStringView; + } + + static ProfilerStringView<CHAR> Read(ProfileBufferEntryReader& aER) { + const Length lengthAndIsLiteral = aER.ReadULEB128<Length>(); + const Length stringLength = lengthAndIsLiteral >> 1; + if ((lengthAndIsLiteral & 1u) == 0u) { + // LSB==0 -> Literal string, read the string pointer. + return ProfilerStringView<CHAR>( + aER.ReadObject<const CHAR*>(), stringLength, + ProfilerStringView<CHAR>::Ownership::Literal); + } + // LSB==1 -> Not a literal string, allocate a buffer to store the string + // (plus terminal, for safety), and give it to the ProfilerStringView; Note + // that this is a secret use of ProfilerStringView, which is intended to + // only be used between deserialization and JSON streaming. + CHAR* buffer = new CHAR[stringLength + 1]; + aER.ReadBytes(buffer, stringLength * sizeof(CHAR)); + buffer[stringLength] = CHAR(0); + return ProfilerStringView<CHAR>( + buffer, stringLength, + ProfilerStringView<CHAR>::Ownership::OwnedThroughStringView); + } +}; + +// Serializer, Deserializer: MarkerCategory + +// The serialization contains both category numbers encoded as ULEB128. +template <> +struct ProfileBufferEntryWriter::Serializer<MarkerCategory> { + static Length Bytes(const MarkerCategory& aCategory) { + return ULEB128Size(static_cast<uint32_t>(aCategory.CategoryPair())); + } + + static void Write(ProfileBufferEntryWriter& aEW, + const MarkerCategory& aCategory) { + aEW.WriteULEB128(static_cast<uint32_t>(aCategory.CategoryPair())); + } +}; + +template <> +struct ProfileBufferEntryReader::Deserializer<MarkerCategory> { + static void ReadInto(ProfileBufferEntryReader& aER, + MarkerCategory& aCategory) { + aCategory = Read(aER); + } + + static MarkerCategory Read(ProfileBufferEntryReader& aER) { + return MarkerCategory(static_cast<baseprofiler::ProfilingCategoryPair>( + aER.ReadULEB128<uint32_t>())); + } +}; + +// ---------------------------------------------------------------------------- +// Serializer, Deserializer: MarkerTiming + +// The serialization starts with the marker phase, followed by one or two +// timestamps as needed. +template <> +struct ProfileBufferEntryWriter::Serializer<MarkerTiming> { + static Length Bytes(const MarkerTiming& aTiming) { + MOZ_ASSERT(!aTiming.IsUnspecified()); + const auto phase = aTiming.MarkerPhase(); + switch (phase) { + case MarkerTiming::Phase::Instant: + return SumBytes(phase, aTiming.StartTime()); + case MarkerTiming::Phase::Interval: + return SumBytes(phase, aTiming.StartTime(), aTiming.EndTime()); + case MarkerTiming::Phase::IntervalStart: + return SumBytes(phase, aTiming.StartTime()); + case MarkerTiming::Phase::IntervalEnd: + return SumBytes(phase, aTiming.EndTime()); + default: + MOZ_RELEASE_ASSERT(phase == MarkerTiming::Phase::Instant || + phase == MarkerTiming::Phase::Interval || + phase == MarkerTiming::Phase::IntervalStart || + phase == MarkerTiming::Phase::IntervalEnd); + return 0; // Only to avoid build errors. + } + } + + static void Write(ProfileBufferEntryWriter& aEW, + const MarkerTiming& aTiming) { + MOZ_ASSERT(!aTiming.IsUnspecified()); + const auto phase = aTiming.MarkerPhase(); + switch (phase) { + case MarkerTiming::Phase::Instant: + aEW.WriteObjects(phase, aTiming.StartTime()); + return; + case MarkerTiming::Phase::Interval: + aEW.WriteObjects(phase, aTiming.StartTime(), aTiming.EndTime()); + return; + case MarkerTiming::Phase::IntervalStart: + aEW.WriteObjects(phase, aTiming.StartTime()); + return; + case MarkerTiming::Phase::IntervalEnd: + aEW.WriteObjects(phase, aTiming.EndTime()); + return; + default: + MOZ_RELEASE_ASSERT(phase == MarkerTiming::Phase::Instant || + phase == MarkerTiming::Phase::Interval || + phase == MarkerTiming::Phase::IntervalStart || + phase == MarkerTiming::Phase::IntervalEnd); + return; + } + } +}; + +template <> +struct ProfileBufferEntryReader::Deserializer<MarkerTiming> { + static void ReadInto(ProfileBufferEntryReader& aER, MarkerTiming& aTiming) { + aTiming.mPhase = aER.ReadObject<MarkerTiming::Phase>(); + switch (aTiming.mPhase) { + case MarkerTiming::Phase::Instant: + aTiming.mStartTime = aER.ReadObject<TimeStamp>(); + aTiming.mEndTime = TimeStamp{}; + break; + case MarkerTiming::Phase::Interval: + aTiming.mStartTime = aER.ReadObject<TimeStamp>(); + aTiming.mEndTime = aER.ReadObject<TimeStamp>(); + break; + case MarkerTiming::Phase::IntervalStart: + aTiming.mStartTime = aER.ReadObject<TimeStamp>(); + aTiming.mEndTime = TimeStamp{}; + break; + case MarkerTiming::Phase::IntervalEnd: + aTiming.mStartTime = TimeStamp{}; + aTiming.mEndTime = aER.ReadObject<TimeStamp>(); + break; + default: + MOZ_RELEASE_ASSERT(aTiming.mPhase == MarkerTiming::Phase::Instant || + aTiming.mPhase == MarkerTiming::Phase::Interval || + aTiming.mPhase == + MarkerTiming::Phase::IntervalStart || + aTiming.mPhase == MarkerTiming::Phase::IntervalEnd); + break; + } + } + + static MarkerTiming Read(ProfileBufferEntryReader& aER) { + TimeStamp start; + TimeStamp end; + auto phase = aER.ReadObject<MarkerTiming::Phase>(); + switch (phase) { + case MarkerTiming::Phase::Instant: + start = aER.ReadObject<TimeStamp>(); + break; + case MarkerTiming::Phase::Interval: + start = aER.ReadObject<TimeStamp>(); + end = aER.ReadObject<TimeStamp>(); + break; + case MarkerTiming::Phase::IntervalStart: + start = aER.ReadObject<TimeStamp>(); + break; + case MarkerTiming::Phase::IntervalEnd: + end = aER.ReadObject<TimeStamp>(); + break; + default: + MOZ_RELEASE_ASSERT(phase == MarkerTiming::Phase::Instant || + phase == MarkerTiming::Phase::Interval || + phase == MarkerTiming::Phase::IntervalStart || + phase == MarkerTiming::Phase::IntervalEnd); + break; + } + return MarkerTiming(start, end, phase); + } +}; + +// ---------------------------------------------------------------------------- +// Serializer, Deserializer: MarkerStack + +// The serialization only contains the `ProfileChunkedBuffer` from the +// backtrace; if there is no backtrace or if it's empty, this will implicitly +// store a nullptr (see +// `ProfileBufferEntryWriter::Serializer<ProfilerChunkedBuffer*>`). +template <> +struct ProfileBufferEntryWriter::Serializer<MarkerStack> { + static Length Bytes(const MarkerStack& aStack) { + return SumBytes(aStack.GetChunkedBuffer()); + } + + static void Write(ProfileBufferEntryWriter& aEW, const MarkerStack& aStack) { + aEW.WriteObject(aStack.GetChunkedBuffer()); + } +}; + +template <> +struct ProfileBufferEntryReader::Deserializer<MarkerStack> { + static void ReadInto(ProfileBufferEntryReader& aER, MarkerStack& aStack) { + aStack = Read(aER); + } + + static MarkerStack Read(ProfileBufferEntryReader& aER) { + return MarkerStack(aER.ReadObject<UniquePtr<ProfileChunkedBuffer>>()); + } +}; + +// ---------------------------------------------------------------------------- +// Serializer, Deserializer: MarkerOptions + +// The serialization contains all members (either trivially-copyable, or they +// provide their specialization above). +template <> +struct ProfileBufferEntryWriter::Serializer<MarkerOptions> { + static Length Bytes(const MarkerOptions& aOptions) { + return SumBytes(aOptions.ThreadId(), aOptions.Timing(), aOptions.Stack(), + aOptions.InnerWindowId()); + } + + static void Write(ProfileBufferEntryWriter& aEW, + const MarkerOptions& aOptions) { + aEW.WriteObjects(aOptions.ThreadId(), aOptions.Timing(), aOptions.Stack(), + aOptions.InnerWindowId()); + } +}; + +template <> +struct ProfileBufferEntryReader::Deserializer<MarkerOptions> { + static void ReadInto(ProfileBufferEntryReader& aER, MarkerOptions& aOptions) { + aER.ReadIntoObjects(aOptions.mThreadId, aOptions.mTiming, aOptions.mStack, + aOptions.mInnerWindowId); + } + + static MarkerOptions Read(ProfileBufferEntryReader& aER) { + MarkerOptions options; + ReadInto(aER, options); + return options; + } +}; + +} // namespace mozilla + +#endif // MOZ_GECKO_PROFILER + +#endif // BaseProfilerMarkersDetail_h diff --git a/mozglue/baseprofiler/public/BaseProfilerMarkersPrerequisites.h b/mozglue/baseprofiler/public/BaseProfilerMarkersPrerequisites.h new file mode 100644 index 0000000000..aa85b41896 --- /dev/null +++ b/mozglue/baseprofiler/public/BaseProfilerMarkersPrerequisites.h @@ -0,0 +1,866 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// This header contains basic definitions required to create marker types, and +// to add markers to the profiler buffers. +// +// In most cases, #include "mozilla/BaseProfilerMarkers.h" instead, or +// #include "mozilla/BaseProfilerMarkerTypes.h" for common marker types. + +#ifndef BaseProfilerMarkersPrerequisites_h +#define BaseProfilerMarkersPrerequisites_h + +#ifdef MOZ_GECKO_PROFILER + +# include "BaseProfilingCategory.h" +# include "mozilla/Maybe.h" +# include "mozilla/ProfileChunkedBuffer.h" +# include "mozilla/TimeStamp.h" +# include "mozilla/UniquePtr.h" +# include "mozilla/Variant.h" + +# include <initializer_list> +# include <string_view> +# include <string> +# include <type_traits> +# include <utility> +# include <vector> + +// TODO: Move common stuff to shared header instead. +# include "BaseProfiler.h" + +namespace mozilla { + +// Return a NotNull<const CHAR*> pointing at the literal empty string `""`. +template <typename CHAR> +constexpr const CHAR* LiteralEmptyStringPointer() { + static_assert(std::is_same_v<CHAR, char> || std::is_same_v<CHAR, char16_t>, + "Only char and char16_t are supported in Firefox"); + if constexpr (std::is_same_v<CHAR, char>) { + return ""; + } + if constexpr (std::is_same_v<CHAR, char16_t>) { + return u""; + } +} + +// Return a string_view<CHAR> pointing at the literal empty string. +template <typename CHAR> +constexpr std::basic_string_view<CHAR> LiteralEmptyStringView() { + static_assert(std::is_same_v<CHAR, char> || std::is_same_v<CHAR, char16_t>, + "Only char and char16_t are supported in Firefox"); + // Use `operator""sv()` from <string_view>. + using namespace std::literals::string_view_literals; + if constexpr (std::is_same_v<CHAR, char>) { + return ""sv; + } + if constexpr (std::is_same_v<CHAR, char16_t>) { + return u""sv; + } +} + +// General string view, optimized for short on-stack life before serialization, +// and between deserialization and JSON-streaming. +template <typename CHAR> +class MOZ_STACK_CLASS ProfilerStringView { + public: + // Default constructor points at "" (literal empty string). + constexpr ProfilerStringView() = default; + + // Don't allow copy. + ProfilerStringView(const ProfilerStringView&) = delete; + ProfilerStringView& operator=(const ProfilerStringView&) = delete; + + // Allow move. For consistency the moved-from string is always reset to "". + constexpr ProfilerStringView(ProfilerStringView&& aOther) + : mStringView(std::move(aOther.mStringView)), + mOwnership(aOther.mOwnership) { + if (mOwnership == Ownership::OwnedThroughStringView) { + // We now own the buffer, make the other point at the literal "". + aOther.mStringView = LiteralEmptyStringView<CHAR>(); + aOther.mOwnership = Ownership::Literal; + } + } + constexpr ProfilerStringView& operator=(ProfilerStringView&& aOther) { + mStringView = std::move(aOther.mStringView); + mOwnership = aOther.mOwnership; + if (mOwnership == Ownership::OwnedThroughStringView) { + // We now own the buffer, make the other point at the literal "". + aOther.mStringView = LiteralEmptyStringView<CHAR>(); + aOther.mOwnership = Ownership::Literal; + } + return *this; + } + + ~ProfilerStringView() { + if (MOZ_UNLIKELY(mOwnership == Ownership::OwnedThroughStringView)) { + // We own the buffer pointed at by mStringView, destroy it. + // This is only used between deserialization and streaming. + delete mStringView.data(); + } + } + + // Implicit construction from nullptr, points at "" (literal empty string). + constexpr MOZ_IMPLICIT ProfilerStringView(decltype(nullptr)) {} + + // Implicit constructor from a literal string. + template <size_t Np1> + constexpr MOZ_IMPLICIT ProfilerStringView(const CHAR (&aLiteralString)[Np1]) + : ProfilerStringView(aLiteralString, Np1 - 1, Ownership::Literal) {} + + // Constructor from a non-literal string. + constexpr ProfilerStringView(const CHAR* aString, size_t aLength) + : ProfilerStringView(aString, aLength, Ownership::Reference) {} + + // Implicit constructor from a string_view. + constexpr MOZ_IMPLICIT ProfilerStringView( + const std::basic_string_view<CHAR>& aStringView) + : ProfilerStringView(aStringView.data(), aStringView.length(), + Ownership::Reference) {} + + // Implicit constructor from an expiring string_view. We assume that the + // pointed-at string will outlive this ProfilerStringView. + constexpr MOZ_IMPLICIT ProfilerStringView( + std::basic_string_view<CHAR>&& aStringView) + : ProfilerStringView(aStringView.data(), aStringView.length(), + Ownership::Reference) {} + + // Implicit constructor from std::string. + constexpr MOZ_IMPLICIT ProfilerStringView( + const std::basic_string<CHAR>& aString) + : ProfilerStringView(aString.data(), aString.length(), + Ownership::Reference) {} + + // Construction from a raw pointer to a null-terminated string. + // This is a named class-static function to make it more obvious where work is + // being done (to determine the string length), and encourage users to instead + // provide a length, if already known. + // TODO: Find callers and convert them to constructor instead if possible. + static constexpr ProfilerStringView WrapNullTerminatedString( + const CHAR* aString) { + return ProfilerStringView( + aString, aString ? std::char_traits<CHAR>::length(aString) : 0, + Ownership::Reference); + } + + // Implicit constructor for an object with member functions `Data()` + // `Length()`, and `IsLiteral()`, common in xpcom strings. + template < + typename String, + typename DataReturnType = decltype(std::declval<const String>().Data()), + typename LengthReturnType = + decltype(std::declval<const String>().Length()), + typename IsLiteralReturnType = + decltype(std::declval<const String>().IsLiteral()), + typename = + std::enable_if_t<std::is_convertible_v<DataReturnType, const CHAR*> && + std::is_integral_v<LengthReturnType> && + std::is_same_v<IsLiteralReturnType, bool>>> + constexpr MOZ_IMPLICIT ProfilerStringView(const String& aString) + : ProfilerStringView( + static_cast<const CHAR*>(aString.Data()), aString.Length(), + aString.IsLiteral() ? Ownership::Literal : Ownership::Reference) {} + + [[nodiscard]] constexpr const std::basic_string_view<CHAR>& StringView() + const { + return mStringView; + } + + [[nodiscard]] constexpr const CHAR* Data() const { + return mStringView.data(); + } + + [[nodiscard]] constexpr size_t Length() const { return mStringView.length(); } + + [[nodiscard]] constexpr bool IsLiteral() const { + return mOwnership == Ownership::Literal; + } + [[nodiscard]] constexpr bool IsReference() const { + return mOwnership == Ownership::Reference; + } + // No `IsOwned...()` because it's a secret, only used internally! + + [[nodiscard]] operator Span<const CHAR>() const { + return Span<const CHAR>(Data(), Length()); + } + + private: + enum class Ownership { Literal, Reference, OwnedThroughStringView }; + + // Allow deserializer to store anything here. + friend ProfileBufferEntryReader::Deserializer<ProfilerStringView>; + + constexpr ProfilerStringView(const CHAR* aString, size_t aLength, + Ownership aOwnership) + : mStringView(aString ? std::basic_string_view<CHAR>(aString, aLength) + : LiteralEmptyStringView<CHAR>()), + mOwnership(aString ? aOwnership : Ownership::Literal) {} + + // String view to an outside string (literal or reference). + // We may actually own the pointed-at buffer, but it is only used internally + // between deserialization and JSON streaming. + std::basic_string_view<CHAR> mStringView = LiteralEmptyStringView<CHAR>(); + + Ownership mOwnership = Ownership::Literal; +}; + +using ProfilerString8View = ProfilerStringView<char>; +using ProfilerString16View = ProfilerStringView<char16_t>; + +// This compulsory marker parameter contains the required category information. +class MarkerCategory { + public: + // Constructor from category pair (includes both super- and sub-categories). + constexpr explicit MarkerCategory( + baseprofiler::ProfilingCategoryPair aCategoryPair) + : mCategoryPair(aCategoryPair) {} + + // Returns the stored category pair. + constexpr baseprofiler::ProfilingCategoryPair CategoryPair() const { + return mCategoryPair; + } + + // Returns the super-category from the stored category pair. + baseprofiler::ProfilingCategory GetCategory() const { + return GetProfilingCategoryPairInfo(mCategoryPair).mCategory; + } + + private: + baseprofiler::ProfilingCategoryPair mCategoryPair = + baseprofiler::ProfilingCategoryPair::OTHER; +}; + +namespace baseprofiler::category { + +// Each category pair name constructs a MarkerCategory. +// E.g.: mozilla::baseprofiler::category::OTHER_Profiling +// Profiler macros will take the category name alone without namespace. +// E.g.: `PROFILER_MARKER_UNTYPED("name", OTHER_Profiling)` +# define CATEGORY_ENUM_BEGIN_CATEGORY(name, labelAsString, color) +# define CATEGORY_ENUM_SUBCATEGORY(supercategory, name, labelAsString) \ + static constexpr MarkerCategory name{ProfilingCategoryPair::name}; +# define CATEGORY_ENUM_END_CATEGORY +MOZ_PROFILING_CATEGORY_LIST(CATEGORY_ENUM_BEGIN_CATEGORY, + CATEGORY_ENUM_SUBCATEGORY, + CATEGORY_ENUM_END_CATEGORY) +# undef CATEGORY_ENUM_BEGIN_CATEGORY +# undef CATEGORY_ENUM_SUBCATEGORY +# undef CATEGORY_ENUM_END_CATEGORY + +// Import `MarkerCategory` into this namespace. This will allow using this type +// dynamically in macros that prepend `::mozilla::baseprofiler::category::` to +// the given category, e.g.: +// `PROFILER_MARKER_UNTYPED("name", MarkerCategory(...))` +using MarkerCategory = ::mozilla::MarkerCategory; + +} // namespace baseprofiler::category + +// The classes below are all embedded in a `MarkerOptions` object. +class MarkerOptions; + +// This marker option captures a given thread id. +// If left unspecified (by default construction) during the add-marker call, the +// current thread id will be used then. +class MarkerThreadId { + public: + // Default constructor, keeps the thread id unspecified. + constexpr MarkerThreadId() = default; + + // Constructor from a given thread id. + constexpr explicit MarkerThreadId(int aThreadId) : mThreadId(aThreadId) {} + + // Use the current thread's id. + static MarkerThreadId CurrentThread() { + return MarkerThreadId(baseprofiler::profiler_current_thread_id()); + } + + // Use the main thread's id. This can be useful to record a marker from a + // possibly-unregistered thread, and display it in the main thread track. + static MarkerThreadId MainThread() { + return MarkerThreadId(baseprofiler::profiler_main_thread_id()); + } + + [[nodiscard]] constexpr int ThreadId() const { return mThreadId; } + + [[nodiscard]] constexpr bool IsUnspecified() const { return mThreadId == 0; } + + private: + int mThreadId = 0; +}; + +// This marker option contains marker timing information. +// This class encapsulates the logic for correctly storing a marker based on its +// Use the static methods to create the MarkerTiming. This is a transient object +// that is being used to enforce the constraints of the combinations of the +// data. +class MarkerTiming { + public: + // The following static methods are used to create the MarkerTiming based on + // the type that it is. + + static MarkerTiming InstantAt(const TimeStamp& aTime) { + MOZ_ASSERT(!aTime.IsNull(), "Time is null for an instant marker."); + return MarkerTiming{aTime, TimeStamp{}, MarkerTiming::Phase::Instant}; + } + + static MarkerTiming InstantNow() { + return InstantAt(TimeStamp::NowUnfuzzed()); + } + + static MarkerTiming Interval(const TimeStamp& aStartTime, + const TimeStamp& aEndTime) { + MOZ_ASSERT(!aStartTime.IsNull(), + "Start time is null for an interval marker."); + MOZ_ASSERT(!aEndTime.IsNull(), "End time is null for an interval marker."); + return MarkerTiming{aStartTime, aEndTime, MarkerTiming::Phase::Interval}; + } + + static MarkerTiming IntervalUntilNowFrom(const TimeStamp& aStartTime) { + return Interval(aStartTime, TimeStamp::NowUnfuzzed()); + } + + static MarkerTiming IntervalStart( + const TimeStamp& aTime = TimeStamp::NowUnfuzzed()) { + MOZ_ASSERT(!aTime.IsNull(), "Time is null for an interval start marker."); + return MarkerTiming{aTime, TimeStamp{}, MarkerTiming::Phase::IntervalStart}; + } + + static MarkerTiming IntervalEnd( + const TimeStamp& aTime = TimeStamp::NowUnfuzzed()) { + MOZ_ASSERT(!aTime.IsNull(), "Time is null for an interval end marker."); + return MarkerTiming{TimeStamp{}, aTime, MarkerTiming::Phase::IntervalEnd}; + } + + // Set the interval end in this timing. + // If there was already a start time, this makes it a full interval. + void SetIntervalEnd(const TimeStamp& aTime = TimeStamp::NowUnfuzzed()) { + MOZ_ASSERT(!aTime.IsNull(), "Time is null for an interval end marker."); + mEndTime = aTime; + mPhase = mStartTime.IsNull() ? Phase::IntervalEnd : Phase::Interval; + } + + [[nodiscard]] const TimeStamp& StartTime() const { return mStartTime; } + [[nodiscard]] const TimeStamp& EndTime() const { return mEndTime; } + + enum class Phase : uint8_t { + Instant = 0, + Interval = 1, + IntervalStart = 2, + IntervalEnd = 3, + }; + + [[nodiscard]] Phase MarkerPhase() const { + MOZ_ASSERT(!IsUnspecified()); + return mPhase; + } + + // The following getter methods are used to put the value into the buffer for + // storage. + [[nodiscard]] double GetStartTime() const { + MOZ_ASSERT(!IsUnspecified()); + // If mStartTime is null (e.g., for IntervalEnd), this will output 0.0 as + // expected. + return MarkerTiming::timeStampToDouble(mStartTime); + } + + [[nodiscard]] double GetEndTime() const { + MOZ_ASSERT(!IsUnspecified()); + // If mEndTime is null (e.g., for Instant or IntervalStart), this will + // output 0.0 as expected. + return MarkerTiming::timeStampToDouble(mEndTime); + } + + [[nodiscard]] uint8_t GetPhase() const { + MOZ_ASSERT(!IsUnspecified()); + return static_cast<uint8_t>(mPhase); + } + + private: + friend ProfileBufferEntryWriter::Serializer<MarkerTiming>; + friend ProfileBufferEntryReader::Deserializer<MarkerTiming>; + friend MarkerOptions; + + // Default timing leaves it internally "unspecified", serialization getters + // and add-marker functions will default to `InstantNow()`. + constexpr MarkerTiming() = default; + + // This should only be used by internal profiler code. + [[nodiscard]] bool IsUnspecified() const { + return mStartTime.IsNull() && mEndTime.IsNull(); + } + + // Full constructor, used by static factory functions. + constexpr MarkerTiming(const TimeStamp& aStartTime, const TimeStamp& aEndTime, + Phase aPhase) + : mStartTime(aStartTime), mEndTime(aEndTime), mPhase(aPhase) {} + + static double timeStampToDouble(const TimeStamp& time) { + if (time.IsNull()) { + // The Phase lets us know not to use this value. + return 0; + } + return (time - TimeStamp::ProcessCreation()).ToMilliseconds(); + } + + TimeStamp mStartTime; + TimeStamp mEndTime; + Phase mPhase = Phase::Instant; +}; + +// This marker option allows three cases: +// - By default, no stacks are captured. +// - The caller can request a stack capture, and the add-marker code will take +// care of it in the most efficient way. +// - The caller can still provide an existing backtrace, for cases where a +// marker reports something that happened elsewhere. +class MarkerStack { + public: + // Default constructor, no capture. + constexpr MarkerStack() = default; + + // Disallow copy. + MarkerStack(const MarkerStack&) = delete; + MarkerStack& operator=(const MarkerStack&) = delete; + + // Allow move. + MarkerStack(MarkerStack&& aOther) + : mIsCaptureRequested(aOther.mIsCaptureRequested), + mOptionalChunkedBufferStorage( + std::move(aOther.mOptionalChunkedBufferStorage)), + mChunkedBuffer(aOther.mChunkedBuffer) { + AssertInvariants(); + aOther.Clear(); + } + MarkerStack& operator=(MarkerStack&& aOther) { + mIsCaptureRequested = aOther.mIsCaptureRequested; + mOptionalChunkedBufferStorage = + std::move(aOther.mOptionalChunkedBufferStorage); + mChunkedBuffer = aOther.mChunkedBuffer; + AssertInvariants(); + aOther.Clear(); + return *this; + } + + // Take ownership of a backtrace. If null or empty, equivalent to NoStack(). + explicit MarkerStack(UniquePtr<ProfileChunkedBuffer>&& aExternalChunkedBuffer) + : mIsCaptureRequested(false), + mOptionalChunkedBufferStorage( + (!aExternalChunkedBuffer || aExternalChunkedBuffer->IsEmpty()) + ? nullptr + : std::move(aExternalChunkedBuffer)), + mChunkedBuffer(mOptionalChunkedBufferStorage.get()) { + AssertInvariants(); + } + + // Use an existing backtrace stored elsewhere, which the user must guarantee + // is alive during the add-marker call. If empty, equivalent to NoStack(). + explicit MarkerStack(ProfileChunkedBuffer& aExternalChunkedBuffer) + : mIsCaptureRequested(false), + mChunkedBuffer(aExternalChunkedBuffer.IsEmpty() + ? nullptr + : &aExternalChunkedBuffer) { + AssertInvariants(); + } + + // Don't capture a stack in this marker. + static MarkerStack NoStack() { return MarkerStack(false); } + + // Capture a stack when adding this marker. + static MarkerStack Capture() { + // Actual capture will be handled inside profiler_add_marker. + return MarkerStack(true); + } + + // Optionally capture a stack, useful for avoiding long-winded ternaries. + static MarkerStack MaybeCapture(bool aDoCapture) { + return MarkerStack(aDoCapture); + } + + // Use an existing backtrace stored elsewhere, which the user must guarantee + // is alive during the add-marker call. If empty, equivalent to NoStack(). + static MarkerStack UseBacktrace( + ProfileChunkedBuffer& aExternalChunkedBuffer) { + return MarkerStack(aExternalChunkedBuffer); + } + + // Take ownership of a backtrace previously captured with + // `profiler_capture_backtrace()`. If null, equivalent to NoStack(). + static MarkerStack TakeBacktrace( + UniquePtr<ProfileChunkedBuffer>&& aExternalChunkedBuffer) { + return MarkerStack(std::move(aExternalChunkedBuffer)); + } + + [[nodiscard]] bool IsCaptureNeeded() const { + // If the chunked buffer already contains something, consider the capture + // request already fulfilled. + return mIsCaptureRequested; + } + + ProfileChunkedBuffer* GetChunkedBuffer() const { return mChunkedBuffer; } + + // Use backtrace after a request. If null, equivalent to NoStack(). + void UseRequestedBacktrace(ProfileChunkedBuffer* aExternalChunkedBuffer) { + MOZ_RELEASE_ASSERT(IsCaptureNeeded()); + mIsCaptureRequested = false; + if (aExternalChunkedBuffer && !aExternalChunkedBuffer->IsEmpty()) { + // We only need to use the provided buffer if it is not empty. + mChunkedBuffer = aExternalChunkedBuffer; + } + AssertInvariants(); + } + + void Clear() { + mIsCaptureRequested = false; + mOptionalChunkedBufferStorage.reset(); + mChunkedBuffer = nullptr; + AssertInvariants(); + } + + private: + explicit MarkerStack(bool aIsCaptureRequested) + : mIsCaptureRequested(aIsCaptureRequested) { + AssertInvariants(); + } + + // This should be called after every constructor and non-const function. + void AssertInvariants() const { +# ifdef DEBUG + if (mIsCaptureRequested) { + MOZ_ASSERT(!mOptionalChunkedBufferStorage, + "We should not hold a buffer when capture is requested"); + MOZ_ASSERT(!mChunkedBuffer, + "We should not point at a buffer when capture is requested"); + } else { + if (mOptionalChunkedBufferStorage) { + MOZ_ASSERT(mChunkedBuffer == mOptionalChunkedBufferStorage.get(), + "Non-null mOptionalChunkedBufferStorage must be pointed-at " + "by mChunkedBuffer"); + } + if (mChunkedBuffer) { + MOZ_ASSERT(!mChunkedBuffer->IsEmpty(), + "Non-null mChunkedBuffer must not be empty"); + } + } +# endif // DEBUG + } + + // True if a capture is requested when marker is added to the profile buffer. + bool mIsCaptureRequested = false; + + // Optional storage for the backtrace, in case it was captured before the + // add-marker call. + UniquePtr<ProfileChunkedBuffer> mOptionalChunkedBufferStorage; + + // If not null, this points to the backtrace. It may point to a backtrace + // temporarily stored on the stack, or to mOptionalChunkedBufferStorage. + ProfileChunkedBuffer* mChunkedBuffer = nullptr; +}; + +// This marker option captures a given inner window id. +class MarkerInnerWindowId { + public: + // Default constructor, it leaves the id unspecified. + constexpr MarkerInnerWindowId() = default; + + // Constructor with a specified inner window id. + constexpr explicit MarkerInnerWindowId(uint64_t i) : mInnerWindowId(i) {} + + // Constructor with either specified inner window id or Nothing. + constexpr explicit MarkerInnerWindowId(const Maybe<uint64_t>& i) + : mInnerWindowId(i.valueOr(scNoId)) {} + + // Explicit option with unspecified id. + constexpr static MarkerInnerWindowId NoId() { return MarkerInnerWindowId{}; } + + [[nodiscard]] bool IsUnspecified() const { return mInnerWindowId == scNoId; } + + [[nodiscard]] constexpr uint64_t Id() const { return mInnerWindowId; } + + private: + static constexpr uint64_t scNoId = 0; + uint64_t mInnerWindowId = scNoId; +}; + +// This class combines each of the possible marker options above. +class MarkerOptions { + public: + // Constructor from individual options (including none). + // Implicit to allow `{}` and one option type as-is. + // Options that are not provided here are defaulted. In particular, timing + // defaults to `MarkerTiming::InstantNow()` when the marker is recorded. + template <typename... Options> + MOZ_IMPLICIT MarkerOptions(Options&&... aOptions) { + (Set(std::forward<Options>(aOptions)), ...); + } + + // Disallow copy. + MarkerOptions(const MarkerOptions&) = delete; + MarkerOptions& operator=(const MarkerOptions&) = delete; + + // Allow move. + MarkerOptions(MarkerOptions&&) = default; + MarkerOptions& operator=(MarkerOptions&&) = default; + + // The embedded `MarkerTiming` hasn't been specified yet. + [[nodiscard]] bool IsTimingUnspecified() const { + return mTiming.IsUnspecified(); + } + + // Each option may be added in a chain by e.g.: + // `options.Set(MarkerThreadId(123)).Set(MarkerTiming::IntervalEnd())`. + // When passed to an add-marker function, it must be an rvalue, either created + // on the spot, or `std::move`d from storage, e.g.: + // `PROFILER_MARKER_UNTYPED("...", std::move(options).Set(...))`; + // + // Options can be read by their name (without "Marker"), e.g.: `o.ThreadId()`. + // Add "Ref" for a non-const reference, e.g.: `o.ThreadIdRef() = ...;` +# define FUNCTIONS_ON_MEMBER(NAME) \ + MarkerOptions& Set(Marker##NAME&& a##NAME)& { \ + m##NAME = std::move(a##NAME); \ + return *this; \ + } \ + \ + MarkerOptions&& Set(Marker##NAME&& a##NAME)&& { \ + m##NAME = std::move(a##NAME); \ + return std::move(*this); \ + } \ + \ + const Marker##NAME& NAME() const { return m##NAME; } \ + \ + Marker##NAME& NAME##Ref() { return m##NAME; } + + FUNCTIONS_ON_MEMBER(ThreadId); + FUNCTIONS_ON_MEMBER(Timing); + FUNCTIONS_ON_MEMBER(Stack); + FUNCTIONS_ON_MEMBER(InnerWindowId); +# undef FUNCTIONS_ON_MEMBER + + private: + friend ProfileBufferEntryReader::Deserializer<MarkerOptions>; + + MarkerThreadId mThreadId; + MarkerTiming mTiming; + MarkerStack mStack; + MarkerInnerWindowId mInnerWindowId; +}; + +} // namespace mozilla + +namespace mozilla::baseprofiler::markers { + +// Default marker payload types, with no extra information, not even a marker +// type and payload. This is intended for label-only markers. +struct NoPayload final {}; + +} // namespace mozilla::baseprofiler::markers + +namespace mozilla { + +class JSONWriter; + +// This class collects all the information necessary to stream the JSON schema +// that informs the front-end how to display a type of markers. +// It will be created and populated in `MarkerTypeDisplay()` functions in each +// marker type definition, see Add/Set functions. +class MarkerSchema { + public: + enum class Location : unsigned { + markerChart, + markerTable, + // This adds markers to the main marker timeline in the header. + timelineOverview, + // In the timeline, this is a section that breaks out markers that are + // related to memory. When memory counters are enabled, this is its own + // track, otherwise it is displayed with the main thread. + timelineMemory, + // This adds markers to the IPC timeline area in the header. + timelineIPC, + // This adds markers to the FileIO timeline area in the header. + timelineFileIO, + // TODO - This is not supported yet. + stackChart + }; + + // Used as constructor parameter, to explicitly specify that the location (and + // other display options) are handled as a special case in the front-end. + // In this case, *no* schema will be output for this type. + struct SpecialFrontendLocation {}; + + enum class Format { + // ---------------------------------------------------- + // String types. + + // Show the URL, and handle PII sanitization + url, + // Show the file path, and handle PII sanitization. + filePath, + // Important, do not put URL or file path information here, as it will not + // be sanitized. Please be careful with including other types of PII here as + // well. + // e.g. "Label: Some String" + string, + + // ---------------------------------------------------- + // Numeric types + + // For time data that represents a duration of time. + // e.g. "Label: 5s, 5ms, 5μs" + duration, + // Data that happened at a specific time, relative to the start of the + // profile. e.g. "Label: 15.5s, 20.5ms, 30.5μs" + time, + // The following are alternatives to display a time only in a specific unit + // of time. + seconds, // "Label: 5s" + milliseconds, // "Label: 5ms" + microseconds, // "Label: 5μs" + nanoseconds, // "Label: 5ns" + // e.g. "Label: 5.55mb, 5 bytes, 312.5kb" + bytes, + // This should be a value between 0 and 1. + // "Label: 50%" + percentage, + // The integer should be used for generic representations of numbers. + // Do not use it for time information. + // "Label: 52, 5,323, 1,234,567" + integer, + // The decimal should be used for generic representations of numbers. + // Do not use it for time information. + // "Label: 52.23, 0.0054, 123,456.78" + decimal + }; + + enum class Searchable { notSearchable, searchable }; + + // Marker schema, with a non-empty list of locations where markers should be + // shown. + // Tech note: Even though `aLocations` are templated arguments, they are + // assigned to an `enum class` object, so they can only be of that enum type. + template <typename... Locations> + explicit MarkerSchema(Location aLocation, Locations... aLocations) + : mLocations{aLocation, aLocations...} {} + + // Marker schema for types that have special frontend handling. + // Nothing else should be set in this case. + // Implicit to allow quick return from MarkerTypeDisplay functions. + MOZ_IMPLICIT MarkerSchema(SpecialFrontendLocation) {} + + // Caller must specify location(s) or SpecialFrontendLocation above. + MarkerSchema() = delete; + + // Optional labels in the marker chart, the chart tooltip, and the marker + // table. If not provided, the marker "name" will be used. The given string + // can contain element keys in braces to include data elements streamed by + // `StreamJSONMarkerData()`. E.g.: "This is {text}" + +# define LABEL_SETTER(name) \ + MarkerSchema& Set##name(std::string a##name) { \ + m##name = std::move(a##name); \ + return *this; \ + } + + LABEL_SETTER(ChartLabel) + LABEL_SETTER(TooltipLabel) + LABEL_SETTER(TableLabel) + +# undef LABEL_SETTER + + MarkerSchema& SetAllLabels(std::string aText) { + // Here we set the same text in each label. + // TODO: Move to a single "label" field once the front-end allows it. + SetChartLabel(aText); + SetTooltipLabel(aText); + SetTableLabel(std::move(aText)); + return *this; + } + + // Each data element that is streamed by `StreamJSONMarkerData()` can be + // displayed as indicated by using one of the `Add...` function below. + // Each `Add...` will add a line in the full marker description. Parameters: + // - `aKey`: Element property name as streamed by `StreamJSONMarkerData()`. + // - `aLabel`: Optional prefix. Defaults to the key name. + // - `aFormat`: How to format the data element value, see `Format` above. + // - `aSearchable`: Optional, indicates if the value is used in searches, + // defaults to false. + + MarkerSchema& AddKeyFormat(std::string aKey, Format aFormat) { + mData.emplace_back(mozilla::VariantType<DynamicData>{}, + DynamicData{std::move(aKey), mozilla::Nothing{}, aFormat, + mozilla::Nothing{}}); + return *this; + } + + MarkerSchema& AddKeyLabelFormat(std::string aKey, std::string aLabel, + Format aFormat) { + mData.emplace_back( + mozilla::VariantType<DynamicData>{}, + DynamicData{std::move(aKey), mozilla::Some(std::move(aLabel)), aFormat, + mozilla::Nothing{}}); + return *this; + } + + MarkerSchema& AddKeyFormatSearchable(std::string aKey, Format aFormat, + Searchable aSearchable) { + mData.emplace_back(mozilla::VariantType<DynamicData>{}, + DynamicData{std::move(aKey), mozilla::Nothing{}, aFormat, + mozilla::Some(aSearchable)}); + return *this; + } + + MarkerSchema& AddKeyLabelFormatSearchable(std::string aKey, + std::string aLabel, Format aFormat, + Searchable aSearchable) { + mData.emplace_back( + mozilla::VariantType<DynamicData>{}, + DynamicData{std::move(aKey), mozilla::Some(std::move(aLabel)), aFormat, + mozilla::Some(aSearchable)}); + return *this; + } + + // The display may also include static rows. + + MarkerSchema& AddStaticLabelValue(std::string aLabel, std::string aValue) { + mData.emplace_back(mozilla::VariantType<StaticData>{}, + StaticData{std::move(aLabel), std::move(aValue)}); + return *this; + } + + // Internal streaming function. + MFBT_API void Stream(JSONWriter& aWriter, const Span<const char>& aName) &&; + + private: + MFBT_API static Span<const char> LocationToStringSpan(Location aLocation); + MFBT_API static Span<const char> FormatToStringSpan(Format aFormat); + + // List of marker display locations. Empty for SpecialFrontendLocation. + std::vector<Location> mLocations; + // Labels for different places. + std::string mChartLabel; + std::string mTooltipLabel; + std::string mTableLabel; + // Main display, made of zero or more rows of key+label+format or label+value. + private: + struct DynamicData { + std::string mKey; + mozilla::Maybe<std::string> mLabel; + Format mFormat; + mozilla::Maybe<Searchable> mSearchable; + }; + struct StaticData { + std::string mLabel; + std::string mValue; + }; + using DataRow = mozilla::Variant<DynamicData, StaticData>; + using DataRowVector = std::vector<DataRow>; + + DataRowVector mData; +}; + +} // namespace mozilla + +#endif // MOZ_GECKO_PROFILER + +#endif // BaseProfilerMarkersPrerequisites_h diff --git a/mozglue/baseprofiler/public/BaseProfilerSharedLibraries.h b/mozglue/baseprofiler/public/BaseProfilerSharedLibraries.h new file mode 100644 index 0000000000..0a104193c3 --- /dev/null +++ b/mozglue/baseprofiler/public/BaseProfilerSharedLibraries.h @@ -0,0 +1,146 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef BASE_PROFILER_SHARED_LIBRARIES_H_ +#define BASE_PROFILER_SHARED_LIBRARIES_H_ + +#include "BaseProfiler.h" + +#ifndef MOZ_GECKO_PROFILER +# error Do not #include this header when MOZ_GECKO_PROFILER is not #defined. +#endif + +#include <algorithm> +#include <stdint.h> +#include <stdlib.h> +#include <string> +#include <vector> + +class SharedLibrary { + public: + SharedLibrary(uintptr_t aStart, uintptr_t aEnd, uintptr_t aOffset, + const std::string& aBreakpadId, const std::string& aModuleName, + const std::string& aModulePath, const std::string& aDebugName, + const std::string& aDebugPath, const std::string& aVersion, + const char* aArch) + : mStart(aStart), + mEnd(aEnd), + mOffset(aOffset), + mBreakpadId(aBreakpadId), + mModuleName(aModuleName), + mModulePath(aModulePath), + mDebugName(aDebugName), + mDebugPath(aDebugPath), + mVersion(aVersion), + mArch(aArch) {} + + SharedLibrary(const SharedLibrary& aEntry) + : mStart(aEntry.mStart), + mEnd(aEntry.mEnd), + mOffset(aEntry.mOffset), + mBreakpadId(aEntry.mBreakpadId), + mModuleName(aEntry.mModuleName), + mModulePath(aEntry.mModulePath), + mDebugName(aEntry.mDebugName), + mDebugPath(aEntry.mDebugPath), + mVersion(aEntry.mVersion), + mArch(aEntry.mArch) {} + + SharedLibrary& operator=(const SharedLibrary& aEntry) { + // Gracefully handle self assignment + if (this == &aEntry) return *this; + + mStart = aEntry.mStart; + mEnd = aEntry.mEnd; + mOffset = aEntry.mOffset; + mBreakpadId = aEntry.mBreakpadId; + mModuleName = aEntry.mModuleName; + mModulePath = aEntry.mModulePath; + mDebugName = aEntry.mDebugName; + mDebugPath = aEntry.mDebugPath; + mVersion = aEntry.mVersion; + mArch = aEntry.mArch; + return *this; + } + + bool operator==(const SharedLibrary& other) const { + return (mStart == other.mStart) && (mEnd == other.mEnd) && + (mOffset == other.mOffset) && (mModuleName == other.mModuleName) && + (mModulePath == other.mModulePath) && + (mDebugName == other.mDebugName) && + (mDebugPath == other.mDebugPath) && + (mBreakpadId == other.mBreakpadId) && (mVersion == other.mVersion) && + (mArch == other.mArch); + } + + uintptr_t GetStart() const { return mStart; } + uintptr_t GetEnd() const { return mEnd; } + uintptr_t GetOffset() const { return mOffset; } + const std::string& GetBreakpadId() const { return mBreakpadId; } + const std::string& GetModuleName() const { return mModuleName; } + const std::string& GetModulePath() const { return mModulePath; } + const std::string& GetDebugName() const { return mDebugName; } + const std::string& GetDebugPath() const { return mDebugPath; } + const std::string& GetVersion() const { return mVersion; } + const std::string& GetArch() const { return mArch; } + + private: + SharedLibrary() : mStart{0}, mEnd{0}, mOffset{0} {} + + uintptr_t mStart; + uintptr_t mEnd; + uintptr_t mOffset; + std::string mBreakpadId; + std::string mModuleName; + std::string mModulePath; + std::string mDebugName; + std::string mDebugPath; + std::string mVersion; + std::string mArch; +}; + +static bool CompareAddresses(const SharedLibrary& first, + const SharedLibrary& second) { + return first.GetStart() < second.GetStart(); +} + +class SharedLibraryInfo { + public: + static SharedLibraryInfo GetInfoForSelf(); + static void Initialize(); + + SharedLibraryInfo() {} + + void AddSharedLibrary(SharedLibrary entry) { mEntries.push_back(entry); } + + const SharedLibrary& GetEntry(size_t i) const { return mEntries[i]; } + + SharedLibrary& GetMutableEntry(size_t i) { return mEntries[i]; } + + // Removes items in the range [first, last) + // i.e. element at the "last" index is not removed + void RemoveEntries(size_t first, size_t last) { + mEntries.erase(mEntries.begin() + first, mEntries.begin() + last); + } + + bool Contains(const SharedLibrary& searchItem) const { + return (mEntries.end() != + std::find(mEntries.begin(), mEntries.end(), searchItem)); + } + + size_t GetSize() const { return mEntries.size(); } + + void SortByAddress() { + std::sort(mEntries.begin(), mEntries.end(), CompareAddresses); + } + + void Clear() { mEntries.clear(); } + + private: + std::vector<SharedLibrary> mEntries; +}; + +#endif // BASE_PROFILER_SHARED_LIBRARIES_H_ diff --git a/mozglue/baseprofiler/public/BaseProfilingCategory.h b/mozglue/baseprofiler/public/BaseProfilingCategory.h new file mode 100644 index 0000000000..6892ec40f4 --- /dev/null +++ b/mozglue/baseprofiler/public/BaseProfilingCategory.h @@ -0,0 +1,72 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sts=4 et sw=4 tw=99: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef BaseProfilingCategory_h +#define BaseProfilingCategory_h + +#ifndef MOZ_GECKO_PROFILER +# error Do not #include this header when MOZ_GECKO_PROFILER is not #defined. +#endif + +#include "mozilla/Types.h" + +#include <cstdint> + +#include "ProfilingCategoryList.h" + +namespace mozilla { +namespace baseprofiler { + +// clang-format off + +// An enum that lists all possible category pairs in one list. +// This is the enum that is used in profiler stack labels. Having one list that +// includes subcategories from all categories in one list allows assigning the +// category pair to a stack label with just one number. +#define CATEGORY_ENUM_BEGIN_CATEGORY(name, labelAsString, color) +#define CATEGORY_ENUM_SUBCATEGORY(supercategory, name, labelAsString) name, +#define CATEGORY_ENUM_END_CATEGORY +enum class ProfilingCategoryPair : uint32_t { + MOZ_PROFILING_CATEGORY_LIST(CATEGORY_ENUM_BEGIN_CATEGORY, + CATEGORY_ENUM_SUBCATEGORY, + CATEGORY_ENUM_END_CATEGORY) + COUNT, + LAST = COUNT - 1, +}; +#undef CATEGORY_ENUM_BEGIN_CATEGORY +#undef CATEGORY_ENUM_SUBCATEGORY +#undef CATEGORY_ENUM_END_CATEGORY + +// An enum that lists just the categories without their subcategories. +#define SUPERCATEGORY_ENUM_BEGIN_CATEGORY(name, labelAsString, color) name, +#define SUPERCATEGORY_ENUM_SUBCATEGORY(supercategory, name, labelAsString) +#define SUPERCATEGORY_ENUM_END_CATEGORY +enum class ProfilingCategory : uint32_t { + MOZ_PROFILING_CATEGORY_LIST(SUPERCATEGORY_ENUM_BEGIN_CATEGORY, + SUPERCATEGORY_ENUM_SUBCATEGORY, + SUPERCATEGORY_ENUM_END_CATEGORY) + COUNT, + LAST = COUNT - 1, +}; +#undef SUPERCATEGORY_ENUM_BEGIN_CATEGORY +#undef SUPERCATEGORY_ENUM_SUBCATEGORY +#undef SUPERCATEGORY_ENUM_END_CATEGORY + +// clang-format on + +struct ProfilingCategoryPairInfo { + ProfilingCategory mCategory; + uint32_t mSubcategoryIndex; + const char* mLabel; +}; + +MFBT_API const ProfilingCategoryPairInfo& GetProfilingCategoryPairInfo( + ProfilingCategoryPair aCategoryPair); + +} // namespace baseprofiler +} // namespace mozilla + +#endif /* BaseProfilingCategory_h */ diff --git a/mozglue/baseprofiler/public/BaseProfilingStack.h b/mozglue/baseprofiler/public/BaseProfilingStack.h new file mode 100644 index 0000000000..214fc1ebbf --- /dev/null +++ b/mozglue/baseprofiler/public/BaseProfilingStack.h @@ -0,0 +1,520 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef BaseProfilingStack_h +#define BaseProfilingStack_h + +#include "BaseProfilingCategory.h" + +#include "mozilla/Atomics.h" + +#include "BaseProfiler.h" + +#ifndef MOZ_GECKO_PROFILER +# error Do not #include this header when MOZ_GECKO_PROFILER is not #defined. +#endif + +#include <algorithm> +#include <stdint.h> + +// This file defines the classes ProfilingStack and ProfilingStackFrame. +// The ProfilingStack manages an array of ProfilingStackFrames. +// It keeps track of the "label stack" and the JS interpreter stack. +// The two stack types are interleaved. +// +// Usage: +// +// ProfilingStack* profilingStack = ...; +// +// // For label frames: +// profilingStack->pushLabelFrame(...); +// // Execute some code. When finished, pop the frame: +// profilingStack->pop(); +// +// // For JS stack frames: +// profilingStack->pushJSFrame(...); +// // Execute some code. When finished, pop the frame: +// profilingStack->pop(); +// +// +// Concurrency considerations +// +// A thread's profiling stack (and the frames inside it) is only modified by +// that thread. However, the profiling stack can be *read* by a different +// thread, the sampler thread: Whenever the profiler wants to sample a given +// thread A, the following happens: +// (1) Thread A is suspended. +// (2) The sampler thread (thread S) reads the ProfilingStack of thread A, +// including all ProfilingStackFrames that are currently in that stack +// (profilingStack->frames[0..profilingStack->stackSize()]). +// (3) Thread A is resumed. +// +// Thread suspension is achieved using platform-specific APIs; refer to each +// platform's Sampler::SuspendAndSampleAndResumeThread implementation in +// platform-*.cpp for details. +// +// When the thread is suspended, the values in profilingStack->stackPointer and +// in the stack frame range +// profilingStack->frames[0..profilingStack->stackPointer] need to be in a +// consistent state, so that thread S does not read partially- constructed stack +// frames. More specifically, we have two requirements: +// (1) When adding a new frame at the top of the stack, its ProfilingStackFrame +// data needs to be put in place *before* the stackPointer is incremented, +// and the compiler + CPU need to know that this order matters. +// (2) When popping an frame from the stack and then preparing the +// ProfilingStackFrame data for the next frame that is about to be pushed, +// the decrement of the stackPointer in pop() needs to happen *before* the +// ProfilingStackFrame for the new frame is being popuplated, and the +// compiler + CPU need to know that this order matters. +// +// We can express the relevance of these orderings in multiple ways. +// Option A is to make stackPointer an atomic with SequentiallyConsistent +// memory ordering. This would ensure that no writes in thread A would be +// reordered across any writes to stackPointer, which satisfies requirements +// (1) and (2) at the same time. Option A is the simplest. +// Option B is to use ReleaseAcquire memory ordering both for writes to +// stackPointer *and* for writes to ProfilingStackFrame fields. Release-stores +// ensure that all writes that happened *before this write in program order* are +// not reordered to happen after this write. ReleaseAcquire ordering places no +// requirements on the ordering of writes that happen *after* this write in +// program order. +// Using release-stores for writes to stackPointer expresses requirement (1), +// and using release-stores for writes to the ProfilingStackFrame fields +// expresses requirement (2). +// +// Option B is more complicated than option A, but has much better performance +// on x86/64: In a microbenchmark run on a Macbook Pro from 2017, switching +// from option A to option B reduced the overhead of pushing+popping a +// ProfilingStackFrame by 10 nanoseconds. +// On x86/64, release-stores require no explicit hardware barriers or lock +// instructions. +// On ARM/64, option B may be slower than option A, because the compiler will +// generate hardware barriers for every single release-store instead of just +// for the writes to stackPointer. However, the actual performance impact of +// this has not yet been measured on ARM, so we're currently using option B +// everywhere. This is something that we may want to change in the future once +// we've done measurements. + +namespace mozilla { +namespace baseprofiler { + +// A call stack can be specified to the JS engine such that all JS entry/exits +// to functions push/pop a stack frame to/from the specified stack. +// +// For more detailed information, see vm/GeckoProfiler.h. +// +class ProfilingStackFrame { + // A ProfilingStackFrame represents either a label frame or a JS frame. + + // WARNING WARNING WARNING + // + // All the fields below are Atomic<...,ReleaseAcquire>. This is needed so + // that writes to these fields are release-writes, which ensures that + // earlier writes in this thread don't get reordered after the writes to + // these fields. In particular, the decrement of the stack pointer in + // ProfilingStack::pop() is a write that *must* happen before the values in + // this ProfilingStackFrame are changed. Otherwise, the sampler thread might + // see an inconsistent state where the stack pointer still points to a + // ProfilingStackFrame which has already been popped off the stack and whose + // fields have now been partially repopulated with new values. + // See the "Concurrency considerations" paragraph at the top of this file + // for more details. + + // Descriptive label for this stack frame. Must be a static string! Can be + // an empty string, but not a null pointer. + Atomic<const char*, ReleaseAcquire> label_; + + // An additional descriptive string of this frame which is combined with + // |label_| in profiler output. Need not be (and usually isn't) static. Can + // be null. + Atomic<const char*, ReleaseAcquire> dynamicString_; + + // Stack pointer for non-JS stack frames, the script pointer otherwise. + Atomic<void*, ReleaseAcquire> spOrScript; + + // ID of the JS Realm for JS stack frames. + // Must not be used on non-JS frames; it'll contain either the default 0, + // or a leftover value from a previous JS stack frame that was using this + // ProfilingStackFrame object. + mozilla::Atomic<uint64_t, mozilla::ReleaseAcquire> realmID_; + + // The bytecode offset for JS stack frames. + // Must not be used on non-JS frames; it'll contain either the default 0, + // or a leftover value from a previous JS stack frame that was using this + // ProfilingStackFrame object. + Atomic<int32_t, ReleaseAcquire> pcOffsetIfJS_; + + // Bits 0...8 hold the Flags. Bits 9...31 hold the category pair. + Atomic<uint32_t, ReleaseAcquire> flagsAndCategoryPair_; + + public: + ProfilingStackFrame() = default; + ProfilingStackFrame& operator=(const ProfilingStackFrame& other) { + label_ = other.label(); + dynamicString_ = other.dynamicString(); + void* spScript = other.spOrScript; + spOrScript = spScript; + int32_t offsetIfJS = other.pcOffsetIfJS_; + pcOffsetIfJS_ = offsetIfJS; + int64_t realmID = other.realmID_; + realmID_ = realmID; + uint32_t flagsAndCategory = other.flagsAndCategoryPair_; + flagsAndCategoryPair_ = flagsAndCategory; + return *this; + } + + // Reserve up to 16 bits for flags, and 16 for category pair. + enum class Flags : uint32_t { + // The first three flags describe the kind of the frame and are + // mutually exclusive. (We still give them individual bits for + // simplicity.) + + // A regular label frame. These usually come from AutoProfilerLabel. + IS_LABEL_FRAME = 1 << 0, + + // A special frame indicating the start of a run of JS profiling stack + // frames. IS_SP_MARKER_FRAME frames are ignored, except for the sp + // field. These frames are needed to get correct ordering between JS + // and LABEL frames because JS frames don't carry sp information. + // SP is short for "stack pointer". + IS_SP_MARKER_FRAME = 1 << 1, + + // A JS frame. + IS_JS_FRAME = 1 << 2, + + // An interpreter JS frame that has OSR-ed into baseline. IS_JS_FRAME + // frames can have this flag set and unset during their lifetime. + // JS_OSR frames are ignored. + JS_OSR = 1 << 3, + + // The next three are mutually exclusive. + // By default, for profiling stack frames that have both a label and a + // dynamic string, the two strings are combined into one string of the + // form "<label> <dynamicString>" during JSON serialization. The + // following flags can be used to change this preset. + STRING_TEMPLATE_METHOD = 1 << 4, // "<label>.<dynamicString>" + STRING_TEMPLATE_GETTER = 1 << 5, // "get <label>.<dynamicString>" + STRING_TEMPLATE_SETTER = 1 << 6, // "set <label>.<dynamicString>" + + // If set, causes this stack frame to be marked as "relevantForJS" in + // the profile JSON, which will make it show up in the "JS only" call + // tree view. + RELEVANT_FOR_JS = 1 << 7, + + // If set, causes the label on this ProfilingStackFrame to be ignored + // and to be replaced by the subcategory's label. + LABEL_DETERMINED_BY_CATEGORY_PAIR = 1 << 8, + + // Frame dynamic string does not contain user data. + NONSENSITIVE = 1 << 9, + + // A JS Baseline Interpreter frame. + IS_BLINTERP_FRAME = 1 << 10, + + FLAGS_BITCOUNT = 16, + FLAGS_MASK = (1 << FLAGS_BITCOUNT) - 1 + }; + + static_assert( + uint32_t(ProfilingCategoryPair::LAST) <= + (UINT32_MAX >> uint32_t(Flags::FLAGS_BITCOUNT)), + "Too many category pairs to fit into u32 with together with the " + "reserved bits for the flags"); + + bool isLabelFrame() const { + return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::IS_LABEL_FRAME); + } + + bool isSpMarkerFrame() const { + return uint32_t(flagsAndCategoryPair_) & + uint32_t(Flags::IS_SP_MARKER_FRAME); + } + + bool isJsFrame() const { + return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::IS_JS_FRAME); + } + + bool isOSRFrame() const { + return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::JS_OSR); + } + + void setIsOSRFrame(bool isOSR) { + if (isOSR) { + flagsAndCategoryPair_ = + uint32_t(flagsAndCategoryPair_) | uint32_t(Flags::JS_OSR); + } else { + flagsAndCategoryPair_ = + uint32_t(flagsAndCategoryPair_) & ~uint32_t(Flags::JS_OSR); + } + } + + const char* label() const { + uint32_t flagsAndCategoryPair = flagsAndCategoryPair_; + if (flagsAndCategoryPair & + uint32_t(Flags::LABEL_DETERMINED_BY_CATEGORY_PAIR)) { + auto categoryPair = ProfilingCategoryPair( + flagsAndCategoryPair >> uint32_t(Flags::FLAGS_BITCOUNT)); + return GetProfilingCategoryPairInfo(categoryPair).mLabel; + } + return label_; + } + + const char* dynamicString() const { return dynamicString_; } + + void initLabelFrame(const char* aLabel, const char* aDynamicString, void* sp, + ProfilingCategoryPair aCategoryPair, uint32_t aFlags) { + label_ = aLabel; + dynamicString_ = aDynamicString; + spOrScript = sp; + // pcOffsetIfJS_ is not set and must not be used on label frames. + flagsAndCategoryPair_ = + uint32_t(Flags::IS_LABEL_FRAME) | + (uint32_t(aCategoryPair) << uint32_t(Flags::FLAGS_BITCOUNT)) | aFlags; + MOZ_ASSERT(isLabelFrame()); + } + + void initSpMarkerFrame(void* sp) { + label_ = ""; + dynamicString_ = nullptr; + spOrScript = sp; + // pcOffsetIfJS_ is not set and must not be used on sp marker frames. + flagsAndCategoryPair_ = uint32_t(Flags::IS_SP_MARKER_FRAME) | + (uint32_t(ProfilingCategoryPair::OTHER) + << uint32_t(Flags::FLAGS_BITCOUNT)); + MOZ_ASSERT(isSpMarkerFrame()); + } + + void initJsFrame(const char* aLabel, const char* aDynamicString, + void* /* JSScript* */ aScript, int32_t aOffset, + uint64_t aRealmID) { + label_ = aLabel; + dynamicString_ = aDynamicString; + spOrScript = aScript; + pcOffsetIfJS_ = aOffset; + realmID_ = aRealmID; + flagsAndCategoryPair_ = + uint32_t(Flags::IS_JS_FRAME) | (uint32_t(ProfilingCategoryPair::JS) + << uint32_t(Flags::FLAGS_BITCOUNT)); + MOZ_ASSERT(isJsFrame()); + } + + uint32_t flags() const { + return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::FLAGS_MASK); + } + + ProfilingCategoryPair categoryPair() const { + return ProfilingCategoryPair(flagsAndCategoryPair_ >> + uint32_t(Flags::FLAGS_BITCOUNT)); + } + + uint64_t realmID() const { return realmID_; } + + void* stackAddress() const { + MOZ_ASSERT(!isJsFrame()); + return spOrScript; + } + + // Note that the pointer returned might be invalid. + void* rawScript() const { + MOZ_ASSERT(isJsFrame()); + return spOrScript; + } + void setRawScript(void* aScript) { + MOZ_ASSERT(isJsFrame()); + spOrScript = aScript; + } + + int32_t pcOffset() const { + MOZ_ASSERT(isJsFrame()); + return pcOffsetIfJS_; + } + + void setPCOffset(int32_t aOffset) { + MOZ_ASSERT(isJsFrame()); + pcOffsetIfJS_ = aOffset; + } + + // The offset of a pc into a script's code can actually be 0, so to + // signify a nullptr pc, use a -1 index. This is checked against in + // pc() and setPC() to set/get the right pc. + static const int32_t NullPCOffset = -1; +}; + +// Each thread has its own ProfilingStack. That thread modifies the +// ProfilingStack, pushing and popping elements as necessary. +// +// The ProfilingStack is also read periodically by the profiler's sampler +// thread. This happens only when the thread that owns the ProfilingStack is +// suspended. So there are no genuine parallel accesses. +// +// However, it is possible for pushing/popping to be interrupted by a periodic +// sample. Because of this, we need pushing/popping to be effectively atomic. +// +// - When pushing a new frame, we increment the stack pointer -- making the new +// frame visible to the sampler thread -- only after the new frame has been +// fully written. The stack pointer is Atomic<uint32_t,ReleaseAcquire>, so +// the increment is a release-store, which ensures that this store is not +// reordered before the writes of the frame. +// +// - When popping an old frame, the only operation is the decrementing of the +// stack pointer, which is obviously atomic. +// +class ProfilingStack final { + public: + ProfilingStack() = default; + + MFBT_API ~ProfilingStack(); + + void pushLabelFrame(const char* label, const char* dynamicString, void* sp, + ProfilingCategoryPair categoryPair, uint32_t flags = 0) { + // This thread is the only one that ever changes the value of + // stackPointer. + // Store the value of the atomic in a non-atomic local variable so that + // the compiler won't generate two separate loads from the atomic for + // the size check and the frames[] array indexing operation. + uint32_t stackPointerVal = stackPointer; + + if (MOZ_UNLIKELY(stackPointerVal >= capacity)) { + ensureCapacitySlow(); + } + frames[stackPointerVal].initLabelFrame(label, dynamicString, sp, + categoryPair, flags); + + // This must happen at the end! The compiler will not reorder this + // update because stackPointer is Atomic<..., ReleaseAcquire>, so any + // the writes above will not be reordered below the stackPointer store. + // Do the read and the write as two separate statements, in order to + // make it clear that we don't need an atomic increment, which would be + // more expensive on x86 than the separate operations done here. + // However, don't use stackPointerVal here; instead, allow the compiler + // to turn this store into a non-atomic increment instruction which + // takes up less code size. + stackPointer = stackPointer + 1; + } + + void pushSpMarkerFrame(void* sp) { + uint32_t oldStackPointer = stackPointer; + + if (MOZ_UNLIKELY(oldStackPointer >= capacity)) { + ensureCapacitySlow(); + } + frames[oldStackPointer].initSpMarkerFrame(sp); + + // This must happen at the end, see the comment in pushLabelFrame. + stackPointer = oldStackPointer + 1; + } + + void pushJsOffsetFrame(const char* label, const char* dynamicString, + void* script, int32_t offset, uint64_t aRealmID) { + // This thread is the only one that ever changes the value of + // stackPointer. Only load the atomic once. + uint32_t oldStackPointer = stackPointer; + + if (MOZ_UNLIKELY(oldStackPointer >= capacity)) { + ensureCapacitySlow(); + } + frames[oldStackPointer].initJsFrame(label, dynamicString, script, offset, + aRealmID); + + // This must happen at the end, see the comment in pushLabelFrame. + stackPointer = stackPointer + 1; + } + + void pop() { + MOZ_ASSERT(stackPointer > 0); + // Do the read and the write as two separate statements, in order to + // make it clear that we don't need an atomic decrement, which would be + // more expensive on x86 than the separate operations done here. + // This thread is the only one that ever changes the value of + // stackPointer. + uint32_t oldStackPointer = stackPointer; + stackPointer = oldStackPointer - 1; + } + + uint32_t stackSize() const { return stackPointer; } + uint32_t stackCapacity() const { return capacity; } + + private: + // Out of line path for expanding the buffer, since otherwise this would get + // inlined in every DOM WebIDL call. + MFBT_API MOZ_COLD void ensureCapacitySlow(); + + // No copying. + ProfilingStack(const ProfilingStack&) = delete; + void operator=(const ProfilingStack&) = delete; + + // No moving either. + ProfilingStack(ProfilingStack&&) = delete; + void operator=(ProfilingStack&&) = delete; + + uint32_t capacity = 0; + + public: + // The pointer to the stack frames, this is read from the profiler thread and + // written from the current thread. + // + // This is effectively a unique pointer. + Atomic<ProfilingStackFrame*, SequentiallyConsistent> frames{nullptr}; + + // This may exceed the capacity, so instead use the stackSize() method to + // determine the number of valid frames in stackFrames. When this is less + // than stackCapacity(), it refers to the first free stackframe past the top + // of the in-use stack (i.e. frames[stackPointer - 1] is the top stack + // frame). + // + // WARNING WARNING WARNING + // + // This is an atomic variable that uses ReleaseAcquire memory ordering. + // See the "Concurrency considerations" paragraph at the top of this file + // for more details. + Atomic<uint32_t, ReleaseAcquire> stackPointer{0}; +}; + +class AutoGeckoProfilerEntry; +class GeckoProfilerEntryMarker; +class GeckoProfilerBaselineOSRMarker; + +class GeckoProfilerThread { + friend class AutoGeckoProfilerEntry; + friend class GeckoProfilerEntryMarker; + friend class GeckoProfilerBaselineOSRMarker; + + ProfilingStack* profilingStack_; + + // Same as profilingStack_ if the profiler is currently active, otherwise + // null. + ProfilingStack* profilingStackIfEnabled_; + + public: + MFBT_API GeckoProfilerThread(); + + uint32_t stackPointer() { + MOZ_ASSERT(infraInstalled()); + return profilingStack_->stackPointer; + } + ProfilingStackFrame* stack() { return profilingStack_->frames; } + ProfilingStack* getProfilingStack() { return profilingStack_; } + ProfilingStack* getProfilingStackIfEnabled() { + return profilingStackIfEnabled_; + } + + /* + * True if the profiler infrastructure is setup. Should be true in builds + * that include profiler support except during early startup or late + * shutdown. Unrelated to the presence of the Gecko Profiler addon. + */ + bool infraInstalled() { return profilingStack_ != nullptr; } + + MFBT_API void setProfilingStack(ProfilingStack* profilingStack, bool enabled); + void enable(bool enable) { + profilingStackIfEnabled_ = enable ? profilingStack_ : nullptr; + } +}; + +} // namespace baseprofiler +} // namespace mozilla + +#endif /* BaseProfilingStack_h */ diff --git a/mozglue/baseprofiler/public/BlocksRingBuffer.h b/mozglue/baseprofiler/public/BlocksRingBuffer.h new file mode 100644 index 0000000000..6948ab8cf4 --- /dev/null +++ b/mozglue/baseprofiler/public/BlocksRingBuffer.h @@ -0,0 +1,1000 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef BlocksRingBuffer_h +#define BlocksRingBuffer_h + +#include "mozilla/BaseProfilerDetail.h" +#include "mozilla/ModuloBuffer.h" +#include "mozilla/ProfileBufferIndex.h" +#include "mozilla/ScopeExit.h" + +#include <functional> +#include <string> +#include <tuple> +#include <utility> + +namespace mozilla { + +// Thread-safe Ring buffer that can store blocks of different sizes during +// defined sessions. +// Each *block* contains an *entry* and the entry size: +// [ entry_size | entry ] [ entry_size | entry ] ... +// *In-session* is a period of time during which `BlocksRingBuffer` allows +// reading and writing. *Out-of-session*, the `BlocksRingBuffer` object is +// still valid, but contains no data, and gracefully denies accesses. +// +// To write an entry, the buffer reserves a block of sufficient size (to contain +// user data of predetermined size), writes the entry size, and lets the caller +// fill the entry contents using ModuloBuffer::Iterator APIs and a few entry- +// specific APIs. E.g.: +// ``` +// BlockRingsBuffer brb(PowerOfTwo<BlockRingsBuffer::Length>(1024)); +// brb.ReserveAndPut([]() { return sizeof(123); }, +// [&](ProfileBufferEntryWriter& aEW) { +// aEW.WriteObject(123); +// }); +// ``` +// Other `Put...` functions may be used as shortcuts for simple entries. +// The objects given to the caller's callbacks should only be used inside the +// callbacks and not stored elsewhere, because they keep their own references to +// the BlocksRingBuffer and therefore should not live longer. +// Different type of objects may be serialized into an entry, see `Serializer` +// for more information. +// +// When reading data, the buffer iterates over blocks (it knows how to read the +// entry size, and therefore move to the next block), and lets the caller read +// the entry inside of each block. E.g.: +// ``` +// brb.Read([](BlocksRingBuffer::Reader aR) {} +// for (ProfileBufferEntryReader aER : aR) { +// /* Use ProfileBufferEntryReader functions to read serialized objects. */ +// int n = aER.ReadObject<int>(); +// } +// }); +// ``` +// Different type of objects may be deserialized from an entry, see +// `Deserializer` for more information. +// +// The caller may retrieve the `ProfileBufferBlockIndex` corresponding to an +// entry (`ProfileBufferBlockIndex` is an opaque type preventing the user from +// modifying it). That index may later be used to get back to that particular +// entry if it still exists. +class BlocksRingBuffer { + public: + // Using ModuloBuffer as underlying circular byte buffer. + using Buffer = ModuloBuffer<uint32_t, ProfileBufferIndex>; + using Byte = Buffer::Byte; + + // Length type for total buffer (as PowerOfTwo<Length>) and each entry. + using Length = uint32_t; + + enum class ThreadSafety { WithoutMutex, WithMutex }; + + // Default constructor starts out-of-session (nothing to read or write). + explicit BlocksRingBuffer(ThreadSafety aThreadSafety) + : mMutex(aThreadSafety != ThreadSafety::WithoutMutex) {} + + // Create a buffer of the given length. + explicit BlocksRingBuffer(ThreadSafety aThreadSafety, + PowerOfTwo<Length> aLength) + : mMutex(aThreadSafety != ThreadSafety::WithoutMutex), + mMaybeUnderlyingBuffer(Some(UnderlyingBuffer(aLength))) {} + + // Take ownership of an existing buffer. + BlocksRingBuffer(ThreadSafety aThreadSafety, + UniquePtr<Buffer::Byte[]> aExistingBuffer, + PowerOfTwo<Length> aLength) + : mMutex(aThreadSafety != ThreadSafety::WithoutMutex), + mMaybeUnderlyingBuffer( + Some(UnderlyingBuffer(std::move(aExistingBuffer), aLength))) {} + + // Use an externally-owned buffer. + BlocksRingBuffer(ThreadSafety aThreadSafety, Buffer::Byte* aExternalBuffer, + PowerOfTwo<Length> aLength) + : mMutex(aThreadSafety != ThreadSafety::WithoutMutex), + mMaybeUnderlyingBuffer( + Some(UnderlyingBuffer(aExternalBuffer, aLength))) {} + + // Destructor doesn't need to do anything special. (Clearing entries would + // only update indices and stats, which won't be accessible after the object + // is destroyed anyway.) + ~BlocksRingBuffer() = default; + + // Remove underlying buffer, if any. + void Reset() { + baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex); + ResetUnderlyingBuffer(); + } + + // Create a buffer of the given length. + void Set(PowerOfTwo<Length> aLength) { + baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex); + ResetUnderlyingBuffer(); + mMaybeUnderlyingBuffer.emplace(aLength); + } + + // Take ownership of an existing buffer. + void Set(UniquePtr<Buffer::Byte[]> aExistingBuffer, + PowerOfTwo<Length> aLength) { + baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex); + ResetUnderlyingBuffer(); + mMaybeUnderlyingBuffer.emplace(std::move(aExistingBuffer), aLength); + } + + // Use an externally-owned buffer. + void Set(Buffer::Byte* aExternalBuffer, PowerOfTwo<Length> aLength) { + baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex); + ResetUnderlyingBuffer(); + mMaybeUnderlyingBuffer.emplace(aExternalBuffer, aLength); + } + + // This cannot change during the lifetime of this buffer, so there's no need + // to lock. + bool IsThreadSafe() const { return mMutex.IsActivated(); } + + [[nodiscard]] bool IsInSession() const { + baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex); + return !!mMaybeUnderlyingBuffer; + } + + // Lock the buffer mutex and run the provided callback. + // This can be useful when the caller needs to explicitly lock down this + // buffer, but not do anything else with it. + template <typename Callback> + auto LockAndRun(Callback&& aCallback) const { + baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex); + return std::forward<Callback>(aCallback)(); + } + + // Buffer length in bytes. + Maybe<PowerOfTwo<Length>> BufferLength() const { + baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex); + return mMaybeUnderlyingBuffer.map([](const UnderlyingBuffer& aBuffer) { + return aBuffer.mBuffer.BufferLength(); + }); + ; + } + + // Size of external resources. + size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const { + if (!mMaybeUnderlyingBuffer) { + return 0; + } + return mMaybeUnderlyingBuffer->mBuffer.SizeOfExcludingThis(aMallocSizeOf); + } + + size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const { + return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf); + } + + // Snapshot of the buffer state. + struct State { + // Index to the first block. + ProfileBufferBlockIndex mRangeStart; + + // Index past the last block. Equals mRangeStart if empty. + ProfileBufferBlockIndex mRangeEnd; + + // Number of blocks that have been pushed into this buffer. + uint64_t mPushedBlockCount = 0; + + // Number of blocks that have been removed from this buffer. + // Note: Live entries = pushed - cleared. + uint64_t mClearedBlockCount = 0; + }; + + // Get a snapshot of the current state. + // When out-of-session, mFirstReadIndex==mNextWriteIndex, and + // mPushedBlockCount==mClearedBlockCount==0. + // Note that these may change right after this thread-safe call, so they + // should only be used for statistical purposes. + State GetState() const { + baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex); + return { + mFirstReadIndex, mNextWriteIndex, + mMaybeUnderlyingBuffer ? mMaybeUnderlyingBuffer->mPushedBlockCount : 0, + mMaybeUnderlyingBuffer ? mMaybeUnderlyingBuffer->mClearedBlockCount + : 0}; + } + + class Reader; + + // Class that can iterate through blocks and provide + // `ProfileBufferEntryReader`s. + // Created through `Reader`, lives within a lock guard lifetime. + class BlockIterator { + public: +#ifdef DEBUG + ~BlockIterator() { + // No BlockIterator should live outside of a mutexed call. + mRing->mMutex.AssertCurrentThreadOwns(); + } +#endif // DEBUG + + // Comparison with other iterator, mostly used in range-for loops. + bool operator==(const BlockIterator aRhs) const { + MOZ_ASSERT(mRing == aRhs.mRing); + return mBlockIndex == aRhs.mBlockIndex; + } + bool operator!=(const BlockIterator aRhs) const { + MOZ_ASSERT(mRing == aRhs.mRing); + return mBlockIndex != aRhs.mBlockIndex; + } + + // Advance to next BlockIterator. + BlockIterator& operator++() { + mBlockIndex = NextBlockIndex(); + return *this; + } + + // Dereferencing creates a `ProfileBufferEntryReader` for the entry inside + // this block. + ProfileBufferEntryReader operator*() const { + return mRing->ReaderInBlockAt(mBlockIndex); + } + + // True if this iterator is just past the last entry. + bool IsAtEnd() const { + MOZ_ASSERT(mBlockIndex <= BufferRangeEnd()); + return mBlockIndex == BufferRangeEnd(); + } + + // Can be used as reference to come back to this entry with `ReadAt()`. + ProfileBufferBlockIndex CurrentBlockIndex() const { return mBlockIndex; } + + // Index past the end of this block, which is the start of the next block. + ProfileBufferBlockIndex NextBlockIndex() const { + MOZ_ASSERT(!IsAtEnd()); + const Length entrySize = + mRing->ReaderInBlockAt(mBlockIndex).RemainingBytes(); + return ProfileBufferBlockIndex::CreateFromProfileBufferIndex( + mBlockIndex.ConvertToProfileBufferIndex() + ULEB128Size(entrySize) + + entrySize); + } + + // Index of the first block in the whole buffer. + ProfileBufferBlockIndex BufferRangeStart() const { + return mRing->mFirstReadIndex; + } + + // Index past the last block in the whole buffer. + ProfileBufferBlockIndex BufferRangeEnd() const { + return mRing->mNextWriteIndex; + } + + private: + // Only a Reader can instantiate a BlockIterator. + friend class Reader; + + BlockIterator(const BlocksRingBuffer& aRing, + ProfileBufferBlockIndex aBlockIndex) + : mRing(WrapNotNull(&aRing)), mBlockIndex(aBlockIndex) { + // No BlockIterator should live outside of a mutexed call. + mRing->mMutex.AssertCurrentThreadOwns(); + } + + // Using a non-null pointer instead of a reference, to allow copying. + // This BlockIterator should only live inside one of the thread-safe + // BlocksRingBuffer functions, for this reference to stay valid. + NotNull<const BlocksRingBuffer*> mRing; + ProfileBufferBlockIndex mBlockIndex; + }; + + // Class that can create `BlockIterator`s (e.g., for range-for), or just + // iterate through entries; lives within a lock guard lifetime. + class MOZ_RAII Reader { + public: + Reader(const Reader&) = delete; + Reader& operator=(const Reader&) = delete; + Reader(Reader&&) = delete; + Reader& operator=(Reader&&) = delete; + +#ifdef DEBUG + ~Reader() { + // No Reader should live outside of a mutexed call. + mRing.mMutex.AssertCurrentThreadOwns(); + } +#endif // DEBUG + + // Index of the first block in the whole buffer. + ProfileBufferBlockIndex BufferRangeStart() const { + return mRing.mFirstReadIndex; + } + + // Index past the last block in the whole buffer. + ProfileBufferBlockIndex BufferRangeEnd() const { + return mRing.mNextWriteIndex; + } + + // Iterators to the first and past-the-last blocks. + // Compatible with range-for (see `ForEach` below as example). + BlockIterator begin() const { + return BlockIterator(mRing, BufferRangeStart()); + } + // Note that a `BlockIterator` at the `end()` should not be dereferenced, as + // there is no actual block there! + BlockIterator end() const { return BlockIterator(mRing, BufferRangeEnd()); } + + // Get a `BlockIterator` at the given `ProfileBufferBlockIndex`, clamped to + // the stored range. Note that a `BlockIterator` at the `end()` should not + // be dereferenced, as there is no actual block there! + BlockIterator At(ProfileBufferBlockIndex aBlockIndex) const { + if (aBlockIndex < BufferRangeStart()) { + // Anything before the range (including null ProfileBufferBlockIndex) is + // clamped at the beginning. + return begin(); + } + // Otherwise we at least expect the index to be valid (pointing exactly at + // a live block, or just past the end.) + mRing.AssertBlockIndexIsValidOrEnd(aBlockIndex); + return BlockIterator(mRing, aBlockIndex); + } + + // Run `aCallback(ProfileBufferEntryReader&)` on each entry from first to + // last. Callback should not store `ProfileBufferEntryReader`, as it may + // become invalid after this thread-safe call. + template <typename Callback> + void ForEach(Callback&& aCallback) const { + for (ProfileBufferEntryReader reader : *this) { + aCallback(reader); + } + } + + private: + friend class BlocksRingBuffer; + + explicit Reader(const BlocksRingBuffer& aRing) : mRing(aRing) { + // No Reader should live outside of a mutexed call. + mRing.mMutex.AssertCurrentThreadOwns(); + } + + // This Reader should only live inside one of the thread-safe + // BlocksRingBuffer functions, for this reference to stay valid. + const BlocksRingBuffer& mRing; + }; + + // Call `aCallback(BlocksRingBuffer::Reader*)` (nullptr when out-of-session), + // and return whatever `aCallback` returns. Callback should not store + // `Reader`, because it may become invalid after this call. + template <typename Callback> + auto Read(Callback&& aCallback) const { + { + baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex); + if (MOZ_LIKELY(mMaybeUnderlyingBuffer)) { + Reader reader(*this); + return std::forward<Callback>(aCallback)(&reader); + } + } + return std::forward<Callback>(aCallback)(nullptr); + } + + // Call `aCallback(ProfileBufferEntryReader&)` on each item. + // Callback should not store `ProfileBufferEntryReader`, because it may become + // invalid after this call. + template <typename Callback> + void ReadEach(Callback&& aCallback) const { + Read([&](Reader* aReader) { + if (MOZ_LIKELY(aReader)) { + aReader->ForEach(aCallback); + } + }); + } + + // Call `aCallback(Maybe<ProfileBufferEntryReader>&&)` on the entry at + // the given ProfileBufferBlockIndex; The `Maybe` will be `Nothing` if + // out-of-session, or if that entry doesn't exist anymore, or if we've reached + // just past the last entry. Return whatever `aCallback` returns. Callback + // should not store `ProfileBufferEntryReader`, because it may become invalid + // after this call. + template <typename Callback> + auto ReadAt(ProfileBufferBlockIndex aBlockIndex, Callback&& aCallback) const { + baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex); + MOZ_ASSERT(aBlockIndex <= mNextWriteIndex); + Maybe<ProfileBufferEntryReader> maybeEntryReader; + if (MOZ_LIKELY(mMaybeUnderlyingBuffer) && aBlockIndex >= mFirstReadIndex && + aBlockIndex < mNextWriteIndex) { + AssertBlockIndexIsValid(aBlockIndex); + maybeEntryReader.emplace(ReaderInBlockAt(aBlockIndex)); + } + return std::forward<Callback>(aCallback)(std::move(maybeEntryReader)); + } + + // Main function to write entries. + // Reserve `aCallbackBytes()` bytes, call `aCallback()` with a pointer to an + // on-stack temporary ProfileBufferEntryWriter (nullptr when out-of-session), + // and return whatever `aCallback` returns. Callback should not store + // `ProfileBufferEntryWriter`, because it may become invalid after this + // thread-safe call. Note: `aCallbackBytes` is a callback instead of a simple + // value, to delay this potentially-expensive computation until after we're + // checked that we're in-session; use `Put(Length, Callback)` below if you + // know the size already. + template <typename CallbackBytes, typename Callback> + auto ReserveAndPut(CallbackBytes aCallbackBytes, Callback&& aCallback) { + Maybe<ProfileBufferEntryWriter> maybeEntryWriter; + + baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex); + + if (MOZ_LIKELY(mMaybeUnderlyingBuffer)) { + const Length entryBytes = std::forward<CallbackBytes>(aCallbackBytes)(); + MOZ_RELEASE_ASSERT(entryBytes > 0); + const Length bufferBytes = + mMaybeUnderlyingBuffer->mBuffer.BufferLength().Value(); + MOZ_RELEASE_ASSERT(entryBytes <= bufferBytes - ULEB128Size(entryBytes), + "Entry would wrap and overwrite itself"); + // Compute block size from the requested entry size. + const Length blockBytes = ULEB128Size(entryBytes) + entryBytes; + // We will put this new block at the end of the current buffer. + const ProfileBufferIndex blockIndex = + mNextWriteIndex.ConvertToProfileBufferIndex(); + // Compute the end of this new block. + const ProfileBufferIndex blockEnd = blockIndex + blockBytes; + while (blockEnd > + mFirstReadIndex.ConvertToProfileBufferIndex() + bufferBytes) { + // About to trample on an old block. + ProfileBufferEntryReader reader = ReaderInBlockAt(mFirstReadIndex); + mMaybeUnderlyingBuffer->mClearedBlockCount += 1; + // Move the buffer reading start past this cleared block. + mFirstReadIndex = ProfileBufferBlockIndex::CreateFromProfileBufferIndex( + mFirstReadIndex.ConvertToProfileBufferIndex() + + ULEB128Size(reader.RemainingBytes()) + reader.RemainingBytes()); + } + // Store the new end of buffer. + mNextWriteIndex = + ProfileBufferBlockIndex::CreateFromProfileBufferIndex(blockEnd); + mMaybeUnderlyingBuffer->mPushedBlockCount += 1; + // Finally, let aCallback write into the entry. + mMaybeUnderlyingBuffer->mBuffer.EntryWriterFromTo(maybeEntryWriter, + blockIndex, blockEnd); + MOZ_ASSERT(maybeEntryWriter.isSome(), + "Non-empty entry should always create an EntryWriter"); + maybeEntryWriter->WriteULEB128(entryBytes); + MOZ_ASSERT(maybeEntryWriter->RemainingBytes() == entryBytes); + } + +#ifdef DEBUG + auto checkAllWritten = MakeScopeExit([&]() { + MOZ_ASSERT(!maybeEntryWriter || maybeEntryWriter->RemainingBytes() == 0); + }); +#endif // DEBUG + return std::forward<Callback>(aCallback)(maybeEntryWriter); + } + + // Add a new entry of known size, call `aCallback` with a pointer to a + // temporary ProfileBufferEntryWriter (can be null when out-of-session), and + // return whatever `aCallback` returns. Callback should not store the + // `ProfileBufferEntryWriter`, as it may become invalid after this thread-safe + // call. + template <typename Callback> + auto Put(Length aBytes, Callback&& aCallback) { + return ReserveAndPut([aBytes]() { return aBytes; }, + std::forward<Callback>(aCallback)); + } + + // Add a new entry copied from the given buffer, return block index. + ProfileBufferBlockIndex PutFrom(const void* aSrc, Length aBytes) { + return ReserveAndPut([aBytes]() { return aBytes; }, + [&](Maybe<ProfileBufferEntryWriter>& aEntryWriter) { + if (MOZ_UNLIKELY(aEntryWriter.isNothing())) { + // Out-of-session, return "empty" index. + return ProfileBufferBlockIndex{}; + } + aEntryWriter->WriteBytes(aSrc, aBytes); + return aEntryWriter->CurrentBlockIndex(); + }); + } + + // Add a new single entry with *all* given object (using a Serializer for + // each), return block index. + template <typename... Ts> + ProfileBufferBlockIndex PutObjects(const Ts&... aTs) { + static_assert(sizeof...(Ts) > 0, + "PutObjects must be given at least one object."); + return ReserveAndPut( + [&]() { return ProfileBufferEntryWriter::SumBytes(aTs...); }, + [&](Maybe<ProfileBufferEntryWriter>& aEntryWriter) { + if (MOZ_UNLIKELY(aEntryWriter.isNothing())) { + // Out-of-session, return "empty" index. + return ProfileBufferBlockIndex{}; + } + aEntryWriter->WriteObjects(aTs...); + return aEntryWriter->CurrentBlockIndex(); + }); + } + + // Add a new entry copied from the given object, return block index. + template <typename T> + ProfileBufferBlockIndex PutObject(const T& aOb) { + return PutObjects(aOb); + } + + // Append the contents of another BlocksRingBuffer to this one. + ProfileBufferBlockIndex AppendContents(const BlocksRingBuffer& aSrc) { + baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex); + + if (MOZ_UNLIKELY(!mMaybeUnderlyingBuffer)) { + // We are out-of-session, could not append contents. + return ProfileBufferBlockIndex{}; + } + + baseprofiler::detail::BaseProfilerMaybeAutoLock srcLock(aSrc.mMutex); + + if (MOZ_UNLIKELY(!aSrc.mMaybeUnderlyingBuffer)) { + // The other BRB is out-of-session, nothing to copy, we're done. + return ProfileBufferBlockIndex{}; + } + + const ProfileBufferIndex srcStartIndex = + aSrc.mFirstReadIndex.ConvertToProfileBufferIndex(); + const ProfileBufferIndex srcEndIndex = + aSrc.mNextWriteIndex.ConvertToProfileBufferIndex(); + const Length bytesToCopy = static_cast<Length>(srcEndIndex - srcStartIndex); + + if (MOZ_UNLIKELY(bytesToCopy == 0)) { + // The other BRB is empty, nothing to copy, we're done. + return ProfileBufferBlockIndex{}; + } + + const Length bufferBytes = + mMaybeUnderlyingBuffer->mBuffer.BufferLength().Value(); + + MOZ_RELEASE_ASSERT(bytesToCopy <= bufferBytes, + "Entry would wrap and overwrite itself"); + + // We will put all copied blocks at the end of the current buffer. + const ProfileBufferIndex dstStartIndex = + mNextWriteIndex.ConvertToProfileBufferIndex(); + // Compute where the copy will end... + const ProfileBufferIndex dstEndIndex = dstStartIndex + bytesToCopy; + + while (dstEndIndex > + mFirstReadIndex.ConvertToProfileBufferIndex() + bufferBytes) { + // About to trample on an old block. + ProfileBufferEntryReader reader = ReaderInBlockAt(mFirstReadIndex); + mMaybeUnderlyingBuffer->mClearedBlockCount += 1; + // Move the buffer reading start past this cleared block. + mFirstReadIndex = ProfileBufferBlockIndex::CreateFromProfileBufferIndex( + mFirstReadIndex.ConvertToProfileBufferIndex() + + ULEB128Size(reader.RemainingBytes()) + reader.RemainingBytes()); + } + + // Store the new end of buffer. + mNextWriteIndex = + ProfileBufferBlockIndex::CreateFromProfileBufferIndex(dstEndIndex); + // Update our pushed count with the number of live blocks we are copying. + mMaybeUnderlyingBuffer->mPushedBlockCount += + aSrc.mMaybeUnderlyingBuffer->mPushedBlockCount - + aSrc.mMaybeUnderlyingBuffer->mClearedBlockCount; + + auto reader = aSrc.mMaybeUnderlyingBuffer->mBuffer.EntryReaderFromTo( + srcStartIndex, srcEndIndex, nullptr, nullptr); + auto writer = mMaybeUnderlyingBuffer->mBuffer.EntryWriterFromTo( + dstStartIndex, dstEndIndex); + writer.WriteFromReader(reader, bytesToCopy); + + return ProfileBufferBlockIndex::CreateFromProfileBufferIndex(dstStartIndex); + } + + // Clear all entries: Move read index to the end so that these entries cannot + // be read anymore. + void Clear() { + baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex); + ClearAllEntries(); + } + + // Clear all entries strictly before aBlockIndex, and move read index to the + // end so that these entries cannot be read anymore. + void ClearBefore(ProfileBufferBlockIndex aBlockIndex) { + baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex); + if (!mMaybeUnderlyingBuffer) { + return; + } + // Don't accept a not-yet-written index. One-past-the-end is ok. + MOZ_ASSERT(aBlockIndex <= mNextWriteIndex); + if (aBlockIndex <= mFirstReadIndex) { + // Already cleared. + return; + } + if (aBlockIndex == mNextWriteIndex) { + // Right past the end, just clear everything. + ClearAllEntries(); + return; + } + // Otherwise we need to clear a subset of entries. + AssertBlockIndexIsValid(aBlockIndex); + // Just count skipped entries. + Reader reader(*this); + BlockIterator it = reader.begin(); + for (; it.CurrentBlockIndex() < aBlockIndex; ++it) { + MOZ_ASSERT(it.CurrentBlockIndex() < reader.end().CurrentBlockIndex()); + mMaybeUnderlyingBuffer->mClearedBlockCount += 1; + } + MOZ_ASSERT(it.CurrentBlockIndex() == aBlockIndex); + // Move read index to given index, so there's effectively no more entries + // before. + mFirstReadIndex = aBlockIndex; + } + +#ifdef DEBUG + void Dump() const { + baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex); + if (!mMaybeUnderlyingBuffer) { + printf("empty BlocksRingBuffer\n"); + return; + } + using ULL = unsigned long long; + printf("start=%llu (%llu) end=%llu (%llu) - ", + ULL(mFirstReadIndex.ConvertToProfileBufferIndex()), + ULL(mFirstReadIndex.ConvertToProfileBufferIndex() & + (mMaybeUnderlyingBuffer->mBuffer.BufferLength().Value() - 1)), + ULL(mNextWriteIndex.ConvertToProfileBufferIndex()), + ULL(mNextWriteIndex.ConvertToProfileBufferIndex() & + (mMaybeUnderlyingBuffer->mBuffer.BufferLength().Value() - 1))); + mMaybeUnderlyingBuffer->mBuffer.Dump(); + } +#endif // DEBUG + + private: + // In DEBUG mode, assert that `aBlockIndex` is a valid index for a live block. + // (Not just in range, but points exactly at the start of a block.) + // Slow, so avoid it for internal checks; this is more to check what callers + // provide us. + void AssertBlockIndexIsValid(ProfileBufferBlockIndex aBlockIndex) const { +#ifdef DEBUG + mMutex.AssertCurrentThreadOwns(); + MOZ_ASSERT(aBlockIndex >= mFirstReadIndex); + MOZ_ASSERT(aBlockIndex < mNextWriteIndex); + // Quick check (default), or slow check (change '1' to '0') below: +# if 1 + // Quick check that this looks like a valid block start. + // Read the entry size at the start of the block. + const Length entryBytes = ReaderInBlockAt(aBlockIndex).RemainingBytes(); + MOZ_ASSERT(entryBytes > 0, "Empty entries are not allowed"); + MOZ_ASSERT( + entryBytes < mMaybeUnderlyingBuffer->mBuffer.BufferLength().Value() - + ULEB128Size(entryBytes), + "Entry would wrap and overwrite itself"); + // The end of the block should be inside the live buffer range. + MOZ_ASSERT(aBlockIndex.ConvertToProfileBufferIndex() + + ULEB128Size(entryBytes) + entryBytes <= + mNextWriteIndex.ConvertToProfileBufferIndex()); +# else + // Slow check that the index is really the start of the block. + // This kills performances, as it reads from the first index until + // aBlockIndex. Only use to debug issues locally. + Reader reader(*this); + BlockIterator it = reader.begin(); + for (; it.CurrentBlockIndex() < aBlockIndex; ++it) { + MOZ_ASSERT(it.CurrentBlockIndex() < reader.end().CurrentBlockIndex()); + } + MOZ_ASSERT(it.CurrentBlockIndex() == aBlockIndex); +# endif +#endif // DEBUG + } + + // In DEBUG mode, assert that `aBlockIndex` is a valid index for a live block, + // or is just past-the-end. (Not just in range, but points exactly at the + // start of a block.) Slow, so avoid it for internal checks; this is more to + // check what callers provide us. + void AssertBlockIndexIsValidOrEnd(ProfileBufferBlockIndex aBlockIndex) const { +#ifdef DEBUG + mMutex.AssertCurrentThreadOwns(); + if (aBlockIndex == mNextWriteIndex) { + return; + } + AssertBlockIndexIsValid(aBlockIndex); +#endif // DEBUG + } + + // Create a reader for the block starting at aBlockIndex. + ProfileBufferEntryReader ReaderInBlockAt( + ProfileBufferBlockIndex aBlockIndex) const { + mMutex.AssertCurrentThreadOwns(); + MOZ_ASSERT(mMaybeUnderlyingBuffer.isSome()); + MOZ_ASSERT(aBlockIndex >= mFirstReadIndex); + MOZ_ASSERT(aBlockIndex < mNextWriteIndex); + // Create a reader from the given index until the end of the buffer. + ProfileBufferEntryReader reader = + mMaybeUnderlyingBuffer->mBuffer.EntryReaderFromTo( + aBlockIndex.ConvertToProfileBufferIndex(), + mNextWriteIndex.ConvertToProfileBufferIndex(), nullptr, nullptr); + // Read the block size at the beginning. + const Length entryBytes = reader.ReadULEB128<Length>(); + // Make sure we don't overshoot the buffer. + MOZ_RELEASE_ASSERT(entryBytes <= reader.RemainingBytes()); + ProfileBufferIndex nextBlockIndex = + aBlockIndex.ConvertToProfileBufferIndex() + ULEB128Size(entryBytes) + + entryBytes; + // And reduce the reader to the entry area. Only provide a next-block-index + // if it's not at the end of the buffer (i.e., there's an actual block + // there). + reader = mMaybeUnderlyingBuffer->mBuffer.EntryReaderFromTo( + aBlockIndex.ConvertToProfileBufferIndex() + ULEB128Size(entryBytes), + nextBlockIndex, aBlockIndex, + (nextBlockIndex < mNextWriteIndex.ConvertToProfileBufferIndex()) + ? ProfileBufferBlockIndex::CreateFromProfileBufferIndex( + nextBlockIndex) + : ProfileBufferBlockIndex{}); + return reader; + } + + ProfileBufferEntryReader FullBufferReader() const { + mMutex.AssertCurrentThreadOwns(); + if (!mMaybeUnderlyingBuffer) { + return {}; + } + return mMaybeUnderlyingBuffer->mBuffer.EntryReaderFromTo( + mFirstReadIndex.ConvertToProfileBufferIndex(), + mNextWriteIndex.ConvertToProfileBufferIndex(), nullptr, nullptr); + } + + // Clear all entries: Move read index to the end so that these entries cannot + // be read anymore. + void ClearAllEntries() { + mMutex.AssertCurrentThreadOwns(); + if (!mMaybeUnderlyingBuffer) { + return; + } + // Mark all entries pushed so far as cleared. + mMaybeUnderlyingBuffer->mClearedBlockCount = + mMaybeUnderlyingBuffer->mPushedBlockCount; + // Move read index to write index, so there's effectively no more entries + // that can be read. (Not setting both to 0, in case user is keeping + // `ProfileBufferBlockIndex`'es to old entries.) + mFirstReadIndex = mNextWriteIndex; + } + + // If there is an underlying buffer, clear all entries, and discard the + // buffer. This BlocksRingBuffer will now gracefully reject all API calls, and + // is in a state where a new underlying buffer may be set. + void ResetUnderlyingBuffer() { + mMutex.AssertCurrentThreadOwns(); + if (!mMaybeUnderlyingBuffer) { + return; + } + ClearAllEntries(); + mMaybeUnderlyingBuffer.reset(); + } + + // Used to de/serialize a BlocksRingBuffer (e.g., containing a backtrace). + friend ProfileBufferEntryWriter::Serializer<BlocksRingBuffer>; + friend ProfileBufferEntryReader::Deserializer<BlocksRingBuffer>; + friend ProfileBufferEntryWriter::Serializer<UniquePtr<BlocksRingBuffer>>; + friend ProfileBufferEntryReader::Deserializer<UniquePtr<BlocksRingBuffer>>; + + // Mutex guarding the following members. + mutable baseprofiler::detail::BaseProfilerMaybeMutex mMutex; + + struct UnderlyingBuffer { + // Create a buffer of the given length. + explicit UnderlyingBuffer(PowerOfTwo<Length> aLength) : mBuffer(aLength) { + MOZ_ASSERT(aLength.Value() > ULEB128MaxSize<Length>(), + "Buffer should be able to contain more than a block size"); + } + + // Take ownership of an existing buffer. + UnderlyingBuffer(UniquePtr<Buffer::Byte[]> aExistingBuffer, + PowerOfTwo<Length> aLength) + : mBuffer(std::move(aExistingBuffer), aLength) { + MOZ_ASSERT(aLength.Value() > ULEB128MaxSize<Length>(), + "Buffer should be able to contain more than a block size"); + } + + // Use an externally-owned buffer. + UnderlyingBuffer(Buffer::Byte* aExternalBuffer, PowerOfTwo<Length> aLength) + : mBuffer(aExternalBuffer, aLength) { + MOZ_ASSERT(aLength.Value() > ULEB128MaxSize<Length>(), + "Buffer should be able to contain more than a block size"); + } + + // Only allow move-construction. + UnderlyingBuffer(UnderlyingBuffer&&) = default; + + // Copies and move-assignment are explictly disallowed. + UnderlyingBuffer(const UnderlyingBuffer&) = delete; + UnderlyingBuffer& operator=(const UnderlyingBuffer&) = delete; + UnderlyingBuffer& operator=(UnderlyingBuffer&&) = delete; + + // Underlying circular byte buffer. + Buffer mBuffer; + + // Statistics. + uint64_t mPushedBlockCount = 0; + uint64_t mClearedBlockCount = 0; + }; + + // Underlying buffer, with stats. + // Only valid during in-session period. + Maybe<UnderlyingBuffer> mMaybeUnderlyingBuffer; + + // Index to the first block to be read (or cleared). Initialized to 1 because + // 0 is reserved for the "empty" ProfileBufferBlockIndex value. Kept between + // sessions, so that stored indices from one session will be gracefully denied + // in future sessions. + ProfileBufferBlockIndex mFirstReadIndex = + ProfileBufferBlockIndex::CreateFromProfileBufferIndex( + ProfileBufferIndex(1)); + // Index where the next new block should be allocated. Initialized to 1 + // because 0 is reserved for the "empty" ProfileBufferBlockIndex value. Kept + // between sessions, so that stored indices from one session will be + // gracefully denied in future sessions. + ProfileBufferBlockIndex mNextWriteIndex = + ProfileBufferBlockIndex::CreateFromProfileBufferIndex( + ProfileBufferIndex(1)); +}; + +// ---------------------------------------------------------------------------- +// BlocksRingBuffer serialization + +// A BlocksRingBuffer can hide another one! +// This will be used to store marker backtraces; They can be read back into a +// UniquePtr<BlocksRingBuffer>. +// Format: len (ULEB128) | start | end | buffer (len bytes) | pushed | cleared +// len==0 marks an out-of-session buffer, or empty buffer. +template <> +struct ProfileBufferEntryWriter::Serializer<BlocksRingBuffer> { + static Length Bytes(const BlocksRingBuffer& aBuffer) { + baseprofiler::detail::BaseProfilerMaybeAutoLock lock(aBuffer.mMutex); + if (aBuffer.mMaybeUnderlyingBuffer.isNothing()) { + // Out-of-session, we only need 1 byte to store a length of 0. + return ULEB128Size<Length>(0); + } + const auto start = aBuffer.mFirstReadIndex.ConvertToProfileBufferIndex(); + const auto end = aBuffer.mNextWriteIndex.ConvertToProfileBufferIndex(); + const auto len = end - start; + if (len == 0) { + // In-session but empty, also store a length of 0. + return ULEB128Size<Length>(0); + } + return ULEB128Size(len) + sizeof(start) + sizeof(end) + len + + sizeof(aBuffer.mMaybeUnderlyingBuffer->mPushedBlockCount) + + sizeof(aBuffer.mMaybeUnderlyingBuffer->mClearedBlockCount); + } + + static void Write(ProfileBufferEntryWriter& aEW, + const BlocksRingBuffer& aBuffer) { + baseprofiler::detail::BaseProfilerMaybeAutoLock lock(aBuffer.mMutex); + if (aBuffer.mMaybeUnderlyingBuffer.isNothing()) { + // Out-of-session, only store a length of 0. + aEW.WriteULEB128<Length>(0); + return; + } + const auto start = aBuffer.mFirstReadIndex.ConvertToProfileBufferIndex(); + const auto end = aBuffer.mNextWriteIndex.ConvertToProfileBufferIndex(); + MOZ_ASSERT(end - start <= std::numeric_limits<Length>::max()); + const auto len = static_cast<Length>(end - start); + if (len == 0) { + // In-session but empty, only store a length of 0. + aEW.WriteULEB128<Length>(0); + return; + } + // In-session. + // Store buffer length, start and end indices. + aEW.WriteULEB128<Length>(len); + aEW.WriteObject(start); + aEW.WriteObject(end); + // Write all the bytes. + auto reader = aBuffer.FullBufferReader(); + aEW.WriteFromReader(reader, reader.RemainingBytes()); + // And write stats. + aEW.WriteObject(aBuffer.mMaybeUnderlyingBuffer->mPushedBlockCount); + aEW.WriteObject(aBuffer.mMaybeUnderlyingBuffer->mClearedBlockCount); + } +}; + +// A serialized BlocksRingBuffer can be read into an empty buffer (either +// out-of-session, or in-session with enough room). +template <> +struct ProfileBufferEntryReader::Deserializer<BlocksRingBuffer> { + static void ReadInto(ProfileBufferEntryReader& aER, + BlocksRingBuffer& aBuffer) { + // Expect an empty buffer, as we're going to overwrite it. + MOZ_ASSERT(aBuffer.GetState().mRangeStart == aBuffer.GetState().mRangeEnd); + // Read the stored buffer length. + const auto len = aER.ReadULEB128<Length>(); + if (len == 0) { + // 0-length means an "uninteresting" buffer, just return now. + return; + } + // We have a non-empty buffer to read. + if (aBuffer.BufferLength().isSome()) { + // Output buffer is in-session (i.e., it already has a memory buffer + // attached). Make sure the caller allocated enough space. + MOZ_RELEASE_ASSERT(aBuffer.BufferLength()->Value() >= len); + } else { + // Output buffer is out-of-session, attach a new memory buffer. + aBuffer.Set(PowerOfTwo<Length>(len)); + MOZ_ASSERT(aBuffer.BufferLength()->Value() >= len); + } + // Read start and end indices. + const auto start = aER.ReadObject<ProfileBufferIndex>(); + aBuffer.mFirstReadIndex = + ProfileBufferBlockIndex::CreateFromProfileBufferIndex(start); + const auto end = aER.ReadObject<ProfileBufferIndex>(); + aBuffer.mNextWriteIndex = + ProfileBufferBlockIndex::CreateFromProfileBufferIndex(end); + MOZ_ASSERT(end - start == len); + // Copy bytes into the buffer. + auto writer = + aBuffer.mMaybeUnderlyingBuffer->mBuffer.EntryWriterFromTo(start, end); + writer.WriteFromReader(aER, end - start); + MOZ_ASSERT(writer.RemainingBytes() == 0); + // Finally copy stats. + aBuffer.mMaybeUnderlyingBuffer->mPushedBlockCount = aER.ReadObject<decltype( + aBuffer.mMaybeUnderlyingBuffer->mPushedBlockCount)>(); + aBuffer.mMaybeUnderlyingBuffer->mClearedBlockCount = + aER.ReadObject<decltype( + aBuffer.mMaybeUnderlyingBuffer->mClearedBlockCount)>(); + } + + // We cannot output a BlocksRingBuffer object (not copyable), use `ReadInto()` + // or `aER.ReadObject<UniquePtr<BlocksRinbBuffer>>()` instead. + static BlocksRingBuffer Read(ProfileBufferEntryReader& aER) = delete; +}; + +// A BlocksRingBuffer is usually refererenced through a UniquePtr, for +// convenience we support (de)serializing that UniquePtr directly. +// This is compatible with the non-UniquePtr serialization above, with a null +// pointer being treated like an out-of-session or empty buffer; and any of +// these would be deserialized into a null pointer. +template <> +struct ProfileBufferEntryWriter::Serializer<UniquePtr<BlocksRingBuffer>> { + static Length Bytes(const UniquePtr<BlocksRingBuffer>& aBufferUPtr) { + if (!aBufferUPtr) { + // Null pointer, treat it like an empty buffer, i.e., write length of 0. + return ULEB128Size<Length>(0); + } + // Otherwise write the pointed-at BlocksRingBuffer (which could be + // out-of-session or empty.) + return SumBytes(*aBufferUPtr); + } + + static void Write(ProfileBufferEntryWriter& aEW, + const UniquePtr<BlocksRingBuffer>& aBufferUPtr) { + if (!aBufferUPtr) { + // Null pointer, treat it like an empty buffer, i.e., write length of 0. + aEW.WriteULEB128<Length>(0); + return; + } + // Otherwise write the pointed-at BlocksRingBuffer (which could be + // out-of-session or empty.) + aEW.WriteObject(*aBufferUPtr); + } +}; + +template <> +struct ProfileBufferEntryReader::Deserializer<UniquePtr<BlocksRingBuffer>> { + static void ReadInto(ProfileBufferEntryReader& aER, + UniquePtr<BlocksRingBuffer>& aBuffer) { + aBuffer = Read(aER); + } + + static UniquePtr<BlocksRingBuffer> Read(ProfileBufferEntryReader& aER) { + UniquePtr<BlocksRingBuffer> bufferUPtr; + // Keep a copy of the reader before reading the length, so we can restart + // from here below. + ProfileBufferEntryReader readerBeforeLen = aER; + // Read the stored buffer length. + const auto len = aER.ReadULEB128<Length>(); + if (len == 0) { + // 0-length means an "uninteresting" buffer, just return nullptr. + return bufferUPtr; + } + // We have a non-empty buffer. + // allocate an empty BlocksRingBuffer without mutex. + bufferUPtr = MakeUnique<BlocksRingBuffer>( + BlocksRingBuffer::ThreadSafety::WithoutMutex); + // Rewind the reader before the length and deserialize the contents, using + // the non-UniquePtr Deserializer. + aER = readerBeforeLen; + aER.ReadIntoObject(*bufferUPtr); + return bufferUPtr; + } +}; + +} // namespace mozilla + +#endif // BlocksRingBuffer_h diff --git a/mozglue/baseprofiler/public/ModuloBuffer.h b/mozglue/baseprofiler/public/ModuloBuffer.h new file mode 100644 index 0000000000..80e765279e --- /dev/null +++ b/mozglue/baseprofiler/public/ModuloBuffer.h @@ -0,0 +1,618 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ModuloBuffer_h +#define ModuloBuffer_h + +#include "mozilla/leb128iterator.h" +#include "mozilla/Maybe.h" +#include "mozilla/MemoryReporting.h" +#include "mozilla/NotNull.h" +#include "mozilla/PowerOfTwo.h" +#include "mozilla/ProfileBufferEntrySerialization.h" +#include "mozilla/UniquePtr.h" + +#include <functional> +#include <iterator> +#include <limits> +#include <type_traits> + +namespace mozilla { + +// The ModuloBuffer class is a circular buffer that holds raw byte values, with +// data-read/write helpers. +// +// OffsetT: Type of the internal offset into the buffer of bytes, it should be +// large enough to access all bytes of the buffer. It will also be used as +// Length (in bytes) of the buffer and of any subset. Default uint32_t +// IndexT: Type of the external index, it should be large enough that overflows +// should not happen during the lifetime of the ModuloBuffer. +// +// The basic usage is to create an iterator-like object with `ReaderAt(Index)` +// or `WriterAt(Index)`, and use it to read/write data blobs. Iterators +// automatically manage the wrap-around (through "Modulo", which is effectively +// an AND-masking with the PowerOfTwo buffer size.) +// +// There is zero safety: No thread safety, no checks that iterators may be +// overwriting data that's still to be read, etc. It's up to the caller to add +// adequate checks. +// The intended use is as an underlying buffer for a safer container. +template <typename OffsetT = uint32_t, typename IndexT = uint64_t> +class ModuloBuffer { + public: + using Byte = uint8_t; + static_assert(sizeof(Byte) == 1, "ModuloBuffer::Byte must be 1 byte"); + using Offset = OffsetT; + static_assert(!std::numeric_limits<Offset>::is_signed, + "ModuloBuffer::Offset must be an unsigned integral type"); + using Length = Offset; + using Index = IndexT; + static_assert(!std::numeric_limits<Index>::is_signed, + "ModuloBuffer::Index must be an unsigned integral type"); + static_assert(sizeof(Index) >= sizeof(Offset), + "ModuloBuffer::Index size must >= Offset"); + + // Create a buffer of the given length. + explicit ModuloBuffer(PowerOfTwo<Length> aLength) + : mMask(aLength.Mask()), + mBuffer(WrapNotNull(new Byte[aLength.Value()])), + mBufferDeleter([](Byte* aBuffer) { delete[] aBuffer; }) {} + + // Take ownership of an existing buffer. Existing contents is ignored. + // Done by extracting the raw pointer from UniquePtr<Byte[]>, and adding + // an equivalent `delete[]` in `mBufferDeleter`. + ModuloBuffer(UniquePtr<Byte[]> aExistingBuffer, PowerOfTwo<Length> aLength) + : mMask(aLength.Mask()), + mBuffer(WrapNotNull(aExistingBuffer.release())), + mBufferDeleter([](Byte* aBuffer) { delete[] aBuffer; }) {} + + // Use an externally-owned buffer. Existing contents is ignored. + ModuloBuffer(Byte* aExternalBuffer, PowerOfTwo<Length> aLength) + : mMask(aLength.Mask()), mBuffer(WrapNotNull(aExternalBuffer)) {} + + // Disallow copying, as we may uniquely own the resource. + ModuloBuffer(const ModuloBuffer& aOther) = delete; + ModuloBuffer& operator=(const ModuloBuffer& aOther) = delete; + + // Allow move-construction. Stealing ownership if the original had it. + // This effectively prevents copy construction, and all assignments; needed so + // that a ModuloBuffer may be initialized from a separate construction. + // The moved-from ModuloBuffer still points at the resource but doesn't own + // it, so it won't try to free it; but accesses are not guaranteed, so it + // should not be used anymore. + ModuloBuffer(ModuloBuffer&& aOther) + : mMask(std::move(aOther.mMask)), + mBuffer(std::move(aOther.mBuffer)), + mBufferDeleter(std::move(aOther.mBufferDeleter)) { + // The above move leaves `aOther.mBufferDeleter` in a valid state but with + // an unspecified value, so it could theoretically still contain the + // original function, which would be bad because we don't want aOther to + // delete the resource that `this` now owns. + if (aOther.mBufferDeleter) { + // `aOther` still had a non-empty deleter, reset it. + aOther.mBufferDeleter = nullptr; + } + } + + // Disallow assignment, as we have some `const` members. + ModuloBuffer& operator=(ModuloBuffer&& aOther) = delete; + + // Destructor, deletes the resource if we uniquely own it. + ~ModuloBuffer() { + if (mBufferDeleter) { + mBufferDeleter(mBuffer); + } + } + + PowerOfTwo<Length> BufferLength() const { + return PowerOfTwo<Length>(mMask.MaskValue() + 1); + } + + // Size of external resources. + // Note: `mBufferDeleter`'s potential external data (for its captures) is not + // included, as it's hidden in the `std::function` implementation. + size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const { + if (!mBufferDeleter) { + // If we don't have a buffer deleter, assume we don't own the data, so + // it's probably on the stack, or should be reported by its owner. + return 0; + } + return aMallocSizeOf(mBuffer); + } + + size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const { + return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf); + } + + ProfileBufferEntryReader EntryReaderFromTo( + Index aStart, Index aEnd, ProfileBufferBlockIndex aBlockIndex, + ProfileBufferBlockIndex aNextBlockIndex) const { + using EntrySpan = Span<const ProfileBufferEntryReader::Byte>; + if (aStart == aEnd) { + return ProfileBufferEntryReader{}; + } + // Don't allow over-wrapping. + MOZ_ASSERT(aEnd - aStart <= mMask.MaskValue() + 1); + // Start offset in 0 .. (buffer size - 1) + Offset start = static_cast<Offset>(aStart) & mMask; + // End offset in 1 .. (buffer size) + Offset end = (static_cast<Offset>(aEnd - 1) & mMask) + 1; + if (start < end) { + // Segment doesn't cross buffer threshold, one span is enough. + return ProfileBufferEntryReader{EntrySpan(&mBuffer[start], end - start), + aBlockIndex, aNextBlockIndex}; + } + // Segment crosses buffer threshold, we need one span until the end and one + // span restarting at the beginning of the buffer. + return ProfileBufferEntryReader{ + EntrySpan(&mBuffer[start], mMask.MaskValue() + 1 - start), + EntrySpan(&mBuffer[0], end), aBlockIndex, aNextBlockIndex}; + } + + // Return an entry writer for the given range. + ProfileBufferEntryWriter EntryWriterFromTo(Index aStart, Index aEnd) const { + using EntrySpan = Span<ProfileBufferEntryReader::Byte>; + if (aStart == aEnd) { + return ProfileBufferEntryWriter{}; + } + MOZ_ASSERT(aEnd - aStart <= mMask.MaskValue() + 1); + // Start offset in 0 .. (buffer size - 1) + Offset start = static_cast<Offset>(aStart) & mMask; + // End offset in 1 .. (buffer size) + Offset end = (static_cast<Offset>(aEnd - 1) & mMask) + 1; + if (start < end) { + // Segment doesn't cross buffer threshold, one span is enough. + return ProfileBufferEntryWriter{ + EntrySpan(&mBuffer[start], end - start), + ProfileBufferBlockIndex::CreateFromProfileBufferIndex(aStart), + ProfileBufferBlockIndex::CreateFromProfileBufferIndex(aEnd)}; + } + // Segment crosses buffer threshold, we need one span until the end and one + // span restarting at the beginning of the buffer. + return ProfileBufferEntryWriter{ + EntrySpan(&mBuffer[start], mMask.MaskValue() + 1 - start), + EntrySpan(&mBuffer[0], end), + ProfileBufferBlockIndex::CreateFromProfileBufferIndex(aStart), + ProfileBufferBlockIndex::CreateFromProfileBufferIndex(aEnd)}; + } + + // Emplace an entry writer into `aMaybeEntryWriter` for the given range. + void EntryWriterFromTo(Maybe<ProfileBufferEntryWriter>& aMaybeEntryWriter, + Index aStart, Index aEnd) const { + MOZ_ASSERT(aMaybeEntryWriter.isNothing(), + "Reference entry writer should be Nothing."); + using EntrySpan = Span<ProfileBufferEntryReader::Byte>; + if (aStart == aEnd) { + return; + } + MOZ_ASSERT(aEnd - aStart <= mMask.MaskValue() + 1); + // Start offset in 0 .. (buffer size - 1) + Offset start = static_cast<Offset>(aStart) & mMask; + // End offset in 1 .. (buffer size) + Offset end = (static_cast<Offset>(aEnd - 1) & mMask) + 1; + if (start < end) { + // Segment doesn't cross buffer threshold, one span is enough. + aMaybeEntryWriter.emplace( + EntrySpan(&mBuffer[start], end - start), + ProfileBufferBlockIndex::CreateFromProfileBufferIndex(aStart), + ProfileBufferBlockIndex::CreateFromProfileBufferIndex(aEnd)); + } else { + // Segment crosses buffer threshold, we need one span until the end and + // one span restarting at the beginning of the buffer. + aMaybeEntryWriter.emplace( + EntrySpan(&mBuffer[start], mMask.MaskValue() + 1 - start), + EntrySpan(&mBuffer[0], end), + ProfileBufferBlockIndex::CreateFromProfileBufferIndex(aStart), + ProfileBufferBlockIndex::CreateFromProfileBufferIndex(aEnd)); + } + } + + // All ModuloBuffer operations should be done through this iterator, which has + // an effectively infinite range. The underlying wrapping-around is hidden. + // Use `ReaderAt(Index)` or `WriterAt(Index)` to create it. + // + // `const Iterator<...>` means the iterator itself cannot change, i.e., it + // cannot move, and only its const methods are available. Note that these + // const methods may still be used to modify the buffer contents (e.g.: + // `operator*()`, `Poke()`). + // + // `Iterator</*IsBufferConst=*/true>` means the buffer contents cannot be + // modified, i.e., write operations are forbidden, but the iterator may still + // move if non-const itself. + template <bool IsBufferConst> + class Iterator { + // Alias to const- or mutable-`ModuloBuffer` depending on `IsBufferConst`. + using ConstOrMutableBuffer = + std::conditional_t<IsBufferConst, const ModuloBuffer, ModuloBuffer>; + + // Implementation note about the strange enable-if's below: + // `template <bool NotIBC = !IsBufferConst> enable_if_t<NotIBC>` + // which intuitively could be simplified to: + // `enable_if_t<!IsBufferConst>` + // The former extra-templated syntax is in fact necessary to delay + // instantiation of these functions until they are actually needed. + // + // If we were just doing `enable_if_t<!IsBufferConst>`, this would only + // depend on the *class* (`ModuloBuffer<...>::Iterator`), which gets + // instantiated when a `ModuloBuffer` is created with some template + // arguments; at that point, all non-templated methods get instantiated, so + // there's no "SFINAE" happening, and `enable_if_t<...>` is actually doing + // `typename enable_if<...>::type` on the spot, but there is no `type` if + // `IsBufferConst` is true, so it just fails right away. E.g.: + // error: no type named 'type' in 'std::enable_if<false, void>'; + // 'enable_if' cannot be used to disable this declaration + // note: in instantiation of template type alias 'enable_if_t' + // > std::enable_if_t<!IsBufferConst> WriteObject(const T& aObject) { + // in instantiation of template class + // 'mozilla::ModuloBuffer<...>::Iterator<true>' + // > auto it = mb.ReaderAt(1); + // + // By adding another template level `template <bool NotIsBufferConst = + // !IsBufferConst>`, the instantiation is delayed until the function is + // actually invoked somewhere, e.g. `it.Poke(...);`. + // So at that invocation point, the compiler looks for a "Poke" name in it, + // and considers potential template instantiations that could work. The + // `enable_if_t` is *now* attempted, with `NotIsBufferConst` taking its + // value from `!IsBufferConst`: + // - If `IsBufferConst` is false, `NotIsBufferConst` is true, + // `enable_if<NotIsBufferConst>` does define a `type` (`void` by default), + // so `enable_if_t` happily becomes `void`, the function exists and may be + // called. + // - Otherwise if `IsBufferConst` is true, `NotIsBufferConst` is false, + // `enable_if<NotIsBufferConst>` does *not* define a `type`, therefore + // `enable_if_t` produces an error because there is no `type`. Now "SFINAE" + // happens: This "Substitution Failure Is Not An Error" (by itself)... But + // then, there are no other functions named "Poke" as requested in the + // `it.Poke(...);` call, so we are now getting an error (can't find + // function), as expected because `it` had `IsBufferConst`==true. (But at + // least the compiler waited until this invocation attempt before outputting + // an error.) + // + // C++ is fun! + + public: + // These definitions are expected by std functions, to recognize this as an + // iterator. See https://en.cppreference.com/w/cpp/iterator/iterator_traits + using difference_type = Index; + using value_type = Byte; + using pointer = std::conditional_t<IsBufferConst, const Byte*, Byte*>; + using reference = std::conditional_t<IsBufferConst, const Byte&, Byte&>; + using iterator_category = std::random_access_iterator_tag; + + // Can always copy/assign from the same kind of iterator. + Iterator(const Iterator& aRhs) = default; + Iterator& operator=(const Iterator& aRhs) = default; + + // Can implicitly copy an Iterator-to-mutable (reader+writer) to + // Iterator-to-const (reader-only), but not the reverse. + template <bool IsRhsBufferConst, + typename = std::enable_if_t<(!IsRhsBufferConst) && IsBufferConst>> + MOZ_IMPLICIT Iterator(const Iterator<IsRhsBufferConst>& aRhs) + : mModuloBuffer(aRhs.mModuloBuffer), mIndex(aRhs.mIndex) {} + + // Can implicitly assign from an Iterator-to-mutable (reader+writer) to + // Iterator-to-const (reader-only), but not the reverse. + template <bool IsRhsBufferConst, + typename = std::enable_if_t<(!IsRhsBufferConst) && IsBufferConst>> + Iterator& operator=(const Iterator<IsRhsBufferConst>& aRhs) { + mModuloBuffer = aRhs.mModuloBuffer; + mIndex = aRhs.mIndex; + return *this; + } + + // Current location of the iterator in the `Index` range. + // Note that due to wrapping, multiple indices may effectively point at the + // same byte in the buffer. + Index CurrentIndex() const { return mIndex; } + + // Location comparison in the `Index` range. I.e., two `Iterator`s may look + // unequal, but refer to the same buffer location. + // Must be on the same buffer. + bool operator==(const Iterator& aRhs) const { + MOZ_ASSERT(mModuloBuffer == aRhs.mModuloBuffer); + return mIndex == aRhs.mIndex; + } + bool operator!=(const Iterator& aRhs) const { + MOZ_ASSERT(mModuloBuffer == aRhs.mModuloBuffer); + return mIndex != aRhs.mIndex; + } + bool operator<(const Iterator& aRhs) const { + MOZ_ASSERT(mModuloBuffer == aRhs.mModuloBuffer); + return mIndex < aRhs.mIndex; + } + bool operator<=(const Iterator& aRhs) const { + MOZ_ASSERT(mModuloBuffer == aRhs.mModuloBuffer); + return mIndex <= aRhs.mIndex; + } + bool operator>(const Iterator& aRhs) const { + MOZ_ASSERT(mModuloBuffer == aRhs.mModuloBuffer); + return mIndex > aRhs.mIndex; + } + bool operator>=(const Iterator& aRhs) const { + MOZ_ASSERT(mModuloBuffer == aRhs.mModuloBuffer); + return mIndex >= aRhs.mIndex; + } + + // Movement in the `Index` range. + Iterator& operator++() { + ++mIndex; + return *this; + } + Iterator operator++(int) { + Iterator here(*mModuloBuffer, mIndex); + ++mIndex; + return here; + } + Iterator& operator--() { + --mIndex; + return *this; + } + Iterator operator--(int) { + Iterator here(*mModuloBuffer, mIndex); + --mIndex; + return here; + } + Iterator& operator+=(Length aLength) { + mIndex += aLength; + return *this; + } + Iterator operator+(Length aLength) const { + return Iterator(*mModuloBuffer, mIndex + aLength); + } + friend Iterator operator+(Length aLength, const Iterator& aIt) { + return aIt + aLength; + } + Iterator& operator-=(Length aLength) { + mIndex -= aLength; + return *this; + } + Iterator operator-(Length aLength) const { + return Iterator(*mModuloBuffer, mIndex - aLength); + } + + // Distance from `aRef` to here in the `Index` range. + // May be negative (as 2's complement) if `aRef > *this`. + Index operator-(const Iterator& aRef) const { + MOZ_ASSERT(mModuloBuffer == aRef.mModuloBuffer); + return mIndex - aRef.mIndex; + } + + // Dereference a single byte (read-only if `IsBufferConst` is true). + reference operator*() const { + return mModuloBuffer->mBuffer[OffsetInBuffer()]; + } + + // Random-access dereference. + reference operator[](Length aLength) const { return *(*this + aLength); } + + // Write data (if `IsBufferConst` is false) but don't move iterator. + template <bool NotIsBufferConst = !IsBufferConst> + std::enable_if_t<NotIsBufferConst> Poke(const void* aSrc, + Length aLength) const { + // Don't allow data larger than the buffer. + MOZ_ASSERT(aLength <= mModuloBuffer->BufferLength().Value()); + // Offset inside the buffer (corresponding to our Index). + Offset offset = OffsetInBuffer(); + // Compute remaining bytes between this offset and the end of the buffer. + Length remaining = mModuloBuffer->BufferLength().Value() - offset; + if (MOZ_LIKELY(remaining >= aLength)) { + // Enough space to write everything before the end. + memcpy(&mModuloBuffer->mBuffer[offset], aSrc, aLength); + } else { + // Not enough space. Write as much as possible before the end. + memcpy(&mModuloBuffer->mBuffer[offset], aSrc, remaining); + // And then continue from the beginning of the buffer. + memcpy(&mModuloBuffer->mBuffer[0], + static_cast<const Byte*>(aSrc) + remaining, + (aLength - remaining)); + } + } + + // Write object data (if `IsBufferConst` is false) but don't move iterator. + // Note that this copies bytes from the object, with the intent to read them + // back later. Restricted to trivially-copyable types, which support this + // without Undefined Behavior! + template <typename T, bool NotIsBufferConst = !IsBufferConst> + std::enable_if_t<NotIsBufferConst> PokeObject(const T& aObject) const { + static_assert(std::is_trivially_copyable<T>::value, + "PokeObject<T> - T must be trivially copyable"); + return Poke(&aObject, sizeof(T)); + } + + // Write data (if `IsBufferConst` is false) and move iterator ahead. + template <bool NotIsBufferConst = !IsBufferConst> + std::enable_if_t<NotIsBufferConst> Write(const void* aSrc, Length aLength) { + Poke(aSrc, aLength); + mIndex += aLength; + } + + // Write object data (if `IsBufferConst` is false) and move iterator ahead. + // Note that this copies bytes from the object, with the intent to read them + // back later. Restricted to trivially-copyable types, which support this + // without Undefined Behavior! + template <typename T, bool NotIsBufferConst = !IsBufferConst> + std::enable_if_t<NotIsBufferConst> WriteObject(const T& aObject) { + static_assert(std::is_trivially_copyable<T>::value, + "WriteObject<T> - T must be trivially copyable"); + return Write(&aObject, sizeof(T)); + } + + // Number of bytes needed to represent `aValue` in unsigned LEB128. + template <typename T> + static unsigned ULEB128Size(T aValue) { + return ::mozilla::ULEB128Size(aValue); + } + + // Write number as unsigned LEB128 (if `IsBufferConst` is false) and move + // iterator ahead. + template <typename T, bool NotIsBufferConst = !IsBufferConst> + std::enable_if_t<NotIsBufferConst> WriteULEB128(T aValue) { + ::mozilla::WriteULEB128(aValue, *this); + } + + // Read data but don't move iterator. + void Peek(void* aDst, Length aLength) const { + // Don't allow data larger than the buffer. + MOZ_ASSERT(aLength <= mModuloBuffer->BufferLength().Value()); + // Offset inside the buffer (corresponding to our Index). + Offset offset = OffsetInBuffer(); + // Compute remaining bytes between this offset and the end of the buffer. + Length remaining = mModuloBuffer->BufferLength().Value() - offset; + if (MOZ_LIKELY(remaining >= aLength)) { + // Can read everything we need before the end of the buffer. + memcpy(aDst, &mModuloBuffer->mBuffer[offset], aLength); + } else { + // Read as much as possible before the end of the buffer. + memcpy(aDst, &mModuloBuffer->mBuffer[offset], remaining); + // And then continue from the beginning of the buffer. + memcpy(static_cast<Byte*>(aDst) + remaining, &mModuloBuffer->mBuffer[0], + (aLength - remaining)); + } + } + + // Read data into an object but don't move iterator. + // Note that this overwrites `aObject` with bytes from the buffer. + // Restricted to trivially-copyable types, which support this without + // Undefined Behavior! + template <typename T> + void PeekIntoObject(T& aObject) const { + static_assert(std::is_trivially_copyable<T>::value, + "PeekIntoObject<T> - T must be trivially copyable"); + Peek(&aObject, sizeof(T)); + } + + // Read data as an object but don't move iterator. + // Note that this creates an default `T` first, and then overwrites it with + // bytes from the buffer. Restricted to trivially-copyable types, which + // support this without Undefined Behavior! + template <typename T> + T PeekObject() const { + static_assert(std::is_trivially_copyable<T>::value, + "PeekObject<T> - T must be trivially copyable"); + T object; + PeekIntoObject(object); + return object; + } + + // Read data and move iterator ahead. + void Read(void* aDst, Length aLength) { + Peek(aDst, aLength); + mIndex += aLength; + } + + // Read data into a mutable iterator and move both iterators ahead. + void ReadInto(Iterator</* IsBufferConst */ false>& aDst, Length aLength) { + // Don't allow data larger than the buffer. + MOZ_ASSERT(aLength <= mModuloBuffer->BufferLength().Value()); + MOZ_ASSERT(aLength <= aDst.mModuloBuffer->BufferLength().Value()); + // Offset inside the buffer (corresponding to our Index). + Offset offset = OffsetInBuffer(); + // Compute remaining bytes between this offset and the end of the buffer. + Length remaining = mModuloBuffer->BufferLength().Value() - offset; + if (MOZ_LIKELY(remaining >= aLength)) { + // Can read everything we need before the end of the buffer. + aDst.Write(&mModuloBuffer->mBuffer[offset], aLength); + } else { + // Read as much as possible before the end of the buffer. + aDst.Write(&mModuloBuffer->mBuffer[offset], remaining); + // And then continue from the beginning of the buffer. + aDst.Write(&mModuloBuffer->mBuffer[0], (aLength - remaining)); + } + mIndex += aLength; + } + + // Read data into an object and move iterator ahead. + // Note that this overwrites `aObject` with bytes from the buffer. + // Restricted to trivially-copyable types, which support this without + // Undefined Behavior! + template <typename T> + void ReadIntoObject(T& aObject) { + static_assert(std::is_trivially_copyable<T>::value, + "ReadIntoObject<T> - T must be trivially copyable"); + Read(&aObject, sizeof(T)); + } + + // Read data as an object and move iterator ahead. + // Note that this creates an default `T` first, and then overwrites it with + // bytes from the buffer. Restricted to trivially-copyable types, which + // support this without Undefined Behavior! + template <typename T> + T ReadObject() { + static_assert(std::is_trivially_copyable<T>::value, + "ReadObject<T> - T must be trivially copyable"); + T object; + ReadIntoObject(object); + return object; + } + + // Read an unsigned LEB128 number and move iterator ahead. + template <typename T> + T ReadULEB128() { + return ::mozilla::ReadULEB128<T>(*this); + } + + private: + // Only a ModuloBuffer can instantiate its iterator. + friend class ModuloBuffer; + + Iterator(ConstOrMutableBuffer& aBuffer, Index aIndex) + : mModuloBuffer(WrapNotNull(&aBuffer)), mIndex(aIndex) {} + + // Convert the Iterator's mIndex into an offset inside the byte buffer. + Offset OffsetInBuffer() const { + return static_cast<Offset>(mIndex) & mModuloBuffer->mMask; + } + + // ModuloBuffer that this Iterator operates on. + // Using a non-null pointer instead of a reference, to allow re-assignment + // of an Iterator variable. + NotNull<ConstOrMutableBuffer*> mModuloBuffer; + + // Position of this iterator in the wider `Index` range. (Will be wrapped + // around as needed when actually accessing bytes from the buffer.) + Index mIndex; + }; + + // Shortcut to iterator to const (read-only) data. + using Reader = Iterator<true>; + // Shortcut to iterator to non-const (read/write) data. + using Writer = Iterator<false>; + + // Create an iterator to const data at the given index. + Reader ReaderAt(Index aIndex) const { return Reader(*this, aIndex); } + + // Create an iterator to non-const data at the given index. + Writer WriterAt(Index aIndex) { return Writer(*this, aIndex); } + +#ifdef DEBUG + void Dump() const { + Length len = BufferLength().Value(); + if (len > 128) { + len = 128; + } + for (Length i = 0; i < len; ++i) { + printf("%02x ", mBuffer[i]); + } + printf("\n"); + } +#endif // DEBUG + + private: + // Mask used to convert an index to an offset in `mBuffer` + const PowerOfTwoMask<Offset> mMask; + + // Buffer data. `const NotNull<...>` shows that `mBuffer is `const`, and + // `Byte* const` shows that the pointer cannot be changed to point at + // something else, but the pointed-at `Byte`s are writable. + const NotNull<Byte* const> mBuffer; + + // Function used to release the buffer resource (if needed). + std::function<void(Byte*)> mBufferDeleter; +}; + +} // namespace mozilla + +#endif // ModuloBuffer_h diff --git a/mozglue/baseprofiler/public/PowerOfTwo.h b/mozglue/baseprofiler/public/PowerOfTwo.h new file mode 100644 index 0000000000..7d396c15e6 --- /dev/null +++ b/mozglue/baseprofiler/public/PowerOfTwo.h @@ -0,0 +1,322 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// PowerOfTwo is a value type that always hold a power of 2. +// It has the same size as their underlying unsigned type, but offer the +// guarantee of being a power of 2, which permits some optimizations when +// involved in modulo operations (using masking instead of actual modulo). +// +// PowerOfTwoMask contains a mask corresponding to a power of 2. +// E.g., 2^8 is 256 or 0x100, the corresponding mask is 2^8-1 or 255 or 0xFF. +// It should be used instead of PowerOfTwo in situations where most operations +// would be modulo, this saves having to recompute the mask from the stored +// power of 2. +// +// One common use would be for ring-buffer containers with a power-of-2 size, +// where an index is usually converted to an in-buffer offset by `i % size`. +// Instead, the container could store a PowerOfTwo or PowerOfTwoMask, and do +// `i % p2` or `i & p2m`, which is more efficient than for arbitrary sizes. +// +// Shortcuts for common 32- and 64-bit values: PowerOfTwo32, etc. +// +// To create constexpr constants, use MakePowerOfTwo<Type, Value>(), etc. + +#ifndef PowerOfTwo_h +#define PowerOfTwo_h + +#include "mozilla/MathAlgorithms.h" + +#include <limits> + +namespace mozilla { + +// Compute the smallest power of 2 greater than or equal to aInput, except if +// that would overflow in which case the highest possible power of 2 if chosen. +// 0->1, 1->1, 2->2, 3->4, ... 2^31->2^31, 2^31+1->2^31 (for uint32_t), etc. +template <typename T> +T FriendlyRoundUpPow2(T aInput) { + // This is the same code as `RoundUpPow2()`, except we handle any type (that + // CeilingLog2 supports) and allow the greater-than-max-power case. + constexpr T max = T(1) << (sizeof(T) * CHAR_BIT - 1); + if (aInput >= max) { + return max; + } + return T(1) << CeilingLog2(aInput); +} + +namespace detail { +// Same function name `CountLeadingZeroes` with uint32_t and uint64_t overloads. +inline uint_fast8_t CountLeadingZeroes(uint32_t aValue) { + MOZ_ASSERT(aValue != 0); + return detail::CountLeadingZeroes32(aValue); +} +inline uint_fast8_t CountLeadingZeroes(uint64_t aValue) { + MOZ_ASSERT(aValue != 0); + return detail::CountLeadingZeroes64(aValue); +} +// Refuse anything else. +template <typename T> +inline uint_fast8_t CountLeadingZeroes(T aValue) = delete; +} // namespace detail + +// Compute the smallest 2^N-1 mask where aInput can fit. +// I.e., `x & mask == x`, but `x & (mask >> 1) != x`. +// Or looking at binary, we want a mask with as many leading zeroes as the +// input, by right-shifting a full mask: (8-bit examples) +// input: 00000000 00000001 00000010 00010110 01111111 10000000 +// N leading 0s: ^^^^^^^^ 8 ^^^^^^^ 7 ^^^^^^ 6 ^^^ 3 ^ 1 0 +// full mask: 11111111 11111111 11111111 11111111 11111111 11111111 +// full mask >> N: 00000000 00000001 00000011 00011111 01111111 11111111 +template <typename T> +T RoundUpPow2Mask(T aInput) { + // Special case, as CountLeadingZeroes(0) is undefined. (And even if that was + // defined, shifting by the full type size is also undefined!) + if (aInput == 0) { + return 0; + } + return T(-1) >> detail::CountLeadingZeroes(aInput); +} + +template <typename T> +class PowerOfTwoMask; + +template <typename T, T Mask> +constexpr PowerOfTwoMask<T> MakePowerOfTwoMask(); + +template <typename T> +class PowerOfTwo; + +template <typename T, T Value> +constexpr PowerOfTwo<T> MakePowerOfTwo(); + +// PowerOfTwoMask will always contain a mask for a power of 2, which is useful +// for power-of-2 modulo operations (e.g., to keep an index inside a power-of-2 +// container). +// Use this instead of PowerOfTwo if masking is the primary use of the value. +// +// Note that this class can store a "full" mask where all bits are set, so it +// works for mask corresponding to the power of 2 that would overflow `T` +// (e.g., 2^32 for uint32_t gives a mask of 2^32-1, which fits in a uint32_t). +// For this reason there is no API that computes the power of 2 corresponding to +// the mask; But this can be done explicitly with `MaskValue() + 1`, which may +// be useful for computing things like distance-to-the-end by doing +// `MaskValue() + 1 - offset`, which works fine with unsigned number types. +template <typename T> +class PowerOfTwoMask { + static_assert(!std::numeric_limits<T>::is_signed, + "PowerOfTwoMask must use an unsigned type"); + + public: + // Construct a power of 2 mask where the given value can fit. + // Cannot be constexpr because of `RoundUpPow2Mask()`. + explicit PowerOfTwoMask(T aInput) : mMask(RoundUpPow2Mask(aInput)) {} + + // Compute the mask corresponding to a PowerOfTwo. + // This saves having to compute the nearest 2^N-1. + // Not a conversion constructor, as that could be ambiguous whether we'd want + // the mask corresponding to the power of 2 (2^N -> 2^N-1), or the mask that + // can *contain* the PowerOfTwo value (2^N -> 2^(N+1)-1). + // Note: Not offering reverse PowerOfTwoMark-to-PowerOfTwo conversion, because + // that could result in an unexpected 0 result for the largest possible mask. + template <typename U> + static constexpr PowerOfTwoMask<U> MaskForPowerOfTwo( + const PowerOfTwo<U>& aP2) { + return PowerOfTwoMask(aP2); + } + + // Allow smaller unsigned types as input. + // Bigger or signed types must be explicitly converted by the caller. + template <typename U> + explicit constexpr PowerOfTwoMask(U aInput) + : mMask(RoundUpPow2Mask(static_cast<T>(aInput))) { + static_assert(!std::numeric_limits<T>::is_signed, + "PowerOfTwoMask does not accept signed types"); + static_assert(sizeof(U) <= sizeof(T), + "PowerOfTwoMask does not accept bigger types"); + } + + constexpr T MaskValue() const { return mMask; } + + // `x & aPowerOfTwoMask` just works. + template <typename U> + friend U operator&(U aNumber, PowerOfTwoMask aP2M) { + return static_cast<U>(aNumber & aP2M.MaskValue()); + } + + // `aPowerOfTwoMask & x` just works. + template <typename U> + friend constexpr U operator&(PowerOfTwoMask aP2M, U aNumber) { + return static_cast<U>(aP2M.MaskValue() & aNumber); + } + + // `x % aPowerOfTwoMask(2^N-1)` is equivalent to `x % 2^N` but is more + // optimal by doing `x & (2^N-1)`. + // Useful for templated code doing modulo with a template argument type. + template <typename U> + friend constexpr U operator%(U aNumerator, PowerOfTwoMask aDenominator) { + return aNumerator & aDenominator.MaskValue(); + } + + constexpr bool operator==(const PowerOfTwoMask& aRhs) const { + return mMask == aRhs.mMask; + } + constexpr bool operator!=(const PowerOfTwoMask& aRhs) const { + return mMask != aRhs.mMask; + } + + private: + // Trust `PowerOfTwo` to call the private Trusted constructor below. + friend class PowerOfTwo<T>; + + // Trust `MakePowerOfTwoMask()` to call the private Trusted constructor below. + template <typename U, U Mask> + friend constexpr PowerOfTwoMask<U> MakePowerOfTwoMask(); + + struct Trusted { + T mMask; + }; + // Construct the mask corresponding to a PowerOfTwo. + // This saves having to compute the nearest 2^N-1. + // Note: Not a public PowerOfTwo->PowerOfTwoMask conversion constructor, as + // that could be ambiguous whether we'd want the mask corresponding to the + // power of 2 (2^N -> 2^N-1), or the mask that can *contain* the PowerOfTwo + // value (2^N -> 2^(N+1)-1). + explicit constexpr PowerOfTwoMask(const Trusted& aP2) : mMask(aP2.mMask) {} + + T mMask = 0; +}; + +// Make a PowerOfTwoMask constant, statically-checked. +template <typename T, T Mask> +constexpr PowerOfTwoMask<T> MakePowerOfTwoMask() { + static_assert(Mask == T(-1) || IsPowerOfTwo(Mask + 1), + "MakePowerOfTwoMask<T, Mask>: Mask must be 2^N-1"); + using Trusted = typename PowerOfTwoMask<T>::Trusted; + return PowerOfTwoMask<T>(Trusted{Mask}); +} + +// PowerOfTwo will always contain a power of 2. +template <typename T> +class PowerOfTwo { + static_assert(!std::numeric_limits<T>::is_signed, + "PowerOfTwo must use an unsigned type"); + + public: + // Construct a power of 2 that can fit the given value, or the highest power + // of 2 possible. + // Caller should explicitly check/assert `Value() <= aInput` if they want to. + // Cannot be constexpr because of `FriendlyRoundUpPow2()`. + explicit PowerOfTwo(T aInput) : mValue(FriendlyRoundUpPow2(aInput)) {} + + // Allow smaller unsigned types as input. + // Bigger or signed types must be explicitly converted by the caller. + template <typename U> + explicit PowerOfTwo(U aInput) + : mValue(FriendlyRoundUpPow2(static_cast<T>(aInput))) { + static_assert(!std::numeric_limits<T>::is_signed, + "PowerOfTwo does not accept signed types"); + static_assert(sizeof(U) <= sizeof(T), + "PowerOfTwo does not accept bigger types"); + } + + constexpr T Value() const { return mValue; } + + // Binary mask corresponding to the power of 2, useful for modulo. + // E.g., `x & powerOfTwo(y).Mask()` == `x % powerOfTwo(y)`. + // Consider PowerOfTwoMask class instead of PowerOfTwo if masking is the + // primary use case. + constexpr T MaskValue() const { return mValue - 1; } + + // PowerOfTwoMask corresponding to this power of 2, useful for modulo. + constexpr PowerOfTwoMask<T> Mask() const { + using Trusted = typename PowerOfTwoMask<T>::Trusted; + return PowerOfTwoMask<T>(Trusted{MaskValue()}); + } + + // `x % aPowerOfTwo` works optimally. + // Useful for templated code doing modulo with a template argument type. + // Use PowerOfTwoMask class instead if masking is the primary use case. + template <typename U> + friend constexpr U operator%(U aNumerator, PowerOfTwo aDenominator) { + return aNumerator & aDenominator.MaskValue(); + } + + constexpr bool operator==(const PowerOfTwo& aRhs) const { + return mValue == aRhs.mValue; + } + constexpr bool operator!=(const PowerOfTwo& aRhs) const { + return mValue != aRhs.mValue; + } + constexpr bool operator<(const PowerOfTwo& aRhs) const { + return mValue < aRhs.mValue; + } + constexpr bool operator<=(const PowerOfTwo& aRhs) const { + return mValue <= aRhs.mValue; + } + constexpr bool operator>(const PowerOfTwo& aRhs) const { + return mValue > aRhs.mValue; + } + constexpr bool operator>=(const PowerOfTwo& aRhs) const { + return mValue >= aRhs.mValue; + } + + private: + // Trust `MakePowerOfTwo()` to call the private Trusted constructor below. + template <typename U, U Value> + friend constexpr PowerOfTwo<U> MakePowerOfTwo(); + + struct Trusted { + T mValue; + }; + // Construct a PowerOfTwo with the given trusted value. + // This saves having to compute the nearest 2^N. + // Note: Not offering PowerOfTwoMark-to-PowerOfTwo conversion, because that + // could result in an unexpected 0 result for the largest possible mask. + explicit constexpr PowerOfTwo(const Trusted& aP2) : mValue(aP2.mValue) {} + + // The smallest power of 2 is 2^0 == 1. + T mValue = 1; +}; + +// Make a PowerOfTwo constant, statically-checked. +template <typename T, T Value> +constexpr PowerOfTwo<T> MakePowerOfTwo() { + static_assert(IsPowerOfTwo(Value), + "MakePowerOfTwo<T, Value>: Value must be 2^N"); + using Trusted = typename PowerOfTwo<T>::Trusted; + return PowerOfTwo<T>(Trusted{Value}); +} + +// Shortcuts for the most common types and functions. + +using PowerOfTwoMask32 = PowerOfTwoMask<uint32_t>; +using PowerOfTwo32 = PowerOfTwo<uint32_t>; +using PowerOfTwoMask64 = PowerOfTwoMask<uint64_t>; +using PowerOfTwo64 = PowerOfTwo<uint64_t>; + +template <uint32_t Mask> +constexpr PowerOfTwoMask32 MakePowerOfTwoMask32() { + return MakePowerOfTwoMask<uint32_t, Mask>(); +} + +template <uint32_t Value> +constexpr PowerOfTwo32 MakePowerOfTwo32() { + return MakePowerOfTwo<uint32_t, Value>(); +} + +template <uint64_t Mask> +constexpr PowerOfTwoMask64 MakePowerOfTwoMask64() { + return MakePowerOfTwoMask<uint64_t, Mask>(); +} + +template <uint64_t Value> +constexpr PowerOfTwo64 MakePowerOfTwo64() { + return MakePowerOfTwo<uint64_t, Value>(); +} + +} // namespace mozilla + +#endif // PowerOfTwo_h diff --git a/mozglue/baseprofiler/public/ProfileBufferChunk.h b/mozglue/baseprofiler/public/ProfileBufferChunk.h new file mode 100644 index 0000000000..24a516bcaf --- /dev/null +++ b/mozglue/baseprofiler/public/ProfileBufferChunk.h @@ -0,0 +1,543 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ProfileBufferChunk_h +#define ProfileBufferChunk_h + +#include "mozilla/MemoryReporting.h" +#include "mozilla/ProfileBufferIndex.h" +#include "mozilla/Span.h" +#include "mozilla/TimeStamp.h" +#include "mozilla/UniquePtr.h" + +#if defined(MOZ_MEMORY) +# include "mozmemory.h" +#endif + +#include <algorithm> +#include <limits> +#include <type_traits> + +#ifdef DEBUG +# include <cstdio> +#endif + +namespace mozilla { + +// Represents a single chunk of memory, with a link to the next chunk (or null). +// +// A chunk is made of an internal header (which contains a public part) followed +// by user-accessible bytes. +// +// +---------------+---------+----------------------------------------------+ +// | public Header | private | memory containing user blocks | +// +---------------+---------+----------------------------------------------+ +// <---------------BufferBytes()------------------> +// <------------------------------ChunkBytes()------------------------------> +// +// The chunk can reserve "blocks", but doesn't know the internal contents of +// each block, it only knows where the first one starts, and where the last one +// ends (which is where the next one will begin, if not already out of range). +// It is up to the user to add structure to each block so that they can be +// distinguished when later read. +// +// +---------------+---------+----------------------------------------------+ +// | public Header | private | [1st block]...[last full block] | +// +---------------+---------+----------------------------------------------+ +// ChunkHeader().mOffsetFirstBlock ^ ^ +// ChunkHeader().mOffsetPastLastBlock --' +// +// It is possible to attempt to reserve more than the remaining space, in which +// case only what is available is returned. The caller is responsible for using +// another chunk, reserving a block "tail" in it, and using both parts to +// constitute a full block. (This initial tail may be empty in some chunks.) +// +// +---------------+---------+----------------------------------------------+ +// | public Header | private | tail][1st block]...[last full block][head... | +// +---------------+---------+----------------------------------------------+ +// ChunkHeader().mOffsetFirstBlock ^ ^ +// ChunkHeader().mOffsetPastLastBlock --' +// +// Each Chunk has an internal state (checked in DEBUG builds) that directs how +// to use it during creation, initialization, use, end of life, recycling, and +// destruction. See `State` below for details. +// In particular: +// - `ReserveInitialBlockAsTail()` must be called before the first `Reserve()` +// after construction or recycling, even with a size of 0 (no actual tail), +// - `MarkDone()` and `MarkRecycled()` must be called as appropriate. +class ProfileBufferChunk { + public: + using Byte = uint8_t; + using Length = uint32_t; + + using SpanOfBytes = Span<Byte>; + + // Hint about the size of the metadata (public and private headers). + // `Create()` below takes the minimum *buffer* size, so the minimum total + // Chunk size is at least `SizeofChunkMetadata() + aMinBufferBytes`. + [[nodiscard]] static constexpr Length SizeofChunkMetadata() { + return static_cast<Length>(sizeof(InternalHeader)); + } + + // Allocate space for a chunk with a given minimum size, and construct it. + // The actual size may be higher, to match the actual space taken in the + // memory pool. + [[nodiscard]] static UniquePtr<ProfileBufferChunk> Create( + Length aMinBufferBytes) { + // We need at least one byte, to cover the always-present `mBuffer` byte. + aMinBufferBytes = std::max(aMinBufferBytes, Length(1)); + // Trivial struct with the same alignment as `ProfileBufferChunk`, and size + // equal to that alignment, because typically the sizeof of an object is + // a multiple of its alignment. + struct alignas(alignof(InternalHeader)) ChunkStruct { + Byte c[alignof(InternalHeader)]; + }; + static_assert(std::is_trivial_v<ChunkStruct>, + "ChunkStruct must be trivial to avoid any construction"); + // Allocate an array of that struct, enough to contain the expected + // `ProfileBufferChunk` (with its header+buffer). + size_t count = (sizeof(InternalHeader) + aMinBufferBytes + + (alignof(InternalHeader) - 1)) / + alignof(InternalHeader); +#if defined(MOZ_MEMORY) + // Potentially expand the array to use more of the effective allocation. + count = (malloc_good_size(count * sizeof(ChunkStruct)) + + (sizeof(ChunkStruct) - 1)) / + sizeof(ChunkStruct); +#endif + auto chunkStorage = MakeUnique<ChunkStruct[]>(count); + MOZ_ASSERT(reinterpret_cast<uintptr_t>(chunkStorage.get()) % + alignof(InternalHeader) == + 0); + // After the allocation, compute the actual chunk size (including header). + const size_t chunkBytes = count * sizeof(ChunkStruct); + MOZ_ASSERT(chunkBytes >= sizeof(ProfileBufferChunk), + "Not enough space to construct a ProfileBufferChunk"); + MOZ_ASSERT(chunkBytes <= + static_cast<size_t>(std::numeric_limits<Length>::max())); + // Compute the size of the user-accessible buffer inside the chunk. + const Length bufferBytes = + static_cast<Length>(chunkBytes - sizeof(InternalHeader)); + MOZ_ASSERT(bufferBytes >= aMinBufferBytes, + "Not enough space for minimum buffer size"); + // Construct the header at the beginning of the allocated array, with the + // known buffer size. + new (chunkStorage.get()) ProfileBufferChunk(bufferBytes); + // We now have a proper `ProfileBufferChunk` object, create the appropriate + // UniquePtr for it. + UniquePtr<ProfileBufferChunk> chunk{ + reinterpret_cast<ProfileBufferChunk*>(chunkStorage.release())}; + MOZ_ASSERT( + size_t(reinterpret_cast<const char*>( + &chunk.get()->BufferSpan()[bufferBytes - 1]) - + reinterpret_cast<const char*>(chunk.get())) == chunkBytes - 1, + "Buffer span spills out of chunk allocation"); + return chunk; + } + +#ifdef DEBUG + ~ProfileBufferChunk() { + MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::InUse); + MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Full); + MOZ_ASSERT(mInternalHeader.mState == InternalHeader::State::Created || + mInternalHeader.mState == InternalHeader::State::Done || + mInternalHeader.mState == InternalHeader::State::Recycled); + } +#endif + + // Must be called with the first block tail (may be empty), which will be + // skipped if the reader starts with this ProfileBufferChunk. + [[nodiscard]] SpanOfBytes ReserveInitialBlockAsTail(Length aTailSize) { +#ifdef DEBUG + MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::InUse); + MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Full); + MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Done); + MOZ_ASSERT(mInternalHeader.mState == InternalHeader::State::Created || + mInternalHeader.mState == InternalHeader::State::Recycled); + mInternalHeader.mState = InternalHeader::State::InUse; +#endif + mInternalHeader.mHeader.mOffsetFirstBlock = aTailSize; + mInternalHeader.mHeader.mOffsetPastLastBlock = aTailSize; + return SpanOfBytes(&mBuffer, aTailSize); + } + + struct ReserveReturn { + SpanOfBytes mSpan; + ProfileBufferBlockIndex mBlockRangeIndex; + }; + + // Reserve a block of up to `aBlockSize` bytes, and return a Span to it, and + // its starting index. The actual size may be smaller, if the block cannot fit + // in the remaining space. + [[nodiscard]] ReserveReturn ReserveBlock(Length aBlockSize) { + MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Created); + MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Full); + MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Done); + MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Recycled); + MOZ_ASSERT(mInternalHeader.mState == InternalHeader::State::InUse); + MOZ_ASSERT(RangeStart() != 0, + "Expected valid range start before first Reserve()"); + const Length blockOffset = mInternalHeader.mHeader.mOffsetPastLastBlock; + Length reservedSize = aBlockSize; + if (MOZ_UNLIKELY(aBlockSize >= RemainingBytes())) { + reservedSize = RemainingBytes(); +#ifdef DEBUG + mInternalHeader.mState = InternalHeader::State::Full; +#endif + } + mInternalHeader.mHeader.mOffsetPastLastBlock += reservedSize; + mInternalHeader.mHeader.mBlockCount += 1; + return {SpanOfBytes(&mBuffer + blockOffset, reservedSize), + ProfileBufferBlockIndex::CreateFromProfileBufferIndex( + mInternalHeader.mHeader.mRangeStart + blockOffset)}; + } + + // When a chunk will not be used to store more blocks (because it is full, or + // because the profiler will not add more data), it should be marked "done". + // Access to its content is still allowed. + void MarkDone() { +#ifdef DEBUG + MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Created); + MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Done); + MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Recycled); + MOZ_ASSERT(mInternalHeader.mState == InternalHeader::State::InUse || + mInternalHeader.mState == InternalHeader::State::Full); + mInternalHeader.mState = InternalHeader::State::Done; +#endif + mInternalHeader.mHeader.mDoneTimeStamp = TimeStamp::NowUnfuzzed(); + } + + // A "Done" chunk may be recycled, to avoid allocating a new one. + void MarkRecycled() { +#ifdef DEBUG + // We also allow Created and already-Recycled chunks to be recycled, this + // way it's easier to recycle chunks when their state is not easily + // trackable. + MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::InUse); + MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Full); + MOZ_ASSERT(mInternalHeader.mState == InternalHeader::State::Created || + mInternalHeader.mState == InternalHeader::State::Done || + mInternalHeader.mState == InternalHeader::State::Recycled); + mInternalHeader.mState = InternalHeader::State::Recycled; +#endif + // Reset all header fields, in case this recycled chunk gets read. + mInternalHeader.mHeader.Reset(); + } + + // Public header, meant to uniquely identify a chunk, it may be shared with + // other processes to coordinate global memory handling. + struct Header { + explicit Header(Length aBufferBytes) : mBufferBytes(aBufferBytes) {} + + // Reset all members to their as-new values (apart from the buffer size, + // which cannot change), ready for re-use. + void Reset() { + mOffsetFirstBlock = 0; + mOffsetPastLastBlock = 0; + mDoneTimeStamp = TimeStamp{}; + mBlockCount = 0; + mRangeStart = 0; + mProcessId = 0; + } + + // Note: Part of the ordering of members below is to avoid unnecessary + // padding. + + // Members managed by the ProfileBufferChunk. + + // Offset of the first block (past the initial tail block, which may be 0). + Length mOffsetFirstBlock = 0; + // Offset past the last byte of the last reserved block + // It may be past mBufferBytes when last block continues in the next + // ProfileBufferChunk. It may be before mBufferBytes if ProfileBufferChunk + // is marked "Done" before the end is reached. + Length mOffsetPastLastBlock = 0; + // Timestamp when buffer is "Done" (which happens when the last block is + // written). This will be used to find and discard the oldest + // ProfileBufferChunk. + TimeStamp mDoneTimeStamp; + // Number of bytes in the buffer, set once at construction time. + const Length mBufferBytes; + // Number of reserved blocks (including final one even if partial, but + // excluding initial tail). + Length mBlockCount = 0; + + // Meta-data set by the user. + + // Index of the first byte of this ProfileBufferChunk, relative to all + // Chunks for this process. Index 0 is reserved as nullptr-like index, + // mRangeStart should be set to a non-0 value before the first `Reserve()`. + ProfileBufferIndex mRangeStart = 0; + // Process writing to this ProfileBufferChunk. + int mProcessId = 0; + + // A bit of spare space (necessary here because of the alignment due to + // other members), may be later repurposed for extra data. + const int mPADDING = 0; + }; + + [[nodiscard]] const Header& ChunkHeader() const { + return mInternalHeader.mHeader; + } + + [[nodiscard]] Length BufferBytes() const { + return ChunkHeader().mBufferBytes; + } + + // Total size of the chunk (buffer + header). + [[nodiscard]] Length ChunkBytes() const { + return static_cast<Length>(sizeof(InternalHeader)) + BufferBytes(); + } + + // Size of external resources, in this case all the following chunks. + [[nodiscard]] size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const { + const ProfileBufferChunk* const next = GetNext(); + return next ? next->SizeOfIncludingThis(aMallocSizeOf) : 0; + } + + // Size of this chunk and all following ones. + [[nodiscard]] size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const { + // Just in case `aMallocSizeOf` falls back on just `sizeof`, make sure we + // account for at least the actual Chunk requested allocation size. + return std::max<size_t>(aMallocSizeOf(this), ChunkBytes()) + + SizeOfExcludingThis(aMallocSizeOf); + } + + [[nodiscard]] Length RemainingBytes() const { + return BufferBytes() - OffsetPastLastBlock(); + } + + [[nodiscard]] Length OffsetFirstBlock() const { + return ChunkHeader().mOffsetFirstBlock; + } + + [[nodiscard]] Length OffsetPastLastBlock() const { + return ChunkHeader().mOffsetPastLastBlock; + } + + [[nodiscard]] Length BlockCount() const { return ChunkHeader().mBlockCount; } + + [[nodiscard]] int ProcessId() const { return ChunkHeader().mProcessId; } + + void SetProcessId(int aProcessId) { + mInternalHeader.mHeader.mProcessId = aProcessId; + } + + // Global range index at the start of this Chunk. + [[nodiscard]] ProfileBufferIndex RangeStart() const { + return ChunkHeader().mRangeStart; + } + + void SetRangeStart(ProfileBufferIndex aRangeStart) { + mInternalHeader.mHeader.mRangeStart = aRangeStart; + } + + // Get a read-only Span to the buffer. It is up to the caller to decypher the + // contents, based on known offsets and the internal block structure. + [[nodiscard]] Span<const Byte> BufferSpan() const { + return Span<const Byte>(&mBuffer, BufferBytes()); + } + + [[nodiscard]] Byte ByteAt(Length aOffset) const { + MOZ_ASSERT(aOffset < OffsetPastLastBlock()); + return *(&mBuffer + aOffset); + } + + [[nodiscard]] ProfileBufferChunk* GetNext() { + return mInternalHeader.mNext.get(); + } + [[nodiscard]] const ProfileBufferChunk* GetNext() const { + return mInternalHeader.mNext.get(); + } + + [[nodiscard]] UniquePtr<ProfileBufferChunk> ReleaseNext() { + return std::move(mInternalHeader.mNext); + } + + void InsertNext(UniquePtr<ProfileBufferChunk>&& aChunk) { + if (!aChunk) { + return; + } + aChunk->SetLast(ReleaseNext()); + mInternalHeader.mNext = std::move(aChunk); + } + + // Find the last chunk in this chain (it may be `this`). + [[nodiscard]] ProfileBufferChunk* Last() { + ProfileBufferChunk* chunk = this; + for (;;) { + ProfileBufferChunk* next = chunk->GetNext(); + if (!next) { + return chunk; + } + chunk = next; + } + } + [[nodiscard]] const ProfileBufferChunk* Last() const { + const ProfileBufferChunk* chunk = this; + for (;;) { + const ProfileBufferChunk* next = chunk->GetNext(); + if (!next) { + return chunk; + } + chunk = next; + } + } + + void SetLast(UniquePtr<ProfileBufferChunk>&& aChunk) { + if (!aChunk) { + return; + } + Last()->mInternalHeader.mNext = std::move(aChunk); + } + + // Join two possibly-null chunk lists. + [[nodiscard]] static UniquePtr<ProfileBufferChunk> Join( + UniquePtr<ProfileBufferChunk>&& aFirst, + UniquePtr<ProfileBufferChunk>&& aLast) { + if (aFirst) { + aFirst->SetLast(std::move(aLast)); + return std::move(aFirst); + } + return std::move(aLast); + } + +#ifdef DEBUG + void Dump(std::FILE* aFile = stdout) const { + fprintf(aFile, + "Chunk[%p] chunkSize=%u bufferSize=%u state=%s rangeStart=%u " + "firstBlockOffset=%u offsetPastLastBlock=%u blockCount=%u", + this, unsigned(ChunkBytes()), unsigned(BufferBytes()), + mInternalHeader.StateString(), unsigned(RangeStart()), + unsigned(OffsetFirstBlock()), unsigned(OffsetPastLastBlock()), + unsigned(BlockCount())); + const auto len = OffsetPastLastBlock(); + constexpr unsigned columns = 16; + unsigned char ascii[columns + 1]; + ascii[columns] = '\0'; + for (Length i = 0; i < len; ++i) { + if (i % columns == 0) { + fprintf(aFile, "\n %4u=0x%03x:", unsigned(i), unsigned(i)); + for (unsigned a = 0; a < columns; ++a) { + ascii[a] = ' '; + } + } + unsigned char sep = ' '; + if (i == OffsetFirstBlock()) { + if (i == OffsetPastLastBlock()) { + sep = '#'; + } else { + sep = '['; + } + } else if (i == OffsetPastLastBlock()) { + sep = ']'; + } + unsigned char c = *(&mBuffer + i); + fprintf(aFile, "%c%02x", sep, c); + + if (i == len - 1) { + if (i + 1 == OffsetPastLastBlock()) { + // Special case when last block ends right at the end. + fprintf(aFile, "]"); + } else { + fprintf(aFile, " "); + } + } else if (i % columns == columns - 1) { + fprintf(aFile, " "); + } + + ascii[i % columns] = (c >= ' ' && c <= '~') ? c : '.'; + + if (i % columns == columns - 1) { + fprintf(aFile, " %s", ascii); + } + } + + if (len % columns < columns - 1) { + for (Length i = len % columns; i < columns; ++i) { + fprintf(aFile, " "); + } + fprintf(aFile, " %s", ascii); + } + + fprintf(aFile, "\n"); + } +#endif // DEBUG + + private: + // ProfileBufferChunk constructor. Use static `Create()` to allocate and + // construct a ProfileBufferChunk. + explicit ProfileBufferChunk(Length aBufferBytes) + : mInternalHeader(aBufferBytes) {} + + // This internal header starts with the public `Header`, and adds some data + // only necessary for local handling. + // This encapsulation is also necessary to perform placement-new in + // `Create()`. + struct InternalHeader { + explicit InternalHeader(Length aBufferBytes) : mHeader(aBufferBytes) {} + + Header mHeader; + UniquePtr<ProfileBufferChunk> mNext; + +#ifdef DEBUG + enum class State { + Created, // Self-set. Just constructed, waiting for initial block tail. + InUse, // Ready to accept blocks. + Full, // Self-set. Blocks reach the end (or further). + Done, // Blocks won't be added anymore. + Recycled // Still full of data, but expecting an initial block tail. + }; + + State mState = State::Created; + // Transition table: (X=unexpected) + // Method \ State Created InUse Full Done Recycled + // ReserveInitialBlockAsTail InUse X X X InUse + // Reserve X InUse/Full X X X + // MarkDone X Done Done X X + // MarkRecycled X X X Recycled X + // destructor ok X X ok ok + + const char* StateString() const { + switch (mState) { + case State::Created: + return "Created"; + case State::InUse: + return "InUse"; + case State::Full: + return "Full"; + case State::Done: + return "Done"; + case State::Recycled: + return "Recycled"; + default: + return "?"; + } + } +#else // DEBUG + const char* StateString() const { return "(non-DEBUG)"; } +#endif + }; + + InternalHeader mInternalHeader; + + // KEEP THIS LAST! + // First byte of the buffer. Note that ProfileBufferChunk::Create allocates a + // bigger block, such that `mBuffer` is the first of `mBufferBytes` available + // bytes. + // The initialization is not strictly needed, because bytes should only be + // read after they have been written and `mOffsetPastLastBlock` has been + // updated. However: + // - Reviewbot complains that it's not initialized. + // - It's cheap to initialize one byte. + // - In the worst case (reading does happen), zero is not a valid entry size + // and should get caught in entry readers. + Byte mBuffer = '\0'; +}; + +} // namespace mozilla + +#endif // ProfileBufferChunk_h diff --git a/mozglue/baseprofiler/public/ProfileBufferChunkManager.h b/mozglue/baseprofiler/public/ProfileBufferChunkManager.h new file mode 100644 index 0000000000..e7f12bf21f --- /dev/null +++ b/mozglue/baseprofiler/public/ProfileBufferChunkManager.h @@ -0,0 +1,134 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ProfileBufferChunkManager_h +#define ProfileBufferChunkManager_h + +#include "mozilla/ProfileBufferChunk.h" +#include "mozilla/ScopeExit.h" + +#include <functional> + +namespace mozilla { + +// Manages the ProfileBufferChunks for this process. +// The main user of this class is the buffer that needs chunks to store its +// data. +// The main ProfileBufferChunks responsibilities are: +// - It can create new chunks, they are called "unreleased". +// - Later these chunks are returned here, and become "released". +// - The manager is free to destroy or recycle the oldest released chunks +// (usually to reclaim memory), and will inform the user through a provided +// callback. +// - The user may access still-alive released chunks. +class ProfileBufferChunkManager { + public: + virtual ~ProfileBufferChunkManager() +#ifdef DEBUG + { + MOZ_ASSERT(!mUser, "Still registered when being destroyed"); + } +#else + = default; +#endif + + // Expected maximum size needed to store one stack sample. + // Most ChunkManager sub-classes will require chunk sizes, this can serve as + // a minimum recommendation to hold most backtraces. + constexpr static ProfileBufferChunk::Length scExpectedMaximumStackSize = + 128 * 1024; + + // Estimated maximum buffer size. + [[nodiscard]] virtual size_t MaxTotalSize() const = 0; + + // Create or recycle a chunk right now. May return null in case of allocation + // failure. + // Note that the chunk-destroyed callback may be invoked during this call; + // user should be careful with reentrancy issues. + [[nodiscard]] virtual UniquePtr<ProfileBufferChunk> GetChunk() = 0; + + // `aChunkReceiver` may be called with a new or recycled chunk, or nullptr. + // (See `FulfillChunkRequests()` regarding when the callback may happen.) + virtual void RequestChunk( + std::function<void(UniquePtr<ProfileBufferChunk>)>&& aChunkReceiver) = 0; + + // This method may be invoked at any time on any thread (and not necessarily + // by the main user of this class), to do the work necessary to respond to a + // previous `RequestChunk()`. + // It is optional: If it is never called, or called too late, the user is + // responsible for directly calling `GetChunk()` when a chunk is really + // needed (or it should at least fail gracefully). + // The idea is to fulfill chunk request on a separate thread, and most + // importantly outside of profiler calls, to avoid doing expensive memory + // allocations during these calls. + virtual void FulfillChunkRequests() = 0; + + // One chunk is released by the user, the ProfileBufferChunkManager should + // keep it as long as possible (depending on local or global memory/time + // limits). Note that the chunk-destroyed callback may be invoked during this + // call; user should be careful with reentrancy issues. + virtual void ReleaseChunk(UniquePtr<ProfileBufferChunk> aChunk) = 0; + + // `aChunkDestroyedCallback` will be called whenever the contents of a + // previously-released chunk is about to be destroyed or recycled. + // Note that it may be called during other functions above, or at other times + // from the same or other threads; user should be careful with reentrancy + // issues. + virtual void SetChunkDestroyedCallback( + std::function<void(const ProfileBufferChunk&)>&& + aChunkDestroyedCallback) = 0; + + // Give away all released chunks that have not yet been destroyed. + [[nodiscard]] virtual UniquePtr<ProfileBufferChunk> + GetExtantReleasedChunks() = 0; + + // Let a callback see all released chunks that have not yet been destroyed, if + // any. Return whatever the callback returns. + template <typename Callback> + [[nodiscard]] auto PeekExtantReleasedChunks(Callback&& aCallback) { + const ProfileBufferChunk* chunks = PeekExtantReleasedChunksAndLock(); + auto unlock = + MakeScopeExit([&]() { UnlockAfterPeekExtantReleasedChunks(); }); + return std::forward<Callback>(aCallback)(chunks); + } + + // Chunks that were still unreleased will never be released. + virtual void ForgetUnreleasedChunks() = 0; + + [[nodiscard]] virtual size_t SizeOfExcludingThis( + MallocSizeOf aMallocSizeOf) const = 0; + [[nodiscard]] virtual size_t SizeOfIncludingThis( + MallocSizeOf aMallocSizeOf) const = 0; + + protected: + // Derived classes to implement `PeekExtantReleasedChunks` through these: + virtual const ProfileBufferChunk* PeekExtantReleasedChunksAndLock() = 0; + virtual void UnlockAfterPeekExtantReleasedChunks() = 0; + +#ifdef DEBUG + public: + // DEBUG checks ensuring that this manager and its users avoid UAFs. + // Derived classes should assert that mUser is not null in their functions. + + void RegisteredWith(const void* aUser) { + MOZ_ASSERT(!mUser); + MOZ_ASSERT(aUser); + mUser = aUser; + } + + void DeregisteredFrom(const void* aUser) { + MOZ_ASSERT(mUser == aUser); + mUser = nullptr; + } + + protected: + const void* mUser = nullptr; +#endif // DEBUG +}; + +} // namespace mozilla + +#endif // ProfileBufferChunkManager_h diff --git a/mozglue/baseprofiler/public/ProfileBufferChunkManagerSingle.h b/mozglue/baseprofiler/public/ProfileBufferChunkManagerSingle.h new file mode 100644 index 0000000000..c91b38cbdb --- /dev/null +++ b/mozglue/baseprofiler/public/ProfileBufferChunkManagerSingle.h @@ -0,0 +1,172 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ProfileBufferChunkManagerSingle_h +#define ProfileBufferChunkManagerSingle_h + +#include "mozilla/ProfileBufferChunkManager.h" + +#ifdef DEBUG +# include "mozilla/Atomics.h" +#endif // DEBUG + +namespace mozilla { + +// Manages only one Chunk. +// The first call to `Get`/`RequestChunk()` will retrieve the one chunk, and all +// subsequent calls will return nullptr. That chunk may still be released, but +// it will never be destroyed or recycled. +// Unlike others, this manager may be `Reset()`, to allow another round of +// small-data gathering. +// The main use is with short-lived ProfileChunkedBuffers that collect little +// data that can fit in one chunk, e.g., capturing one stack. +// It is not thread-safe. +class ProfileBufferChunkManagerSingle final : public ProfileBufferChunkManager { + public: + using Length = ProfileBufferChunk::Length; + + // Use a preallocated chunk. (Accepting null to gracefully handle OOM.) + explicit ProfileBufferChunkManagerSingle(UniquePtr<ProfileBufferChunk> aChunk) + : mInitialChunk(std::move(aChunk)), + mBufferBytes(mInitialChunk ? mInitialChunk->BufferBytes() : 0) { + MOZ_ASSERT(!mInitialChunk || !mInitialChunk->GetNext(), + "Expected at most one chunk"); + } + + // ChunkMinBufferBytes: Minimum number of user-available bytes in the Chunk. + // Note that Chunks use a bit more memory for their header. + explicit ProfileBufferChunkManagerSingle(Length aChunkMinBufferBytes) + : mInitialChunk(ProfileBufferChunk::Create(aChunkMinBufferBytes)), + mBufferBytes(mInitialChunk ? mInitialChunk->BufferBytes() : 0) {} + +#ifdef DEBUG + ~ProfileBufferChunkManagerSingle() { MOZ_ASSERT(mVirtuallyLocked == false); } +#endif // DEBUG + + // Reset this manager, using the provided chunk (probably coming from the + // ProfileChunkedBuffer that just used it); if null, fallback on current or + // released chunk. + void Reset(UniquePtr<ProfileBufferChunk> aPossibleChunk) { + if (aPossibleChunk) { + mInitialChunk = std::move(aPossibleChunk); + mReleasedChunk = nullptr; + } else if (!mInitialChunk) { + MOZ_ASSERT(!!mReleasedChunk, "Can't reset properly!"); + mInitialChunk = std::move(mReleasedChunk); + } + + if (mInitialChunk) { + mInitialChunk->MarkRecycled(); + mBufferBytes = mInitialChunk->BufferBytes(); + } else { + mBufferBytes = 0; + } + } + + [[nodiscard]] size_t MaxTotalSize() const final { return mBufferBytes; } + + // One of `GetChunk` and `RequestChunk` will only work the very first time (if + // there's even a chunk). + [[nodiscard]] UniquePtr<ProfileBufferChunk> GetChunk() final { + MOZ_ASSERT(mUser, "Not registered yet"); + return std::move(mInitialChunk); + } + + void RequestChunk(std::function<void(UniquePtr<ProfileBufferChunk>)>&& + aChunkReceiver) final { + MOZ_ASSERT(mUser, "Not registered yet"); + // Simple retrieval. + std::move(aChunkReceiver)(GetChunk()); + } + + void FulfillChunkRequests() final { + // Nothing to do here. + } + + void ReleaseChunk(UniquePtr<ProfileBufferChunk> aChunk) final { + MOZ_ASSERT(mUser, "Not registered yet"); + if (!aChunk) { + return; + } + MOZ_ASSERT(!mReleasedChunk, "Unexpected 2nd released chunk"); + MOZ_ASSERT(!aChunk->GetNext(), "Only expected one released chunk"); + mReleasedChunk = std::move(aChunk); + } + + void SetChunkDestroyedCallback( + std::function<void(const ProfileBufferChunk&)>&& aChunkDestroyedCallback) + final { + MOZ_ASSERT(mUser, "Not registered yet"); + // The chunk-destroyed callback will never actually be called, but we keep + // the callback here in case the caller expects it to live as long as this + // manager. + mChunkDestroyedCallback = std::move(aChunkDestroyedCallback); + } + + [[nodiscard]] UniquePtr<ProfileBufferChunk> GetExtantReleasedChunks() final { + MOZ_ASSERT(mUser, "Not registered yet"); + return std::move(mReleasedChunk); + } + + void ForgetUnreleasedChunks() final { + MOZ_ASSERT(mUser, "Not registered yet"); + } + + [[nodiscard]] size_t SizeOfExcludingThis( + MallocSizeOf aMallocSizeOf) const final { + MOZ_ASSERT(mUser, "Not registered yet"); + size_t size = 0; + if (mInitialChunk) { + size += mInitialChunk->SizeOfIncludingThis(aMallocSizeOf); + } + if (mReleasedChunk) { + size += mReleasedChunk->SizeOfIncludingThis(aMallocSizeOf); + } + // Note: Missing size of std::function external resources (if any). + return size; + } + + [[nodiscard]] size_t SizeOfIncludingThis( + MallocSizeOf aMallocSizeOf) const final { + MOZ_ASSERT(mUser, "Not registered yet"); + return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf); + } + + protected: + // This manager is not thread-safe, so there's not actual locking needed. + const ProfileBufferChunk* PeekExtantReleasedChunksAndLock() final { + MOZ_ASSERT(mVirtuallyLocked.compareExchange(false, true)); + MOZ_ASSERT(mUser, "Not registered yet"); + return mReleasedChunk.get(); + } + void UnlockAfterPeekExtantReleasedChunks() final { + MOZ_ASSERT(mVirtuallyLocked.compareExchange(true, false)); + } + + private: + // Initial chunk created with this manager, given away at first Get/Request. + UniquePtr<ProfileBufferChunk> mInitialChunk; + + // Storage for the released chunk (which should probably not happen, as it + // means the chunk is full). + UniquePtr<ProfileBufferChunk> mReleasedChunk; + + // Size of the one chunk we're managing. Stored here, because the chunk may + // be moved out and inaccessible from here. + Length mBufferBytes; + + // The chunk-destroyed callback will never actually be called, but we keep it + // here in case the caller expects it to live as long as this manager. + std::function<void(const ProfileBufferChunk&)> mChunkDestroyedCallback; + +#ifdef DEBUG + mutable Atomic<bool> mVirtuallyLocked{false}; +#endif // DEBUG +}; + +} // namespace mozilla + +#endif // ProfileBufferChunkManagerSingle_h diff --git a/mozglue/baseprofiler/public/ProfileBufferChunkManagerWithLocalLimit.h b/mozglue/baseprofiler/public/ProfileBufferChunkManagerWithLocalLimit.h new file mode 100644 index 0000000000..5b1af6d66c --- /dev/null +++ b/mozglue/baseprofiler/public/ProfileBufferChunkManagerWithLocalLimit.h @@ -0,0 +1,428 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ProfileBufferChunkManagerWithLocalLimit_h +#define ProfileBufferChunkManagerWithLocalLimit_h + +#include "BaseProfiler.h" +#include "mozilla/BaseProfilerDetail.h" +#include "mozilla/ProfileBufferChunkManager.h" +#include "mozilla/ProfileBufferControlledChunkManager.h" + +#include <utility> + +namespace mozilla { + +// Manages the Chunks for this process in a thread-safe manner, with a maximum +// size per process. +// +// "Unreleased" chunks are not owned here, only "released" chunks can be +// destroyed or recycled when reaching the memory limit, so it is theoretically +// possible to break that limit, if: +// - The user of this class doesn't release their chunks, AND/OR +// - The limit is too small (e.g., smaller than 2 or 3 chunks, which should be +// the usual number of unreleased chunks in flight). +// In this case, it just means that we will use more memory than allowed, +// potentially risking OOMs. Hopefully this shouldn't happen in real code, +// assuming that the user is doing the right thing and releasing chunks ASAP, +// and that the memory limit is reasonably large. +class ProfileBufferChunkManagerWithLocalLimit final + : public ProfileBufferChunkManager, + public ProfileBufferControlledChunkManager { + public: + using Length = ProfileBufferChunk::Length; + + // MaxTotalBytes: Maximum number of bytes allocated in all local Chunks. + // ChunkMinBufferBytes: Minimum number of user-available bytes in each Chunk. + // Note that Chunks use a bit more memory for their header. + explicit ProfileBufferChunkManagerWithLocalLimit(size_t aMaxTotalBytes, + Length aChunkMinBufferBytes) + : mMaxTotalBytes(aMaxTotalBytes), + mChunkMinBufferBytes(aChunkMinBufferBytes) {} + + ~ProfileBufferChunkManagerWithLocalLimit() { + if (mUpdateCallback) { + // Signal the end of this callback. + std::move(mUpdateCallback)(Update(nullptr)); + } + } + + [[nodiscard]] size_t MaxTotalSize() const final { + // `mMaxTotalBytes` is `const` so there is no need to lock the mutex. + return mMaxTotalBytes; + } + + [[nodiscard]] UniquePtr<ProfileBufferChunk> GetChunk() final { + AUTO_PROFILER_STATS(Local_GetChunk); + + ChunkAndUpdate chunkAndUpdate = [&]() { + baseprofiler::detail::BaseProfilerAutoLock lock(mMutex); + return GetChunk(lock); + }(); + + baseprofiler::detail::BaseProfilerAutoLock lock(mUpdateCallbackMutex); + if (mUpdateCallback && !chunkAndUpdate.second.IsNotUpdate()) { + mUpdateCallback(std::move(chunkAndUpdate.second)); + } + + return std::move(chunkAndUpdate.first); + } + + void RequestChunk(std::function<void(UniquePtr<ProfileBufferChunk>)>&& + aChunkReceiver) final { + AUTO_PROFILER_STATS(Local_RequestChunk); + baseprofiler::detail::BaseProfilerAutoLock lock(mMutex); + if (mChunkReceiver) { + // We already have a chunk receiver, meaning a request is pending. + return; + } + // Store the chunk receiver. This indicates that a request is pending, and + // it will be handled in the next `FulfillChunkRequests()` call. + mChunkReceiver = std::move(aChunkReceiver); + } + + void FulfillChunkRequests() final { + AUTO_PROFILER_STATS(Local_FulfillChunkRequests); + std::function<void(UniquePtr<ProfileBufferChunk>)> chunkReceiver; + ChunkAndUpdate chunkAndUpdate = [&]() -> ChunkAndUpdate { + baseprofiler::detail::BaseProfilerAutoLock lock(mMutex); + if (!mChunkReceiver) { + // No receiver means no pending request, we're done. + return {}; + } + // Otherwise there is a request, extract the receiver to call below. + std::swap(chunkReceiver, mChunkReceiver); + MOZ_ASSERT(!mChunkReceiver, "mChunkReceiver should have been emptied"); + // And allocate the requested chunk. This may fail, it's fine, we're + // letting the receiver know about it. + AUTO_PROFILER_STATS(Local_FulfillChunkRequests_GetChunk); + return GetChunk(lock); + }(); + + if (chunkReceiver) { + { + baseprofiler::detail::BaseProfilerAutoLock lock(mUpdateCallbackMutex); + if (mUpdateCallback && !chunkAndUpdate.second.IsNotUpdate()) { + mUpdateCallback(std::move(chunkAndUpdate.second)); + } + } + + // Invoke callback outside of lock, so that it can use other chunk manager + // functions if needed. + // Note that this means there could be a race, where another request + // happens now and even gets fulfilled before this one is! It should be + // rare, and shouldn't be a problem anyway, the user will still get their + // requested chunks, new/recycled chunks look the same so their order + // doesn't matter. + std::move(chunkReceiver)(std::move(chunkAndUpdate.first)); + } + } + + void ReleaseChunk(UniquePtr<ProfileBufferChunk> aChunk) final { + if (!aChunk) { + return; + } + + MOZ_RELEASE_ASSERT(!aChunk->GetNext(), "ReleaseChunk only accepts 1 chunk"); + MOZ_RELEASE_ASSERT(!aChunk->ChunkHeader().mDoneTimeStamp.IsNull(), + "Released chunk should have a 'Done' timestamp"); + + Update update = [&]() { + baseprofiler::detail::BaseProfilerAutoLock lock(mMutex); + MOZ_ASSERT(mUser, "Not registered yet"); + // Keep a pointer to the first newly-released chunk, so we can use it to + // prepare an update (after `aChunk` is moved-from). + const ProfileBufferChunk* const newlyReleasedChunk = aChunk.get(); + // Transfer the chunk size from the unreleased bucket to the released one. + mUnreleasedBufferBytes -= aChunk->BufferBytes(); + mReleasedBufferBytes += aChunk->BufferBytes(); + if (!mReleasedChunks) { + // No other released chunks at the moment, we're starting the list. + MOZ_ASSERT(mReleasedBufferBytes == aChunk->BufferBytes()); + mReleasedChunks = std::move(aChunk); + } else { + // Insert aChunk in mReleasedChunks to keep done-timestamp order. + const TimeStamp& releasedChunkDoneTimeStamp = + aChunk->ChunkHeader().mDoneTimeStamp; + if (releasedChunkDoneTimeStamp < + mReleasedChunks->ChunkHeader().mDoneTimeStamp) { + // aChunk is the oldest -> Insert at the beginning. + aChunk->SetLast(std::move(mReleasedChunks)); + mReleasedChunks = std::move(aChunk); + } else { + // Go through the already-released chunk list, and insert aChunk + // before the first younger released chunk, or at the end. + ProfileBufferChunk* chunk = mReleasedChunks.get(); + for (;;) { + ProfileBufferChunk* const nextChunk = chunk->GetNext(); + if (!nextChunk || releasedChunkDoneTimeStamp < + nextChunk->ChunkHeader().mDoneTimeStamp) { + // Either we're at the last released chunk, or the next released + // chunk is younger -> Insert right after this released chunk. + chunk->InsertNext(std::move(aChunk)); + break; + } + chunk = nextChunk; + } + } + } + + return Update(mUnreleasedBufferBytes, mReleasedBufferBytes, + mReleasedChunks.get(), newlyReleasedChunk); + }(); + + baseprofiler::detail::BaseProfilerAutoLock lock(mUpdateCallbackMutex); + if (mUpdateCallback && !update.IsNotUpdate()) { + mUpdateCallback(std::move(update)); + } + } + + void SetChunkDestroyedCallback( + std::function<void(const ProfileBufferChunk&)>&& aChunkDestroyedCallback) + final { + baseprofiler::detail::BaseProfilerAutoLock lock(mMutex); + MOZ_ASSERT(mUser, "Not registered yet"); + mChunkDestroyedCallback = std::move(aChunkDestroyedCallback); + } + + [[nodiscard]] UniquePtr<ProfileBufferChunk> GetExtantReleasedChunks() final { + UniquePtr<ProfileBufferChunk> chunks; + size_t unreleasedBufferBytes = [&]() { + baseprofiler::detail::BaseProfilerAutoLock lock(mMutex); + MOZ_ASSERT(mUser, "Not registered yet"); + mReleasedBufferBytes = 0; + chunks = std::move(mReleasedChunks); + return mUnreleasedBufferBytes; + }(); + + baseprofiler::detail::BaseProfilerAutoLock lock(mUpdateCallbackMutex); + if (mUpdateCallback) { + mUpdateCallback(Update(unreleasedBufferBytes, 0, nullptr, nullptr)); + } + + return chunks; + } + + void ForgetUnreleasedChunks() final { + Update update = [&]() { + baseprofiler::detail::BaseProfilerAutoLock lock(mMutex); + MOZ_ASSERT(mUser, "Not registered yet"); + mUnreleasedBufferBytes = 0; + return Update(0, mReleasedBufferBytes, mReleasedChunks.get(), nullptr); + }(); + baseprofiler::detail::BaseProfilerAutoLock lock(mUpdateCallbackMutex); + if (mUpdateCallback) { + mUpdateCallback(std::move(update)); + } + } + + [[nodiscard]] size_t SizeOfExcludingThis( + MallocSizeOf aMallocSizeOf) const final { + baseprofiler::detail::BaseProfilerAutoLock lock(mMutex); + return SizeOfExcludingThis(aMallocSizeOf, lock); + } + + [[nodiscard]] size_t SizeOfIncludingThis( + MallocSizeOf aMallocSizeOf) const final { + baseprofiler::detail::BaseProfilerAutoLock lock(mMutex); + MOZ_ASSERT(mUser, "Not registered yet"); + return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf, lock); + } + + void SetUpdateCallback(UpdateCallback&& aUpdateCallback) final { + { + baseprofiler::detail::BaseProfilerAutoLock lock(mUpdateCallbackMutex); + if (mUpdateCallback) { + // Signal the end of the previous callback. + std::move(mUpdateCallback)(Update(nullptr)); + mUpdateCallback = nullptr; + } + } + + if (aUpdateCallback) { + Update initialUpdate = [&]() { + baseprofiler::detail::BaseProfilerAutoLock lock(mMutex); + return Update(mUnreleasedBufferBytes, mReleasedBufferBytes, + mReleasedChunks.get(), nullptr); + }(); + + baseprofiler::detail::BaseProfilerAutoLock lock(mUpdateCallbackMutex); + MOZ_ASSERT(!mUpdateCallback, "Only one update callback allowed"); + mUpdateCallback = std::move(aUpdateCallback); + mUpdateCallback(std::move(initialUpdate)); + } + } + + void DestroyChunksAtOrBefore(TimeStamp aDoneTimeStamp) final { + MOZ_ASSERT(!aDoneTimeStamp.IsNull()); + baseprofiler::detail::BaseProfilerAutoLock lock(mMutex); + for (;;) { + if (!mReleasedChunks) { + // We don't own any released chunks (anymore), we're done. + break; + } + if (mReleasedChunks->ChunkHeader().mDoneTimeStamp > aDoneTimeStamp) { + // The current chunk is strictly after the given timestamp, we're done. + break; + } + // We've found a chunk at or before the timestamp, discard it. + DiscardOldestReleasedChunk(lock); + } + } + + protected: + const ProfileBufferChunk* PeekExtantReleasedChunksAndLock() final { + mMutex.Lock(); + MOZ_ASSERT(mUser, "Not registered yet"); + return mReleasedChunks.get(); + } + void UnlockAfterPeekExtantReleasedChunks() final { mMutex.Unlock(); } + + private: + void MaybeRecycleChunk( + UniquePtr<ProfileBufferChunk>&& chunk, + const baseprofiler::detail::BaseProfilerAutoLock& aLock) { + // Try to recycle big-enough chunks. (All chunks should have the same size, + // but it's a cheap test and may allow future adjustments based on actual + // data rate.) + if (chunk->BufferBytes() >= mChunkMinBufferBytes) { + // We keep up to two recycled chunks at any time. + if (!mRecycledChunks) { + mRecycledChunks = std::move(chunk); + } else if (!mRecycledChunks->GetNext()) { + mRecycledChunks->InsertNext(std::move(chunk)); + } + } + } + + UniquePtr<ProfileBufferChunk> TakeRecycledChunk( + const baseprofiler::detail::BaseProfilerAutoLock& aLock) { + UniquePtr<ProfileBufferChunk> recycled; + if (mRecycledChunks) { + recycled = std::exchange(mRecycledChunks, mRecycledChunks->ReleaseNext()); + recycled->MarkRecycled(); + } + return recycled; + } + + void DiscardOldestReleasedChunk( + const baseprofiler::detail::BaseProfilerAutoLock& aLock) { + MOZ_ASSERT(!!mReleasedChunks); + UniquePtr<ProfileBufferChunk> oldest = + std::exchange(mReleasedChunks, mReleasedChunks->ReleaseNext()); + mReleasedBufferBytes -= oldest->BufferBytes(); + if (mChunkDestroyedCallback) { + // Inform the user that we're going to destroy this chunk. + mChunkDestroyedCallback(*oldest); + } + MaybeRecycleChunk(std::move(oldest), aLock); + } + + using ChunkAndUpdate = std::pair<UniquePtr<ProfileBufferChunk>, Update>; + [[nodiscard]] ChunkAndUpdate GetChunk( + const baseprofiler::detail::BaseProfilerAutoLock& aLock) { + MOZ_ASSERT(mUser, "Not registered yet"); + // After this function, the total memory consumption will be the sum of: + // - Bytes from released (i.e., full) chunks, + // - Bytes from unreleased (still in use) chunks, + // - Bytes from the chunk we want to create/recycle. (Note that we don't + // count the extra bytes of chunk header, and of extra allocation ability, + // for the new chunk, as it's assumed to be negligible compared to the + // total memory limit.) + // If this total is higher than the local limit, we'll want to destroy + // the oldest released chunks until we're under the limit; if any, we may + // recycle one of them to avoid a deallocation followed by an allocation. + while (mReleasedBufferBytes + mUnreleasedBufferBytes + + mChunkMinBufferBytes >= + mMaxTotalBytes && + !!mReleasedChunks) { + // We have reached the local limit, discard the oldest released chunk. + DiscardOldestReleasedChunk(aLock); + } + + // Extract the recycled chunk, if any. + ChunkAndUpdate chunkAndUpdate{TakeRecycledChunk(aLock), Update()}; + UniquePtr<ProfileBufferChunk>& chunk = chunkAndUpdate.first; + + if (!chunk) { + // No recycled chunk -> Create a chunk now. (This could still fail.) + chunk = ProfileBufferChunk::Create(mChunkMinBufferBytes); + } + + if (chunk) { + // We do have a chunk (recycled or new), record its size as "unreleased". + mUnreleasedBufferBytes += chunk->BufferBytes(); + + chunkAndUpdate.second = + Update(mUnreleasedBufferBytes, mReleasedBufferBytes, + mReleasedChunks.get(), nullptr); + } + + return chunkAndUpdate; + } + + [[nodiscard]] size_t SizeOfExcludingThis( + MallocSizeOf aMallocSizeOf, + const baseprofiler::detail::BaseProfilerAutoLock&) const { + MOZ_ASSERT(mUser, "Not registered yet"); + size_t size = 0; + if (mReleasedChunks) { + size += mReleasedChunks->SizeOfIncludingThis(aMallocSizeOf); + } + if (mRecycledChunks) { + size += mRecycledChunks->SizeOfIncludingThis(aMallocSizeOf); + } + // Note: Missing size of std::function external resources (if any). + return size; + } + + // Maxumum number of bytes that should be used by all unreleased and released + // chunks. Note that only released chunks can be destroyed here, so it is the + // responsibility of the user to properly release their chunks when possible. + const size_t mMaxTotalBytes; + + // Minimum number of bytes that new chunks should be able to store. + // Used when calling `ProfileBufferChunk::Create()`. + const Length mChunkMinBufferBytes; + + // Mutex guarding the following members. + mutable baseprofiler::detail::BaseProfilerMutex mMutex; + + // Number of bytes currently held in chunks that have been given away (through + // `GetChunk` or `RequestChunk`) and not released yet. + size_t mUnreleasedBufferBytes = 0; + + // Number of bytes currently held in chunks that have been released and stored + // in `mReleasedChunks` below. + size_t mReleasedBufferBytes = 0; + + // List of all released chunks. The oldest one should be at the start of the + // list, and may be destroyed or recycled when the memory limit is reached. + UniquePtr<ProfileBufferChunk> mReleasedChunks; + + // This may hold chunks that were released then slated for destruction, they + // will be reused next time an allocation would have been needed. + UniquePtr<ProfileBufferChunk> mRecycledChunks; + + // Optional callback used to notify the user when a chunk is about to be + // destroyed or recycled. (The data content is always destroyed, but the chunk + // container may be reused.) + std::function<void(const ProfileBufferChunk&)> mChunkDestroyedCallback; + + // Callback set from `RequestChunk()`, until it is serviced in + // `FulfillChunkRequests()`. There can only be one request in flight. + std::function<void(UniquePtr<ProfileBufferChunk>)> mChunkReceiver; + + // Separate mutex guarding mUpdateCallback, so that it may be invoked outside + // of the main buffer `mMutex`. + mutable baseprofiler::detail::BaseProfilerMutex mUpdateCallbackMutex; + + UpdateCallback mUpdateCallback; +}; + +} // namespace mozilla + +#endif // ProfileBufferChunkManagerWithLocalLimit_h diff --git a/mozglue/baseprofiler/public/ProfileBufferControlledChunkManager.h b/mozglue/baseprofiler/public/ProfileBufferControlledChunkManager.h new file mode 100644 index 0000000000..45b39b163c --- /dev/null +++ b/mozglue/baseprofiler/public/ProfileBufferControlledChunkManager.h @@ -0,0 +1,203 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ProfileBufferControlledChunkManager_h +#define ProfileBufferControlledChunkManager_h + +#include "mozilla/ProfileBufferChunk.h" + +#include <functional> +#include <vector> + +namespace mozilla { + +// A "Controlled" chunk manager will provide updates about chunks that it +// creates, releases, and destroys; and it can destroy released chunks as +// requested. +class ProfileBufferControlledChunkManager { + public: + using Length = ProfileBufferChunk::Length; + + virtual ~ProfileBufferControlledChunkManager() = default; + + // Minimum amount of chunk metadata to be transferred between processes. + struct ChunkMetadata { + // Timestamp when chunk was marked "done", which is used to: + // - determine its age, so the oldest one will be destroyed first, + // - uniquely identify this chunk in this process. (The parent process is + // responsible for associating this timestamp to its process id.) + TimeStamp mDoneTimeStamp; + // Size of this chunk's buffer. + Length mBufferBytes; + + ChunkMetadata(TimeStamp aDoneTimeStamp, Length aBufferBytes) + : mDoneTimeStamp(aDoneTimeStamp), mBufferBytes(aBufferBytes) {} + }; + + // Class collecting all information necessary to describe updates that + // happened in a chunk manager. + // An update can be folded into a previous update. + class Update { + public: + // Construct a "not-an-Update" object, which should only be used after a + // real update is folded into it. + Update() = default; + + // Construct a "final" Update, which marks the end of all updates from a + // chunk manager. + explicit Update(decltype(nullptr)) : mUnreleasedBytes(FINAL) {} + + // Construct an Update from the given data and released chunks. + // The chunk pointers may be null, and it doesn't matter if + // `aNewlyReleasedChunks` is already linked to `aExistingReleasedChunks` or + // not. + Update(size_t aUnreleasedBytes, size_t aReleasedBytes, + const ProfileBufferChunk* aExistingReleasedChunks, + const ProfileBufferChunk* aNewlyReleasedChunks) + : mUnreleasedBytes(aUnreleasedBytes), + mReleasedBytes(aReleasedBytes), + mOldestDoneTimeStamp( + aExistingReleasedChunks + ? aExistingReleasedChunks->ChunkHeader().mDoneTimeStamp + : TimeStamp{}) { + MOZ_RELEASE_ASSERT( + !IsNotUpdate(), + "Empty update should only be constructed with default constructor"); + MOZ_RELEASE_ASSERT( + !IsFinal(), + "Final update should only be constructed with nullptr constructor"); + for (const ProfileBufferChunk* chunk = aNewlyReleasedChunks; chunk; + chunk = chunk->GetNext()) { + mNewlyReleasedChunks.emplace_back(ChunkMetadata{ + chunk->ChunkHeader().mDoneTimeStamp, chunk->BufferBytes()}); + } + } + + // Construct an Update from raw data. + // This may be used to re-construct an Update that was previously + // serialized. + Update(size_t aUnreleasedBytes, size_t aReleasedBytes, + TimeStamp aOldestDoneTimeStamp, + std::vector<ChunkMetadata>&& aNewlyReleasedChunks) + : mUnreleasedBytes(aUnreleasedBytes), + mReleasedBytes(aReleasedBytes), + mOldestDoneTimeStamp(aOldestDoneTimeStamp), + mNewlyReleasedChunks(std::move(aNewlyReleasedChunks)) {} + + // Clear the Update completely and return it to a "not-an-Update" state. + void Clear() { + mUnreleasedBytes = NO_UPDATE; + mReleasedBytes = 0; + mOldestDoneTimeStamp = TimeStamp{}; + mNewlyReleasedChunks.clear(); + } + + bool IsNotUpdate() const { return mUnreleasedBytes == NO_UPDATE; } + + bool IsFinal() const { return mUnreleasedBytes == FINAL; } + + size_t UnreleasedBytes() const { + MOZ_RELEASE_ASSERT(!IsNotUpdate(), + "Cannot access UnreleasedBytes from empty update"); + MOZ_RELEASE_ASSERT(!IsFinal(), + "Cannot access UnreleasedBytes from final update"); + return mUnreleasedBytes; + } + + size_t ReleasedBytes() const { + MOZ_RELEASE_ASSERT(!IsNotUpdate(), + "Cannot access ReleasedBytes from empty update"); + MOZ_RELEASE_ASSERT(!IsFinal(), + "Cannot access ReleasedBytes from final update"); + return mReleasedBytes; + } + + TimeStamp OldestDoneTimeStamp() const { + MOZ_RELEASE_ASSERT(!IsNotUpdate(), + "Cannot access OldestDoneTimeStamp from empty update"); + MOZ_RELEASE_ASSERT(!IsFinal(), + "Cannot access OldestDoneTimeStamp from final update"); + return mOldestDoneTimeStamp; + } + + const std::vector<ChunkMetadata>& NewlyReleasedChunksRef() const { + MOZ_RELEASE_ASSERT( + !IsNotUpdate(), + "Cannot access NewlyReleasedChunksRef from empty update"); + MOZ_RELEASE_ASSERT( + !IsFinal(), "Cannot access NewlyReleasedChunksRef from final update"); + return mNewlyReleasedChunks; + } + + // Fold a later update into this one. + void Fold(Update&& aNewUpdate) { + MOZ_ASSERT( + !IsFinal() || aNewUpdate.IsFinal(), + "There shouldn't be another non-final update after the final update"); + + if (IsNotUpdate() || aNewUpdate.IsFinal()) { + // We were empty, or the new update is the final update, we just switch + // to that new update. + *this = std::move(aNewUpdate); + return; + } + + mUnreleasedBytes = aNewUpdate.mUnreleasedBytes; + mReleasedBytes = aNewUpdate.mReleasedBytes; + if (!aNewUpdate.mOldestDoneTimeStamp.IsNull()) { + MOZ_ASSERT(mOldestDoneTimeStamp.IsNull() || + mOldestDoneTimeStamp <= aNewUpdate.mOldestDoneTimeStamp); + mOldestDoneTimeStamp = aNewUpdate.mOldestDoneTimeStamp; + auto it = mNewlyReleasedChunks.begin(); + while (it != mNewlyReleasedChunks.end() && + it->mDoneTimeStamp < mOldestDoneTimeStamp) { + it = mNewlyReleasedChunks.erase(it); + } + } + if (!aNewUpdate.mNewlyReleasedChunks.empty()) { + mNewlyReleasedChunks.reserve(mNewlyReleasedChunks.size() + + aNewUpdate.mNewlyReleasedChunks.size()); + mNewlyReleasedChunks.insert(mNewlyReleasedChunks.end(), + aNewUpdate.mNewlyReleasedChunks.begin(), + aNewUpdate.mNewlyReleasedChunks.end()); + } + } + + private: + static const size_t NO_UPDATE = size_t(-1); + static const size_t FINAL = size_t(-2); + + size_t mUnreleasedBytes = NO_UPDATE; + size_t mReleasedBytes = 0; + TimeStamp mOldestDoneTimeStamp; + std::vector<ChunkMetadata> mNewlyReleasedChunks; + }; + + using UpdateCallback = std::function<void(Update&&)>; + + // This *may* be set (or reset) by an object that needs to know about all + // chunk updates that happen in this manager. The main use will be to + // coordinate the global memory usage of Firefox. + // If a non-empty callback is given, it will be immediately invoked with the + // current state. + // When the callback is about to be destroyed (by overwriting it here, or in + // the class destructor), it will be invoked one last time with an empty + // update. + // Note that the callback (even the first current-state callback) will be + // invoked from inside a locked scope, so it should *not* call other functions + // of the chunk manager. A side benefit of this locking is that it guarantees + // that no two invocations can overlap. + virtual void SetUpdateCallback(UpdateCallback&& aUpdateCallback) = 0; + + // This is a request to destroy all chunks before the given timestamp. + // This timestamp should be one that was given in a previous UpdateCallback + // call. Obviously, only released chunks can be destroyed. + virtual void DestroyChunksAtOrBefore(TimeStamp aDoneTimeStamp) = 0; +}; + +} // namespace mozilla + +#endif // ProfileBufferControlledChunkManager_h diff --git a/mozglue/baseprofiler/public/ProfileBufferEntryKinds.h b/mozglue/baseprofiler/public/ProfileBufferEntryKinds.h new file mode 100644 index 0000000000..c8280a92d7 --- /dev/null +++ b/mozglue/baseprofiler/public/ProfileBufferEntryKinds.h @@ -0,0 +1,94 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ProfileBufferEntryKinds_h +#define ProfileBufferEntryKinds_h + +#include <cstdint> + +namespace mozilla { + +// This is equal to sizeof(double), which is the largest non-char variant in +// |u|. +static constexpr size_t ProfileBufferEntryNumChars = 8; + +// NOTE! If you add entries, you need to verify if they need to be added to the +// switch statement in DuplicateLastSample! +// This will evaluate the MACRO with (KIND, TYPE, SIZE) +#define FOR_EACH_PROFILE_BUFFER_ENTRY_KIND(MACRO) \ + MACRO(CategoryPair, int, sizeof(int)) \ + MACRO(CollectionStart, double, sizeof(double)) \ + MACRO(CollectionEnd, double, sizeof(double)) \ + MACRO(Label, const char*, sizeof(const char*)) \ + MACRO(FrameFlags, uint64_t, sizeof(uint64_t)) \ + MACRO(DynamicStringFragment, char*, ProfileBufferEntryNumChars) \ + MACRO(JitReturnAddr, void*, sizeof(void*)) \ + MACRO(InnerWindowID, uint64_t, sizeof(uint64_t)) \ + MACRO(LineNumber, int, sizeof(int)) \ + MACRO(ColumnNumber, int, sizeof(int)) \ + MACRO(NativeLeafAddr, void*, sizeof(void*)) \ + MACRO(Pause, double, sizeof(double)) \ + MACRO(Resume, double, sizeof(double)) \ + MACRO(PauseSampling, double, sizeof(double)) \ + MACRO(ResumeSampling, double, sizeof(double)) \ + MACRO(Responsiveness, double, sizeof(double)) \ + MACRO(ThreadId, int, sizeof(int)) \ + MACRO(Time, double, sizeof(double)) \ + MACRO(TimeBeforeCompactStack, double, sizeof(double)) \ + MACRO(CounterId, void*, sizeof(void*)) \ + MACRO(CounterKey, uint64_t, sizeof(uint64_t)) \ + MACRO(Number, uint64_t, sizeof(uint64_t)) \ + MACRO(Count, int64_t, sizeof(int64_t)) \ + MACRO(ProfilerOverheadTime, double, sizeof(double)) \ + MACRO(ProfilerOverheadDuration, double, sizeof(double)) + +// The `Kind` is a single byte identifying the type of data that is actually +// stored in a `ProfileBufferEntry`, as per the list in +// `FOR_EACH_PROFILE_BUFFER_ENTRY_KIND`. +// +// This byte is also used to identify entries in ProfileChunkedBuffer blocks, +// for both "legacy" entries that do contain a `ProfileBufferEntry`, and for +// new types of entries that may carry more data of different types. +// TODO: Eventually each type of "legacy" entry should be replaced with newer, +// more efficient kinds of entries (e.g., stack frames could be stored in one +// bigger entry, instead of multiple `ProfileBufferEntry`s); then we could +// discard `ProfileBufferEntry` and move this enum to a more appropriate spot. +using ProfileBufferEntryKindUnderlyingType = uint8_t; + +enum class ProfileBufferEntryKind : ProfileBufferEntryKindUnderlyingType { + INVALID = 0, +#define KIND(KIND, TYPE, SIZE) KIND, + FOR_EACH_PROFILE_BUFFER_ENTRY_KIND(KIND) +#undef KIND + + // Any value under `LEGACY_LIMIT` represents a `ProfileBufferEntry`. + LEGACY_LIMIT, + + // Any value starting here does *not* represent a `ProfileBufferEntry` and + // requires separate decoding and handling. + + // Markers and their data. + Marker = LEGACY_LIMIT, + + // Entry with "running times", such as CPU usage measurements. + // Optional between TimeBeforeCompactStack and CompactStack. + RunningTimes, + + // Optional between TimeBeforeCompactStack and CompactStack. + UnresponsiveDurationMs, + + // Collection of legacy stack entries, must follow a ThreadId and + // TimeBeforeCompactStack (which are not included in the CompactStack; + // TimeBeforeCompactStack is equivalent to Time, but indicates that a + // CompactStack follows shortly afterwards). + CompactStack, + + MODERN_LIMIT +}; + +} // namespace mozilla + +#endif // ProfileBufferEntryKinds_h diff --git a/mozglue/baseprofiler/public/ProfileBufferEntrySerialization.h b/mozglue/baseprofiler/public/ProfileBufferEntrySerialization.h new file mode 100644 index 0000000000..267b99f10d --- /dev/null +++ b/mozglue/baseprofiler/public/ProfileBufferEntrySerialization.h @@ -0,0 +1,1166 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ProfileBufferEntrySerialization_h +#define ProfileBufferEntrySerialization_h + +#include "mozilla/Assertions.h" +#include "mozilla/leb128iterator.h" +#include "mozilla/Likely.h" +#include "mozilla/Maybe.h" +#include "mozilla/ProfileBufferIndex.h" +#include "mozilla/Span.h" +#include "mozilla/Tuple.h" +#include "mozilla/UniquePtrExtensions.h" +#include "mozilla/Unused.h" +#include "mozilla/Variant.h" + +#include <string> +#include <tuple> + +namespace mozilla { + +class ProfileBufferEntryWriter; + +// Iterator-like class used to read from an entry. +// An entry may be split in two memory segments (e.g., the ends of a ring +// buffer, or two chunks of a chunked buffer); it doesn't deal with this +// underlying buffer, but only with one or two spans pointing at the space +// where the entry lives. +class ProfileBufferEntryReader { + public: + using Byte = uint8_t; + using Length = uint32_t; + + using SpanOfConstBytes = Span<const Byte>; + + // Class to be specialized for types to be read from a profile buffer entry. + // See common specializations at the bottom of this header. + // The following static functions must be provided: + // static void ReadInto(EntryReader aER&, T& aT) + // { + // /* Call `aER.ReadX(...)` function to deserialize into aT, be sure to + // read exactly `Bytes(aT)`! */ + // } + // static T Read(EntryReader& aER) { + // /* Call `aER.ReadX(...)` function to deserialize and return a `T`, be + // sure to read exactly `Bytes(returned value)`! */ + // } + template <typename T> + struct Deserializer; + + ProfileBufferEntryReader() = default; + + // Reader over one Span. + ProfileBufferEntryReader(SpanOfConstBytes aSpan, + ProfileBufferBlockIndex aCurrentBlockIndex, + ProfileBufferBlockIndex aNextBlockIndex) + : mCurrentSpan(aSpan), + mNextSpanOrEmpty(aSpan.Last(0)), + mCurrentBlockIndex(aCurrentBlockIndex), + mNextBlockIndex(aNextBlockIndex) { + // 2nd internal Span points at the end of the 1st internal Span, to enforce + // invariants. + CheckInvariants(); + } + + // Reader over two Spans, the second one must not be empty. + ProfileBufferEntryReader(SpanOfConstBytes aSpanHead, + SpanOfConstBytes aSpanTail, + ProfileBufferBlockIndex aCurrentBlockIndex, + ProfileBufferBlockIndex aNextBlockIndex) + : mCurrentSpan(aSpanHead), + mNextSpanOrEmpty(aSpanTail), + mCurrentBlockIndex(aCurrentBlockIndex), + mNextBlockIndex(aNextBlockIndex) { + MOZ_RELEASE_ASSERT(!mNextSpanOrEmpty.IsEmpty()); + if (MOZ_UNLIKELY(mCurrentSpan.IsEmpty())) { + // First span is already empty, skip it. + mCurrentSpan = mNextSpanOrEmpty; + mNextSpanOrEmpty = mNextSpanOrEmpty.Last(0); + } + CheckInvariants(); + } + + // Allow copying, which is needed when used as an iterator in some std + // functions (e.g., string assignment), and to occasionally backtrack. + // Be aware that the main profile buffer APIs give a reference to an entry + // reader, and expect that reader to advance to the end of the entry, so don't + // just advance copies! + ProfileBufferEntryReader(const ProfileBufferEntryReader&) = default; + ProfileBufferEntryReader& operator=(const ProfileBufferEntryReader&) = + default; + + // Don't =default moving, as it doesn't bring any benefit in this class. + + [[nodiscard]] Length RemainingBytes() const { + return mCurrentSpan.LengthBytes() + mNextSpanOrEmpty.LengthBytes(); + } + + void SetRemainingBytes(Length aBytes) { + MOZ_RELEASE_ASSERT(aBytes <= RemainingBytes()); + if (aBytes <= mCurrentSpan.LengthBytes()) { + mCurrentSpan = mCurrentSpan.First(aBytes); + mNextSpanOrEmpty = mCurrentSpan.Last(0); + } else { + mNextSpanOrEmpty = + mNextSpanOrEmpty.First(aBytes - mCurrentSpan.LengthBytes()); + } + } + + [[nodiscard]] ProfileBufferBlockIndex CurrentBlockIndex() const { + return mCurrentBlockIndex; + } + + [[nodiscard]] ProfileBufferBlockIndex NextBlockIndex() const { + return mNextBlockIndex; + } + + // Create a reader of size zero, pointing at aOffset past the current position + // of this Reader, so it can be used as end iterator. + [[nodiscard]] ProfileBufferEntryReader EmptyIteratorAtOffset( + Length aOffset) const { + MOZ_RELEASE_ASSERT(aOffset <= RemainingBytes()); + if (MOZ_LIKELY(aOffset < mCurrentSpan.LengthBytes())) { + // aOffset is before the end of mCurrentSpan. + return ProfileBufferEntryReader(mCurrentSpan.Subspan(aOffset, 0), + mCurrentBlockIndex, mNextBlockIndex); + } + // aOffset is right at the end of mCurrentSpan, or inside mNextSpanOrEmpty. + return ProfileBufferEntryReader( + mNextSpanOrEmpty.Subspan(aOffset - mCurrentSpan.LengthBytes(), 0), + mCurrentBlockIndex, mNextBlockIndex); + } + + // Be like a limited input iterator, with only `*`, prefix-`++`, `==`, `!=`. + // These definitions are expected by std functions, to recognize this as an + // iterator. See https://en.cppreference.com/w/cpp/iterator/iterator_traits + using difference_type = std::make_signed_t<Length>; + using value_type = Byte; + using pointer = const Byte*; + using reference = const Byte&; + using iterator_category = std::input_iterator_tag; + + [[nodiscard]] const Byte& operator*() { + // Assume the caller will read from the returned reference (and not just + // take the address). + MOZ_RELEASE_ASSERT(mCurrentSpan.LengthBytes() >= 1); + return *(mCurrentSpan.Elements()); + } + + ProfileBufferEntryReader& operator++() { + MOZ_RELEASE_ASSERT(mCurrentSpan.LengthBytes() >= 1); + if (MOZ_LIKELY(mCurrentSpan.LengthBytes() > 1)) { + // More than 1 byte left in mCurrentSpan, just eat it. + mCurrentSpan = mCurrentSpan.From(1); + } else { + // mCurrentSpan will be empty, move mNextSpanOrEmpty to mCurrentSpan. + mCurrentSpan = mNextSpanOrEmpty; + mNextSpanOrEmpty = mNextSpanOrEmpty.Last(0); + } + CheckInvariants(); + return *this; + } + + ProfileBufferEntryReader& operator+=(Length aBytes) { + MOZ_RELEASE_ASSERT(aBytes <= RemainingBytes()); + if (MOZ_LIKELY(aBytes <= mCurrentSpan.LengthBytes())) { + // All bytes are in mCurrentSpan. + // Update mCurrentSpan past the read bytes. + mCurrentSpan = mCurrentSpan.From(aBytes); + if (mCurrentSpan.IsEmpty() && !mNextSpanOrEmpty.IsEmpty()) { + // Don't leave mCurrentSpan empty, move non-empty mNextSpanOrEmpty into + // mCurrentSpan. + mCurrentSpan = mNextSpanOrEmpty; + mNextSpanOrEmpty = mNextSpanOrEmpty.Last(0); + } + } else { + // mCurrentSpan does not hold enough bytes. + // This should only happen at most once: Only for double spans, and when + // data crosses the gap. + const Length tail = + aBytes - static_cast<Length>(mCurrentSpan.LengthBytes()); + // Move mNextSpanOrEmpty to mCurrentSpan, past the data. So the next call + // will go back to the true case above. + mCurrentSpan = mNextSpanOrEmpty.From(tail); + mNextSpanOrEmpty = mNextSpanOrEmpty.Last(0); + } + CheckInvariants(); + return *this; + } + + [[nodiscard]] bool operator==(const ProfileBufferEntryReader& aOther) const { + return mCurrentSpan.Elements() == aOther.mCurrentSpan.Elements(); + } + [[nodiscard]] bool operator!=(const ProfileBufferEntryReader& aOther) const { + return mCurrentSpan.Elements() != aOther.mCurrentSpan.Elements(); + } + + // Read an unsigned LEB128 number and move iterator ahead. + template <typename T> + [[nodiscard]] T ReadULEB128() { + return ::mozilla::ReadULEB128<T>(*this); + } + + // Read a sequence of bytes, like memcpy. + void ReadBytes(void* aDest, Length aBytes) { + MOZ_RELEASE_ASSERT(aBytes <= RemainingBytes()); + if (MOZ_LIKELY(aBytes <= mCurrentSpan.LengthBytes())) { + // All bytes are in mCurrentSpan. + memcpy(aDest, mCurrentSpan.Elements(), aBytes); + // Update mCurrentSpan past the read bytes. + mCurrentSpan = mCurrentSpan.From(aBytes); + if (mCurrentSpan.IsEmpty() && !mNextSpanOrEmpty.IsEmpty()) { + // Don't leave mCurrentSpan empty, move non-empty mNextSpanOrEmpty into + // mCurrentSpan. + mCurrentSpan = mNextSpanOrEmpty; + mNextSpanOrEmpty = mNextSpanOrEmpty.Last(0); + } + } else { + // mCurrentSpan does not hold enough bytes. + // This should only happen at most once: Only for double spans, and when + // data crosses the gap. + // Split data between the end of mCurrentSpan and the beginning of + // mNextSpanOrEmpty. + memcpy(aDest, mCurrentSpan.Elements(), mCurrentSpan.LengthBytes()); + const Length tail = + aBytes - static_cast<Length>(mCurrentSpan.LengthBytes()); + memcpy(reinterpret_cast<Byte*>(aDest) + mCurrentSpan.LengthBytes(), + mNextSpanOrEmpty.Elements(), tail); + // Move mNextSpanOrEmpty to mCurrentSpan, past the data. So the next call + // will go back to the true case above. + mCurrentSpan = mNextSpanOrEmpty.From(tail); + mNextSpanOrEmpty = mNextSpanOrEmpty.Last(0); + } + CheckInvariants(); + } + + template <typename T> + void ReadIntoObject(T& aObject) { + Deserializer<T>::ReadInto(*this, aObject); + } + + // Read into one or more objects, sequentially. + // `EntryReader::ReadIntoObjects()` with nothing is implicitly allowed, this + // could be useful for generic programming. + template <typename... Ts> + void ReadIntoObjects(Ts&... aTs) { + (ReadIntoObject(aTs), ...); + } + + // Read data as an object and move iterator ahead. + template <typename T> + [[nodiscard]] T ReadObject() { + T ob = Deserializer<T>::Read(*this); + return ob; + } + + private: + friend class ProfileBufferEntryWriter; + + // Invariants: + // - mCurrentSpan cannot be empty unless mNextSpanOrEmpty is also empty. So + // mCurrentSpan always points at the next byte to read or the end. + // - If mNextSpanOrEmpty is empty, it points at the end of mCurrentSpan. So + // when reaching the end of mCurrentSpan, we can blindly move + // mNextSpanOrEmpty to mCurrentSpan and keep the invariants. + SpanOfConstBytes mCurrentSpan; + SpanOfConstBytes mNextSpanOrEmpty; + ProfileBufferBlockIndex mCurrentBlockIndex; + ProfileBufferBlockIndex mNextBlockIndex; + + void CheckInvariants() const { + MOZ_ASSERT(!mCurrentSpan.IsEmpty() || mNextSpanOrEmpty.IsEmpty()); + MOZ_ASSERT(!mNextSpanOrEmpty.IsEmpty() || + (mNextSpanOrEmpty == mCurrentSpan.Last(0))); + } +}; + +// Iterator-like class used to write into an entry. +// An entry may be split in two memory segments (e.g., the ends of a ring +// buffer, or two chunks of a chunked buffer); it doesn't deal with this +// underlying buffer, but only with one or two spans pointing at the space +// reserved for the entry. +class ProfileBufferEntryWriter { + public: + using Byte = uint8_t; + using Length = uint32_t; + + using SpanOfBytes = Span<Byte>; + + // Class to be specialized for types to be written in an entry. + // See common specializations at the bottom of this header. + // The following static functions must be provided: + // static Length Bytes(const T& aT) { + // /* Return number of bytes that will be written. */ + // } + // static void Write(ProfileBufferEntryWriter& aEW, + // const T& aT) { + // /* Call `aEW.WriteX(...)` functions to serialize aT, be sure to write + // exactly `Bytes(aT)` bytes! */ + // } + template <typename T> + struct Serializer; + + ProfileBufferEntryWriter() = default; + + ProfileBufferEntryWriter(SpanOfBytes aSpan, + ProfileBufferBlockIndex aCurrentBlockIndex, + ProfileBufferBlockIndex aNextBlockIndex) + : mCurrentSpan(aSpan), + mCurrentBlockIndex(aCurrentBlockIndex), + mNextBlockIndex(aNextBlockIndex) {} + + ProfileBufferEntryWriter(SpanOfBytes aSpanHead, SpanOfBytes aSpanTail, + ProfileBufferBlockIndex aCurrentBlockIndex, + ProfileBufferBlockIndex aNextBlockIndex) + : mCurrentSpan(aSpanHead), + mNextSpanOrEmpty(aSpanTail), + mCurrentBlockIndex(aCurrentBlockIndex), + mNextBlockIndex(aNextBlockIndex) { + // Either: + // - mCurrentSpan is not empty, OR + // - mNextSpanOrEmpty is empty if mNextSpanOrEmpty is empty as well. + MOZ_RELEASE_ASSERT(!mCurrentSpan.IsEmpty() || mNextSpanOrEmpty.IsEmpty()); + } + + // Disable copying and moving, so we can't have multiple writing heads. + ProfileBufferEntryWriter(const ProfileBufferEntryWriter&) = delete; + ProfileBufferEntryWriter& operator=(const ProfileBufferEntryWriter&) = delete; + ProfileBufferEntryWriter(ProfileBufferEntryWriter&&) = delete; + ProfileBufferEntryWriter& operator=(ProfileBufferEntryWriter&&) = delete; + + void Set() { + mCurrentSpan = SpanOfBytes{}; + mNextSpanOrEmpty = SpanOfBytes{}; + mCurrentBlockIndex = nullptr; + mNextBlockIndex = nullptr; + } + + void Set(SpanOfBytes aSpan, ProfileBufferBlockIndex aCurrentBlockIndex, + ProfileBufferBlockIndex aNextBlockIndex) { + mCurrentSpan = aSpan; + mNextSpanOrEmpty = SpanOfBytes{}; + mCurrentBlockIndex = aCurrentBlockIndex; + mNextBlockIndex = aNextBlockIndex; + } + + void Set(SpanOfBytes aSpan0, SpanOfBytes aSpan1, + ProfileBufferBlockIndex aCurrentBlockIndex, + ProfileBufferBlockIndex aNextBlockIndex) { + mCurrentSpan = aSpan0; + mNextSpanOrEmpty = aSpan1; + mCurrentBlockIndex = aCurrentBlockIndex; + mNextBlockIndex = aNextBlockIndex; + // Either: + // - mCurrentSpan is not empty, OR + // - mNextSpanOrEmpty is empty if mNextSpanOrEmpty is empty as well. + MOZ_RELEASE_ASSERT(!mCurrentSpan.IsEmpty() || mNextSpanOrEmpty.IsEmpty()); + } + + [[nodiscard]] Length RemainingBytes() const { + return mCurrentSpan.LengthBytes() + mNextSpanOrEmpty.LengthBytes(); + } + + [[nodiscard]] ProfileBufferBlockIndex CurrentBlockIndex() const { + return mCurrentBlockIndex; + } + + [[nodiscard]] ProfileBufferBlockIndex NextBlockIndex() const { + return mNextBlockIndex; + } + + // Be like a limited output iterator, with only `*` and prefix-`++`. + // These definitions are expected by std functions, to recognize this as an + // iterator. See https://en.cppreference.com/w/cpp/iterator/iterator_traits + using value_type = Byte; + using pointer = Byte*; + using reference = Byte&; + using iterator_category = std::output_iterator_tag; + + [[nodiscard]] Byte& operator*() { + MOZ_RELEASE_ASSERT(RemainingBytes() >= 1); + return *( + (MOZ_LIKELY(!mCurrentSpan.IsEmpty()) ? mCurrentSpan : mNextSpanOrEmpty) + .Elements()); + } + + ProfileBufferEntryWriter& operator++() { + if (MOZ_LIKELY(mCurrentSpan.LengthBytes() >= 1)) { + // There is at least 1 byte in mCurrentSpan, eat it. + mCurrentSpan = mCurrentSpan.From(1); + } else { + // mCurrentSpan is empty, move mNextSpanOrEmpty (past the first byte) to + // mCurrentSpan. + MOZ_RELEASE_ASSERT(mNextSpanOrEmpty.LengthBytes() >= 1); + mCurrentSpan = mNextSpanOrEmpty.From(1); + mNextSpanOrEmpty = mNextSpanOrEmpty.First(0); + } + return *this; + } + + ProfileBufferEntryWriter& operator+=(Length aBytes) { + // Note: This is a rare operation. The code below is a copy of `WriteBytes` + // but without the `memcpy`s. + MOZ_RELEASE_ASSERT(aBytes <= RemainingBytes()); + if (MOZ_LIKELY(aBytes <= mCurrentSpan.LengthBytes())) { + // Data fits in mCurrentSpan. + // Update mCurrentSpan. It may become empty, so in case of a double span, + // the next call will go to the false case below. + mCurrentSpan = mCurrentSpan.From(aBytes); + } else { + // Data does not fully fit in mCurrentSpan. + // This should only happen at most once: Only for double spans, and when + // data crosses the gap or starts there. + const Length tail = + aBytes - static_cast<Length>(mCurrentSpan.LengthBytes()); + // Move mNextSpanOrEmpty to mCurrentSpan, past the data. So the next call + // will go back to the true case above. + mCurrentSpan = mNextSpanOrEmpty.From(tail); + mNextSpanOrEmpty = mNextSpanOrEmpty.First(0); + } + return *this; + } + + // Number of bytes needed to represent `aValue` in unsigned LEB128. + template <typename T> + [[nodiscard]] static unsigned ULEB128Size(T aValue) { + return ::mozilla::ULEB128Size(aValue); + } + + // Write number as unsigned LEB128 and move iterator ahead. + template <typename T> + void WriteULEB128(T aValue) { + ::mozilla::WriteULEB128(aValue, *this); + } + + // Number of bytes needed to serialize objects. + template <typename... Ts> + [[nodiscard]] static Length SumBytes(const Ts&... aTs) { + return (0 + ... + Serializer<Ts>::Bytes(aTs)); + } + + // Write a sequence of bytes, like memcpy. + void WriteBytes(const void* aSrc, Length aBytes) { + MOZ_RELEASE_ASSERT(aBytes <= RemainingBytes()); + if (MOZ_LIKELY(aBytes <= mCurrentSpan.LengthBytes())) { + // Data fits in mCurrentSpan. + memcpy(mCurrentSpan.Elements(), aSrc, aBytes); + // Update mCurrentSpan. It may become empty, so in case of a double span, + // the next call will go to the false case below. + mCurrentSpan = mCurrentSpan.From(aBytes); + } else { + // Data does not fully fit in mCurrentSpan. + // This should only happen at most once: Only for double spans, and when + // data crosses the gap or starts there. + // Split data between the end of mCurrentSpan and the beginning of + // mNextSpanOrEmpty. (mCurrentSpan could be empty, it's ok to do a memcpy + // because Span::Elements() is never null.) + memcpy(mCurrentSpan.Elements(), aSrc, mCurrentSpan.LengthBytes()); + const Length tail = + aBytes - static_cast<Length>(mCurrentSpan.LengthBytes()); + memcpy(mNextSpanOrEmpty.Elements(), + reinterpret_cast<const Byte*>(aSrc) + mCurrentSpan.LengthBytes(), + tail); + // Move mNextSpanOrEmpty to mCurrentSpan, past the data. So the next call + // will go back to the true case above. + mCurrentSpan = mNextSpanOrEmpty.From(tail); + mNextSpanOrEmpty = mNextSpanOrEmpty.First(0); + } + } + + void WriteFromReader(ProfileBufferEntryReader& aReader, Length aBytes) { + MOZ_RELEASE_ASSERT(aBytes <= RemainingBytes()); + MOZ_RELEASE_ASSERT(aBytes <= aReader.RemainingBytes()); + Length read0 = std::min( + aBytes, static_cast<Length>(aReader.mCurrentSpan.LengthBytes())); + if (read0 != 0) { + WriteBytes(aReader.mCurrentSpan.Elements(), read0); + } + Length read1 = aBytes - read0; + if (read1 != 0) { + WriteBytes(aReader.mNextSpanOrEmpty.Elements(), read1); + } + aReader += aBytes; + } + + // Write a single object by using the appropriate Serializer. + template <typename T> + void WriteObject(const T& aObject) { + Serializer<T>::Write(*this, aObject); + } + + // Write one or more objects, sequentially. + // Allow `EntryWrite::WriteObjects()` with nothing, this could be useful + // for generic programming. + template <typename... Ts> + void WriteObjects(const Ts&... aTs) { + (WriteObject(aTs), ...); + } + + private: + // The two spans covering the memory still to be written. + SpanOfBytes mCurrentSpan; + SpanOfBytes mNextSpanOrEmpty; + ProfileBufferBlockIndex mCurrentBlockIndex; + ProfileBufferBlockIndex mNextBlockIndex; +}; + +// ============================================================================ +// Serializer and Deserializer ready-to-use specializations. + +// ---------------------------------------------------------------------------- +// Trivially-copyable types (default) + +// The default implementation works for all trivially-copyable types (e.g., +// PODs). +// +// Usage: `aEW.WriteObject(123);`. +// +// Raw pointers, though trivially-copyable, are explictly forbidden when writing +// (to avoid unexpected leaks/UAFs), instead use one of +// `WrapProfileBufferLiteralCStringPointer`, `WrapProfileBufferUnownedCString`, +// or `WrapProfileBufferRawPointer` as needed. +template <typename T> +struct ProfileBufferEntryWriter::Serializer { + static_assert(std::is_trivially_copyable<T>::value, + "Serializer only works with trivially-copyable types by " + "default, use/add specialization for other types."); + + static constexpr Length Bytes(const T&) { return sizeof(T); } + + static void Write(ProfileBufferEntryWriter& aEW, const T& aT) { + static_assert(!std::is_pointer<T>::value, + "Serializer won't write raw pointers by default, use " + "WrapProfileBufferRawPointer or other."); + aEW.WriteBytes(&aT, sizeof(T)); + } +}; + +// Usage: `aER.ReadObject<int>();` or `int x; aER.ReadIntoObject(x);`. +template <typename T> +struct ProfileBufferEntryReader::Deserializer { + static_assert(std::is_trivially_copyable<T>::value, + "Deserializer only works with trivially-copyable types by " + "default, use/add specialization for other types."); + + static void ReadInto(ProfileBufferEntryReader& aER, T& aT) { + aER.ReadBytes(&aT, sizeof(T)); + } + + static T Read(ProfileBufferEntryReader& aER) { + // Note that this creates a default `T` first, and then overwrites it with + // bytes from the buffer. Trivially-copyable types support this without UB. + T ob; + ReadInto(aER, ob); + return ob; + } +}; + +// ---------------------------------------------------------------------------- +// Strip const/volatile/reference from types. + +// Automatically strip `const`. +template <typename T> +struct ProfileBufferEntryWriter::Serializer<const T> + : public ProfileBufferEntryWriter::Serializer<T> {}; + +template <typename T> +struct ProfileBufferEntryReader::Deserializer<const T> + : public ProfileBufferEntryReader::Deserializer<T> {}; + +// Automatically strip `volatile`. +template <typename T> +struct ProfileBufferEntryWriter::Serializer<volatile T> + : public ProfileBufferEntryWriter::Serializer<T> {}; + +template <typename T> +struct ProfileBufferEntryReader::Deserializer<volatile T> + : public ProfileBufferEntryReader::Deserializer<T> {}; + +// Automatically strip `lvalue-reference`. +template <typename T> +struct ProfileBufferEntryWriter::Serializer<T&> + : public ProfileBufferEntryWriter::Serializer<T> {}; + +template <typename T> +struct ProfileBufferEntryReader::Deserializer<T&> + : public ProfileBufferEntryReader::Deserializer<T> {}; + +// Automatically strip `rvalue-reference`. +template <typename T> +struct ProfileBufferEntryWriter::Serializer<T&&> + : public ProfileBufferEntryWriter::Serializer<T> {}; + +template <typename T> +struct ProfileBufferEntryReader::Deserializer<T&&> + : public ProfileBufferEntryReader::Deserializer<T> {}; + +// ---------------------------------------------------------------------------- +// ProfileBufferBlockIndex + +// ProfileBufferBlockIndex, serialized as the underlying value. +template <> +struct ProfileBufferEntryWriter::Serializer<ProfileBufferBlockIndex> { + static constexpr Length Bytes(const ProfileBufferBlockIndex& aBlockIndex) { + return sizeof(ProfileBufferBlockIndex); + } + + static void Write(ProfileBufferEntryWriter& aEW, + const ProfileBufferBlockIndex& aBlockIndex) { + aEW.WriteBytes(&aBlockIndex, sizeof(aBlockIndex)); + } +}; + +template <> +struct ProfileBufferEntryReader::Deserializer<ProfileBufferBlockIndex> { + static void ReadInto(ProfileBufferEntryReader& aER, + ProfileBufferBlockIndex& aBlockIndex) { + aER.ReadBytes(&aBlockIndex, sizeof(aBlockIndex)); + } + + static ProfileBufferBlockIndex Read(ProfileBufferEntryReader& aER) { + ProfileBufferBlockIndex blockIndex; + ReadInto(aER, blockIndex); + return blockIndex; + } +}; + +// ---------------------------------------------------------------------------- +// Literal C string pointer + +// Wrapper around a pointer to a literal C string. +template <size_t NonTerminalCharacters> +struct ProfileBufferLiteralCStringPointer { + const char* mCString; +}; + +// Wrap a pointer to a literal C string. +template <size_t CharactersIncludingTerminal> +ProfileBufferLiteralCStringPointer<CharactersIncludingTerminal - 1> +WrapProfileBufferLiteralCStringPointer( + const char (&aCString)[CharactersIncludingTerminal]) { + return {aCString}; +} + +// Literal C strings, serialized as the raw pointer because it is unique and +// valid for the whole program lifetime. +// +// Usage: `aEW.WriteObject(WrapProfileBufferLiteralCStringPointer("hi"));`. +// +// No deserializer is provided for this type, instead it must be deserialized as +// a raw pointer: `aER.ReadObject<const char*>();` +template <size_t CharactersIncludingTerminal> +struct ProfileBufferEntryReader::Deserializer< + ProfileBufferLiteralCStringPointer<CharactersIncludingTerminal>> { + static constexpr Length Bytes( + const ProfileBufferLiteralCStringPointer<CharactersIncludingTerminal>&) { + // We're only storing a pointer, its size is independent from the pointer + // value. + return sizeof(const char*); + } + + static void Write( + ProfileBufferEntryWriter& aEW, + const ProfileBufferLiteralCStringPointer<CharactersIncludingTerminal>& + aWrapper) { + // Write the pointer *value*, not the string contents. + aEW.WriteBytes(aWrapper.mCString, sizeof(aWrapper.mCString)); + } +}; + +// ---------------------------------------------------------------------------- +// C string contents + +// Wrapper around a pointer to a C string whose contents will be serialized. +struct ProfileBufferUnownedCString { + const char* mCString; +}; + +// Wrap a pointer to a C string whose contents will be serialized. +inline ProfileBufferUnownedCString WrapProfileBufferUnownedCString( + const char* aCString) { + return {aCString}; +} + +// The contents of a (probably) unowned C string are serialized as the number of +// characters (encoded as ULEB128) and all the characters in the string. The +// terminal '\0' is omitted. +// +// Usage: `aEW.WriteObject(WrapProfileBufferUnownedCString(str.c_str()))`. +// +// No deserializer is provided for this pointer type, instead it must be +// deserialized as one of the other string types that manages its contents, +// e.g.: `aER.ReadObject<std::string>();` +template <> +struct ProfileBufferEntryWriter::Serializer<ProfileBufferUnownedCString> { + static Length Bytes(const ProfileBufferUnownedCString& aS) { + const auto len = strlen(aS.mCString); + return ULEB128Size(len) + len; + } + + static void Write(ProfileBufferEntryWriter& aEW, + const ProfileBufferUnownedCString& aS) { + const auto len = strlen(aS.mCString); + aEW.WriteULEB128(len); + aEW.WriteBytes(aS.mCString, len); + } +}; + +// ---------------------------------------------------------------------------- +// Raw pointers + +// Wrapper around a pointer to be serialized as the raw pointer value. +template <typename T> +struct ProfileBufferRawPointer { + T* mRawPointer; +}; + +// Wrap a pointer to be serialized as the raw pointer value. +template <typename T> +ProfileBufferRawPointer<T> WrapProfileBufferRawPointer(T* aRawPointer) { + return {aRawPointer}; +} + +// Raw pointers are serialized as the raw pointer value. +// +// Usage: `aEW.WriteObject(WrapProfileBufferRawPointer(ptr));` +// +// The wrapper is compulsory when writing pointers (to avoid unexpected +// leaks/UAFs), but reading can be done straight into a raw pointer object, +// e.g.: `aER.ReadObject<Foo*>;`. +template <typename T> +struct ProfileBufferEntryWriter::Serializer<ProfileBufferRawPointer<T>> { + template <typename U> + static constexpr Length Bytes(const U&) { + return sizeof(T*); + } + + static void Write(ProfileBufferEntryWriter& aEW, + const ProfileBufferRawPointer<T>& aWrapper) { + aEW.WriteBytes(&aWrapper.mRawPointer, sizeof(aWrapper.mRawPointer)); + } +}; + +// Usage: `aER.ReadObject<Foo*>;` or `Foo* p; aER.ReadIntoObject(p);`, no +// wrapper necessary. +template <typename T> +struct ProfileBufferEntryReader::Deserializer<ProfileBufferRawPointer<T>> { + static void ReadInto(ProfileBufferEntryReader& aER, + ProfileBufferRawPointer<T>& aPtr) { + aER.ReadBytes(&aPtr.mRawPointer, sizeof(aPtr)); + } + + static ProfileBufferRawPointer<T> Read(ProfileBufferEntryReader& aER) { + ProfileBufferRawPointer<T> rawPointer; + ReadInto(aER, rawPointer); + return rawPointer; + } +}; + +// ---------------------------------------------------------------------------- +// std::string contents + +// std::string contents are serialized as the number of characters (encoded as +// ULEB128) and all the characters in the string. The terminal '\0' is omitted. +// +// Usage: `std::string s = ...; aEW.WriteObject(s);` +template <typename CHAR> +struct ProfileBufferEntryWriter::Serializer<std::basic_string<CHAR>> { + static Length Bytes(const std::basic_string<CHAR>& aS) { + const Length len = static_cast<Length>(aS.length()); + return ULEB128Size(len) + len; + } + + static void Write(ProfileBufferEntryWriter& aEW, + const std::basic_string<CHAR>& aS) { + const Length len = static_cast<Length>(aS.length()); + aEW.WriteULEB128(len); + aEW.WriteBytes(aS.c_str(), len * sizeof(CHAR)); + } +}; + +// Usage: `std::string s = aEW.ReadObject<std::string>(s);` or +// `std::string s; aER.ReadIntoObject(s);` +template <typename CHAR> +struct ProfileBufferEntryReader::Deserializer<std::basic_string<CHAR>> { + static void ReadCharsInto(ProfileBufferEntryReader& aER, + std::basic_string<CHAR>& aS, size_t aLength) { + // Assign to `aS` by using iterators. + // (`aER+0` so we get the same iterator type as `aER+len`.) + aS.assign(aER, aER.EmptyIteratorAtOffset(aLength)); + aER += aLength; + } + + static void ReadInto(ProfileBufferEntryReader& aER, + std::basic_string<CHAR>& aS) { + ReadCharsInto( + aER, aS, + aER.ReadULEB128<typename std::basic_string<CHAR>::size_type>()); + } + + static std::basic_string<CHAR> ReadChars(ProfileBufferEntryReader& aER, + size_t aLength) { + // Construct a string by using iterators. + // (`aER+0` so we get the same iterator type as `aER+len`.) + std::basic_string<CHAR> s(aER, aER.EmptyIteratorAtOffset(aLength)); + aER += aLength; + return s; + } + + static std::basic_string<CHAR> Read(ProfileBufferEntryReader& aER) { + return ReadChars( + aER, aER.ReadULEB128<typename std::basic_string<CHAR>::size_type>()); + } +}; + +// ---------------------------------------------------------------------------- +// mozilla::UniqueFreePtr<CHAR> + +// UniqueFreePtr<CHAR>, which points at a string allocated with `malloc` +// (typically generated by `strdup()`), is serialized as the number of +// *bytes* (encoded as ULEB128) and all the characters in the string. The +// null terminator is omitted. +// `CHAR` can be any type that has a specialization for +// `std::char_traits<CHAR>::length(const CHAR*)`. +// +// Note: A nullptr pointer will be serialized like an empty string, so when +// deserializing it will result in an allocated buffer only containing a +// single null terminator. +template <typename CHAR> +struct ProfileBufferEntryWriter::Serializer<UniqueFreePtr<CHAR>> { + static Length Bytes(const UniqueFreePtr<CHAR>& aS) { + if (!aS) { + // Null pointer, store it as if it was an empty string (so: 0 bytes). + return ULEB128Size(0u); + } + // Note that we store the size in *bytes*, not in number of characters. + const auto bytes = std::char_traits<CHAR>::length(aS.get()) * sizeof(CHAR); + return ULEB128Size(bytes) + bytes; + } + + static void Write(ProfileBufferEntryWriter& aEW, + const UniqueFreePtr<CHAR>& aS) { + if (!aS) { + // Null pointer, store it as if it was an empty string (so we write a + // length of 0 bytes). + aEW.WriteULEB128(0u); + return; + } + // Note that we store the size in *bytes*, not in number of characters. + const auto bytes = std::char_traits<CHAR>::length(aS.get()) * sizeof(CHAR); + aEW.WriteULEB128(bytes); + aEW.WriteBytes(aS.get(), bytes); + } +}; + +template <typename CHAR> +struct ProfileBufferEntryReader::Deserializer<UniqueFreePtr<CHAR>> { + static void ReadInto(ProfileBufferEntryReader& aER, UniqueFreePtr<CHAR>& aS) { + aS = Read(aER); + } + + static UniqueFreePtr<CHAR> Read(ProfileBufferEntryReader& aER) { + // Read the number of *bytes* that follow. + const auto bytes = aER.ReadULEB128<size_t>(); + // We need a buffer of the non-const character type. + using NC_CHAR = std::remove_const_t<CHAR>; + // We allocate the required number of bytes, plus one extra character for + // the null terminator. + NC_CHAR* buffer = static_cast<NC_CHAR*>(malloc(bytes + sizeof(NC_CHAR))); + // Copy the characters into the buffer. + aER.ReadBytes(buffer, bytes); + // And append a null terminator. + buffer[bytes / sizeof(NC_CHAR)] = NC_CHAR(0); + return UniqueFreePtr<CHAR>(buffer); + } +}; + +// ---------------------------------------------------------------------------- +// std::tuple + +// std::tuple is serialized as a sequence of each recursively-serialized item. +// +// This is equivalent to manually serializing each item, so reading/writing +// tuples is equivalent to reading/writing their elements in order, e.g.: +// ``` +// std::tuple<int, std::string> is = ...; +// aEW.WriteObject(is); // Write the tuple, equivalent to: +// aEW.WriteObject(/* int */ std::get<0>(is), /* string */ std::get<1>(is)); +// ... +// // Reading back can be done directly into a tuple: +// auto is = aER.ReadObject<std::tuple<int, std::string>>(); +// // Or each item could be read separately: +// auto i = aER.ReadObject<int>(); auto s = aER.ReadObject<std::string>(); +// ``` +template <typename... Ts> +struct ProfileBufferEntryWriter::Serializer<std::tuple<Ts...>> { + private: + template <size_t... Is> + static Length TupleBytes(const std::tuple<Ts...>& aTuple, + std::index_sequence<Is...>) { + return (0 + ... + SumBytes(std::get<Is>(aTuple))); + } + + template <size_t... Is> + static void TupleWrite(ProfileBufferEntryWriter& aEW, + const std::tuple<Ts...>& aTuple, + std::index_sequence<Is...>) { + (aEW.WriteObject(std::get<Is>(aTuple)), ...); + } + + public: + static Length Bytes(const std::tuple<Ts...>& aTuple) { + // Generate a 0..N-1 index pack, we'll add the sizes of each item. + return TupleBytes(aTuple, std::index_sequence_for<Ts...>()); + } + + static void Write(ProfileBufferEntryWriter& aEW, + const std::tuple<Ts...>& aTuple) { + // Generate a 0..N-1 index pack, we'll write each item. + TupleWrite(aEW, aTuple, std::index_sequence_for<Ts...>()); + } +}; + +template <typename... Ts> +struct ProfileBufferEntryReader::Deserializer<std::tuple<Ts...>> { + static void ReadInto(ProfileBufferEntryReader& aER, + std::tuple<Ts...>& aTuple) { + aER.ReadBytes(&aTuple, Bytes(aTuple)); + } + + static std::tuple<Ts...> Read(ProfileBufferEntryReader& aER) { + // Note that this creates default `Ts` first, and then overwrites them. + std::tuple<Ts...> ob; + ReadInto(aER, ob); + return ob; + } +}; + +// ---------------------------------------------------------------------------- +// mozilla::Tuple + +// Tuple is serialized as a sequence of each recursively-serialized +// item. +// +// This is equivalent to manually serializing each item, so reading/writing +// tuples is equivalent to reading/writing their elements in order, e.g.: +// ``` +// Tuple<int, std::string> is = ...; +// aEW.WriteObject(is); // Write the Tuple, equivalent to: +// aEW.WriteObject(/* int */ std::get<0>(is), /* string */ std::get<1>(is)); +// ... +// // Reading back can be done directly into a Tuple: +// auto is = aER.ReadObject<Tuple<int, std::string>>(); +// // Or each item could be read separately: +// auto i = aER.ReadObject<int>(); auto s = aER.ReadObject<std::string>(); +// ``` +template <typename... Ts> +struct ProfileBufferEntryWriter::Serializer<Tuple<Ts...>> { + private: + template <size_t... Is> + static Length TupleBytes(const Tuple<Ts...>& aTuple, + std::index_sequence<Is...>) { + return (0 + ... + SumBytes(Get<Is>(aTuple))); + } + + template <size_t... Is> + static void TupleWrite(ProfileBufferEntryWriter& aEW, + const Tuple<Ts...>& aTuple, + std::index_sequence<Is...>) { + (aEW.WriteObject(Get<Is>(aTuple)), ...); + } + + public: + static Length Bytes(const Tuple<Ts...>& aTuple) { + // Generate a 0..N-1 index pack, we'll add the sizes of each item. + return TupleBytes(aTuple, std::index_sequence_for<Ts...>()); + } + + static void Write(ProfileBufferEntryWriter& aEW, const Tuple<Ts...>& aTuple) { + // Generate a 0..N-1 index pack, we'll write each item. + TupleWrite(aEW, aTuple, std::index_sequence_for<Ts...>()); + } +}; + +template <typename... Ts> +struct ProfileBufferEntryReader::Deserializer<Tuple<Ts...>> { + static void ReadInto(ProfileBufferEntryReader& aER, Tuple<Ts...>& aTuple) { + aER.ReadBytes(&aTuple, Bytes(aTuple)); + } + + static Tuple<Ts...> Read(ProfileBufferEntryReader& aER) { + // Note that this creates default `Ts` first, and then overwrites them. + Tuple<Ts...> ob; + ReadInto(aER, ob); + return ob; + } +}; + +// ---------------------------------------------------------------------------- +// mozilla::Span + +// Span. All elements are serialized in sequence. +// The caller is assumed to know the number of elements (they may manually +// write&read it before the span if needed). +// Similar to tuples, reading/writing spans is equivalent to reading/writing +// their elements in order. +template <class T, size_t N> +struct ProfileBufferEntryWriter::Serializer<Span<T, N>> { + static Length Bytes(const Span<T, N>& aSpan) { + Length bytes = 0; + for (const T& element : aSpan) { + bytes += SumBytes(element); + } + return bytes; + } + + static void Write(ProfileBufferEntryWriter& aEW, const Span<T, N>& aSpan) { + for (const T& element : aSpan) { + aEW.WriteObject(element); + } + } +}; + +template <class T, size_t N> +struct ProfileBufferEntryReader::Deserializer<Span<T, N>> { + // Read elements back into span pointing at a pre-allocated buffer. + static void ReadInto(ProfileBufferEntryReader& aER, Span<T, N>& aSpan) { + for (T& element : aSpan) { + aER.ReadIntoObject(element); + } + } + + // A Span does not own its data, this would probably leak so we forbid this. + static Span<T, N> Read(ProfileBufferEntryReader& aER) = delete; +}; + +// ---------------------------------------------------------------------------- +// mozilla::Maybe + +// Maybe<T> is serialized as one byte containing either 'm' (Nothing), +// or 'M' followed by the recursively-serialized `T` object. +template <typename T> +struct ProfileBufferEntryWriter::Serializer<Maybe<T>> { + static Length Bytes(const Maybe<T>& aMaybe) { + // 1 byte to store nothing/something flag, then object size if present. + return aMaybe.isNothing() ? 1 : (1 + SumBytes(aMaybe.ref())); + } + + static void Write(ProfileBufferEntryWriter& aEW, const Maybe<T>& aMaybe) { + // 'm'/'M' is just an arbitrary 1-byte value to distinguish states. + if (aMaybe.isNothing()) { + aEW.WriteObject<char>('m'); + } else { + aEW.WriteObject<char>('M'); + // Use the Serializer for the contained type. + aEW.WriteObject(aMaybe.ref()); + } + } +}; + +template <typename T> +struct ProfileBufferEntryReader::Deserializer<Maybe<T>> { + static void ReadInto(ProfileBufferEntryReader& aER, Maybe<T>& aMaybe) { + char c = aER.ReadObject<char>(); + if (c == 'm') { + aMaybe.reset(); + } else { + MOZ_ASSERT(c == 'M'); + // If aMaybe is empty, create a default `T` first, to be overwritten. + // Otherwise we'll just overwrite whatever was already there. + if (aMaybe.isNothing()) { + aMaybe.emplace(); + } + // Use the Deserializer for the contained type. + aER.ReadIntoObject(aMaybe.ref()); + } + } + + static Maybe<T> Read(ProfileBufferEntryReader& aER) { + Maybe<T> maybe; + char c = aER.ReadObject<char>(); + MOZ_ASSERT(c == 'M' || c == 'm'); + if (c == 'M') { + // Note that this creates a default `T` inside the Maybe first, and then + // overwrites it. + maybe = Some(T{}); + // Use the Deserializer for the contained type. + aER.ReadIntoObject(maybe.ref()); + } + return maybe; + } +}; + +// ---------------------------------------------------------------------------- +// mozilla::Variant + +// Variant is serialized as the tag (0-based index of the stored type, encoded +// as ULEB128), and the recursively-serialized object. +template <typename... Ts> +struct ProfileBufferEntryWriter::Serializer<Variant<Ts...>> { + public: + static Length Bytes(const Variant<Ts...>& aVariantTs) { + return aVariantTs.match([](auto aIndex, const auto& aAlternative) { + return ULEB128Size(aIndex) + SumBytes(aAlternative); + }); + } + + static void Write(ProfileBufferEntryWriter& aEW, + const Variant<Ts...>& aVariantTs) { + aVariantTs.match([&aEW](auto aIndex, const auto& aAlternative) { + aEW.WriteULEB128(aIndex); + aEW.WriteObject(aAlternative); + }); + } +}; + +template <typename... Ts> +struct ProfileBufferEntryReader::Deserializer<Variant<Ts...>> { + private: + // Called from the fold expression in `VariantReadInto()`, only the selected + // variant will deserialize the object. + template <size_t I> + static void VariantIReadInto(ProfileBufferEntryReader& aER, + Variant<Ts...>& aVariantTs, unsigned aTag) { + if (I == aTag) { + // Ensure the variant contains the target type. Note that this may create + // a default object. + if (!aVariantTs.template is<I>()) { + aVariantTs = Variant<Ts...>(VariantIndex<I>{}); + } + aER.ReadIntoObject(aVariantTs.template as<I>()); + } + } + + template <size_t... Is> + static void VariantReadInto(ProfileBufferEntryReader& aER, + Variant<Ts...>& aVariantTs, + std::index_sequence<Is...>) { + unsigned tag = aER.ReadULEB128<unsigned>(); + (VariantIReadInto<Is>(aER, aVariantTs, tag), ...); + } + + public: + static void ReadInto(ProfileBufferEntryReader& aER, + Variant<Ts...>& aVariantTs) { + // Generate a 0..N-1 index pack, the selected variant will deserialize + // itself. + VariantReadInto(aER, aVariantTs, std::index_sequence_for<Ts...>()); + } + + static Variant<Ts...> Read(ProfileBufferEntryReader& aER) { + // Note that this creates a default `Variant` of the first type, and then + // overwrites it. Consider using `ReadInto` for more control if needed. + Variant<Ts...> variant(VariantIndex<0>{}); + ReadInto(aER, variant); + return variant; + } +}; + +} // namespace mozilla + +#endif // ProfileBufferEntrySerialization_h diff --git a/mozglue/baseprofiler/public/ProfileBufferIndex.h b/mozglue/baseprofiler/public/ProfileBufferIndex.h new file mode 100644 index 0000000000..5cda6bd89e --- /dev/null +++ b/mozglue/baseprofiler/public/ProfileBufferIndex.h @@ -0,0 +1,97 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ProfileBufferIndex_h +#define ProfileBufferIndex_h + +#include "mozilla/Attributes.h" + +#include <cstddef> +#include <cstdint> + +namespace mozilla { + +// Generic index into a Profiler buffer, mostly for internal usage. +// Intended to appear infinite (it should effectively never wrap). +// 0 (zero) is reserved as nullptr-like value; it may indicate failure result, +// or it may point at the earliest available block. +using ProfileBufferIndex = uint64_t; + +// Externally-opaque class encapsulating a block index, i.e. a +// ProfileBufferIndex that is guaranteed to point at the start of a Profile +// buffer block (until it is destroyed, but then that index cannot be reused and +// functions should gracefully handle expired blocks). +// Users may get these from Profile buffer functions, to later access previous +// blocks; they should avoid converting and operating on their value. +class ProfileBufferBlockIndex { + public: + // Default constructor with internal 0 value, for which Profile buffers must + // guarantee that it is before any valid entries; All public APIs should + // fail gracefully, doing and/or returning Nothing. + ProfileBufferBlockIndex() : mBlockIndex(0) {} + + // Implicit conversion from literal `nullptr` to internal 0 value, to allow + // convenient init/reset/comparison with 0 index. + MOZ_IMPLICIT ProfileBufferBlockIndex(std::nullptr_t) : mBlockIndex(0) {} + + // Explicit conversion to bool, works in `if` and other tests. + // Only returns false for default `ProfileBufferBlockIndex{}` value. + explicit operator bool() const { return mBlockIndex != 0; } + + // Comparison operators. Default `ProfileBufferBlockIndex{}` value is always + // the lowest. + [[nodiscard]] bool operator==(const ProfileBufferBlockIndex& aRhs) const { + return mBlockIndex == aRhs.mBlockIndex; + } + [[nodiscard]] bool operator!=(const ProfileBufferBlockIndex& aRhs) const { + return mBlockIndex != aRhs.mBlockIndex; + } + [[nodiscard]] bool operator<(const ProfileBufferBlockIndex& aRhs) const { + return mBlockIndex < aRhs.mBlockIndex; + } + [[nodiscard]] bool operator<=(const ProfileBufferBlockIndex& aRhs) const { + return mBlockIndex <= aRhs.mBlockIndex; + } + [[nodiscard]] bool operator>(const ProfileBufferBlockIndex& aRhs) const { + return mBlockIndex > aRhs.mBlockIndex; + } + [[nodiscard]] bool operator>=(const ProfileBufferBlockIndex& aRhs) const { + return mBlockIndex >= aRhs.mBlockIndex; + } + + // Explicit conversion to ProfileBufferIndex, mostly used by internal Profile + // buffer code. + [[nodiscard]] ProfileBufferIndex ConvertToProfileBufferIndex() const { + return mBlockIndex; + } + + // Explicit creation from ProfileBufferIndex, mostly used by internal + // Profile buffer code. + [[nodiscard]] static ProfileBufferBlockIndex CreateFromProfileBufferIndex( + ProfileBufferIndex aIndex) { + return ProfileBufferBlockIndex(aIndex); + } + + private: + // Private to prevent easy construction from any value. Use + // `CreateFromProfileBufferIndex()` instead. + // The main reason for this indirection is to make it harder to create these + // objects, because only the profiler code should need to do it. Ideally, this + // class should be used wherever a block index should be stored, but there is + // so much code that uses `uint64_t` that it would be a big task to change + // them all. So for now we allow conversions to/from numbers, but it's as ugly + // as possible to make sure it doesn't get too common; and if one day we want + // to tackle a global change, it should be easy to find all these locations + // thanks to the explicit conversion functions. + explicit ProfileBufferBlockIndex(ProfileBufferIndex aBlockIndex) + : mBlockIndex(aBlockIndex) {} + + ProfileBufferIndex mBlockIndex; +}; + +} // namespace mozilla + +#endif // ProfileBufferIndex_h diff --git a/mozglue/baseprofiler/public/ProfileChunkedBuffer.h b/mozglue/baseprofiler/public/ProfileChunkedBuffer.h new file mode 100644 index 0000000000..d4d55eafcb --- /dev/null +++ b/mozglue/baseprofiler/public/ProfileChunkedBuffer.h @@ -0,0 +1,1872 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ProfileChunkedBuffer_h +#define ProfileChunkedBuffer_h + +#include "mozilla/BaseProfilerDetail.h" +#include "mozilla/NotNull.h" +#include "mozilla/ProfileBufferChunkManager.h" +#include "mozilla/ProfileBufferChunkManagerSingle.h" +#include "mozilla/ProfileBufferEntrySerialization.h" +#include "mozilla/RefCounted.h" +#include "mozilla/RefPtr.h" +#include "mozilla/ScopeExit.h" +#include "mozilla/Unused.h" + +#include <cstdio> +#include <utility> + +namespace mozilla { + +namespace detail { + +// Internal accessor pointing at a position inside a chunk. +// It can handle two groups of chunks (typically the extant chunks stored in +// the store manager, and the current chunk). +// The main operations are: +// - ReadEntrySize() to read an entry size, 0 means failure. +// - operator+=(Length) to skip a number of bytes. +// - EntryReader() creates an entry reader at the current position for a given +// size (it may fail with an empty reader), and skips the entry. +// Note that there is no "past-the-end" position -- as soon as InChunkPointer +// reaches the end, it becomes effectively null. +class InChunkPointer { + public: + using Byte = ProfileBufferChunk::Byte; + using Length = ProfileBufferChunk::Length; + + // Nullptr-like InChunkPointer, may be used as end iterator. + InChunkPointer() + : mChunk(nullptr), mNextChunkGroup(nullptr), mOffsetInChunk(0) {} + + // InChunkPointer over one or two chunk groups, pointing at the given + // block index (if still in range). + // This constructor should only be used with *trusted* block index values! + InChunkPointer(const ProfileBufferChunk* aChunk, + const ProfileBufferChunk* aNextChunkGroup, + ProfileBufferBlockIndex aBlockIndex) + : mChunk(aChunk), mNextChunkGroup(aNextChunkGroup) { + if (mChunk) { + mOffsetInChunk = mChunk->OffsetFirstBlock(); + Adjust(); + } else if (mNextChunkGroup) { + mChunk = mNextChunkGroup; + mNextChunkGroup = nullptr; + mOffsetInChunk = mChunk->OffsetFirstBlock(); + Adjust(); + } else { + mOffsetInChunk = 0; + } + + // Try to advance to given position. + if (!AdvanceToGlobalRangePosition(aBlockIndex)) { + // Block does not exist anymore (or block doesn't look valid), reset the + // in-chunk pointer. + mChunk = nullptr; + mNextChunkGroup = nullptr; + } + } + + // InChunkPointer over one or two chunk groups, will start at the first + // block (if any). This may be slow, so avoid using it too much. + InChunkPointer(const ProfileBufferChunk* aChunk, + const ProfileBufferChunk* aNextChunkGroup, + ProfileBufferIndex aIndex = ProfileBufferIndex(0)) + : mChunk(aChunk), mNextChunkGroup(aNextChunkGroup) { + if (mChunk) { + mOffsetInChunk = mChunk->OffsetFirstBlock(); + Adjust(); + } else if (mNextChunkGroup) { + mChunk = mNextChunkGroup; + mNextChunkGroup = nullptr; + mOffsetInChunk = mChunk->OffsetFirstBlock(); + Adjust(); + } else { + mOffsetInChunk = 0; + } + + // Try to advance to given position. + if (!AdvanceToGlobalRangePosition(aIndex)) { + // Block does not exist anymore, reset the in-chunk pointer. + mChunk = nullptr; + mNextChunkGroup = nullptr; + } + } + + // Compute the current position in the global range. + // 0 if null (including if we're reached the end). + [[nodiscard]] ProfileBufferIndex GlobalRangePosition() const { + if (IsNull()) { + return 0; + } + return mChunk->RangeStart() + mOffsetInChunk; + } + + // Move InChunkPointer forward to the block at the given global block + // position, which is assumed to be valid exactly -- but it may be obsolete. + // 0 stays where it is (if valid already). + // MOZ_ASSERTs if the index is invalid. + [[nodiscard]] bool AdvanceToGlobalRangePosition( + ProfileBufferBlockIndex aBlockIndex) { + if (IsNull()) { + // Pointer is null already. (Not asserting because it's acceptable.) + return false; + } + if (!aBlockIndex) { + // Special null position, just stay where we are. + return ShouldPointAtValidBlock(); + } + if (aBlockIndex.ConvertToProfileBufferIndex() < GlobalRangePosition()) { + // Past the requested position, stay where we are (assuming the current + // position was valid). + return ShouldPointAtValidBlock(); + } + for (;;) { + if (aBlockIndex.ConvertToProfileBufferIndex() < + mChunk->RangeStart() + mChunk->OffsetPastLastBlock()) { + // Target position is in this chunk's written space, move to it. + mOffsetInChunk = + aBlockIndex.ConvertToProfileBufferIndex() - mChunk->RangeStart(); + return ShouldPointAtValidBlock(); + } + // Position is after this chunk, try next chunk. + GoToNextChunk(); + if (IsNull()) { + return false; + } + // Skip whatever block tail there is, we don't allow pointing in the + // middle of a block. + mOffsetInChunk = mChunk->OffsetFirstBlock(); + if (aBlockIndex.ConvertToProfileBufferIndex() < GlobalRangePosition()) { + // Past the requested position, meaning that the given position was in- + // between blocks -> Failure. + MOZ_ASSERT(false, "AdvanceToGlobalRangePosition - In-between blocks"); + return false; + } + } + } + + // Move InChunkPointer forward to the block at or after the given global + // range position. + // 0 stays where it is (if valid already). + [[nodiscard]] bool AdvanceToGlobalRangePosition( + ProfileBufferIndex aPosition) { + if (aPosition == 0) { + // Special position '0', just stay where we are. + // Success if this position is already valid. + return !IsNull(); + } + for (;;) { + ProfileBufferIndex currentPosition = GlobalRangePosition(); + if (currentPosition == 0) { + // Pointer is null. + return false; + } + if (aPosition <= currentPosition) { + // At or past the requested position, stay where we are. + return true; + } + if (aPosition < mChunk->RangeStart() + mChunk->OffsetPastLastBlock()) { + // Target position is in this chunk's written space, move to it. + for (;;) { + // Skip the current block. + mOffsetInChunk += ReadEntrySize(); + if (mOffsetInChunk >= mChunk->OffsetPastLastBlock()) { + // Reached the end of the chunk, this can happen for the last + // block, let's just continue to the next chunk. + break; + } + if (aPosition <= mChunk->RangeStart() + mOffsetInChunk) { + // We're at or after the position, return at this block position. + return true; + } + } + } + // Position is after this chunk, try next chunk. + GoToNextChunk(); + if (IsNull()) { + return false; + } + // Skip whatever block tail there is, we don't allow pointing in the + // middle of a block. + mOffsetInChunk = mChunk->OffsetFirstBlock(); + } + } + + [[nodiscard]] Byte ReadByte() { + MOZ_ASSERT(!IsNull()); + MOZ_ASSERT(mOffsetInChunk < mChunk->OffsetPastLastBlock()); + Byte byte = mChunk->ByteAt(mOffsetInChunk); + if (MOZ_UNLIKELY(++mOffsetInChunk == mChunk->OffsetPastLastBlock())) { + Adjust(); + } + return byte; + } + + // Read and skip a ULEB128-encoded size. + // 0 means failure (0-byte entries are not allowed.) + // Note that this doesn't guarantee that there are actually that many bytes + // available to read! (EntryReader() below may gracefully fail.) + [[nodiscard]] Length ReadEntrySize() { + ULEB128Reader<Length> reader; + if (IsNull()) { + return 0; + } + for (;;) { + const bool isComplete = reader.FeedByteIsComplete(ReadByte()); + if (MOZ_UNLIKELY(IsNull())) { + // End of chunks, so there's no actual entry after this anyway. + return 0; + } + if (MOZ_LIKELY(isComplete)) { + if (MOZ_UNLIKELY(reader.Value() > mChunk->BufferBytes())) { + // Don't allow entries larger than a chunk. + return 0; + } + return reader.Value(); + } + } + } + + InChunkPointer& operator+=(Length aLength) { + MOZ_ASSERT(!IsNull()); + mOffsetInChunk += aLength; + Adjust(); + return *this; + } + + [[nodiscard]] ProfileBufferEntryReader EntryReader(Length aLength) { + if (IsNull() || aLength == 0) { + return ProfileBufferEntryReader(); + } + + MOZ_ASSERT(mOffsetInChunk < mChunk->OffsetPastLastBlock()); + + // We should be pointing at the entry, past the entry size. + const ProfileBufferIndex entryIndex = GlobalRangePosition(); + // Verify that there's enough space before for the size (starting at index + // 1 at least). + MOZ_ASSERT(entryIndex >= 1u + ULEB128Size(aLength)); + + const Length remaining = mChunk->OffsetPastLastBlock() - mOffsetInChunk; + Span<const Byte> mem0 = mChunk->BufferSpan(); + mem0 = mem0.From(mOffsetInChunk); + if (aLength <= remaining) { + // Move to the end of this block, which could make this null if we have + // reached the end of all buffers. + *this += aLength; + return ProfileBufferEntryReader( + mem0.To(aLength), + // Block starts before the entry size. + ProfileBufferBlockIndex::CreateFromProfileBufferIndex( + entryIndex - ULEB128Size(aLength)), + // Block ends right after the entry (could be null for last entry). + ProfileBufferBlockIndex::CreateFromProfileBufferIndex( + GlobalRangePosition())); + } + + // We need to go to the next chunk for the 2nd part of this block. + GoToNextChunk(); + if (IsNull()) { + return ProfileBufferEntryReader(); + } + + Span<const Byte> mem1 = mChunk->BufferSpan(); + const Length tail = aLength - remaining; + MOZ_ASSERT(tail <= mChunk->BufferBytes()); + MOZ_ASSERT(tail == mChunk->OffsetFirstBlock()); + // We are in the correct chunk, move the offset to the end of the block. + mOffsetInChunk = tail; + // And adjust as needed, which could make this null if we have reached the + // end of all buffers. + Adjust(); + return ProfileBufferEntryReader( + mem0, mem1.To(tail), + // Block starts before the entry size. + ProfileBufferBlockIndex::CreateFromProfileBufferIndex( + entryIndex - ULEB128Size(aLength)), + // Block ends right after the entry (could be null for last entry). + ProfileBufferBlockIndex::CreateFromProfileBufferIndex( + GlobalRangePosition())); + } + + [[nodiscard]] bool IsNull() const { return !mChunk; } + + [[nodiscard]] bool operator==(const InChunkPointer& aOther) const { + if (IsNull() || aOther.IsNull()) { + return IsNull() && aOther.IsNull(); + } + return mChunk == aOther.mChunk && mOffsetInChunk == aOther.mOffsetInChunk; + } + + [[nodiscard]] bool operator!=(const InChunkPointer& aOther) const { + return !(*this == aOther); + } + + [[nodiscard]] Byte operator*() const { + MOZ_ASSERT(!IsNull()); + MOZ_ASSERT(mOffsetInChunk < mChunk->OffsetPastLastBlock()); + return mChunk->ByteAt(mOffsetInChunk); + } + + InChunkPointer& operator++() { + MOZ_ASSERT(!IsNull()); + MOZ_ASSERT(mOffsetInChunk < mChunk->OffsetPastLastBlock()); + if (MOZ_UNLIKELY(++mOffsetInChunk == mChunk->OffsetPastLastBlock())) { + mOffsetInChunk = 0; + GoToNextChunk(); + Adjust(); + } + return *this; + } + + private: + void GoToNextChunk() { + MOZ_ASSERT(!IsNull()); + const ProfileBufferIndex expectedNextRangeStart = + mChunk->RangeStart() + mChunk->BufferBytes(); + + mChunk = mChunk->GetNext(); + if (!mChunk) { + // Reached the end of the current chunk group, try the next one (which + // may be null too, especially on the 2nd try). + mChunk = mNextChunkGroup; + mNextChunkGroup = nullptr; + } + + if (mChunk && mChunk->RangeStart() == 0) { + // Reached a chunk without a valid (non-null) range start, assume there + // are only unused chunks from here on. + mChunk = nullptr; + } + + MOZ_ASSERT(!mChunk || mChunk->RangeStart() == expectedNextRangeStart, + "We don't handle discontinuous buffers (yet)"); + // Non-DEBUG fallback: Stop reading past discontinuities. + // (They should be rare, only happening on temporary OOMs.) + // TODO: Handle discontinuities (by skipping over incomplete blocks). + if (mChunk && mChunk->RangeStart() != expectedNextRangeStart) { + mChunk = nullptr; + } + } + + // We want `InChunkPointer` to always point at a valid byte (or be null). + // After some operations, `mOffsetInChunk` may point past the end of the + // current `mChunk`, in which case we need to adjust our position to be inside + // the appropriate chunk. E.g., if we're 10 bytes after the end of the current + // chunk, we should end up at offset 10 in the next chunk. + // Note that we may "fall off" the last chunk and make this `InChunkPointer` + // effectively null. + void Adjust() { + while (mChunk && mOffsetInChunk >= mChunk->OffsetPastLastBlock()) { + // TODO: Try to adjust offset between chunks relative to mRangeStart + // differences. But we don't handle discontinuities yet. + if (mOffsetInChunk < mChunk->BufferBytes()) { + mOffsetInChunk -= mChunk->BufferBytes(); + } else { + mOffsetInChunk -= mChunk->OffsetPastLastBlock(); + } + GoToNextChunk(); + } + } + + // Check if the current position is likely to point at a valid block. + // (Size should be reasonable, and block should fully fit inside buffer.) + // MOZ_ASSERTs on failure, to catch incorrect uses of block indices (which + // should only point at valid blocks if still in range). Non-asserting build + // fallback should still be handled. + [[nodiscard]] bool ShouldPointAtValidBlock() const { + if (IsNull()) { + // Pointer is null, no blocks here. + MOZ_ASSERT(false, "ShouldPointAtValidBlock - null pointer"); + return false; + } + // Use a copy, so we don't modify `*this`. + InChunkPointer pointer = *this; + // Try to read the entry size. + Length entrySize = pointer.ReadEntrySize(); + if (entrySize == 0) { + // Entry size of zero means we read 0 or a way-too-big value. + MOZ_ASSERT(false, "ShouldPointAtValidBlock - invalid size"); + return false; + } + // See if the last byte of the entry is still inside the buffer. + pointer += entrySize - 1; + MOZ_ASSERT(!IsNull(), "ShouldPointAtValidBlock - past end of buffer"); + return !IsNull(); + } + + const ProfileBufferChunk* mChunk; + const ProfileBufferChunk* mNextChunkGroup; + Length mOffsetInChunk; +}; + +} // namespace detail + +// Thread-safe buffer that can store blocks of different sizes during defined +// sessions, using Chunks (from a ChunkManager) as storage. +// +// Each *block* contains an *entry* and the entry size: +// [ entry_size | entry ] [ entry_size | entry ] ... +// +// *In-session* is a period of time during which `ProfileChunkedBuffer` allows +// reading and writing. +// *Out-of-session*, the `ProfileChunkedBuffer` object is still valid, but +// contains no data, and gracefully denies accesses. +// +// To write an entry, the buffer reserves a block of sufficient size (to contain +// user data of predetermined size), writes the entry size, and lets the caller +// fill the entry contents using a ProfileBufferEntryWriter. E.g.: +// ``` +// ProfileChunkedBuffer cb(...); +// cb.ReserveAndPut([]() { return sizeof(123); }, +// [&](Maybe<ProfileBufferEntryWriter>& aEW) { +// if (aEW) { aEW->WriteObject(123); } +// }); +// ``` +// Other `Put...` functions may be used as shortcuts for simple entries. +// The objects given to the caller's callbacks should only be used inside the +// callbacks and not stored elsewhere, because they keep their own references to +// chunk memory and therefore should not live longer. +// Different type of objects may be serialized into an entry, see +// `ProfileBufferEntryWriter::Serializer` for more information. +// +// When reading data, the buffer iterates over blocks (it knows how to read the +// entry size, and therefore move to the next block), and lets the caller read +// the entry inside of each block. E.g.: +// ``` +// cb.ReadEach([](ProfileBufferEntryReader& aER) { +// /* Use ProfileBufferEntryReader functions to read serialized objects. */ +// int n = aER.ReadObject<int>(); +// }); +// ``` +// Different type of objects may be deserialized from an entry, see +// `ProfileBufferEntryReader::Deserializer` for more information. +// +// Writers may retrieve the block index corresponding to an entry +// (`ProfileBufferBlockIndex` is an opaque type preventing the user from easily +// modifying it). That index may later be used with `ReadAt` to get back to the +// entry in that particular block -- if it still exists. +class ProfileChunkedBuffer { + public: + using Byte = ProfileBufferChunk::Byte; + using Length = ProfileBufferChunk::Length; + + enum class ThreadSafety { WithoutMutex, WithMutex }; + + // Default constructor starts out-of-session (nothing to read or write). + explicit ProfileChunkedBuffer(ThreadSafety aThreadSafety) + : mMutex(aThreadSafety != ThreadSafety::WithoutMutex) {} + + // Start in-session with external chunk manager. + ProfileChunkedBuffer(ThreadSafety aThreadSafety, + ProfileBufferChunkManager& aChunkManager) + : mMutex(aThreadSafety != ThreadSafety::WithoutMutex) { + SetChunkManager(aChunkManager); + } + + // Start in-session with owned chunk manager. + ProfileChunkedBuffer(ThreadSafety aThreadSafety, + UniquePtr<ProfileBufferChunkManager>&& aChunkManager) + : mMutex(aThreadSafety != ThreadSafety::WithoutMutex) { + SetChunkManager(std::move(aChunkManager)); + } + + ~ProfileChunkedBuffer() { + // Do proper clean-up by resetting the chunk manager. + ResetChunkManager(); + } + + // This cannot change during the lifetime of this buffer, so there's no need + // to lock. + [[nodiscard]] bool IsThreadSafe() const { return mMutex.IsActivated(); } + + [[nodiscard]] bool IsInSession() const { + baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex); + return !!mChunkManager; + } + + // Stop using the current chunk manager. + // If we own the current chunk manager, it will be destroyed. + // This will always clear currently-held chunks, if any. + void ResetChunkManager() { + baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex); + Unused << ResetChunkManager(lock); + } + + // Set the current chunk manager. + // The caller is responsible for keeping the chunk manager alive as along as + // it's used here (until the next (Re)SetChunkManager, or + // ~ProfileChunkedBuffer). + void SetChunkManager(ProfileBufferChunkManager& aChunkManager) { + baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex); + Unused << ResetChunkManager(lock); + SetChunkManager(aChunkManager, lock); + } + + // Set the current chunk manager, and keep ownership of it. + void SetChunkManager(UniquePtr<ProfileBufferChunkManager>&& aChunkManager) { + baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex); + Unused << ResetChunkManager(lock); + mOwnedChunkManager = std::move(aChunkManager); + if (mOwnedChunkManager) { + SetChunkManager(*mOwnedChunkManager, lock); + } + } + + // Stop using the current chunk manager, and return it if owned here. + [[nodiscard]] UniquePtr<ProfileBufferChunkManager> ExtractChunkManager() { + baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex); + return ResetChunkManager(lock); + } + + // Clear the contents of this buffer, ready to receive new chunks. + // Note that memory is not freed: No chunks are destroyed, they are all + // receycled. + // Also the range doesn't reset, instead it continues at some point after the + // previous range. This may be useful if the caller may be keeping indexes + // into old chunks that have now been cleared, using these indexes will fail + // gracefully (instead of potentially pointing into new data). + void Clear() { + baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex); + if (MOZ_UNLIKELY(!mChunkManager)) { + // Out-of-session. + return; + } + + mRangeStart = mRangeEnd = mNextChunkRangeStart; + mPushedBlockCount = 0; + mClearedBlockCount = 0; + mFailedPutBytes = 0; + + // Recycle all released chunks as "next" chunks. This will reduce the number + // of future allocations. Also, when using ProfileBufferChunkManagerSingle, + // this retrieves the one chunk if it was released. + UniquePtr<ProfileBufferChunk> releasedChunks = + mChunkManager->GetExtantReleasedChunks(); + if (releasedChunks) { + // Released chunks should be in the "Done" state, they need to be marked + // "recycled" before they can be reused. + for (ProfileBufferChunk* chunk = releasedChunks.get(); chunk; + chunk = chunk->GetNext()) { + chunk->MarkRecycled(); + } + mNextChunks = ProfileBufferChunk::Join(std::move(mNextChunks), + std::move(releasedChunks)); + } + + if (mCurrentChunk) { + // We already have a current chunk (empty or in-use), mark it "done" and + // then "recycled", ready to be reused. + mCurrentChunk->MarkDone(); + mCurrentChunk->MarkRecycled(); + } else { + if (!mNextChunks) { + // No current chunk, and no next chunks to recycle, nothing more to do. + // The next "Put" operation will try to allocate a chunk as needed. + return; + } + + // No current chunk, take a next chunk. + mCurrentChunk = std::exchange(mNextChunks, mNextChunks->ReleaseNext()); + } + + // Here, there was already a current chunk, or one has just been taken. + // Make sure it's ready to receive new entries. + InitializeCurrentChunk(lock); + } + + // Buffer maximum length in bytes. + Maybe<size_t> BufferLength() const { + baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex); + if (!mChunkManager) { + return Nothing{}; + } + return Some(mChunkManager->MaxTotalSize()); + } + + [[nodiscard]] size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const { + baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex); + return SizeOfExcludingThis(aMallocSizeOf, lock); + } + + [[nodiscard]] size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const { + baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex); + return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf, lock); + } + + // Snapshot of the buffer state. + struct State { + // Index to/before the first block. + ProfileBufferIndex mRangeStart = 1; + + // Index past the last block. Equals mRangeStart if empty. + ProfileBufferIndex mRangeEnd = 1; + + // Number of blocks that have been pushed into this buffer. + uint64_t mPushedBlockCount = 0; + + // Number of blocks that have been removed from this buffer. + // Note: Live entries = pushed - cleared. + uint64_t mClearedBlockCount = 0; + + // Number of bytes that could not be put into this buffer. + uint64_t mFailedPutBytes = 0; + }; + + // Get a snapshot of the current state. + // When out-of-session, mFirstReadIndex==mNextWriteIndex, and + // mPushedBlockCount==mClearedBlockCount==0. + // Note that these may change right after this thread-safe call, so they + // should only be used for statistical purposes. + [[nodiscard]] State GetState() const { + baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex); + return {mRangeStart, mRangeEnd, mPushedBlockCount, mClearedBlockCount, + mFailedPutBytes}; + } + + [[nodiscard]] bool IsEmpty() const { + baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex); + return mRangeStart == mRangeEnd; + } + + // True if this buffer is already locked on this thread. + // This should be used if some functions may call an already-locked buffer, + // e.g.: Put -> memory hook -> profiler_add_native_allocation_marker -> Put. + [[nodiscard]] bool IsThreadSafeAndLockedOnCurrentThread() const { + return mMutex.IsActivatedAndLockedOnCurrentThread(); + } + + // Lock the buffer mutex and run the provided callback. + // This can be useful when the caller needs to explicitly lock down this + // buffer, but not do anything else with it. + template <typename Callback> + auto LockAndRun(Callback&& aCallback) const { + baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex); + return std::forward<Callback>(aCallback)(); + } + + // Reserve a block that can hold an entry of the given `aCallbackEntryBytes()` + // size, write the entry size (ULEB128-encoded), and invoke and return + // `aCallback(Maybe<ProfileBufferEntryWriter>&)`. + // Note: `aCallbackEntryBytes` is a callback instead of a simple value, to + // delay this potentially-expensive computation until after we're checked that + // we're in-session; use `Put(Length, Callback)` below if you know the size + // already. + template <typename CallbackEntryBytes, typename Callback> + auto ReserveAndPut(CallbackEntryBytes&& aCallbackEntryBytes, + Callback&& aCallback) + -> decltype(std::forward<Callback>(aCallback)( + std::declval<Maybe<ProfileBufferEntryWriter>&>())) { + baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex); + + // This can only be read in the 2nd lambda below after it has been written + // by the first lambda. + Length entryBytes; + + return ReserveAndPutRaw( + [&]() { + entryBytes = std::forward<CallbackEntryBytes>(aCallbackEntryBytes)(); + MOZ_ASSERT(entryBytes != 0, "Empty entries are not allowed"); + return ULEB128Size(entryBytes) + entryBytes; + }, + [&](Maybe<ProfileBufferEntryWriter>& aMaybeEntryWriter) { + if (aMaybeEntryWriter.isSome()) { + aMaybeEntryWriter->WriteULEB128(entryBytes); + MOZ_ASSERT(aMaybeEntryWriter->RemainingBytes() == entryBytes); + } + return std::forward<Callback>(aCallback)(aMaybeEntryWriter); + }, + lock); + } + + template <typename Callback> + auto Put(Length aEntryBytes, Callback&& aCallback) { + return ReserveAndPut([aEntryBytes]() { return aEntryBytes; }, + std::forward<Callback>(aCallback)); + } + + // Add a new entry copied from the given buffer, return block index. + ProfileBufferBlockIndex PutFrom(const void* aSrc, Length aBytes) { + return ReserveAndPut( + [aBytes]() { return aBytes; }, + [aSrc, aBytes](Maybe<ProfileBufferEntryWriter>& aMaybeEntryWriter) { + if (aMaybeEntryWriter.isNothing()) { + return ProfileBufferBlockIndex{}; + } + aMaybeEntryWriter->WriteBytes(aSrc, aBytes); + return aMaybeEntryWriter->CurrentBlockIndex(); + }); + } + + // Add a new single entry with *all* given object (using a Serializer for + // each), return block index. + template <typename... Ts> + ProfileBufferBlockIndex PutObjects(const Ts&... aTs) { + static_assert(sizeof...(Ts) > 0, + "PutObjects must be given at least one object."); + return ReserveAndPut( + [&]() { return ProfileBufferEntryWriter::SumBytes(aTs...); }, + [&](Maybe<ProfileBufferEntryWriter>& aMaybeEntryWriter) { + if (aMaybeEntryWriter.isNothing()) { + return ProfileBufferBlockIndex{}; + } + aMaybeEntryWriter->WriteObjects(aTs...); + return aMaybeEntryWriter->CurrentBlockIndex(); + }); + } + + // Add a new entry copied from the given object, return block index. + template <typename T> + ProfileBufferBlockIndex PutObject(const T& aOb) { + return PutObjects(aOb); + } + + // Get *all* chunks related to this buffer, including extant chunks in its + // ChunkManager, and yet-unused new/recycled chunks. + // We don't expect this buffer to be used again, though it's still possible + // and will allocate the first buffer when needed. + [[nodiscard]] UniquePtr<ProfileBufferChunk> GetAllChunks() { + baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex); + if (MOZ_UNLIKELY(!mChunkManager)) { + // Out-of-session. + return nullptr; + } + UniquePtr<ProfileBufferChunk> chunks = + mChunkManager->GetExtantReleasedChunks(); + Unused << HandleRequestedChunk_IsPending(lock); + if (MOZ_LIKELY(!!mCurrentChunk)) { + mCurrentChunk->MarkDone(); + chunks = + ProfileBufferChunk::Join(std::move(chunks), std::move(mCurrentChunk)); + } + chunks = + ProfileBufferChunk::Join(std::move(chunks), std::move(mNextChunks)); + mChunkManager->ForgetUnreleasedChunks(); + mRangeStart = mRangeEnd = mNextChunkRangeStart; + return chunks; + } + + class Reader; + + // Class that can iterate through blocks and provide + // `ProfileBufferEntryReader`s. + // Created through `Reader`, lives within a lock guard lifetime. + class BlockIterator { + public: +#ifdef DEBUG + ~BlockIterator() { + // No BlockIterator should live outside of a mutexed call. + mBuffer->mMutex.AssertCurrentThreadOwns(); + } +#endif // DEBUG + + // Comparison with other iterator, mostly used in range-for loops. + [[nodiscard]] bool operator==(const BlockIterator& aRhs) const { + MOZ_ASSERT(mBuffer == aRhs.mBuffer); + return mCurrentBlockIndex == aRhs.mCurrentBlockIndex; + } + [[nodiscard]] bool operator!=(const BlockIterator& aRhs) const { + MOZ_ASSERT(mBuffer == aRhs.mBuffer); + return mCurrentBlockIndex != aRhs.mCurrentBlockIndex; + } + + // Advance to next BlockIterator. + BlockIterator& operator++() { + mBuffer->mMutex.AssertCurrentThreadOwns(); + mCurrentBlockIndex = + ProfileBufferBlockIndex::CreateFromProfileBufferIndex( + mNextBlockPointer.GlobalRangePosition()); + mCurrentEntry = + mNextBlockPointer.EntryReader(mNextBlockPointer.ReadEntrySize()); + return *this; + } + + // Dereferencing creates a `ProfileBufferEntryReader` object for the entry + // inside this block. + // (Note: It would be possible to return a `const + // ProfileBufferEntryReader&`, but not useful in practice, because in most + // case the user will want to read, which is non-const.) + [[nodiscard]] ProfileBufferEntryReader operator*() const { + return mCurrentEntry; + } + + // True if this iterator is just past the last entry. + [[nodiscard]] bool IsAtEnd() const { + return mCurrentEntry.RemainingBytes() == 0; + } + + // Can be used as reference to come back to this entry with `GetEntryAt()`. + [[nodiscard]] ProfileBufferBlockIndex CurrentBlockIndex() const { + return mCurrentBlockIndex; + } + + // Index past the end of this block, which is the start of the next block. + [[nodiscard]] ProfileBufferBlockIndex NextBlockIndex() const { + MOZ_ASSERT(!IsAtEnd()); + return ProfileBufferBlockIndex::CreateFromProfileBufferIndex( + mNextBlockPointer.GlobalRangePosition()); + } + + // Index of the first block in the whole buffer. + [[nodiscard]] ProfileBufferBlockIndex BufferRangeStart() const { + mBuffer->mMutex.AssertCurrentThreadOwns(); + return ProfileBufferBlockIndex::CreateFromProfileBufferIndex( + mBuffer->mRangeStart); + } + + // Index past the last block in the whole buffer. + [[nodiscard]] ProfileBufferBlockIndex BufferRangeEnd() const { + mBuffer->mMutex.AssertCurrentThreadOwns(); + return ProfileBufferBlockIndex::CreateFromProfileBufferIndex( + mBuffer->mRangeEnd); + } + + private: + // Only a Reader can instantiate a BlockIterator. + friend class Reader; + + BlockIterator(const ProfileChunkedBuffer& aBuffer, + const ProfileBufferChunk* aChunks0, + const ProfileBufferChunk* aChunks1, + ProfileBufferBlockIndex aBlockIndex) + : mNextBlockPointer(aChunks0, aChunks1, aBlockIndex), + mCurrentBlockIndex( + ProfileBufferBlockIndex::CreateFromProfileBufferIndex( + mNextBlockPointer.GlobalRangePosition())), + mCurrentEntry( + mNextBlockPointer.EntryReader(mNextBlockPointer.ReadEntrySize())), + mBuffer(WrapNotNull(&aBuffer)) { + // No BlockIterator should live outside of a mutexed call. + mBuffer->mMutex.AssertCurrentThreadOwns(); + } + + detail::InChunkPointer mNextBlockPointer; + + ProfileBufferBlockIndex mCurrentBlockIndex; + + ProfileBufferEntryReader mCurrentEntry; + + // Using a non-null pointer instead of a reference, to allow copying. + // This BlockIterator should only live inside one of the thread-safe + // ProfileChunkedBuffer functions, for this reference to stay valid. + NotNull<const ProfileChunkedBuffer*> mBuffer; + }; + + // Class that can create `BlockIterator`s (e.g., for range-for), or just + // iterate through entries; lives within a lock guard lifetime. + class MOZ_RAII Reader { + public: + Reader(const Reader&) = delete; + Reader& operator=(const Reader&) = delete; + Reader(Reader&&) = delete; + Reader& operator=(Reader&&) = delete; + +#ifdef DEBUG + ~Reader() { + // No Reader should live outside of a mutexed call. + mBuffer.mMutex.AssertCurrentThreadOwns(); + } +#endif // DEBUG + + // Index of the first block in the whole buffer. + [[nodiscard]] ProfileBufferBlockIndex BufferRangeStart() const { + mBuffer.mMutex.AssertCurrentThreadOwns(); + return ProfileBufferBlockIndex::CreateFromProfileBufferIndex( + mBuffer.mRangeStart); + } + + // Index past the last block in the whole buffer. + [[nodiscard]] ProfileBufferBlockIndex BufferRangeEnd() const { + mBuffer.mMutex.AssertCurrentThreadOwns(); + return ProfileBufferBlockIndex::CreateFromProfileBufferIndex( + mBuffer.mRangeEnd); + } + + // Iterators to the first and past-the-last blocks. + // Compatible with range-for (see `ForEach` below as example). + [[nodiscard]] BlockIterator begin() const { + return BlockIterator(mBuffer, mChunks0, mChunks1, nullptr); + } + // Note that a `BlockIterator` at the `end()` should not be dereferenced, as + // there is no actual block there! + [[nodiscard]] BlockIterator end() const { + return BlockIterator(mBuffer, nullptr, nullptr, nullptr); + } + + // Get a `BlockIterator` at the given `ProfileBufferBlockIndex`, clamped to + // the stored range. Note that a `BlockIterator` at the `end()` should not + // be dereferenced, as there is no actual block there! + [[nodiscard]] BlockIterator At(ProfileBufferBlockIndex aBlockIndex) const { + if (aBlockIndex < BufferRangeStart()) { + // Anything before the range (including null ProfileBufferBlockIndex) is + // clamped at the beginning. + return begin(); + } + // Otherwise we at least expect the index to be valid (pointing exactly at + // a live block, or just past the end.) + return BlockIterator(mBuffer, mChunks0, mChunks1, aBlockIndex); + } + + // Run `aCallback(ProfileBufferEntryReader&)` on each entry from first to + // last. Callback should not store `ProfileBufferEntryReader`, as it may + // become invalid after this thread-safe call. + template <typename Callback> + void ForEach(Callback&& aCallback) const { + for (ProfileBufferEntryReader reader : *this) { + aCallback(reader); + } + } + + // If this reader only points at one chunk with some data, this data will be + // exposed as a single entry. + [[nodiscard]] ProfileBufferEntryReader SingleChunkDataAsEntry() { + const ProfileBufferChunk* onlyNonEmptyChunk = nullptr; + for (const ProfileBufferChunk* chunkList : {mChunks0, mChunks1}) { + for (const ProfileBufferChunk* chunk = chunkList; chunk; + chunk = chunk->GetNext()) { + if (chunk->OffsetFirstBlock() != chunk->OffsetPastLastBlock()) { + if (onlyNonEmptyChunk) { + // More than one non-empty chunk. + return ProfileBufferEntryReader(); + } + onlyNonEmptyChunk = chunk; + } + } + } + if (!onlyNonEmptyChunk) { + // No non-empty chunks. + return ProfileBufferEntryReader(); + } + // Here, we have found one chunk that had some data. + return ProfileBufferEntryReader( + onlyNonEmptyChunk->BufferSpan().FromTo( + onlyNonEmptyChunk->OffsetFirstBlock(), + onlyNonEmptyChunk->OffsetPastLastBlock()), + ProfileBufferBlockIndex::CreateFromProfileBufferIndex( + onlyNonEmptyChunk->RangeStart()), + ProfileBufferBlockIndex::CreateFromProfileBufferIndex( + onlyNonEmptyChunk->RangeStart() + + (onlyNonEmptyChunk->OffsetPastLastBlock() - + onlyNonEmptyChunk->OffsetFirstBlock()))); + } + + private: + friend class ProfileChunkedBuffer; + + explicit Reader(const ProfileChunkedBuffer& aBuffer, + const ProfileBufferChunk* aChunks0, + const ProfileBufferChunk* aChunks1) + : mBuffer(aBuffer), mChunks0(aChunks0), mChunks1(aChunks1) { + // No Reader should live outside of a mutexed call. + mBuffer.mMutex.AssertCurrentThreadOwns(); + } + + // This Reader should only live inside one of the thread-safe + // ProfileChunkedBuffer functions, for this reference to stay valid. + const ProfileChunkedBuffer& mBuffer; + const ProfileBufferChunk* mChunks0; + const ProfileBufferChunk* mChunks1; + }; + + // In in-session, call `aCallback(ProfileChunkedBuffer::Reader&)` and return + // true. Callback should not store `Reader`, because it may become invalid + // after this call. + // If out-of-session, return false (callback is not invoked). + template <typename Callback> + [[nodiscard]] auto Read(Callback&& aCallback) const { + baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex); + if (MOZ_UNLIKELY(!mChunkManager)) { + // Out-of-session. + return std::forward<Callback>(aCallback)(static_cast<Reader*>(nullptr)); + } + return mChunkManager->PeekExtantReleasedChunks( + [&](const ProfileBufferChunk* aOldestChunk) { + Reader reader(*this, aOldestChunk, mCurrentChunk.get()); + return std::forward<Callback>(aCallback)(&reader); + }); + } + + // Invoke `aCallback(ProfileBufferEntryReader& [, ProfileBufferBlockIndex])` + // on each entry, it must read or at least skip everything. Either/both chunk + // pointers may be null. + template <typename Callback> + static void ReadEach(const ProfileBufferChunk* aChunks0, + const ProfileBufferChunk* aChunks1, + Callback&& aCallback) { + static_assert(std::is_invocable_v<Callback, ProfileBufferEntryReader&> || + std::is_invocable_v<Callback, ProfileBufferEntryReader&, + ProfileBufferBlockIndex>, + "ReadEach callback must take ProfileBufferEntryReader& and " + "optionally a ProfileBufferBlockIndex"); + detail::InChunkPointer p{aChunks0, aChunks1}; + while (!p.IsNull()) { + // The position right before an entry size *is* a block index. + const ProfileBufferBlockIndex blockIndex = + ProfileBufferBlockIndex::CreateFromProfileBufferIndex( + p.GlobalRangePosition()); + Length entrySize = p.ReadEntrySize(); + if (entrySize == 0) { + return; + } + ProfileBufferEntryReader entryReader = p.EntryReader(entrySize); + if (entryReader.RemainingBytes() == 0) { + return; + } + MOZ_ASSERT(entryReader.RemainingBytes() == entrySize); + if constexpr (std::is_invocable_v<Callback, ProfileBufferEntryReader&, + ProfileBufferBlockIndex>) { + aCallback(entryReader, blockIndex); + } else { + Unused << blockIndex; + aCallback(entryReader); + } + MOZ_ASSERT(entryReader.RemainingBytes() == 0); + } + } + + // Invoke `aCallback(ProfileBufferEntryReader& [, ProfileBufferBlockIndex])` + // on each entry, it must read or at least skip everything. + template <typename Callback> + void ReadEach(Callback&& aCallback) const { + baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex); + if (MOZ_UNLIKELY(!mChunkManager)) { + // Out-of-session. + return; + } + mChunkManager->PeekExtantReleasedChunks( + [&](const ProfileBufferChunk* aOldestChunk) { + ReadEach(aOldestChunk, mCurrentChunk.get(), + std::forward<Callback>(aCallback)); + }); + } + + // Call `aCallback(Maybe<ProfileBufferEntryReader>&&)` on the entry at + // the given ProfileBufferBlockIndex; The `Maybe` will be `Nothing` if + // out-of-session, or if that entry doesn't exist anymore, or if we've reached + // just past the last entry. Return whatever `aCallback` returns. Callback + // should not store `ProfileBufferEntryReader`, because it may become invalid + // after this call. + // Either/both chunk pointers may be null. + template <typename Callback> + [[nodiscard]] static auto ReadAt(ProfileBufferBlockIndex aMinimumBlockIndex, + const ProfileBufferChunk* aChunks0, + const ProfileBufferChunk* aChunks1, + Callback&& aCallback) { + static_assert( + std::is_invocable_v<Callback, Maybe<ProfileBufferEntryReader>&&>, + "ReadAt callback must take a Maybe<ProfileBufferEntryReader>&&"); + Maybe<ProfileBufferEntryReader> maybeEntryReader; + if (detail::InChunkPointer p{aChunks0, aChunks1}; !p.IsNull()) { + // If the pointer position is before the given position, try to advance. + if (p.GlobalRangePosition() >= + aMinimumBlockIndex.ConvertToProfileBufferIndex() || + p.AdvanceToGlobalRangePosition( + aMinimumBlockIndex.ConvertToProfileBufferIndex())) { + MOZ_ASSERT(p.GlobalRangePosition() >= + aMinimumBlockIndex.ConvertToProfileBufferIndex()); + // Here we're pointing at the start of a block, try to read the entry + // size. (Entries cannot be empty, so 0 means failure.) + if (Length entrySize = p.ReadEntrySize(); entrySize != 0) { + maybeEntryReader.emplace(p.EntryReader(entrySize)); + if (maybeEntryReader->RemainingBytes() == 0) { + // An empty entry reader means there was no complete block at the + // given index. + maybeEntryReader.reset(); + } else { + MOZ_ASSERT(maybeEntryReader->RemainingBytes() == entrySize); + } + } + } + } +#ifdef DEBUG + auto assertAllRead = MakeScopeExit([&]() { + MOZ_ASSERT(!maybeEntryReader || maybeEntryReader->RemainingBytes() == 0); + }); +#endif // DEBUG + return std::forward<Callback>(aCallback)(std::move(maybeEntryReader)); + } + + // Call `aCallback(Maybe<ProfileBufferEntryReader>&&)` on the entry at + // the given ProfileBufferBlockIndex; The `Maybe` will be `Nothing` if + // out-of-session, or if that entry doesn't exist anymore, or if we've reached + // just past the last entry. Return whatever `aCallback` returns. Callback + // should not store `ProfileBufferEntryReader`, because it may become invalid + // after this call. + template <typename Callback> + [[nodiscard]] auto ReadAt(ProfileBufferBlockIndex aBlockIndex, + Callback&& aCallback) const { + baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex); + if (MOZ_UNLIKELY(!mChunkManager)) { + // Out-of-session. + return std::forward<Callback>(aCallback)(Nothing{}); + } + return mChunkManager->PeekExtantReleasedChunks( + [&](const ProfileBufferChunk* aOldestChunk) { + return ReadAt(aBlockIndex, aOldestChunk, mCurrentChunk.get(), + std::forward<Callback>(aCallback)); + }); + } + + // Append the contents of another ProfileChunkedBuffer to this one. + ProfileBufferBlockIndex AppendContents(const ProfileChunkedBuffer& aSrc) { + ProfileBufferBlockIndex firstBlockIndex; + // If we start failing, we'll stop writing. + bool failed = false; + aSrc.ReadEach([&](ProfileBufferEntryReader& aER) { + if (failed) { + return; + } + failed = + !Put(aER.RemainingBytes(), [&](Maybe<ProfileBufferEntryWriter>& aEW) { + if (aEW.isNothing()) { + return false; + } + if (!firstBlockIndex) { + firstBlockIndex = aEW->CurrentBlockIndex(); + } + aEW->WriteFromReader(aER, aER.RemainingBytes()); + return true; + }); + }); + return failed ? nullptr : firstBlockIndex; + } + +#ifdef DEBUG + void Dump(std::FILE* aFile = stdout) const { + baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex); + fprintf(aFile, + "ProfileChunkedBuffer[%p] State: range %u-%u pushed=%u cleared=%u " + "(live=%u) failed-puts=%u bytes", + this, unsigned(mRangeStart), unsigned(mRangeEnd), + unsigned(mPushedBlockCount), unsigned(mClearedBlockCount), + unsigned(mPushedBlockCount) - unsigned(mClearedBlockCount), + unsigned(mFailedPutBytes)); + if (MOZ_UNLIKELY(!mChunkManager)) { + fprintf(aFile, " - Out-of-session\n"); + return; + } + fprintf(aFile, " - chunks:\n"); + bool hasChunks = false; + mChunkManager->PeekExtantReleasedChunks( + [&](const ProfileBufferChunk* aOldestChunk) { + for (const ProfileBufferChunk* chunk = aOldestChunk; chunk; + chunk = chunk->GetNext()) { + fprintf(aFile, "R "); + chunk->Dump(aFile); + hasChunks = true; + } + }); + if (mCurrentChunk) { + fprintf(aFile, "C "); + mCurrentChunk->Dump(aFile); + hasChunks = true; + } + for (const ProfileBufferChunk* chunk = mNextChunks.get(); chunk; + chunk = chunk->GetNext()) { + fprintf(aFile, "N "); + chunk->Dump(aFile); + hasChunks = true; + } + switch (mRequestedChunkHolder->GetState()) { + case RequestedChunkRefCountedHolder::State::Unused: + fprintf(aFile, " - No request pending.\n"); + break; + case RequestedChunkRefCountedHolder::State::Requested: + fprintf(aFile, " - Request pending.\n"); + break; + case RequestedChunkRefCountedHolder::State::Fulfilled: + fprintf(aFile, " - Request fulfilled.\n"); + break; + } + if (!hasChunks) { + fprintf(aFile, " No chunks.\n"); + } + } +#endif // DEBUG + + private: + // Used to de/serialize a ProfileChunkedBuffer (e.g., containing a backtrace). + friend ProfileBufferEntryWriter::Serializer<ProfileChunkedBuffer>; + friend ProfileBufferEntryReader::Deserializer<ProfileChunkedBuffer>; + friend ProfileBufferEntryWriter::Serializer<UniquePtr<ProfileChunkedBuffer>>; + friend ProfileBufferEntryReader::Deserializer< + UniquePtr<ProfileChunkedBuffer>>; + + [[nodiscard]] UniquePtr<ProfileBufferChunkManager> ResetChunkManager( + const baseprofiler::detail::BaseProfilerMaybeAutoLock&) { + UniquePtr<ProfileBufferChunkManager> chunkManager; + if (mChunkManager) { + mRequestedChunkHolder = nullptr; + mChunkManager->ForgetUnreleasedChunks(); +#ifdef DEBUG + mChunkManager->DeregisteredFrom(this); +#endif + mChunkManager = nullptr; + chunkManager = std::move(mOwnedChunkManager); + if (mCurrentChunk) { + mCurrentChunk->MarkDone(); + mCurrentChunk = nullptr; + } + mNextChunks = nullptr; + mNextChunkRangeStart = mRangeEnd; + mRangeStart = mRangeEnd; + mPushedBlockCount = 0; + mClearedBlockCount = 0; + mFailedPutBytes = 0; + } + return chunkManager; + } + + void SetChunkManager( + ProfileBufferChunkManager& aChunkManager, + const baseprofiler::detail::BaseProfilerMaybeAutoLock& aLock) { + MOZ_ASSERT(!mChunkManager); + mChunkManager = &aChunkManager; +#ifdef DEBUG + mChunkManager->RegisteredWith(this); +#endif + + mChunkManager->SetChunkDestroyedCallback( + [this](const ProfileBufferChunk& aChunk) { + for (;;) { + ProfileBufferIndex rangeStart = mRangeStart; + if (MOZ_LIKELY(rangeStart <= aChunk.RangeStart())) { + if (MOZ_LIKELY(mRangeStart.compareExchange( + rangeStart, + aChunk.RangeStart() + aChunk.BufferBytes()))) { + break; + } + } + } + mClearedBlockCount += aChunk.BlockCount(); + }); + + // We start with one chunk right away, and request a following one now + // so it should be available before the current chunk is full. + SetAndInitializeCurrentChunk(mChunkManager->GetChunk(), aLock); + mRequestedChunkHolder = MakeRefPtr<RequestedChunkRefCountedHolder>(); + RequestChunk(aLock); + } + + [[nodiscard]] size_t SizeOfExcludingThis( + MallocSizeOf aMallocSizeOf, + const baseprofiler::detail::BaseProfilerMaybeAutoLock&) const { + if (MOZ_UNLIKELY(!mChunkManager)) { + // Out-of-session. + return 0; + } + size_t size = mChunkManager->SizeOfIncludingThis(aMallocSizeOf); + if (mCurrentChunk) { + size += mCurrentChunk->SizeOfIncludingThis(aMallocSizeOf); + } + if (mNextChunks) { + size += mNextChunks->SizeOfIncludingThis(aMallocSizeOf); + } + return size; + } + + void InitializeCurrentChunk( + const baseprofiler::detail::BaseProfilerMaybeAutoLock&) { + MOZ_ASSERT(!!mCurrentChunk); + mCurrentChunk->SetRangeStart(mNextChunkRangeStart); + mNextChunkRangeStart += mCurrentChunk->BufferBytes(); + Unused << mCurrentChunk->ReserveInitialBlockAsTail(0); + } + + void SetAndInitializeCurrentChunk( + UniquePtr<ProfileBufferChunk>&& aChunk, + const baseprofiler::detail::BaseProfilerMaybeAutoLock& aLock) { + mCurrentChunk = std::move(aChunk); + if (mCurrentChunk) { + InitializeCurrentChunk(aLock); + } + } + + void RequestChunk( + const baseprofiler::detail::BaseProfilerMaybeAutoLock& aLock) { + if (HandleRequestedChunk_IsPending(aLock)) { + // There is already a pending request, don't start a new one. + return; + } + + // Ensure the `RequestedChunkHolder` knows we're starting a request. + mRequestedChunkHolder->StartRequest(); + + // Request a chunk, the callback carries a `RefPtr` of the + // `RequestedChunkHolder`, so it's guaranteed to live until it's invoked, + // even if this `ProfileChunkedBuffer` changes its `ChunkManager` or is + // destroyed. + mChunkManager->RequestChunk( + [requestedChunkHolder = RefPtr<RequestedChunkRefCountedHolder>( + mRequestedChunkHolder)](UniquePtr<ProfileBufferChunk> aChunk) { + requestedChunkHolder->AddRequestedChunk(std::move(aChunk)); + }); + } + + [[nodiscard]] bool HandleRequestedChunk_IsPending( + const baseprofiler::detail::BaseProfilerMaybeAutoLock& aLock) { + MOZ_ASSERT(!!mChunkManager); + MOZ_ASSERT(!!mRequestedChunkHolder); + + if (mRequestedChunkHolder->GetState() == + RequestedChunkRefCountedHolder::State::Unused) { + return false; + } + + // A request is either in-flight or fulfilled. + Maybe<UniquePtr<ProfileBufferChunk>> maybeChunk = + mRequestedChunkHolder->GetChunkIfFulfilled(); + if (maybeChunk.isNothing()) { + // Request is still pending. + return true; + } + + // Since we extracted the provided chunk, the holder should now be unused. + MOZ_ASSERT(mRequestedChunkHolder->GetState() == + RequestedChunkRefCountedHolder::State::Unused); + + // Request has been fulfilled. + UniquePtr<ProfileBufferChunk>& chunk = *maybeChunk; + if (chunk) { + // Try to use as current chunk if needed. + if (!mCurrentChunk) { + SetAndInitializeCurrentChunk(std::move(chunk), aLock); + // We've just received a chunk and made it current, request a next chunk + // for later. + MOZ_ASSERT(!mNextChunks); + RequestChunk(aLock); + return true; + } + + if (!mNextChunks) { + mNextChunks = std::move(chunk); + } else { + mNextChunks->InsertNext(std::move(chunk)); + } + } + + return false; + } + + // Get a pointer to the next chunk available + [[nodiscard]] ProfileBufferChunk* GetOrCreateCurrentChunk( + const baseprofiler::detail::BaseProfilerMaybeAutoLock& aLock) { + ProfileBufferChunk* current = mCurrentChunk.get(); + if (MOZ_UNLIKELY(!current)) { + // No current chunk ready. + MOZ_ASSERT(!mNextChunks, + "There shouldn't be next chunks when there is no current one"); + // See if a request has recently been fulfilled, ignore pending status. + Unused << HandleRequestedChunk_IsPending(aLock); + current = mCurrentChunk.get(); + if (MOZ_UNLIKELY(!current)) { + // There was no pending chunk, try to get one right now. + // This may still fail, but we can't do anything else about it, the + // caller must handle the nullptr case. + // Attempt a request for later. + SetAndInitializeCurrentChunk(mChunkManager->GetChunk(), aLock); + current = mCurrentChunk.get(); + } + } + return current; + } + + // Get a pointer to the next chunk available + [[nodiscard]] ProfileBufferChunk* GetOrCreateNextChunk( + const baseprofiler::detail::BaseProfilerMaybeAutoLock& aLock) { + MOZ_ASSERT(!!mCurrentChunk, + "Why ask for a next chunk when there isn't even a current one?"); + ProfileBufferChunk* next = mNextChunks.get(); + if (MOZ_UNLIKELY(!next)) { + // No next chunk ready, see if a request has recently been fulfilled, + // ignore pending status. + Unused << HandleRequestedChunk_IsPending(aLock); + next = mNextChunks.get(); + if (MOZ_UNLIKELY(!next)) { + // There was no pending chunk, try to get one right now. + mNextChunks = mChunkManager->GetChunk(); + next = mNextChunks.get(); + // This may still fail, but we can't do anything else about it, the + // caller must handle the nullptr case. + if (MOZ_UNLIKELY(!next)) { + // Attempt a request for later. + RequestChunk(aLock); + } + } + } + return next; + } + + // Reserve a block of `aCallbackBlockBytes()` size, and invoke and return + // `aCallback(Maybe<ProfileBufferEntryWriter>&)`. Note that this is the "raw" + // version that doesn't write the entry size at the beginning of the block. + // Note: `aCallbackBlockBytes` is a callback instead of a simple value, to + // delay this potentially-expensive computation until after we're checked that + // we're in-session; use `Put(Length, Callback)` below if you know the size + // already. + template <typename CallbackBlockBytes, typename Callback> + auto ReserveAndPutRaw(CallbackBlockBytes&& aCallbackBlockBytes, + Callback&& aCallback, + baseprofiler::detail::BaseProfilerMaybeAutoLock& aLock, + uint64_t aBlockCount = 1) { + // The entry writer that will point into one or two chunks to write + // into, empty by default (failure). + Maybe<ProfileBufferEntryWriter> maybeEntryWriter; + + // The current chunk will be filled if we need to write more than its + // remaining space. + bool currentChunkFilled = false; + + // If the current chunk gets filled, we may or may not initialize the next + // chunk! + bool nextChunkInitialized = false; + + if (MOZ_LIKELY(mChunkManager)) { + // In-session. + + const Length blockBytes = + std::forward<CallbackBlockBytes>(aCallbackBlockBytes)(); + + if (ProfileBufferChunk* current = GetOrCreateCurrentChunk(aLock); + MOZ_LIKELY(current)) { + if (blockBytes <= current->RemainingBytes()) { + // Block fits in current chunk with only one span. + currentChunkFilled = blockBytes == current->RemainingBytes(); + const auto [mem0, blockIndex] = current->ReserveBlock(blockBytes); + MOZ_ASSERT(mem0.LengthBytes() == blockBytes); + maybeEntryWriter.emplace( + mem0, blockIndex, + ProfileBufferBlockIndex::CreateFromProfileBufferIndex( + blockIndex.ConvertToProfileBufferIndex() + blockBytes)); + MOZ_ASSERT(maybeEntryWriter->RemainingBytes() == blockBytes); + mRangeEnd += blockBytes; + mPushedBlockCount += aBlockCount; + } else { + // Block doesn't fit fully in current chunk, it needs to overflow into + // the next one. + // Whether or not we can write this entry, the current chunk is now + // considered full, so it will be released. (Otherwise we could refuse + // this entry, but later accept a smaller entry into this chunk, which + // would be somewhat inconsistent.) + currentChunkFilled = true; + // Make sure the next chunk is available (from a previous request), + // otherwise create one on the spot. + if (ProfileBufferChunk* next = GetOrCreateNextChunk(aLock); + MOZ_LIKELY(next)) { + // Here, we know we have a current and a next chunk. + // Reserve head of block at the end of the current chunk. + const auto [mem0, blockIndex] = + current->ReserveBlock(current->RemainingBytes()); + MOZ_ASSERT(mem0.LengthBytes() < blockBytes); + MOZ_ASSERT(current->RemainingBytes() == 0); + // Set the next chunk range, and reserve the needed space for the + // tail of the block. + next->SetRangeStart(mNextChunkRangeStart); + mNextChunkRangeStart += next->BufferBytes(); + const auto mem1 = next->ReserveInitialBlockAsTail( + blockBytes - mem0.LengthBytes()); + MOZ_ASSERT(next->RemainingBytes() != 0); + nextChunkInitialized = true; + // Block is split in two spans. + maybeEntryWriter.emplace( + mem0, mem1, blockIndex, + ProfileBufferBlockIndex::CreateFromProfileBufferIndex( + blockIndex.ConvertToProfileBufferIndex() + blockBytes)); + MOZ_ASSERT(maybeEntryWriter->RemainingBytes() == blockBytes); + mRangeEnd += blockBytes; + mPushedBlockCount += aBlockCount; + } else { + // Cannot get a new chunk. Record put failure. + mFailedPutBytes += blockBytes; + } + } + } else { + // Cannot get a current chunk. Record put failure. + mFailedPutBytes += blockBytes; + } + } // end of `if (MOZ_LIKELY(mChunkManager))` + + // Here, we either have a `Nothing` (failure), or a non-empty entry writer + // pointing at the start of the block. + + // After we invoke the callback and return, we may need to handle the + // current chunk being filled. + auto handleFilledChunk = MakeScopeExit([&]() { + // If the entry writer was not already empty, the callback *must* have + // filled the full entry. + MOZ_ASSERT(!maybeEntryWriter || maybeEntryWriter->RemainingBytes() == 0); + + if (currentChunkFilled) { + // Extract current (now filled) chunk. + UniquePtr<ProfileBufferChunk> filled = std::move(mCurrentChunk); + + if (mNextChunks) { + // Cycle to the next chunk. + mCurrentChunk = + std::exchange(mNextChunks, mNextChunks->ReleaseNext()); + + // Make sure it is initialized (it is now the current chunk). + if (!nextChunkInitialized) { + InitializeCurrentChunk(aLock); + } + } + + // And finally mark filled chunk done and release it. + filled->MarkDone(); + mChunkManager->ReleaseChunk(std::move(filled)); + + // Request another chunk if needed. + // In most cases, here we should have one current chunk and no next + // chunk, so we want to do a request so there hopefully will be a next + // chunk available when the current one gets filled. + // But we also for a request if we don't even have a current chunk (if + // it's too late, it's ok because the next `ReserveAndPutRaw` wil just + // allocate one on the spot.) + // And if we already have a next chunk, there's no need for more now. + if (!mCurrentChunk || !mNextChunks) { + RequestChunk(aLock); + } + } + }); + + return std::forward<Callback>(aCallback)(maybeEntryWriter); + } + + // Reserve a block of `aBlockBytes` size, and invoke and return + // `aCallback(Maybe<ProfileBufferEntryWriter>&)`. Note that this is the "raw" + // version that doesn't write the entry size at the beginning of the block. + template <typename Callback> + auto ReserveAndPutRaw(Length aBlockBytes, Callback&& aCallback, + uint64_t aBlockCount) { + baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex); + return ReserveAndPutRaw([aBlockBytes]() { return aBlockBytes; }, + std::forward<Callback>(aCallback), lock, + aBlockCount); + } + + // Mutex guarding the following members. + mutable baseprofiler::detail::BaseProfilerMaybeMutex mMutex; + + // Pointer to the current Chunk Manager (or null when out-of-session.) + // It may be owned locally (see below) or externally. + ProfileBufferChunkManager* mChunkManager = nullptr; + + // Only non-null when we own the current Chunk Manager. + UniquePtr<ProfileBufferChunkManager> mOwnedChunkManager; + + UniquePtr<ProfileBufferChunk> mCurrentChunk; + + UniquePtr<ProfileBufferChunk> mNextChunks; + + // Class used to transfer requested chunks from a `ChunkManager` to a + // `ProfileChunkedBuffer`. + // It needs to be ref-counted because the request may be fulfilled + // asynchronously, and either side may be destroyed during the request. + // It cannot use the `ProfileChunkedBuffer` mutex, because that buffer and its + // mutex could be destroyed during the request. + class RequestedChunkRefCountedHolder + : public external::AtomicRefCounted<RequestedChunkRefCountedHolder> { + public: + MOZ_DECLARE_REFCOUNTED_TYPENAME(RequestedChunkRefCountedHolder) + + enum class State { Unused, Requested, Fulfilled }; + + // Get the current state. Note that it may change after the function + // returns, so it should be used carefully, e.g., `ProfileChunkedBuffer` can + // see if a request is pending or fulfilled, to avoid starting another + // request. + [[nodiscard]] State GetState() const { + baseprofiler::detail::BaseProfilerAutoLock lock(mRequestMutex); + return mState; + } + + // Must be called by `ProfileChunkedBuffer` when it requests a chunk. + // There cannot be more than one request in-flight. + void StartRequest() { + baseprofiler::detail::BaseProfilerAutoLock lock(mRequestMutex); + MOZ_ASSERT(mState == State::Unused, "Already requested or fulfilled"); + mState = State::Requested; + } + + // Must be called by the `ChunkManager` with a chunk. + // If the `ChunkManager` cannot provide a chunk (because of memory limits, + // or it gets destroyed), it must call this anyway with a nullptr. + void AddRequestedChunk(UniquePtr<ProfileBufferChunk>&& aChunk) { + baseprofiler::detail::BaseProfilerAutoLock lock(mRequestMutex); + MOZ_ASSERT(mState == State::Requested); + mState = State::Fulfilled; + mRequestedChunk = std::move(aChunk); + } + + // The `ProfileChunkedBuffer` can try to extract the provided chunk after a + // request: + // - Nothing -> Request is not fulfilled yet. + // - Some(nullptr) -> The `ChunkManager` was not able to provide a chunk. + // - Some(chunk) -> Requested chunk. + [[nodiscard]] Maybe<UniquePtr<ProfileBufferChunk>> GetChunkIfFulfilled() { + Maybe<UniquePtr<ProfileBufferChunk>> maybeChunk; + baseprofiler::detail::BaseProfilerAutoLock lock(mRequestMutex); + MOZ_ASSERT(mState == State::Requested || mState == State::Fulfilled); + if (mState == State::Fulfilled) { + mState = State::Unused; + maybeChunk.emplace(std::move(mRequestedChunk)); + } + return maybeChunk; + } + + private: + // Mutex guarding the following members. + mutable baseprofiler::detail::BaseProfilerMutex mRequestMutex; + State mState = State::Unused; + UniquePtr<ProfileBufferChunk> mRequestedChunk; + }; + + // Requested-chunk holder, kept alive when in-session, but may also live + // longer if a request is in-flight. + RefPtr<RequestedChunkRefCountedHolder> mRequestedChunkHolder; + + // Range start of the next chunk to become current. Starting at 1 because + // 0 is a reserved index similar to nullptr. + ProfileBufferIndex mNextChunkRangeStart = 1; + + // Index to the first block. + // Atomic because it may be increased when a Chunk is destroyed, and the + // callback may be invoked from anywhere, including from inside one of our + // locked section, so we cannot protect it with a mutex. + Atomic<ProfileBufferIndex, MemoryOrdering::ReleaseAcquire> mRangeStart{1}; + + // Index past the last block. Equals mRangeStart if empty. + ProfileBufferIndex mRangeEnd = 1; + + // Number of blocks that have been pushed into this buffer. + uint64_t mPushedBlockCount = 0; + + // Number of blocks that have been removed from this buffer. + // Note: Live entries = pushed - cleared. + // Atomic because it may be updated when a Chunk is destroyed, and the + // callback may be invoked from anywhere, including from inside one of our + // locked section, so we cannot protect it with a mutex. + Atomic<uint64_t, MemoryOrdering::ReleaseAcquire> mClearedBlockCount{0}; + + // Number of bytes that could not be put into this buffer. + uint64_t mFailedPutBytes = 0; +}; + +// ---------------------------------------------------------------------------- +// ProfileChunkedBuffer serialization + +// A ProfileChunkedBuffer can hide another one! +// This will be used to store marker backtraces; They can be read back into a +// UniquePtr<ProfileChunkedBuffer>. +// Format: len (ULEB128) | start | end | buffer (len bytes) | pushed | cleared +// len==0 marks an out-of-session buffer, or empty buffer. +template <> +struct ProfileBufferEntryWriter::Serializer<ProfileChunkedBuffer> { + static Length Bytes(const ProfileChunkedBuffer& aBuffer) { + return aBuffer.Read([&](ProfileChunkedBuffer::Reader* aReader) { + if (!aReader) { + // Out-of-session, we only need 1 byte to store a length of 0. + return ULEB128Size<Length>(0); + } + ProfileBufferEntryReader reader = aReader->SingleChunkDataAsEntry(); + const ProfileBufferIndex start = + reader.CurrentBlockIndex().ConvertToProfileBufferIndex(); + const ProfileBufferIndex end = + reader.NextBlockIndex().ConvertToProfileBufferIndex(); + MOZ_ASSERT(end - start <= std::numeric_limits<Length>::max()); + const Length len = static_cast<Length>(end - start); + if (len == 0) { + // In-session but empty, also store a length of 0. + return ULEB128Size<Length>(0); + } + // In-session. + return static_cast<Length>(ULEB128Size(len) + sizeof(start) + len + + sizeof(aBuffer.mPushedBlockCount) + + sizeof(aBuffer.mClearedBlockCount)); + }); + } + + static void Write(ProfileBufferEntryWriter& aEW, + const ProfileChunkedBuffer& aBuffer) { + aBuffer.Read([&](ProfileChunkedBuffer::Reader* aReader) { + if (!aReader) { + // Out-of-session, only store a length of 0. + aEW.WriteULEB128<Length>(0); + return; + } + ProfileBufferEntryReader reader = aReader->SingleChunkDataAsEntry(); + const ProfileBufferIndex start = + reader.CurrentBlockIndex().ConvertToProfileBufferIndex(); + const ProfileBufferIndex end = + reader.NextBlockIndex().ConvertToProfileBufferIndex(); + MOZ_ASSERT(end - start <= std::numeric_limits<Length>::max()); + const Length len = static_cast<Length>(end - start); + MOZ_ASSERT(len <= aEW.RemainingBytes()); + if (len == 0) { + // In-session but empty, only store a length of 0. + aEW.WriteULEB128<Length>(0); + return; + } + // In-session. + // Store buffer length, and start index. + aEW.WriteULEB128(len); + aEW.WriteObject(start); + // Write all the bytes. + aEW.WriteFromReader(reader, reader.RemainingBytes()); + // And write stats. + aEW.WriteObject(static_cast<uint64_t>(aBuffer.mPushedBlockCount)); + aEW.WriteObject(static_cast<uint64_t>(aBuffer.mClearedBlockCount)); + // Note: Failed pushes are not important to serialize. + }); + } +}; + +// A serialized ProfileChunkedBuffer can be read into an empty buffer (either +// out-of-session, or in-session with enough room). +template <> +struct ProfileBufferEntryReader::Deserializer<ProfileChunkedBuffer> { + static void ReadInto(ProfileBufferEntryReader& aER, + ProfileChunkedBuffer& aBuffer) { + // Expect an empty buffer, as we're going to overwrite it. + MOZ_ASSERT(aBuffer.GetState().mRangeStart == aBuffer.GetState().mRangeEnd); + // Read the stored buffer length. + const auto len = aER.ReadULEB128<ProfileChunkedBuffer::Length>(); + if (len == 0) { + // 0-length means an "uninteresting" buffer, just return now. + return; + } + // We have a non-empty buffer to read. + + // Read start and end indices. + const auto start = aER.ReadObject<ProfileBufferIndex>(); + aBuffer.mRangeStart = start; + // For now, set the end to be the start (the buffer is still empty). It will + // be updated in `ReserveAndPutRaw()` below. + aBuffer.mRangeEnd = start; + + if (aBuffer.IsInSession()) { + // Output buffer is in-session (i.e., it already has a memory buffer + // attached). Make sure the caller allocated enough space. + MOZ_RELEASE_ASSERT(aBuffer.BufferLength().value() >= len); + } else { + // Output buffer is out-of-session, set a new chunk manager that will + // provide a single chunk of just the right size. + aBuffer.SetChunkManager(MakeUnique<ProfileBufferChunkManagerSingle>(len)); + MOZ_ASSERT(aBuffer.BufferLength().value() >= len); + } + + // Copy bytes into the buffer. + aBuffer.ReserveAndPutRaw( + len, + [&](Maybe<ProfileBufferEntryWriter>& aEW) { + MOZ_RELEASE_ASSERT(aEW.isSome()); + aEW->WriteFromReader(aER, len); + }, + 0); + // Finally copy stats. + aBuffer.mPushedBlockCount = aER.ReadObject<uint64_t>(); + aBuffer.mClearedBlockCount = aER.ReadObject<uint64_t>(); + // Failed puts are not important to keep. + aBuffer.mFailedPutBytes = 0; + } + + // We cannot output a ProfileChunkedBuffer object (not copyable), use + // `ReadInto()` or `aER.ReadObject<UniquePtr<BlocksRinbBuffer>>()` instead. + static ProfileChunkedBuffer Read(ProfileBufferEntryReader& aER) = delete; +}; + +// A ProfileChunkedBuffer is usually refererenced through a UniquePtr, for +// convenience we support (de)serializing that UniquePtr directly. +// This is compatible with the non-UniquePtr serialization above, with a null +// pointer being treated like an out-of-session or empty buffer; and any of +// these would be deserialized into a null pointer. +template <> +struct ProfileBufferEntryWriter::Serializer<UniquePtr<ProfileChunkedBuffer>> { + static Length Bytes(const UniquePtr<ProfileChunkedBuffer>& aBufferUPtr) { + if (!aBufferUPtr) { + // Null pointer, treat it like an empty buffer, i.e., write length of 0. + return ULEB128Size<Length>(0); + } + // Otherwise write the pointed-at ProfileChunkedBuffer (which could be + // out-of-session or empty.) + return SumBytes(*aBufferUPtr); + } + + static void Write(ProfileBufferEntryWriter& aEW, + const UniquePtr<ProfileChunkedBuffer>& aBufferUPtr) { + if (!aBufferUPtr) { + // Null pointer, treat it like an empty buffer, i.e., write length of 0. + aEW.WriteULEB128<Length>(0); + return; + } + // Otherwise write the pointed-at ProfileChunkedBuffer (which could be + // out-of-session or empty.) + aEW.WriteObject(*aBufferUPtr); + } +}; + +// Serialization of a raw pointer to ProfileChunkedBuffer. +// Use Deserializer<UniquePtr<ProfileChunkedBuffer>> to read it back. +template <> +struct ProfileBufferEntryWriter::Serializer<ProfileChunkedBuffer*> { + static Length Bytes(ProfileChunkedBuffer* aBufferUPtr) { + if (!aBufferUPtr) { + // Null pointer, treat it like an empty buffer, i.e., write length of 0. + return ULEB128Size<Length>(0); + } + // Otherwise write the pointed-at ProfileChunkedBuffer (which could be + // out-of-session or empty.) + return SumBytes(*aBufferUPtr); + } + + static void Write(ProfileBufferEntryWriter& aEW, + ProfileChunkedBuffer* aBufferUPtr) { + if (!aBufferUPtr) { + // Null pointer, treat it like an empty buffer, i.e., write length of 0. + aEW.WriteULEB128<Length>(0); + return; + } + // Otherwise write the pointed-at ProfileChunkedBuffer (which could be + // out-of-session or empty.) + aEW.WriteObject(*aBufferUPtr); + } +}; + +template <> +struct ProfileBufferEntryReader::Deserializer<UniquePtr<ProfileChunkedBuffer>> { + static void ReadInto(ProfileBufferEntryReader& aER, + UniquePtr<ProfileChunkedBuffer>& aBuffer) { + aBuffer = Read(aER); + } + + static UniquePtr<ProfileChunkedBuffer> Read(ProfileBufferEntryReader& aER) { + UniquePtr<ProfileChunkedBuffer> bufferUPtr; + // Keep a copy of the reader before reading the length, so we can restart + // from here below. + ProfileBufferEntryReader readerBeforeLen = aER; + // Read the stored buffer length. + const auto len = aER.ReadULEB128<ProfileChunkedBuffer::Length>(); + if (len == 0) { + // 0-length means an "uninteresting" buffer, just return nullptr. + return bufferUPtr; + } + // We have a non-empty buffer. + // allocate an empty ProfileChunkedBuffer without mutex. + bufferUPtr = MakeUnique<ProfileChunkedBuffer>( + ProfileChunkedBuffer::ThreadSafety::WithoutMutex); + // Rewind the reader before the length and deserialize the contents, using + // the non-UniquePtr Deserializer. + aER = readerBeforeLen; + aER.ReadIntoObject(*bufferUPtr); + return bufferUPtr; + } +}; + +} // namespace mozilla + +#endif // ProfileChunkedBuffer_h diff --git a/mozglue/baseprofiler/public/ProfilingCategoryList.h b/mozglue/baseprofiler/public/ProfilingCategoryList.h new file mode 100644 index 0000000000..437f24aaa1 --- /dev/null +++ b/mozglue/baseprofiler/public/ProfilingCategoryList.h @@ -0,0 +1,122 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=99: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef baseprofiler_ProfilingCategoryList_h +#define baseprofiler_ProfilingCategoryList_h + +// Profiler sub-categories are applied to each sampled stack to describe the +// type of workload that the CPU is busy with. Only one sub-category can be +// assigned so be mindful that these are non-overlapping. The active category is +// set by pushing a label to the profiling stack, or by the unwinder in cases +// such as JITs. A profile sample in arbitrary C++/Rust will typically be +// categorized based on the top of the label stack. +// +// The list of available color names for categories is: +// transparent +// blue +// green +// grey +// lightblue +// magenta +// orange +// purple +// yellow + +// clang-format off + +#define MOZ_PROFILING_CATEGORY_LIST(BEGIN_CATEGORY, SUBCATEGORY, END_CATEGORY) \ + BEGIN_CATEGORY(IDLE, "Idle", "transparent") \ + SUBCATEGORY(IDLE, IDLE, "Other") \ + END_CATEGORY \ + BEGIN_CATEGORY(OTHER, "Other", "grey") \ + SUBCATEGORY(OTHER, OTHER, "Other") \ + SUBCATEGORY(OTHER, OTHER_PreferenceRead, "Preference Read") \ + SUBCATEGORY(OTHER, OTHER_Profiling, "Profiling") \ + END_CATEGORY \ + BEGIN_CATEGORY(TEST, "Test", "darkgray") \ + SUBCATEGORY(TEST, TEST, "Test") \ + END_CATEGORY \ + BEGIN_CATEGORY(LAYOUT, "Layout", "purple") \ + SUBCATEGORY(LAYOUT, LAYOUT, "Other") \ + SUBCATEGORY(LAYOUT, LAYOUT_FrameConstruction, "Frame construction") \ + SUBCATEGORY(LAYOUT, LAYOUT_Reflow, "Reflow") \ + SUBCATEGORY(LAYOUT, LAYOUT_CSSParsing, "CSS parsing") \ + SUBCATEGORY(LAYOUT, LAYOUT_SelectorQuery, "Selector query") \ + SUBCATEGORY(LAYOUT, LAYOUT_StyleComputation, "Style computation") \ + END_CATEGORY \ + BEGIN_CATEGORY(JS, "JavaScript", "yellow") \ + SUBCATEGORY(JS, JS, "Other") \ + SUBCATEGORY(JS, JS_Parsing, "Parsing") \ + SUBCATEGORY(JS, JS_BaselineCompilation, "JIT Compile (baseline)") \ + SUBCATEGORY(JS, JS_IonCompilation, "JIT Compile (ion)") \ + SUBCATEGORY(JS, JS_Interpreter, "Interpreter") \ + SUBCATEGORY(JS, JS_BaselineInterpret, "JIT (baseline-interpreter)") \ + SUBCATEGORY(JS, JS_Baseline, "JIT (baseline)") \ + SUBCATEGORY(JS, JS_IonMonkey, "JIT (ion)") \ + END_CATEGORY \ + BEGIN_CATEGORY(GCCC, "GC / CC", "orange") \ + SUBCATEGORY(GCCC, GCCC, "Other") \ + SUBCATEGORY(GCCC, GCCC_MinorGC, "Minor GC") \ + SUBCATEGORY(GCCC, GCCC_MajorGC, "Major GC (Other)") \ + SUBCATEGORY(GCCC, GCCC_MajorGC_Mark, "Major GC (Mark)") \ + SUBCATEGORY(GCCC, GCCC_MajorGC_Sweep, "Major GC (Sweep)") \ + SUBCATEGORY(GCCC, GCCC_MajorGC_Compact, "Major GC (Compact)") \ + SUBCATEGORY(GCCC, GCCC_UnmarkGray, "Unmark Gray") \ + SUBCATEGORY(GCCC, GCCC_Barrier, "Barrier") \ + SUBCATEGORY(GCCC, GCCC_FreeSnowWhite, "CC (Free Snow White)") \ + SUBCATEGORY(GCCC, GCCC_BuildGraph, "CC (Build Graph)") \ + SUBCATEGORY(GCCC, GCCC_ScanRoots, "CC (Scan Roots)") \ + SUBCATEGORY(GCCC, GCCC_CollectWhite, "CC (Collect White)") \ + SUBCATEGORY(GCCC, GCCC_Finalize, "CC (Finalize)") \ + END_CATEGORY \ + BEGIN_CATEGORY(NETWORK, "Network", "lightblue") \ + SUBCATEGORY(NETWORK, NETWORK, "Other") \ + END_CATEGORY \ + BEGIN_CATEGORY(GRAPHICS, "Graphics", "green") \ + SUBCATEGORY(GRAPHICS, GRAPHICS, "Other") \ + SUBCATEGORY(GRAPHICS, GRAPHICS_DisplayListBuilding, "DisplayList building") \ + SUBCATEGORY(GRAPHICS, GRAPHICS_DisplayListMerging, "DisplayList merging") \ + SUBCATEGORY(GRAPHICS, GRAPHICS_LayerBuilding, "Layer building") \ + SUBCATEGORY(GRAPHICS, GRAPHICS_TileAllocation, "Tile allocation") \ + SUBCATEGORY(GRAPHICS, GRAPHICS_WRDisplayList, "WebRender display list") \ + SUBCATEGORY(GRAPHICS, GRAPHICS_Rasterization, "Rasterization") \ + SUBCATEGORY(GRAPHICS, GRAPHICS_FlushingAsyncPaints, "Flushing async paints") \ + SUBCATEGORY(GRAPHICS, GRAPHICS_ImageDecoding, "Image decoding") \ + END_CATEGORY \ + BEGIN_CATEGORY(DOM, "DOM", "blue") \ + SUBCATEGORY(DOM, DOM, "Other") \ + END_CATEGORY \ + BEGIN_CATEGORY(JAVA_ANDROID, "Android", "yellow") \ + SUBCATEGORY(JAVA_ANDROID, JAVA_ANDROID, "Other") \ + END_CATEGORY \ + BEGIN_CATEGORY(JAVA_ANDROIDX, "AndroidX", "orange") \ + SUBCATEGORY(JAVA_ANDROIDX, JAVA_ANDROIDX, "Other") \ + END_CATEGORY \ + BEGIN_CATEGORY(JAVA_LANGUAGE, "Java", "blue") \ + SUBCATEGORY(JAVA_LANGUAGE, JAVA_LANGUAGE, "Other") \ + END_CATEGORY \ + BEGIN_CATEGORY(JAVA_MOZILLA, "Mozilla", "green") \ + SUBCATEGORY(JAVA_MOZILLA, JAVA_MOZILLA, "Other") \ + END_CATEGORY \ + BEGIN_CATEGORY(JAVA_KOTLIN, "Kotlin", "purple") \ + SUBCATEGORY(JAVA_KOTLIN, JAVA_KOTLIN, "Other") \ + END_CATEGORY \ + BEGIN_CATEGORY(JAVA_BLOCKED, "Blocked", "lightblue") \ + SUBCATEGORY(JAVA_BLOCKED, JAVA_BLOCKED, "Other") \ + END_CATEGORY \ + BEGIN_CATEGORY(IPC, "IPC", "lightgreen") \ + SUBCATEGORY(IPC, IPC, "Other") \ + END_CATEGORY \ + BEGIN_CATEGORY(MEDIA, "Media", "orange") \ + SUBCATEGORY(MEDIA, MEDIA, "Other") \ + SUBCATEGORY(MEDIA, MEDIA_CUBEB, "Cubeb") \ + SUBCATEGORY(MEDIA, MEDIA_PLAYBACK, "Playback") \ + SUBCATEGORY(MEDIA, MEDIA_RT, "Real-time rendering") \ + END_CATEGORY + +// clang-format on + +#endif // baseprofiler_ProfilingCategoryList_h diff --git a/mozglue/baseprofiler/public/leb128iterator.h b/mozglue/baseprofiler/public/leb128iterator.h new file mode 100644 index 0000000000..636baf916f --- /dev/null +++ b/mozglue/baseprofiler/public/leb128iterator.h @@ -0,0 +1,207 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// LEB128 utilities that can read/write unsigned LEB128 numbers from/to +// iterators. +// +// LEB128 = Little Endian Base 128, where small numbers take few bytes, but +// large numbers are still allowed, which is ideal when serializing numbers that +// are likely to be small. +// Each byte contains 7 bits from the number, starting at the "little end", the +// top bit is 0 for the last byte, 1 otherwise. +// Numbers 0-127 only take 1 byte. 128-16383 take 2 bytes. Etc. +// +// Iterators only need to provide: +// - `*it` to return a reference to the next byte to be read from or written to. +// - `++it` to advance the iterator after a byte is written. +// +// The caller must always provide sufficient space to write any number, by: +// - pre-allocating a large enough buffer, or +// - allocating more space when `++it` reaches the end and/or `*it` is invoked +// after the end, or +// - moving the underlying pointer to an appropriate location (e.g., wrapping +// around a circular buffer). +// The caller must also provide enough bytes to read a full value (i.e., at +// least one byte should have its top bit unset), and a type large enough to +// hold the stored value. +// +// Note: There are insufficient checks for validity! These functions are +// intended to be used together, i.e., the user should only `ReadULEB128()` from +// a sufficiently-large buffer that the same user filled with `WriteULEB128()`. +// Using with externally-sourced data (e.g., DWARF) is *not* recommended. +// +// https://en.wikipedia.org/wiki/LEB128 + +#ifndef leb128iterator_h +#define leb128iterator_h + +#include "mozilla/Assertions.h" +#include "mozilla/Likely.h" + +#include <climits> +#include <cstdint> +#include <limits> +#include <type_traits> + +namespace mozilla { + +// Number of bytes needed to represent `aValue`. +template <typename T> +constexpr uint_fast8_t ULEB128Size(T aValue) { + static_assert(!std::numeric_limits<T>::is_signed, + "ULEB128Size only takes unsigned types"); + // We need one output byte per 7 bits of non-zero value. So we just remove + // 7 least significant bits at a time until the value becomes zero. + // Note the special case of 0, which still needs 1 output byte; this is done + // by starting the first loop before we check for 0. + uint_fast8_t size = 0; + for (;;) { + size += 1; + aValue >>= 7; + // Expecting small values, so it should be more likely that `aValue == 0`. + if (MOZ_LIKELY(aValue == 0)) { + return size; + } + } +} + +// Maximum number of bytes needed to represent any value of type `T`. +template <typename T> +constexpr uint_fast8_t ULEB128MaxSize() { + return ULEB128Size<T>(std::numeric_limits<T>::max()); +} + +// Write `aValue` in LEB128 to `aIterator`. +// The iterator will be moved past the last byte. +template <typename T, typename It> +void WriteULEB128(T aValue, It& aIterator) { + static_assert(!std::numeric_limits<T>::is_signed, + "WriteULEB128 only takes unsigned types"); + using IteratorValue = std::remove_reference_t<decltype(*aIterator)>; + static_assert(sizeof(IteratorValue) == 1, + "WriteULEB128 expects an iterator to single bytes"); + // 0. Don't test for 0 yet, as we want to output one byte for it. + for (;;) { + // 1. Extract the 7 least significant bits. + const uint_fast8_t byte = aValue & 0x7Fu; + // 2. Remove them from `aValue`. + aValue >>= 7; + // 3. Write the 7 bits, and set the 8th bit if `aValue` is not 0 yet + // (meaning there will be more bytes after this one.) + // Expecting small values, so it should be more likely that `aValue == 0`. + // Note: No absolute need to force-cast to IteratorValue, because we have + // only changed the bottom 8 bits above. However the compiler could warn + // about a narrowing conversion from potentially-multibyte uint_fast8_t down + // to whatever single-byte type `*iterator* expects, so we make it explicit. + *aIterator = static_cast<IteratorValue>( + MOZ_LIKELY(aValue == 0) ? byte : (byte | 0x80u)); + // 4. Always advance the iterator to the next byte. + ++aIterator; + // 5. We're done if `aValue` is 0. + // Expecting small values, so it should be more likely that `aValue == 0`. + if (MOZ_LIKELY(aValue == 0)) { + return; + } + } +} + +// Read an LEB128 value from `aIterator`. +// The iterator will be moved past the last byte. +template <typename T, typename It> +T ReadULEB128(It& aIterator) { + static_assert(!std::numeric_limits<T>::is_signed, + "ReadULEB128 must return an unsigned type"); + using IteratorValue = std::remove_reference_t<decltype(*aIterator)>; + static_assert(sizeof(IteratorValue) == 1, + "ReadULEB128 expects an iterator to single bytes"); + // Incoming bits will be added to `result`... + T result = 0; + // ... starting with the least significant bits. + uint_fast8_t shift = 0; + for (;;) { + // 1. Read one byte from the iterator. + // `static_cast` just in case IteratorValue is not implicitly convertible to + // uint_fast8_t. It wouldn't matter if the sign was extended, we're only + // dealing with the bottom 8 bits below. + const uint_fast8_t byte = static_cast<uint_fast8_t>(*aIterator); + // 2. Always advance the iterator. + ++aIterator; + // 3. Extract the 7 bits of value, and shift them in place into `result`. + result |= static_cast<T>(byte & 0x7fu) << shift; + // 4. If the 8th bit is *not* set, this was the last byte. + // Expecting small values, so it should be more likely that the bit is off. + if (MOZ_LIKELY((byte & 0x80u) == 0)) { + return result; + } + // There are more bytes to read. + // 5. Next byte will contain more significant bits above the past 7. + shift += 7; + // Safety check that we're not going to shift by >= than the type size, + // which is Undefined Behavior in C++. + MOZ_ASSERT(shift < CHAR_BIT * sizeof(T)); + } +} + +// constexpr ULEB128 reader class. +// Mostly useful when dealing with non-trivial byte feeds. +template <typename T> +class ULEB128Reader { + static_assert(!std::numeric_limits<T>::is_signed, + "ULEB128Reader must handle an unsigned type"); + + public: + constexpr ULEB128Reader() = default; + + // Don't allow copy/assignment, it doesn't make sense for a stateful parser. + constexpr ULEB128Reader(const ULEB128Reader&) = delete; + constexpr ULEB128Reader& operator=(const ULEB128Reader&) = delete; + + // Feed a byte into the parser. + // Returns true if this was the last byte. + [[nodiscard]] constexpr bool FeedByteIsComplete(unsigned aByte) { + MOZ_ASSERT(!IsComplete()); + // Extract the 7 bits of value, and shift them in place into the value. + mValue |= static_cast<T>(aByte & 0x7fu) << mShift; + // If the 8th bit is *not* set, this was the last byte. + // Expecting small values, so it should be more likely that the bit is off. + if (MOZ_LIKELY((aByte & 0x80u) == 0)) { + mShift = mCompleteShift; + return true; + } + // There are more bytes to read. + // Next byte will contain more significant bits above the past 7. + mShift += 7; + // Safety check that we're not going to shift by >= than the type size, + // which is Undefined Behavior in C++. + MOZ_ASSERT(mShift < CHAR_BIT * sizeof(T)); + return false; + } + + constexpr void Reset() { + mValue = 0; + mShift = 0; + } + + [[nodiscard]] constexpr bool IsComplete() const { + return mShift == mCompleteShift; + } + + [[nodiscard]] constexpr T Value() const { + MOZ_ASSERT(IsComplete()); + return mValue; + } + + private: + // Special value of `mShift` indicating that parsing is complete. + constexpr static unsigned mCompleteShift = 0x10000u; + + T mValue = 0; + unsigned mShift = 0; +}; + +} // namespace mozilla + +#endif // leb128iterator_h |