diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
commit | 43a97878ce14b72f0981164f87f2e35e14151312 (patch) | |
tree | 620249daf56c0258faa40cbdcf9cfba06de2a846 /tools/profiler/core | |
parent | Initial commit. (diff) | |
download | firefox-upstream.tar.xz firefox-upstream.zip |
Adding upstream version 110.0.1.upstream/110.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'tools/profiler/core')
39 files changed, 22053 insertions, 0 deletions
diff --git a/tools/profiler/core/EHABIStackWalk.cpp b/tools/profiler/core/EHABIStackWalk.cpp new file mode 100644 index 0000000000..e3099b89ec --- /dev/null +++ b/tools/profiler/core/EHABIStackWalk.cpp @@ -0,0 +1,597 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * This is an implementation of stack unwinding according to a subset + * of the ARM Exception Handling ABI, as described in: + * http://infocenter.arm.com/help/topic/com.arm.doc.ihi0038a/IHI0038A_ehabi.pdf + * + * This handles only the ARM-defined "personality routines" (chapter + * 9), and don't track the value of FP registers, because profiling + * needs only chain of PC/SP values. + * + * Because the exception handling info may not be accurate for all + * possible places where an async signal could occur (e.g., in a + * prologue or epilogue), this bounds-checks all stack accesses. + * + * This file uses "struct" for structures in the exception tables and + * "class" otherwise. We should avoid violating the C++11 + * standard-layout rules in the former. + */ + +#include "EHABIStackWalk.h" + +#include "shared-libraries.h" +#include "platform.h" + +#include "mozilla/Atomics.h" +#include "mozilla/Attributes.h" +#include "mozilla/DebugOnly.h" +#include "mozilla/EndianUtils.h" + +#include <algorithm> +#include <elf.h> +#include <stdint.h> +#include <vector> +#include <string> + +#ifndef PT_ARM_EXIDX +# define PT_ARM_EXIDX 0x70000001 +#endif + +namespace mozilla { + +struct PRel31 { + uint32_t mBits; + bool topBit() const { return mBits & 0x80000000; } + uint32_t value() const { return mBits & 0x7fffffff; } + int32_t offset() const { return (static_cast<int32_t>(mBits) << 1) >> 1; } + const void* compute() const { + return reinterpret_cast<const char*>(this) + offset(); + } + + private: + PRel31(const PRel31& copied) = delete; + PRel31() = delete; +}; + +struct EHEntry { + PRel31 startPC; + PRel31 exidx; + + private: + EHEntry(const EHEntry& copied) = delete; + EHEntry() = delete; +}; + +class EHState { + // Note that any core register can be used as a "frame pointer" to + // influence the unwinding process, so this must track all of them. + uint32_t mRegs[16]; + + public: + bool unwind(const EHEntry* aEntry, const void* stackBase); + uint32_t& operator[](int i) { return mRegs[i]; } + const uint32_t& operator[](int i) const { return mRegs[i]; } + explicit EHState(const mcontext_t&); +}; + +enum { R_SP = 13, R_LR = 14, R_PC = 15 }; + +class EHTable { + uint32_t mStartPC; + uint32_t mEndPC; + uint32_t mBaseAddress; + const EHEntry* mEntriesBegin; + const EHEntry* mEntriesEnd; + std::string mName; + + public: + EHTable(const void* aELF, size_t aSize, const std::string& aName); + const EHEntry* lookup(uint32_t aPC) const; + bool isValid() const { return mEntriesEnd != mEntriesBegin; } + const std::string& name() const { return mName; } + uint32_t startPC() const { return mStartPC; } + uint32_t endPC() const { return mEndPC; } + uint32_t baseAddress() const { return mBaseAddress; } +}; + +class EHAddrSpace { + std::vector<uint32_t> mStarts; + std::vector<EHTable> mTables; + static mozilla::Atomic<const EHAddrSpace*> sCurrent; + + public: + explicit EHAddrSpace(const std::vector<EHTable>& aTables); + const EHTable* lookup(uint32_t aPC) const; + static void Update(); + static const EHAddrSpace* Get(); +}; + +void EHABIStackWalkInit() { EHAddrSpace::Update(); } + +size_t EHABIStackWalk(const mcontext_t& aContext, void* stackBase, void** aSPs, + void** aPCs, const size_t aNumFrames) { + const EHAddrSpace* space = EHAddrSpace::Get(); + EHState state(aContext); + size_t count = 0; + + while (count < aNumFrames) { + uint32_t pc = state[R_PC], sp = state[R_SP]; + + // ARM instructions are always aligned to 2 or 4 bytes. + // The last bit of the pc / lr indicates ARM or Thumb mode. + // We're only interested in the instruction address, so we mask off that + // bit. + constexpr uint32_t instrAddrMask = ~1; + uint32_t instrAddress = pc & instrAddrMask; + + aPCs[count] = reinterpret_cast<void*>(instrAddress); + aSPs[count] = reinterpret_cast<void*>(sp); + count++; + + if (!space) break; + // TODO: cache these lookups. Binary-searching libxul is + // expensive (possibly more expensive than doing the actual + // unwind), and even a small cache should help. + const EHTable* table = space->lookup(pc); + if (!table) break; + const EHEntry* entry = table->lookup(pc); + if (!entry) break; + if (!state.unwind(entry, stackBase)) break; + } + + return count; +} + +class EHInterp { + public: + // Note that stackLimit is exclusive and stackBase is inclusive + // (i.e, stackLimit < SP <= stackBase), following the convention + // set by the AAPCS spec. + EHInterp(EHState& aState, const EHEntry* aEntry, uint32_t aStackLimit, + uint32_t aStackBase) + : mState(aState), + mStackLimit(aStackLimit), + mStackBase(aStackBase), + mNextWord(0), + mWordsLeft(0), + mFailed(false) { + const PRel31& exidx = aEntry->exidx; + uint32_t firstWord; + + if (exidx.mBits == 1) { // EXIDX_CANTUNWIND + mFailed = true; + return; + } + if (exidx.topBit()) { + firstWord = exidx.mBits; + } else { + mNextWord = reinterpret_cast<const uint32_t*>(exidx.compute()); + firstWord = *mNextWord++; + } + + switch (firstWord >> 24) { + case 0x80: // short + mWord = firstWord << 8; + mBytesLeft = 3; + break; + case 0x81: + case 0x82: // long; catch descriptor size ignored + mWord = firstWord << 16; + mBytesLeft = 2; + mWordsLeft = (firstWord >> 16) & 0xff; + break; + default: + // unknown personality + mFailed = true; + } + } + + bool unwind(); + + private: + // TODO: GCC has been observed not CSEing repeated reads of + // mState[R_SP] with writes to mFailed between them, suggesting that + // it hasn't determined that they can't alias and is thus missing + // optimization opportunities. So, we may want to flatten EHState + // into this class; this may also make the code simpler. + EHState& mState; + uint32_t mStackLimit; + uint32_t mStackBase; + const uint32_t* mNextWord; + uint32_t mWord; + uint8_t mWordsLeft; + uint8_t mBytesLeft; + bool mFailed; + + enum { + I_ADDSP = 0x00, // 0sxxxxxx (subtract if s) + M_ADDSP = 0x80, + I_POPMASK = 0x80, // 1000iiii iiiiiiii (if any i set) + M_POPMASK = 0xf0, + I_MOVSP = 0x90, // 1001nnnn + M_MOVSP = 0xf0, + I_POPN = 0xa0, // 1010lnnn + M_POPN = 0xf0, + I_FINISH = 0xb0, // 10110000 + I_POPLO = 0xb1, // 10110001 0000iiii (if any i set) + I_ADDSPBIG = 0xb2, // 10110010 uleb128 + I_POPFDX = 0xb3, // 10110011 sssscccc + I_POPFDX8 = 0xb8, // 10111nnn + M_POPFDX8 = 0xf8, + // "Intel Wireless MMX" extensions omitted. + I_POPFDD = 0xc8, // 1100100h sssscccc + M_POPFDD = 0xfe, + I_POPFDD8 = 0xd0, // 11010nnn + M_POPFDD8 = 0xf8 + }; + + uint8_t next() { + if (mBytesLeft == 0) { + if (mWordsLeft == 0) { + return I_FINISH; + } + mWordsLeft--; + mWord = *mNextWord++; + mBytesLeft = 4; + } + mBytesLeft--; + mWord = (mWord << 8) | (mWord >> 24); // rotate + return mWord; + } + + uint32_t& vSP() { return mState[R_SP]; } + uint32_t* ptrSP() { return reinterpret_cast<uint32_t*>(vSP()); } + + void checkStackBase() { + if (vSP() > mStackBase) mFailed = true; + } + void checkStackLimit() { + if (vSP() <= mStackLimit) mFailed = true; + } + void checkStackAlign() { + if ((vSP() & 3) != 0) mFailed = true; + } + void checkStack() { + checkStackBase(); + checkStackLimit(); + checkStackAlign(); + } + + void popRange(uint8_t first, uint8_t last, uint16_t mask) { + bool hasSP = false; + uint32_t tmpSP; + if (mask == 0) mFailed = true; + for (uint8_t r = first; r <= last; ++r) { + if (mask & 1) { + if (r == R_SP) { + hasSP = true; + tmpSP = *ptrSP(); + } else + mState[r] = *ptrSP(); + vSP() += 4; + checkStackBase(); + if (mFailed) return; + } + mask >>= 1; + } + if (hasSP) { + vSP() = tmpSP; + checkStack(); + } + } +}; + +bool EHState::unwind(const EHEntry* aEntry, const void* stackBasePtr) { + // The unwinding program cannot set SP to less than the initial value. + uint32_t stackLimit = mRegs[R_SP] - 4; + uint32_t stackBase = reinterpret_cast<uint32_t>(stackBasePtr); + EHInterp interp(*this, aEntry, stackLimit, stackBase); + return interp.unwind(); +} + +bool EHInterp::unwind() { + mState[R_PC] = 0; + checkStack(); + while (!mFailed) { + uint8_t insn = next(); +#if DEBUG_EHABI_UNWIND + LOG("unwind insn = %02x", (unsigned)insn); +#endif + // Try to put the common cases first. + + // 00xxxxxx: vsp = vsp + (xxxxxx << 2) + 4 + // 01xxxxxx: vsp = vsp - (xxxxxx << 2) - 4 + if ((insn & M_ADDSP) == I_ADDSP) { + uint32_t offset = ((insn & 0x3f) << 2) + 4; + if (insn & 0x40) { + vSP() -= offset; + checkStackLimit(); + } else { + vSP() += offset; + checkStackBase(); + } + continue; + } + + // 10100nnn: Pop r4-r[4+nnn] + // 10101nnn: Pop r4-r[4+nnn], r14 + if ((insn & M_POPN) == I_POPN) { + uint8_t n = (insn & 0x07) + 1; + bool lr = insn & 0x08; + uint32_t* ptr = ptrSP(); + vSP() += (n + (lr ? 1 : 0)) * 4; + checkStackBase(); + for (uint8_t r = 4; r < 4 + n; ++r) mState[r] = *ptr++; + if (lr) mState[R_LR] = *ptr++; + continue; + } + + // 1011000: Finish + if (insn == I_FINISH) { + if (mState[R_PC] == 0) { + mState[R_PC] = mState[R_LR]; + // Non-standard change (bug 916106): Prevent the caller from + // re-using LR. Since the caller is by definition not a leaf + // routine, it will have to restore LR from somewhere to + // return to its own caller, so we can safely zero it here. + // This makes a difference only if an error in unwinding + // (e.g., caused by starting from within a prologue/epilogue) + // causes us to load a pointer to a leaf routine as LR; if we + // don't do something, we'll go into an infinite loop of + // "returning" to that same function. + mState[R_LR] = 0; + } + return true; + } + + // 1001nnnn: Set vsp = r[nnnn] + if ((insn & M_MOVSP) == I_MOVSP) { + vSP() = mState[insn & 0x0f]; + checkStack(); + continue; + } + + // 11001000 sssscccc: Pop VFP regs D[16+ssss]-D[16+ssss+cccc] (as FLDMFDD) + // 11001001 sssscccc: Pop VFP regs D[ssss]-D[ssss+cccc] (as FLDMFDD) + if ((insn & M_POPFDD) == I_POPFDD) { + uint8_t n = (next() & 0x0f) + 1; + // Note: if the 16+ssss+cccc > 31, the encoding is reserved. + // As the space is currently unused, we don't try to check. + vSP() += 8 * n; + checkStackBase(); + continue; + } + + // 11010nnn: Pop VFP regs D[8]-D[8+nnn] (as FLDMFDD) + if ((insn & M_POPFDD8) == I_POPFDD8) { + uint8_t n = (insn & 0x07) + 1; + vSP() += 8 * n; + checkStackBase(); + continue; + } + + // 10110010 uleb128: vsp = vsp + 0x204 + (uleb128 << 2) + if (insn == I_ADDSPBIG) { + uint32_t acc = 0; + uint8_t shift = 0; + uint8_t byte; + do { + if (shift >= 32) return false; + byte = next(); + acc |= (byte & 0x7f) << shift; + shift += 7; + } while (byte & 0x80); + uint32_t offset = 0x204 + (acc << 2); + // The calculations above could have overflowed. + // But the one we care about is this: + if (vSP() + offset < vSP()) mFailed = true; + vSP() += offset; + // ...so that this is the only other check needed: + checkStackBase(); + continue; + } + + // 1000iiii iiiiiiii (i not all 0): Pop under masks {r15-r12}, {r11-r4} + if ((insn & M_POPMASK) == I_POPMASK) { + popRange(4, 15, ((insn & 0x0f) << 8) | next()); + continue; + } + + // 1011001 0000iiii (i not all 0): Pop under mask {r3-r0} + if (insn == I_POPLO) { + popRange(0, 3, next() & 0x0f); + continue; + } + + // 10110011 sssscccc: Pop VFP regs D[ssss]-D[ssss+cccc] (as FLDMFDX) + if (insn == I_POPFDX) { + uint8_t n = (next() & 0x0f) + 1; + vSP() += 8 * n + 4; + checkStackBase(); + continue; + } + + // 10111nnn: Pop VFP regs D[8]-D[8+nnn] (as FLDMFDX) + if ((insn & M_POPFDX8) == I_POPFDX8) { + uint8_t n = (insn & 0x07) + 1; + vSP() += 8 * n + 4; + checkStackBase(); + continue; + } + + // unhandled instruction +#ifdef DEBUG_EHABI_UNWIND + LOG("Unhandled EHABI instruction 0x%02x", insn); +#endif + mFailed = true; + } + return false; +} + +bool operator<(const EHTable& lhs, const EHTable& rhs) { + return lhs.startPC() < rhs.startPC(); +} + +// Async signal unsafe. +EHAddrSpace::EHAddrSpace(const std::vector<EHTable>& aTables) + : mTables(aTables) { + std::sort(mTables.begin(), mTables.end()); + DebugOnly<uint32_t> lastEnd = 0; + for (std::vector<EHTable>::iterator i = mTables.begin(); i != mTables.end(); + ++i) { + MOZ_ASSERT(i->startPC() >= lastEnd); + mStarts.push_back(i->startPC()); + lastEnd = i->endPC(); + } +} + +const EHTable* EHAddrSpace::lookup(uint32_t aPC) const { + ptrdiff_t i = (std::upper_bound(mStarts.begin(), mStarts.end(), aPC) - + mStarts.begin()) - + 1; + + if (i < 0 || aPC >= mTables[i].endPC()) return 0; + return &mTables[i]; +} + +const EHEntry* EHTable::lookup(uint32_t aPC) const { + MOZ_ASSERT(aPC >= mStartPC); + if (aPC >= mEndPC) return nullptr; + + const EHEntry* begin = mEntriesBegin; + const EHEntry* end = mEntriesEnd; + MOZ_ASSERT(begin < end); + if (aPC < reinterpret_cast<uint32_t>(begin->startPC.compute())) + return nullptr; + + while (end - begin > 1) { +#ifdef EHABI_UNWIND_MORE_ASSERTS + if ((end - 1)->startPC.compute() < begin->startPC.compute()) { + MOZ_CRASH("unsorted exidx"); + } +#endif + const EHEntry* mid = begin + (end - begin) / 2; + if (aPC < reinterpret_cast<uint32_t>(mid->startPC.compute())) + end = mid; + else + begin = mid; + } + return begin; +} + +#if MOZ_LITTLE_ENDIAN() +static const unsigned char hostEndian = ELFDATA2LSB; +#elif MOZ_BIG_ENDIAN() +static const unsigned char hostEndian = ELFDATA2MSB; +#else +# error "No endian?" +#endif + +// Async signal unsafe: std::vector::reserve, std::string copy ctor. +EHTable::EHTable(const void* aELF, size_t aSize, const std::string& aName) + : mStartPC(~0), // largest uint32_t + mEndPC(0), + mEntriesBegin(nullptr), + mEntriesEnd(nullptr), + mName(aName) { + const uint32_t fileHeaderAddr = reinterpret_cast<uint32_t>(aELF); + + if (aSize < sizeof(Elf32_Ehdr)) return; + + const Elf32_Ehdr& file = *(reinterpret_cast<Elf32_Ehdr*>(fileHeaderAddr)); + if (memcmp(&file.e_ident[EI_MAG0], ELFMAG, SELFMAG) != 0 || + file.e_ident[EI_CLASS] != ELFCLASS32 || + file.e_ident[EI_DATA] != hostEndian || + file.e_ident[EI_VERSION] != EV_CURRENT || file.e_machine != EM_ARM || + file.e_version != EV_CURRENT) + // e_flags? + return; + + MOZ_ASSERT(file.e_phoff + file.e_phnum * file.e_phentsize <= aSize); + const Elf32_Phdr *exidxHdr = 0, *zeroHdr = 0; + for (unsigned i = 0; i < file.e_phnum; ++i) { + const Elf32_Phdr& phdr = *(reinterpret_cast<Elf32_Phdr*>( + fileHeaderAddr + file.e_phoff + i * file.e_phentsize)); + if (phdr.p_type == PT_ARM_EXIDX) { + exidxHdr = &phdr; + } else if (phdr.p_type == PT_LOAD) { + if (phdr.p_offset == 0) { + zeroHdr = &phdr; + } + if (phdr.p_flags & PF_X) { + mStartPC = std::min(mStartPC, phdr.p_vaddr); + mEndPC = std::max(mEndPC, phdr.p_vaddr + phdr.p_memsz); + } + } + } + if (!exidxHdr) return; + if (!zeroHdr) return; + mBaseAddress = fileHeaderAddr - zeroHdr->p_vaddr; + mStartPC += mBaseAddress; + mEndPC += mBaseAddress; + mEntriesBegin = + reinterpret_cast<const EHEntry*>(mBaseAddress + exidxHdr->p_vaddr); + mEntriesEnd = reinterpret_cast<const EHEntry*>( + mBaseAddress + exidxHdr->p_vaddr + exidxHdr->p_memsz); +} + +mozilla::Atomic<const EHAddrSpace*> EHAddrSpace::sCurrent(nullptr); + +// Async signal safe; can fail if Update() hasn't returned yet. +const EHAddrSpace* EHAddrSpace::Get() { return sCurrent; } + +// Collect unwinding information from loaded objects. Calls after the +// first have no effect. Async signal unsafe. +void EHAddrSpace::Update() { + const EHAddrSpace* space = sCurrent; + if (space) return; + + SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf(); + std::vector<EHTable> tables; + + for (size_t i = 0; i < info.GetSize(); ++i) { + const SharedLibrary& lib = info.GetEntry(i); + // FIXME: This isn't correct if the start address isn't p_offset 0, because + // the start address will not point at the file header. But this is worked + // around by magic number checks in the EHTable constructor. + EHTable tab(reinterpret_cast<const void*>(lib.GetStart()), + lib.GetEnd() - lib.GetStart(), lib.GetNativeDebugPath()); + if (tab.isValid()) tables.push_back(tab); + } + space = new EHAddrSpace(tables); + + if (!sCurrent.compareExchange(nullptr, space)) { + delete space; + space = sCurrent; + } +} + +EHState::EHState(const mcontext_t& context) { +#ifdef linux + mRegs[0] = context.arm_r0; + mRegs[1] = context.arm_r1; + mRegs[2] = context.arm_r2; + mRegs[3] = context.arm_r3; + mRegs[4] = context.arm_r4; + mRegs[5] = context.arm_r5; + mRegs[6] = context.arm_r6; + mRegs[7] = context.arm_r7; + mRegs[8] = context.arm_r8; + mRegs[9] = context.arm_r9; + mRegs[10] = context.arm_r10; + mRegs[11] = context.arm_fp; + mRegs[12] = context.arm_ip; + mRegs[13] = context.arm_sp; + mRegs[14] = context.arm_lr; + mRegs[15] = context.arm_pc; +#else +# error "Unhandled OS for ARM EHABI unwinding" +#endif +} + +} // namespace mozilla diff --git a/tools/profiler/core/EHABIStackWalk.h b/tools/profiler/core/EHABIStackWalk.h new file mode 100644 index 0000000000..61286290b8 --- /dev/null +++ b/tools/profiler/core/EHABIStackWalk.h @@ -0,0 +1,28 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * This is an implementation of stack unwinding according to a subset + * of the ARM Exception Handling ABI; see the comment at the top of + * the .cpp file for details. + */ + +#ifndef mozilla_EHABIStackWalk_h__ +#define mozilla_EHABIStackWalk_h__ + +#include <stddef.h> +#include <ucontext.h> + +namespace mozilla { + +void EHABIStackWalkInit(); + +size_t EHABIStackWalk(const mcontext_t& aContext, void* stackBase, void** aSPs, + void** aPCs, size_t aNumFrames); + +} // namespace mozilla + +#endif diff --git a/tools/profiler/core/MicroGeckoProfiler.cpp b/tools/profiler/core/MicroGeckoProfiler.cpp new file mode 100644 index 0000000000..bedb755742 --- /dev/null +++ b/tools/profiler/core/MicroGeckoProfiler.cpp @@ -0,0 +1,203 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "GeckoProfiler.h" + +#include "mozilla/Maybe.h" +#include "nsPrintfCString.h" +#include "public/GeckoTraceEvent.h" + +using namespace mozilla; +using webrtc::trace_event_internal::TraceValueUnion; + +void uprofiler_register_thread(const char* name, void* stacktop) { +#ifdef MOZ_GECKO_PROFILER + profiler_register_thread(name, stacktop); +#endif // MOZ_GECKO_PROFILER +} + +void uprofiler_unregister_thread() { +#ifdef MOZ_GECKO_PROFILER + profiler_unregister_thread(); +#endif // MOZ_GECKO_PROFILER +} + +#ifdef MOZ_GECKO_PROFILER +namespace { +Maybe<MarkerTiming> ToTiming(char phase) { + switch (phase) { + case 'B': + return Some(MarkerTiming::IntervalStart()); + case 'E': + return Some(MarkerTiming::IntervalEnd()); + case 'I': + return Some(MarkerTiming::InstantNow()); + default: + return Nothing(); + } +} + +struct TraceOption { + bool mPassed = false; + ProfilerString8View mName; + Variant<int64_t, bool, double, ProfilerString8View> mValue = AsVariant(false); +}; + +struct TraceMarker { + static constexpr int MAX_NUM_ARGS = 2; + using OptionsType = std::tuple<TraceOption, TraceOption>; + static constexpr mozilla::Span<const char> MarkerTypeName() { + return MakeStringSpan("TraceEvent"); + } + static void StreamJSONMarkerData( + mozilla::baseprofiler::SpliceableJSONWriter& aWriter, + const OptionsType& aArgs) { + auto writeValue = [&](const auto& aName, const auto& aVariant) { + aVariant.match( + [&](const int64_t& aValue) { aWriter.IntProperty(aName, aValue); }, + [&](const bool& aValue) { aWriter.BoolProperty(aName, aValue); }, + [&](const double& aValue) { aWriter.DoubleProperty(aName, aValue); }, + [&](const ProfilerString8View& aValue) { + aWriter.StringProperty(aName, aValue); + }); + }; + if (const auto& arg = std::get<0>(aArgs); arg.mPassed) { + aWriter.StringProperty("name1", arg.mName); + writeValue("val1", arg.mValue); + } + if (const auto& arg = std::get<1>(aArgs); arg.mPassed) { + aWriter.StringProperty("name2", arg.mName); + writeValue("val2", arg.mValue); + } + } + static mozilla::MarkerSchema MarkerTypeDisplay() { + using MS = MarkerSchema; + MS schema{MS::Location::MarkerChart, MS::Location::MarkerTable}; + schema.SetChartLabel("{marker.name}"); + schema.SetTableLabel( + "{marker.name} {marker.data.name1} {marker.data.val1} " + "{marker.data.name2} {marker.data.val2}"); + schema.AddKeyLabelFormatSearchable("name1", "Key 1", MS::Format::String, + MS::Searchable::Searchable); + schema.AddKeyLabelFormatSearchable("val1", "Value 1", MS::Format::String, + MS::Searchable::Searchable); + schema.AddKeyLabelFormatSearchable("name2", "Key 2", MS::Format::String, + MS::Searchable::Searchable); + schema.AddKeyLabelFormatSearchable("val2", "Value 2", MS::Format::String, + MS::Searchable::Searchable); + return schema; + } +}; +} // namespace + +namespace mozilla { +template <> +struct ProfileBufferEntryWriter::Serializer<TraceOption> { + static Length Bytes(const TraceOption& aOption) { + // 1 byte to store passed flag, then object size if passed. + return aOption.mPassed ? (1 + SumBytes(aOption.mName, aOption.mValue)) : 1; + } + + static void Write(ProfileBufferEntryWriter& aEW, const TraceOption& aOption) { + // 'T'/'t' is just an arbitrary 1-byte value to distinguish states. + if (aOption.mPassed) { + aEW.WriteObject<char>('T'); + // Use the Serializer for the name/value pair. + aEW.WriteObject(aOption.mName); + aEW.WriteObject(aOption.mValue); + } else { + aEW.WriteObject<char>('t'); + } + } +}; + +template <> +struct ProfileBufferEntryReader::Deserializer<TraceOption> { + static void ReadInto(ProfileBufferEntryReader& aER, TraceOption& aOption) { + char c = aER.ReadObject<char>(); + if ((aOption.mPassed = (c == 'T'))) { + aER.ReadIntoObject(aOption.mName); + aER.ReadIntoObject(aOption.mValue); + } else { + MOZ_ASSERT(c == 't'); + } + } + + static TraceOption Read(ProfileBufferEntryReader& aER) { + TraceOption option; + ReadInto(aER, option); + return option; + } +}; +} // namespace mozilla +#endif // MOZ_GECKO_PROFILER + +void uprofiler_simple_event_marker(const char* name, char phase, int num_args, + const char** arg_names, + const unsigned char* arg_types, + const unsigned long long* arg_values) { +#ifdef MOZ_GECKO_PROFILER + if (!profiler_thread_is_being_profiled_for_markers()) { + return; + } + Maybe<MarkerTiming> timing = ToTiming(phase); + if (!timing) { + if (getenv("MOZ_LOG_UNKNOWN_TRACE_EVENT_PHASES")) { + fprintf(stderr, "XXX UProfiler: phase not handled: '%c'\n", phase); + } + return; + } + MOZ_ASSERT(num_args <= TraceMarker::MAX_NUM_ARGS); + TraceMarker::OptionsType tuple; + TraceOption* args[2] = {&std::get<0>(tuple), &std::get<1>(tuple)}; + for (int i = 0; i < std::min(num_args, TraceMarker::MAX_NUM_ARGS); ++i) { + auto& arg = *args[i]; + arg.mPassed = true; + arg.mName = ProfilerString8View::WrapNullTerminatedString(arg_names[i]); + switch (arg_types[i]) { + case TRACE_VALUE_TYPE_UINT: + MOZ_ASSERT(arg_values[i] <= std::numeric_limits<int64_t>::max()); + arg.mValue = AsVariant(static_cast<int64_t>( + reinterpret_cast<const TraceValueUnion*>(&arg_values[i])->as_uint)); + break; + case TRACE_VALUE_TYPE_INT: + arg.mValue = AsVariant(static_cast<int64_t>( + reinterpret_cast<const TraceValueUnion*>(&arg_values[i])->as_int)); + break; + case TRACE_VALUE_TYPE_BOOL: + arg.mValue = AsVariant( + reinterpret_cast<const TraceValueUnion*>(&arg_values[i])->as_bool); + break; + case TRACE_VALUE_TYPE_DOUBLE: + arg.mValue = + AsVariant(reinterpret_cast<const TraceValueUnion*>(&arg_values[i]) + ->as_double); + break; + case TRACE_VALUE_TYPE_POINTER: + arg.mValue = AsVariant(ProfilerString8View(nsPrintfCString( + "%p", reinterpret_cast<const TraceValueUnion*>(&arg_values[i]) + ->as_pointer))); + break; + case TRACE_VALUE_TYPE_STRING: + arg.mValue = AsVariant(ProfilerString8View::WrapNullTerminatedString( + reinterpret_cast<const TraceValueUnion*>(&arg_values[i]) + ->as_string)); + break; + case TRACE_VALUE_TYPE_COPY_STRING: + arg.mValue = AsVariant(ProfilerString8View( + nsCString(reinterpret_cast<const TraceValueUnion*>(&arg_values[i]) + ->as_string))); + break; + default: + MOZ_ASSERT_UNREACHABLE("Unexpected trace value type"); + arg.mValue = AsVariant(ProfilerString8View( + nsPrintfCString("Unexpected type: %u", arg_types[i]))); + break; + } + } + profiler_add_marker(ProfilerString8View::WrapNullTerminatedString(name), + geckoprofiler::category::MEDIA_RT, {timing.extract()}, + TraceMarker{}, tuple); +#endif // MOZ_GECKO_PROFILER +} diff --git a/tools/profiler/core/PageInformation.cpp b/tools/profiler/core/PageInformation.cpp new file mode 100644 index 0000000000..83d2d508a1 --- /dev/null +++ b/tools/profiler/core/PageInformation.cpp @@ -0,0 +1,44 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "PageInformation.h" + +#include "mozilla/ProfileJSONWriter.h" + +PageInformation::PageInformation(uint64_t aTabID, uint64_t aInnerWindowID, + const nsCString& aUrl, + uint64_t aEmbedderInnerWindowID, + bool aIsPrivateBrowsing) + : mTabID(aTabID), + mInnerWindowID(aInnerWindowID), + mUrl(aUrl), + mEmbedderInnerWindowID(aEmbedderInnerWindowID), + mIsPrivateBrowsing(aIsPrivateBrowsing) {} + +bool PageInformation::Equals(PageInformation* aOtherPageInfo) const { + // It's enough to check inner window IDs because they are unique for each + // page. Therefore, we don't have to check the tab ID or url. + return InnerWindowID() == aOtherPageInfo->InnerWindowID(); +} + +void PageInformation::StreamJSON(SpliceableJSONWriter& aWriter) const { + // Here, we are converting uint64_t to double. Both tab and Inner + // Window IDs are created using `nsContentUtils::GenerateProcessSpecificId`, + // which is specifically designed to only use 53 of the 64 bits to be lossless + // when passed into and out of JS as a double. + aWriter.StartObjectElement(); + aWriter.DoubleProperty("tabID", TabID()); + aWriter.DoubleProperty("innerWindowID", InnerWindowID()); + aWriter.StringProperty("url", Url()); + aWriter.DoubleProperty("embedderInnerWindowID", EmbedderInnerWindowID()); + aWriter.BoolProperty("isPrivateBrowsing", IsPrivateBrowsing()); + aWriter.EndObject(); +} + +size_t PageInformation::SizeOfIncludingThis( + mozilla::MallocSizeOf aMallocSizeOf) const { + return aMallocSizeOf(this); +} diff --git a/tools/profiler/core/PageInformation.h b/tools/profiler/core/PageInformation.h new file mode 100644 index 0000000000..6c9039b9a4 --- /dev/null +++ b/tools/profiler/core/PageInformation.h @@ -0,0 +1,68 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef PageInformation_h +#define PageInformation_h + +#include "mozilla/Maybe.h" +#include "mozilla/MemoryReporting.h" +#include "nsISupportsImpl.h" +#include "nsString.h" + +namespace mozilla { +namespace baseprofiler { +class SpliceableJSONWriter; +} // namespace baseprofiler +} // namespace mozilla + +// This class contains information that's relevant to a single page only +// while the page information is important and registered with the profiler, +// but regardless of whether the profiler is running. All accesses to it are +// protected by the profiler state lock. +// When the page gets unregistered, we keep the profiler buffer position +// to determine if we are still using this page. If not, we unregister +// it in the next page registration. +class PageInformation final { + public: + NS_INLINE_DECL_THREADSAFE_REFCOUNTING(PageInformation) + PageInformation(uint64_t aTabID, uint64_t aInnerWindowID, + const nsCString& aUrl, uint64_t aEmbedderInnerWindowID, + bool aIsPrivateBrowsing); + + size_t SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) const; + bool Equals(PageInformation* aOtherPageInfo) const; + void StreamJSON(mozilla::baseprofiler::SpliceableJSONWriter& aWriter) const; + + uint64_t InnerWindowID() const { return mInnerWindowID; } + uint64_t TabID() const { return mTabID; } + const nsCString& Url() const { return mUrl; } + uint64_t EmbedderInnerWindowID() const { return mEmbedderInnerWindowID; } + bool IsPrivateBrowsing() const { return mIsPrivateBrowsing; } + + mozilla::Maybe<uint64_t> BufferPositionWhenUnregistered() const { + return mBufferPositionWhenUnregistered; + } + + void NotifyUnregistered(uint64_t aBufferPosition) { + mBufferPositionWhenUnregistered = mozilla::Some(aBufferPosition); + } + + private: + const uint64_t mTabID; + const uint64_t mInnerWindowID; + const nsCString mUrl; + const uint64_t mEmbedderInnerWindowID; + const bool mIsPrivateBrowsing; + + // Holds the buffer position when page is unregistered. + // It's used to determine if we still use this page in the profiler or + // not. + mozilla::Maybe<uint64_t> mBufferPositionWhenUnregistered; + + virtual ~PageInformation() = default; +}; + +#endif // PageInformation_h diff --git a/tools/profiler/core/PlatformMacros.h b/tools/profiler/core/PlatformMacros.h new file mode 100644 index 0000000000..c72e94c128 --- /dev/null +++ b/tools/profiler/core/PlatformMacros.h @@ -0,0 +1,130 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef PLATFORM_MACROS_H +#define PLATFORM_MACROS_H + +// Define platform selection macros in a consistent way. Don't add anything +// else to this file, so it can remain freestanding. The primary factorisation +// is on (ARCH,OS) pairs ("PLATforms") but ARCH_ and OS_ macros are defined +// too, since they are sometimes convenient. +// +// Note: "GP" is short for "Gecko Profiler". + +#undef GP_PLAT_x86_android +#undef GP_PLAT_amd64_android +#undef GP_PLAT_arm_android +#undef GP_PLAT_arm64_android +#undef GP_PLAT_x86_linux +#undef GP_PLAT_amd64_linux +#undef GP_PLAT_arm_linux +#undef GP_PLAT_mips64_linux +#undef GP_PLAT_amd64_darwin +#undef GP_PLAT_arm64_darwin +#undef GP_PLAT_x86_windows +#undef GP_PLAT_amd64_windows +#undef GP_PLAT_arm64_windows + +#undef GP_ARCH_x86 +#undef GP_ARCH_amd64 +#undef GP_ARCH_arm +#undef GP_ARCH_arm64 +#undef GP_ARCH_mips64 + +#undef GP_OS_android +#undef GP_OS_linux +#undef GP_OS_darwin +#undef GP_OS_windows + +// We test __ANDROID__ before __linux__ because __linux__ is defined on both +// Android and Linux, whereas GP_OS_android is not defined on vanilla Linux. + +#if defined(__ANDROID__) && defined(__i386__) +# define GP_PLAT_x86_android 1 +# define GP_ARCH_x86 1 +# define GP_OS_android 1 + +#elif defined(__ANDROID__) && defined(__x86_64__) +# define GP_PLAT_amd64_android 1 +# define GP_ARCH_amd64 1 +# define GP_OS_android 1 + +#elif defined(__ANDROID__) && defined(__arm__) +# define GP_PLAT_arm_android 1 +# define GP_ARCH_arm 1 +# define GP_OS_android 1 + +#elif defined(__ANDROID__) && defined(__aarch64__) +# define GP_PLAT_arm64_android 1 +# define GP_ARCH_arm64 1 +# define GP_OS_android 1 + +#elif defined(__linux__) && defined(__i386__) +# define GP_PLAT_x86_linux 1 +# define GP_ARCH_x86 1 +# define GP_OS_linux 1 + +#elif defined(__linux__) && defined(__x86_64__) +# define GP_PLAT_amd64_linux 1 +# define GP_ARCH_amd64 1 +# define GP_OS_linux 1 + +#elif defined(__linux__) && defined(__arm__) +# define GP_PLAT_arm_linux 1 +# define GP_ARCH_arm 1 +# define GP_OS_linux 1 + +#elif defined(__linux__) && defined(__aarch64__) +# define GP_PLAT_arm64_linux 1 +# define GP_ARCH_arm64 1 +# define GP_OS_linux 1 + +#elif defined(__linux__) && defined(__mips64) +# define GP_PLAT_mips64_linux 1 +# define GP_ARCH_mips64 1 +# define GP_OS_linux 1 + +#elif defined(__APPLE__) && defined(__aarch64__) +# define GP_PLAT_arm64_darwin 1 +# define GP_ARCH_arm64 1 +# define GP_OS_darwin 1 + +#elif defined(__APPLE__) && defined(__x86_64__) +# define GP_PLAT_amd64_darwin 1 +# define GP_ARCH_amd64 1 +# define GP_OS_darwin 1 + +#elif defined(__FreeBSD__) && defined(__x86_64__) +# define GP_PLAT_amd64_freebsd 1 +# define GP_ARCH_amd64 1 +# define GP_OS_freebsd 1 + +#elif defined(__FreeBSD__) && defined(__aarch64__) +# define GP_PLAT_arm64_freebsd 1 +# define GP_ARCH_arm64 1 +# define GP_OS_freebsd 1 + +#elif (defined(_MSC_VER) || defined(__MINGW32__)) && \ + (defined(_M_IX86) || defined(__i386__)) +# define GP_PLAT_x86_windows 1 +# define GP_ARCH_x86 1 +# define GP_OS_windows 1 + +#elif (defined(_MSC_VER) || defined(__MINGW32__)) && \ + (defined(_M_X64) || defined(__x86_64__)) +# define GP_PLAT_amd64_windows 1 +# define GP_ARCH_amd64 1 +# define GP_OS_windows 1 + +#elif defined(_MSC_VER) && defined(_M_ARM64) +# define GP_PLAT_arm64_windows 1 +# define GP_ARCH_arm64 1 +# define GP_OS_windows 1 + +#else +# error "Unsupported platform" +#endif + +#endif /* ndef PLATFORM_MACROS_H */ diff --git a/tools/profiler/core/PowerCounters-linux.cpp b/tools/profiler/core/PowerCounters-linux.cpp new file mode 100644 index 0000000000..006cea4867 --- /dev/null +++ b/tools/profiler/core/PowerCounters-linux.cpp @@ -0,0 +1,287 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "PowerCounters.h" +#include "nsXULAppAPI.h" +#include "mozilla/Maybe.h" +#include "mozilla/Logging.h" + +#include <sys/syscall.h> +#include <sys/ioctl.h> +#include <unistd.h> + +#include <cerrno> +#include <cinttypes> +#include <cstdio> +#include <cstdlib> +#include <fstream> +#include <string> + +#include <linux/perf_event.h> + +// From the kernel rapl_scale() function: +// +// > users must then scale back: count * 1/(1e9*2^32) to get Joules +#define PERF_EVENT_SCALE_NANOJOULES 2.3283064365386962890625e-1 +#define SCALE_NANOJOULES_TO_PICOWATTHOUR 3.6 +#define SYSFS_PERF_POWER_TYPE_PATH "/sys/bus/event_source/devices/power/type" + +static mozilla::LazyLogModule sRaplEventLog("profiler.rapl"); +#define RAPL_LOG(...) \ + MOZ_LOG(sRaplEventLog, mozilla::LogLevel::Debug, (__VA_ARGS__)); + +enum class RaplEventType : uint64_t { + RAPL_ENERGY_CORES = 0x01, + RAPL_ENERGY_PKG = 0x02, + RAPL_ENERGY_DRAM = 0x03, + RAPL_ENERGY_GPU = 0x04, + RAPL_ENERGY_PSYS = 0x05, +}; + +struct RaplDomain { + RaplEventType mRaplEventType; + const char* mLabel; + const char* mDescription; +}; + +constexpr RaplDomain kSupportedRaplDomains[] = { + {RaplEventType::RAPL_ENERGY_CORES, "Power: CPU cores", + "Consumption of all physical cores"}, + { + RaplEventType::RAPL_ENERGY_PKG, + "Power: CPU package", + "Consumption of the whole processor package", + }, + { + RaplEventType::RAPL_ENERGY_DRAM, + "Power: DRAM", + "Consumption of the dram domain", + }, + { + RaplEventType::RAPL_ENERGY_GPU, + "Power: iGPU", + "Consumption of the builtin-gpu domain", + }, + { + RaplEventType::RAPL_ENERGY_PSYS, + "Power: System", + "Consumption of the builtin-psys domain", + }}; + +static std::string GetSysfsFileID(RaplEventType aEventType) { + switch (aEventType) { + case RaplEventType::RAPL_ENERGY_CORES: + return "cores"; + case RaplEventType::RAPL_ENERGY_PKG: + return "pkg"; + case RaplEventType::RAPL_ENERGY_DRAM: + return "ram"; + case RaplEventType::RAPL_ENERGY_GPU: + return "gpu"; + case RaplEventType::RAPL_ENERGY_PSYS: + return "psys"; + } + + return ""; +} + +static double GetRaplPerfEventScale(RaplEventType aEventType) { + const std::string sysfsFileName = + "/sys/bus/event_source/devices/power/events/energy-" + + GetSysfsFileID(aEventType) + ".scale"; + std::ifstream sysfsFile(sysfsFileName); + + if (!sysfsFile) { + return PERF_EVENT_SCALE_NANOJOULES; + } + + double scale; + + if (sysfsFile >> scale) { + RAPL_LOG("Read scale from %s: %.22e", sysfsFileName.c_str(), scale); + return scale * 1e9; + } + + return PERF_EVENT_SCALE_NANOJOULES; +} + +static uint64_t GetRaplPerfEventConfig(RaplEventType aEventType) { + const std::string sysfsFileName = + "/sys/bus/event_source/devices/power/events/energy-" + + GetSysfsFileID(aEventType); + std::ifstream sysfsFile(sysfsFileName); + + if (!sysfsFile) { + return static_cast<uint64_t>(aEventType); + } + + char buffer[7] = {}; + const std::string key = "event="; + + if (!sysfsFile.get(buffer, static_cast<std::streamsize>(key.length()) + 1) || + key != buffer) { + return static_cast<uint64_t>(aEventType); + } + + uint64_t config; + + if (sysfsFile >> std::hex >> config) { + RAPL_LOG("Read config from %s: 0x%" PRIx64, sysfsFileName.c_str(), config); + return config; + } + + return static_cast<uint64_t>(aEventType); +} + +class RaplProfilerCount final : public BaseProfilerCount { + public: + explicit RaplProfilerCount(int aPerfEventType, + const RaplEventType& aPerfEventConfig, + const char* aLabel, const char* aDescription) + : BaseProfilerCount(aLabel, nullptr, nullptr, "power", aDescription), + mLastResult(0), + mPerfEventFd(-1) { + RAPL_LOG("Creating RAPL Event for type: %s", mLabel); + + // Optimize for ease of use and do not set an excludes value. This + // ensures we do not require PERF_PMU_CAP_NO_EXCLUDE. + struct perf_event_attr attr = {0}; + memset(&attr, 0, sizeof(attr)); + attr.type = aPerfEventType; + attr.size = sizeof(struct perf_event_attr); + attr.config = GetRaplPerfEventConfig(aPerfEventConfig); + attr.sample_period = 0; + attr.sample_type = PERF_SAMPLE_IDENTIFIER; + attr.inherit = 1; + + RAPL_LOG("Config for event %s: 0x%llx", mLabel, attr.config); + + mEventScale = GetRaplPerfEventScale(aPerfEventConfig); + RAPL_LOG("Scale for event %s: %.22e", mLabel, mEventScale); + + long fd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, 0); + if (fd < 0) { + RAPL_LOG("Event descriptor creation failed for event: %s", mLabel); + mPerfEventFd = -1; + return; + } + + RAPL_LOG("Created descriptor for event: %s", mLabel) + mPerfEventFd = static_cast<int>(fd); + } + + ~RaplProfilerCount() { + if (ValidPerfEventFd()) { + ioctl(mPerfEventFd, PERF_EVENT_IOC_DISABLE, 0); + close(mPerfEventFd); + } + } + + RaplProfilerCount(const RaplProfilerCount&) = delete; + RaplProfilerCount& operator=(const RaplProfilerCount&) = delete; + + CountSample Sample() override { + CountSample result = { + .count = 0, + .number = 0, + .isSampleNew = false, + }; + mozilla::Maybe<uint64_t> raplEventResult = ReadEventFd(); + + if (raplEventResult.isNothing()) { + return result; + } + + // We need to return picowatthour to be consistent with the Windows + // EMI API. As a result, the scale calculation should: + // + // - Convert the returned value to nanojoules + // - Convert nanojoules to picowatthour + double nanojoules = + static_cast<double>(raplEventResult.value()) * mEventScale; + double picowatthours = nanojoules / SCALE_NANOJOULES_TO_PICOWATTHOUR; + RAPL_LOG("Sample %s { count: %lu, last-result: %lu } = %lfJ", mLabel, + raplEventResult.value(), mLastResult, nanojoules * 1e-9); + + result.count = static_cast<int64_t>(picowatthours); + + // If the tick count is the same as the returned value or if this is the + // first sample, treat this sample as a duplicate. + result.isSampleNew = + (mLastResult != 0 && mLastResult != raplEventResult.value() && + result.count >= 0); + mLastResult = raplEventResult.value(); + + return result; + } + + bool ValidPerfEventFd() { return mPerfEventFd >= 0; } + + private: + mozilla::Maybe<uint64_t> ReadEventFd() { + MOZ_ASSERT(ValidPerfEventFd()); + + uint64_t eventResult; + ssize_t readBytes = read(mPerfEventFd, &eventResult, sizeof(uint64_t)); + if (readBytes != sizeof(uint64_t)) { + RAPL_LOG("Invalid RAPL event read size: %ld", readBytes); + return mozilla::Nothing(); + } + + return mozilla::Some(eventResult); + } + + uint64_t mLastResult; + int mPerfEventFd; + double mEventScale; +}; + +static int GetRaplPerfEventType() { + FILE* fp = fopen(SYSFS_PERF_POWER_TYPE_PATH, "r"); + if (!fp) { + RAPL_LOG("Open of " SYSFS_PERF_POWER_TYPE_PATH " failed"); + return -1; + } + + int readTypeValue = -1; + if (fscanf(fp, "%d", &readTypeValue) != 1) { + RAPL_LOG("Read of " SYSFS_PERF_POWER_TYPE_PATH " failed"); + } + fclose(fp); + + return readTypeValue; +} + +PowerCounters::PowerCounters() { + if (!XRE_IsParentProcess()) { + // Energy meters are global, so only sample them on the parent. + return; + } + + // Get the value perf_event_attr.type should be set to for RAPL + // perf events. + int perfEventType = GetRaplPerfEventType(); + if (perfEventType < 0) { + RAPL_LOG("Failed to find the event type for RAPL perf events."); + return; + } + + for (const auto& raplEventDomain : kSupportedRaplDomains) { + RaplProfilerCount* raplEvent = new RaplProfilerCount( + perfEventType, raplEventDomain.mRaplEventType, raplEventDomain.mLabel, + raplEventDomain.mDescription); + if (!raplEvent->ValidPerfEventFd() || !mCounters.emplaceBack(raplEvent)) { + delete raplEvent; + } + } +} + +PowerCounters::~PowerCounters() { + for (auto* raplEvent : mCounters) { + delete raplEvent; + } + mCounters.clear(); +} + +void PowerCounters::Sample() {} diff --git a/tools/profiler/core/PowerCounters-mac-amd64.cpp b/tools/profiler/core/PowerCounters-mac-amd64.cpp new file mode 100644 index 0000000000..7557be5046 --- /dev/null +++ b/tools/profiler/core/PowerCounters-mac-amd64.cpp @@ -0,0 +1,392 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "PowerCounters.h" +#include "nsDebug.h" +#include "nsPrintfCString.h" +#include "nsXULAppAPI.h" // for XRE_IsParentProcess + +// Because of the pkg_energy_statistics_t::pkes_version check below, the +// earliest OS X version this code will work with is 10.9.0 (xnu-2422.1.72). + +#include <sys/types.h> +#include <sys/sysctl.h> + +// OS X has four kinds of system calls: +// +// 1. Mach traps; +// 2. UNIX system calls; +// 3. machine-dependent calls; +// 4. diagnostic calls. +// +// (See "Mac OS X and iOS Internals" by Jonathan Levin for more details.) +// +// The last category has a single call named diagCall() or diagCall64(). Its +// mode is controlled by its first argument, and one of the modes allows access +// to the Intel RAPL MSRs. +// +// The interface to diagCall64() is not exported, so we have to import some +// definitions from the XNU kernel. All imported definitions are annotated with +// the XNU source file they come from, and information about what XNU versions +// they were introduced in and (if relevant) modified. + +// The diagCall64() mode. +// From osfmk/i386/Diagnostics.h +// - In 10.8.4 (xnu-2050.24.15) this value was introduced. (In 10.8.3 the value +// 17 was used for dgGzallocTest.) +#define dgPowerStat 17 + +// From osfmk/i386/cpu_data.h +// - In 10.8.5 these values were introduced, along with core_energy_stat_t. +#define CPU_RTIME_BINS (12) +#define CPU_ITIME_BINS (CPU_RTIME_BINS) + +// core_energy_stat_t and pkg_energy_statistics_t are both from +// osfmk/i386/Diagnostics.c. +// - In 10.8.4 (xnu-2050.24.15) both structs were introduced, but with many +// fewer fields. +// - In 10.8.5 (xnu-2050.48.11) both structs were substantially expanded, with +// numerous new fields. +// - In 10.9.0 (xnu-2422.1.72) pkg_energy_statistics_t::pkes_version was added. +// diagCall64(dgPowerStat) fills it with '1' in all versions since (up to +// 10.10.2 at time of writing). +// - in 10.10.2 (xnu-2782.10.72) core_energy_stat_t::gpmcs was conditionally +// added, if DIAG_ALL_PMCS is true. (DIAG_ALL_PMCS is not even defined in the +// source code, but it could be defined at compile-time via compiler flags.) +// pkg_energy_statistics_t::pkes_version did not change, though. + +typedef struct { + uint64_t caperf; + uint64_t cmperf; + uint64_t ccres[6]; + uint64_t crtimes[CPU_RTIME_BINS]; + uint64_t citimes[CPU_ITIME_BINS]; + uint64_t crtime_total; + uint64_t citime_total; + uint64_t cpu_idle_exits; + uint64_t cpu_insns; + uint64_t cpu_ucc; + uint64_t cpu_urc; +#if DIAG_ALL_PMCS // Added in 10.10.2 (xnu-2782.10.72). + uint64_t gpmcs[4]; // Added in 10.10.2 (xnu-2782.10.72). +#endif /* DIAG_ALL_PMCS */ // Added in 10.10.2 (xnu-2782.10.72). +} core_energy_stat_t; + +typedef struct { + uint64_t pkes_version; // Added in 10.9.0 (xnu-2422.1.72). + uint64_t pkg_cres[2][7]; + + // This is read from MSR 0x606, which Intel calls MSR_RAPL_POWER_UNIT + // and XNU calls MSR_IA32_PKG_POWER_SKU_UNIT. + uint64_t pkg_power_unit; + + // These are the four fields for the four RAPL domains. For each field + // we list: + // + // - the corresponding MSR number; + // - Intel's name for that MSR; + // - XNU's name for that MSR; + // - which Intel processors the MSR is supported on. + // + // The last of these is determined from chapter 35 of Volume 3 of the + // "Intel 64 and IA-32 Architecture's Software Developer's Manual", + // Order Number 325384. (Note that chapter 35 contradicts section 14.9 + // to some degree.) + + // 0x611 == MSR_PKG_ENERGY_STATUS == MSR_IA32_PKG_ENERGY_STATUS + // Atom (various), Sandy Bridge, Next Gen Xeon Phi (model 0x57). + uint64_t pkg_energy; + + // 0x639 == MSR_PP0_ENERGY_STATUS == MSR_IA32_PP0_ENERGY_STATUS + // Atom (various), Sandy Bridge, Next Gen Xeon Phi (model 0x57). + uint64_t pp0_energy; + + // 0x641 == MSR_PP1_ENERGY_STATUS == MSR_PP1_ENERGY_STATUS + // Sandy Bridge, Haswell. + uint64_t pp1_energy; + + // 0x619 == MSR_DRAM_ENERGY_STATUS == MSR_IA32_DDR_ENERGY_STATUS + // Xeon E5, Xeon E5 v2, Haswell/Haswell-E, Next Gen Xeon Phi (model + // 0x57) + uint64_t ddr_energy; + + uint64_t llc_flushed_cycles; + uint64_t ring_ratio_instantaneous; + uint64_t IA_frequency_clipping_cause; + uint64_t GT_frequency_clipping_cause; + uint64_t pkg_idle_exits; + uint64_t pkg_rtimes[CPU_RTIME_BINS]; + uint64_t pkg_itimes[CPU_ITIME_BINS]; + uint64_t mbus_delay_time; + uint64_t mint_delay_time; + uint32_t ncpus; + core_energy_stat_t cest[]; +} pkg_energy_statistics_t; + +static int diagCall64(uint64_t aMode, void* aBuf) { + // We cannot use syscall() here because it doesn't work with diagnostic + // system calls -- it raises SIGSYS if you try. So we have to use asm. + +#ifdef __x86_64__ + // The 0x40000 prefix indicates it's a diagnostic system call. The 0x01 + // suffix indicates the syscall number is 1, which also happens to be the + // only diagnostic system call. See osfmk/mach/i386/syscall_sw.h for more + // details. + static const uint64_t diagCallNum = 0x4000001; + uint64_t rv; + + __asm__ __volatile__( + "syscall" + + // Return value goes in "a" (%rax). + : /* outputs */ "=a"(rv) + + // The syscall number goes in "0", a synonym (from outputs) for "a" + // (%rax). The syscall arguments go in "D" (%rdi) and "S" (%rsi). + : /* inputs */ "0"(diagCallNum), "D"(aMode), "S"(aBuf) + + // The |syscall| instruction clobbers %rcx, %r11, and %rflags ("cc"). And + // this particular syscall also writes memory (aBuf). + : /* clobbers */ "rcx", "r11", "cc", "memory"); + return rv; +#else +# error Sorry, only x86-64 is supported +#endif +} + +// This is a counter to collect power utilization during profiling. +// It cannot be a raw `ProfilerCounter` because we need to manually add/remove +// it while the profiler lock is already held. +class RaplDomain final : public BaseProfilerCount { + public: + explicit RaplDomain(const char* aLabel, const char* aDescription) + : BaseProfilerCount(aLabel, nullptr, nullptr, "power", aDescription), + mSample(0), + mIsSampleNew(false) {} + + CountSample Sample() override { + CountSample result; + + // To be consistent with the Windows EMI API, + // return values in picowatt-hour. + constexpr double NANOJOULES_PER_JOULE = 1'000'000'000; + constexpr double NANOJOULES_TO_PICOWATTHOUR = 3.6; + result.count = mSample * NANOJOULES_PER_JOULE / NANOJOULES_TO_PICOWATTHOUR; + + result.number = 0; + result.isSampleNew = mIsSampleNew; + mIsSampleNew = false; + return result; + } + + void AddSample(double aSample) { + if (aSample > mSample) { + mIsSampleNew = true; + mSample = aSample; + } + } + + private: + double mSample; + bool mIsSampleNew; +}; + +class RAPL { + bool mIsGpuSupported; // Is the GPU domain supported by the processor? + bool mIsRamSupported; // Is the RAM domain supported by the processor? + + // The DRAM domain on Haswell servers has a fixed energy unit (1/65536 J == + // 15.3 microJoules) which is different to the power unit MSR. (See the + // "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, Volume 2 of + // 2, Registers" datasheet, September 2014, Reference Number: 330784-001.) + // This field records whether the quirk is present. + bool mHasRamUnitsQuirk; + + // The abovementioned 15.3 microJoules value. + static constexpr double kQuirkyRamJoulesPerTick = (double)1 / 65536; + + // The struct passed to diagCall64(). + pkg_energy_statistics_t* mPkes; + + RaplDomain* mPkg = nullptr; + RaplDomain* mCores = nullptr; + RaplDomain* mGpu = nullptr; + RaplDomain* mRam = nullptr; + + public: + explicit RAPL(PowerCounters::CountVector& aCounters) + : mHasRamUnitsQuirk(false) { + // Work out which RAPL MSRs this CPU model supports. + int cpuModel; + size_t size = sizeof(cpuModel); + if (sysctlbyname("machdep.cpu.model", &cpuModel, &size, NULL, 0) != 0) { + NS_WARNING("sysctlbyname(\"machdep.cpu.model\") failed"); + return; + } + + // This is similar to arch/x86/kernel/cpu/perf_event_intel_rapl.c in + // linux-4.1.5/. + // + // By linux-5.6.14/, this stuff had moved into + // arch/x86/events/intel/rapl.c, which references processor families in + // arch/x86/include/asm/intel-family.h. + switch (cpuModel) { + case 0x2a: // Sandy Bridge + case 0x3a: // Ivy Bridge + // Supports package, cores, GPU. + mIsGpuSupported = true; + mIsRamSupported = false; + break; + + case 0x3f: // Haswell X + case 0x4f: // Broadwell X + case 0x55: // Skylake X + case 0x56: // Broadwell D + // Supports package, cores, RAM. Has the units quirk. + mIsGpuSupported = false; + mIsRamSupported = true; + mHasRamUnitsQuirk = true; + break; + + case 0x2d: // Sandy Bridge X + case 0x3e: // Ivy Bridge X + // Supports package, cores, RAM. + mIsGpuSupported = false; + mIsRamSupported = true; + break; + + case 0x3c: // Haswell + case 0x3d: // Broadwell + case 0x45: // Haswell L + case 0x46: // Haswell G + case 0x47: // Broadwell G + // Supports package, cores, GPU, RAM. + mIsGpuSupported = true; + mIsRamSupported = true; + break; + + case 0x4e: // Skylake L + case 0x5e: // Skylake + case 0x8e: // Kaby Lake L + case 0x9e: // Kaby Lake + case 0x66: // Cannon Lake L + case 0x7d: // Ice Lake + case 0x7e: // Ice Lake L + case 0xa5: // Comet Lake + case 0xa6: // Comet Lake L + // Supports package, cores, GPU, RAM, PSYS. + // XXX: this tool currently doesn't measure PSYS. + mIsGpuSupported = true; + mIsRamSupported = true; + break; + + default: + NS_WARNING(nsPrintfCString("unknown CPU model: %d", cpuModel).get()); + return; + } + + // Get the maximum number of logical CPUs so that we know how big to make + // |mPkes|. + int logicalcpu_max; + size = sizeof(logicalcpu_max); + if (sysctlbyname("hw.logicalcpu_max", &logicalcpu_max, &size, NULL, 0) != + 0) { + NS_WARNING("sysctlbyname(\"hw.logicalcpu_max\") failed"); + return; + } + + // Over-allocate by 1024 bytes per CPU to allow for the uncertainty around + // core_energy_stat_t::gpmcs and for any other future extensions to that + // struct. (The fields we read all come before the core_energy_stat_t + // array, so it won't matter to us whether gpmcs is present or not.) + size_t pkesSize = sizeof(pkg_energy_statistics_t) + + logicalcpu_max * sizeof(core_energy_stat_t) + + logicalcpu_max * 1024; + mPkes = (pkg_energy_statistics_t*)malloc(pkesSize); + if (mPkes && aCounters.reserve(4)) { + mPkg = new RaplDomain("Power: CPU package", "RAPL PKG"); + aCounters.infallibleAppend(mPkg); + + mCores = new RaplDomain("Power: CPU cores", "RAPL PP0"); + aCounters.infallibleAppend(mCores); + + if (mIsGpuSupported) { + mGpu = new RaplDomain("Power: iGPU", "RAPL PP1"); + aCounters.infallibleAppend(mGpu); + } + + if (mIsRamSupported) { + mRam = new RaplDomain("Power: DRAM", "RAPL DRAM"); + aCounters.infallibleAppend(mRam); + } + } + } + + ~RAPL() { + free(mPkes); + delete mPkg; + delete mCores; + delete mGpu; + delete mRam; + } + + static double Joules(uint64_t aTicks, double aJoulesPerTick) { + return double(aTicks) * aJoulesPerTick; + } + + void Sample() { + constexpr uint64_t kSupportedVersion = 1; + + // Write an unsupported version number into pkes_version so that the check + // below cannot succeed by dumb luck. + mPkes->pkes_version = kSupportedVersion - 1; + + // diagCall64() returns 1 on success, and 0 on failure (which can only + // happen if the mode is unrecognized, e.g. in 10.7.x or earlier versions). + if (diagCall64(dgPowerStat, mPkes) != 1) { + NS_WARNING("diagCall64() failed"); + return; + } + + if (mPkes->pkes_version != kSupportedVersion) { + NS_WARNING( + nsPrintfCString("unexpected pkes_version: %llu", mPkes->pkes_version) + .get()); + return; + } + + // Bits 12:8 are the ESU. + // Energy measurements come in multiples of 1/(2^ESU). + uint32_t energyStatusUnits = (mPkes->pkg_power_unit >> 8) & 0x1f; + double joulesPerTick = ((double)1 / (1 << energyStatusUnits)); + + mPkg->AddSample(Joules(mPkes->pkg_energy, joulesPerTick)); + mCores->AddSample(Joules(mPkes->pp0_energy, joulesPerTick)); + if (mIsGpuSupported) { + mGpu->AddSample(Joules(mPkes->pp1_energy, joulesPerTick)); + } + if (mIsRamSupported) { + mRam->AddSample(Joules(mPkes->ddr_energy, mHasRamUnitsQuirk + ? kQuirkyRamJoulesPerTick + : joulesPerTick)); + } + } +}; + +PowerCounters::PowerCounters() { + // RAPL values are global, so only sample them on the parent. + mRapl = XRE_IsParentProcess() ? new RAPL(mCounters) : nullptr; +} + +PowerCounters::~PowerCounters() { + mCounters.clear(); + delete mRapl; + mRapl = nullptr; +} + +void PowerCounters::Sample() { + if (mRapl) { + mRapl->Sample(); + } +} diff --git a/tools/profiler/core/PowerCounters-mac-arm64.cpp b/tools/profiler/core/PowerCounters-mac-arm64.cpp new file mode 100644 index 0000000000..3a84a479ef --- /dev/null +++ b/tools/profiler/core/PowerCounters-mac-arm64.cpp @@ -0,0 +1,47 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "PowerCounters.h" + +#include <mach/mach.h> + +class ProcessPower final : public BaseProfilerCount { + public: + ProcessPower() + : BaseProfilerCount("Process Power", nullptr, nullptr, "power", + "Power utilization") {} + + CountSample Sample() override { + CountSample result; + result.count = GetTaskEnergy(); + result.number = 0; + result.isSampleNew = true; + return result; + } + + private: + int64_t GetTaskEnergy() { + task_power_info_v2_data_t task_power_info; + mach_msg_type_number_t count = TASK_POWER_INFO_V2_COUNT; + kern_return_t kr = task_info(mach_task_self(), TASK_POWER_INFO_V2, + (task_info_t)&task_power_info, &count); + if (kr != KERN_SUCCESS) { + return 0; + } + + // task_energy is in nanojoules. To be consistent with the Windows EMI + // API, return values in picowatt-hour. + return task_power_info.task_energy / 3.6; + } +}; + +PowerCounters::PowerCounters() : mProcessPower(new ProcessPower()) { + if (mProcessPower) { + (void)mCounters.append(mProcessPower.get()); + } +} + +PowerCounters::~PowerCounters() { mCounters.clear(); } + +void PowerCounters::Sample() {} diff --git a/tools/profiler/core/PowerCounters-win.cpp b/tools/profiler/core/PowerCounters-win.cpp new file mode 100644 index 0000000000..f1d05389b6 --- /dev/null +++ b/tools/profiler/core/PowerCounters-win.cpp @@ -0,0 +1,342 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "PowerCounters.h" +#include "nsXULAppAPI.h" // for XRE_IsParentProcess +#include "nsString.h" + +#include <windows.h> +#include <devioctl.h> +#include <setupapi.h> // for SetupDi* +// LogSeverity, defined by setupapi.h to DWORD, messes with other code. +#undef LogSeverity + +#undef NTDDI_VERSION +#define NTDDI_VERSION NTDDI_WINBLUE +#include <emi.h> + +#ifndef NTDDI_WIN10_RS5 +// EMI v2 API exists in SDK 10.0.17763 (Windows 10 1809 / Redstone 5) and later. +// Our build machines are still on SDK 10.0.17134. +// Remove this block when updating the SDK (bug 1774628). +typedef EMI_METADATA EMI_METADATA_V1; +typedef EMI_MEASUREMENT_DATA EMI_CHANNEL_MEASUREMENT_DATA; +# define EMI_VERSION_V2 2 + +typedef struct { + EMI_MEASUREMENT_UNIT MeasurementUnit; + USHORT ChannelNameSize; + WCHAR ChannelName[ANYSIZE_ARRAY]; +} EMI_CHANNEL_V2; + +typedef struct { + WCHAR HardwareOEM[EMI_NAME_MAX]; + WCHAR HardwareModel[EMI_NAME_MAX]; + USHORT HardwareRevision; + USHORT ChannelCount; + EMI_CHANNEL_V2 Channels[ANYSIZE_ARRAY]; +} EMI_METADATA_V2; + +# define EMI_CHANNEL_V2_LENGTH(_ChannelNameSize) \ + (FIELD_OFFSET(EMI_CHANNEL_V2, ChannelName) + (_ChannelNameSize)) + +# define EMI_CHANNEL_V2_NEXT_CHANNEL(_Channel) \ + ((EMI_CHANNEL_V2*)((PUCHAR)(_Channel) + \ + EMI_CHANNEL_V2_LENGTH((_Channel)->ChannelNameSize))) +#endif + +using namespace mozilla; + +// This is a counter to collect power utilization during profiling. +// It cannot be a raw `ProfilerCounter` because we need to manually add/remove +// it while the profiler lock is already held. +class PowerMeterChannel final : public BaseProfilerCount { + public: + explicit PowerMeterChannel(const WCHAR* aChannelName, ULONGLONG aInitialValue, + ULONGLONG aInitialTime) + : BaseProfilerCount(nullptr, nullptr, nullptr, "power", + "Power utilization"), + mChannelName(NS_ConvertUTF16toUTF8(aChannelName)), + mPreviousValue(aInitialValue), + mPreviousTime(aInitialTime), + mIsSampleNew(true) { + if (mChannelName.Equals("RAPL_Package0_PKG")) { + mLabel = "Power: CPU package"; + mDescription = mChannelName.get(); + } else if (mChannelName.Equals("RAPL_Package0_PP0")) { + mLabel = "Power: CPU cores"; + mDescription = mChannelName.get(); + } else if (mChannelName.Equals("RAPL_Package0_PP1")) { + mLabel = "Power: iGPU"; + mDescription = mChannelName.get(); + } else if (mChannelName.Equals("RAPL_Package0_DRAM")) { + mLabel = "Power: DRAM"; + mDescription = mChannelName.get(); + } else { + unsigned int coreId; + if (sscanf(mChannelName.get(), "RAPL_Package0_Core%u_CORE", &coreId) == + 1) { + mLabelString = "Power: CPU core "; + mLabelString.AppendInt(coreId); + mLabel = mLabelString.get(); + mDescription = mChannelName.get(); + } else { + mLabel = mChannelName.get(); + } + } + } + + CountSample Sample() override { + CountSample result; + result.count = mCounter; + result.number = 0; + result.isSampleNew = mIsSampleNew; + mIsSampleNew = false; + return result; + } + + void AddSample(ULONGLONG aAbsoluteEnergy, ULONGLONG aAbsoluteTime) { + // aAbsoluteTime is the time since the system start in 100ns increments. + if (aAbsoluteTime == mPreviousTime) { + return; + } + + if (aAbsoluteEnergy > mPreviousValue) { + int64_t increment = aAbsoluteEnergy - mPreviousValue; + mCounter += increment; + mPreviousValue += increment; + mPreviousTime = aAbsoluteTime; + } + + mIsSampleNew = true; + } + + private: + int64_t mCounter; + nsCString mChannelName; + + // Used as a storage when the label can not be a literal string. + nsCString mLabelString; + + ULONGLONG mPreviousValue; + ULONGLONG mPreviousTime; + bool mIsSampleNew; +}; + +class PowerMeterDevice { + public: + explicit PowerMeterDevice(LPCTSTR aDevicePath) { + mHandle = ::CreateFile(aDevicePath, GENERIC_READ, + FILE_SHARE_READ | FILE_SHARE_WRITE, nullptr, + OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr); + if (mHandle == INVALID_HANDLE_VALUE) { + return; + } + + EMI_VERSION version = {0}; + DWORD dwOut; + + if (!::DeviceIoControl(mHandle, IOCTL_EMI_GET_VERSION, nullptr, 0, &version, + sizeof(version), &dwOut, nullptr) || + (version.EmiVersion != EMI_VERSION_V1 && + version.EmiVersion != EMI_VERSION_V2)) { + return; + } + + EMI_METADATA_SIZE size = {0}; + if (!::DeviceIoControl(mHandle, IOCTL_EMI_GET_METADATA_SIZE, nullptr, 0, + &size, sizeof(size), &dwOut, nullptr) || + !size.MetadataSize) { + return; + } + + UniquePtr<uint8_t[]> metadata(new (std::nothrow) + uint8_t[size.MetadataSize]); + if (!metadata) { + return; + } + + if (version.EmiVersion == EMI_VERSION_V2) { + EMI_METADATA_V2* metadata2 = + reinterpret_cast<EMI_METADATA_V2*>(metadata.get()); + if (!::DeviceIoControl(mHandle, IOCTL_EMI_GET_METADATA, nullptr, 0, + metadata2, size.MetadataSize, &dwOut, nullptr)) { + return; + } + + if (!mChannels.reserve(metadata2->ChannelCount)) { + return; + } + + mDataBuffer = + MakeUnique<EMI_CHANNEL_MEASUREMENT_DATA[]>(metadata2->ChannelCount); + if (!mDataBuffer) { + return; + } + + if (!::DeviceIoControl( + mHandle, IOCTL_EMI_GET_MEASUREMENT, nullptr, 0, mDataBuffer.get(), + sizeof(EMI_CHANNEL_MEASUREMENT_DATA[metadata2->ChannelCount]), + &dwOut, nullptr)) { + return; + } + + EMI_CHANNEL_V2* channel = &metadata2->Channels[0]; + for (int i = 0; i < metadata2->ChannelCount; ++i) { + EMI_CHANNEL_MEASUREMENT_DATA* channel_data = &mDataBuffer[i]; + mChannels.infallibleAppend(new PowerMeterChannel( + channel->ChannelName, channel_data->AbsoluteEnergy, + channel_data->AbsoluteTime)); + channel = EMI_CHANNEL_V2_NEXT_CHANNEL(channel); + } + } else if (version.EmiVersion == EMI_VERSION_V1) { + EMI_METADATA_V1* metadata1 = + reinterpret_cast<EMI_METADATA_V1*>(metadata.get()); + if (!::DeviceIoControl(mHandle, IOCTL_EMI_GET_METADATA, nullptr, 0, + metadata1, size.MetadataSize, &dwOut, nullptr)) { + return; + } + + mDataBuffer = MakeUnique<EMI_CHANNEL_MEASUREMENT_DATA[]>(1); + if (!mDataBuffer) { + return; + } + + if (!::DeviceIoControl( + mHandle, IOCTL_EMI_GET_MEASUREMENT, nullptr, 0, mDataBuffer.get(), + sizeof(EMI_CHANNEL_MEASUREMENT_DATA), &dwOut, nullptr)) { + return; + } + + (void)mChannels.append(new PowerMeterChannel( + metadata1->MeteredHardwareName, mDataBuffer[0].AbsoluteEnergy, + mDataBuffer[0].AbsoluteTime)); + } + } + + ~PowerMeterDevice() { + if (mHandle != INVALID_HANDLE_VALUE) { + ::CloseHandle(mHandle); + } + } + + void Sample() { + MOZ_ASSERT(HasChannels()); + MOZ_ASSERT(mDataBuffer); + + DWORD dwOut; + if (!::DeviceIoControl( + mHandle, IOCTL_EMI_GET_MEASUREMENT, nullptr, 0, mDataBuffer.get(), + sizeof(EMI_CHANNEL_MEASUREMENT_DATA[mChannels.length()]), &dwOut, + nullptr)) { + return; + } + + for (size_t i = 0; i < mChannels.length(); ++i) { + EMI_CHANNEL_MEASUREMENT_DATA* channel_data = &mDataBuffer[i]; + mChannels[i]->AddSample(channel_data->AbsoluteEnergy, + channel_data->AbsoluteTime); + } + } + + bool HasChannels() { return mChannels.length() != 0; } + void AppendCountersTo(PowerCounters::CountVector& aCounters) { + if (aCounters.reserve(aCounters.length() + mChannels.length())) { + for (auto& channel : mChannels) { + aCounters.infallibleAppend(channel.get()); + } + } + } + + private: + Vector<UniquePtr<PowerMeterChannel>, 4> mChannels; + HANDLE mHandle = INVALID_HANDLE_VALUE; + UniquePtr<EMI_CHANNEL_MEASUREMENT_DATA[]> mDataBuffer; +}; + +PowerCounters::PowerCounters() { + class MOZ_STACK_CLASS HDevInfoHolder final { + public: + explicit HDevInfoHolder(HDEVINFO aHandle) : mHandle(aHandle) {} + + ~HDevInfoHolder() { ::SetupDiDestroyDeviceInfoList(mHandle); } + + private: + HDEVINFO mHandle; + }; + + if (!XRE_IsParentProcess()) { + // Energy meters are global, so only sample them on the parent. + return; + } + + // Energy Metering Device Interface + // {45BD8344-7ED6-49cf-A440-C276C933B053} + // + // Using GUID_DEVICE_ENERGY_METER does not compile as the symbol does not + // exist before Windows 10. + GUID my_GUID_DEVICE_ENERGY_METER = { + 0x45bd8344, + 0x7ed6, + 0x49cf, + {0xa4, 0x40, 0xc2, 0x76, 0xc9, 0x33, 0xb0, 0x53}}; + + HDEVINFO hdev = + ::SetupDiGetClassDevs(&my_GUID_DEVICE_ENERGY_METER, nullptr, nullptr, + DIGCF_PRESENT | DIGCF_DEVICEINTERFACE); + if (hdev == INVALID_HANDLE_VALUE) { + return; + } + + HDevInfoHolder hdevHolder(hdev); + + DWORD i = 0; + SP_DEVICE_INTERFACE_DATA did = {0}; + did.cbSize = sizeof(did); + + while (::SetupDiEnumDeviceInterfaces( + hdev, nullptr, &my_GUID_DEVICE_ENERGY_METER, i++, &did)) { + DWORD bufferSize = 0; + ::SetupDiGetDeviceInterfaceDetail(hdev, &did, nullptr, 0, &bufferSize, + nullptr); + if (::GetLastError() != ERROR_INSUFFICIENT_BUFFER) { + continue; + } + + UniquePtr<uint8_t[]> buffer(new (std::nothrow) uint8_t[bufferSize]); + if (!buffer) { + continue; + } + + PSP_DEVICE_INTERFACE_DETAIL_DATA pdidd = + reinterpret_cast<PSP_DEVICE_INTERFACE_DETAIL_DATA>(buffer.get()); + MOZ_ASSERT(uintptr_t(buffer.get()) % + alignof(PSP_DEVICE_INTERFACE_DETAIL_DATA) == + 0); + pdidd->cbSize = sizeof(*pdidd); + if (!::SetupDiGetDeviceInterfaceDetail(hdev, &did, pdidd, bufferSize, + &bufferSize, nullptr)) { + continue; + } + + UniquePtr<PowerMeterDevice> pmd = + MakeUnique<PowerMeterDevice>(pdidd->DevicePath); + if (!pmd->HasChannels() || + !mPowerMeterDevices.emplaceBack(std::move(pmd))) { + NS_WARNING("PowerMeterDevice without measurement channel (or OOM)"); + } + } + + for (auto& device : mPowerMeterDevices) { + device->AppendCountersTo(mCounters); + } +} + +PowerCounters::~PowerCounters() { mCounters.clear(); } + +void PowerCounters::Sample() { + for (auto& device : mPowerMeterDevices) { + device->Sample(); + } +} diff --git a/tools/profiler/core/PowerCounters.h b/tools/profiler/core/PowerCounters.h new file mode 100644 index 0000000000..2fd8d5892c --- /dev/null +++ b/tools/profiler/core/PowerCounters.h @@ -0,0 +1,52 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef TOOLS_POWERCOUNTERS_H_ +#define TOOLS_POWERCOUNTERS_H_ + +#include "PlatformMacros.h" +#include "mozilla/ProfilerCounts.h" +#include "mozilla/UniquePtr.h" +#include "mozilla/Vector.h" + +#if defined(_MSC_VER) +class PowerMeterDevice; +#endif +#if defined(GP_PLAT_arm64_darwin) +class ProcessPower; +#endif +#if defined(GP_PLAT_amd64_darwin) +class RAPL; +#endif + +class PowerCounters { + public: +#if defined(_MSC_VER) || defined(GP_OS_darwin) || defined(GP_PLAT_amd64_linux) + explicit PowerCounters(); + ~PowerCounters(); + void Sample(); +#else + explicit PowerCounters(){}; + ~PowerCounters(){}; + void Sample(){}; +#endif + + using CountVector = mozilla::Vector<BaseProfilerCount*, 4>; + const CountVector& GetCounters() { return mCounters; } + + private: + CountVector mCounters; + +#if defined(_MSC_VER) + mozilla::Vector<mozilla::UniquePtr<PowerMeterDevice>> mPowerMeterDevices; +#endif +#if defined(GP_PLAT_arm64_darwin) + mozilla::UniquePtr<ProcessPower> mProcessPower; +#endif +#if defined(GP_PLAT_amd64_darwin) + RAPL* mRapl; +#endif +}; + +#endif /* ndef TOOLS_POWERCOUNTERS_H_ */ diff --git a/tools/profiler/core/ProfileBuffer.cpp b/tools/profiler/core/ProfileBuffer.cpp new file mode 100644 index 0000000000..170a4f14b4 --- /dev/null +++ b/tools/profiler/core/ProfileBuffer.cpp @@ -0,0 +1,243 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ProfileBuffer.h" + +#include "BaseProfiler.h" +#include "js/GCAPI.h" +#include "jsfriendapi.h" +#include "mozilla/MathAlgorithms.h" +#include "nsJSPrincipals.h" +#include "nsScriptSecurityManager.h" + +using namespace mozilla; + +ProfileBuffer::ProfileBuffer(ProfileChunkedBuffer& aBuffer) + : mEntries(aBuffer) { + // Assume the given buffer is in-session. + MOZ_ASSERT(mEntries.IsInSession()); +} + +/* static */ +ProfileBufferBlockIndex ProfileBuffer::AddEntry( + ProfileChunkedBuffer& aProfileChunkedBuffer, + const ProfileBufferEntry& aEntry) { + switch (aEntry.GetKind()) { +#define SWITCH_KIND(KIND, TYPE, SIZE) \ + case ProfileBufferEntry::Kind::KIND: { \ + return aProfileChunkedBuffer.PutFrom(&aEntry, 1 + (SIZE)); \ + } + + FOR_EACH_PROFILE_BUFFER_ENTRY_KIND(SWITCH_KIND) + +#undef SWITCH_KIND + default: + MOZ_ASSERT(false, "Unhandled ProfilerBuffer entry KIND"); + return ProfileBufferBlockIndex{}; + } +} + +// Called from signal, call only reentrant functions +uint64_t ProfileBuffer::AddEntry(const ProfileBufferEntry& aEntry) { + return AddEntry(mEntries, aEntry).ConvertToProfileBufferIndex(); +} + +/* static */ +ProfileBufferBlockIndex ProfileBuffer::AddThreadIdEntry( + ProfileChunkedBuffer& aProfileChunkedBuffer, ProfilerThreadId aThreadId) { + return AddEntry(aProfileChunkedBuffer, + ProfileBufferEntry::ThreadId(aThreadId)); +} + +uint64_t ProfileBuffer::AddThreadIdEntry(ProfilerThreadId aThreadId) { + return AddThreadIdEntry(mEntries, aThreadId).ConvertToProfileBufferIndex(); +} + +void ProfileBuffer::CollectCodeLocation( + const char* aLabel, const char* aStr, uint32_t aFrameFlags, + uint64_t aInnerWindowID, const Maybe<uint32_t>& aLineNumber, + const Maybe<uint32_t>& aColumnNumber, + const Maybe<JS::ProfilingCategoryPair>& aCategoryPair) { + AddEntry(ProfileBufferEntry::Label(aLabel)); + AddEntry(ProfileBufferEntry::FrameFlags(uint64_t(aFrameFlags))); + + if (aStr) { + // Store the string using one or more DynamicStringFragment entries. + size_t strLen = strlen(aStr) + 1; // +1 for the null terminator + // If larger than the prescribed limit, we will cut the string and end it + // with an ellipsis. + const bool tooBig = strLen > kMaxFrameKeyLength; + if (tooBig) { + strLen = kMaxFrameKeyLength; + } + char chars[ProfileBufferEntry::kNumChars]; + for (size_t j = 0;; j += ProfileBufferEntry::kNumChars) { + // Store up to kNumChars characters in the entry. + size_t len = ProfileBufferEntry::kNumChars; + const bool last = j + len >= strLen; + if (last) { + // Only the last entry may be smaller than kNumChars. + len = strLen - j; + if (tooBig) { + // That last entry is part of a too-big string, replace the end + // characters with an ellipsis "...". + len = std::max(len, size_t(4)); + chars[len - 4] = '.'; + chars[len - 3] = '.'; + chars[len - 2] = '.'; + chars[len - 1] = '\0'; + // Make sure the memcpy will not overwrite our ellipsis! + len -= 4; + } + } + memcpy(chars, &aStr[j], len); + AddEntry(ProfileBufferEntry::DynamicStringFragment(chars)); + if (last) { + break; + } + } + } + + if (aInnerWindowID) { + AddEntry(ProfileBufferEntry::InnerWindowID(aInnerWindowID)); + } + + if (aLineNumber) { + AddEntry(ProfileBufferEntry::LineNumber(*aLineNumber)); + } + + if (aColumnNumber) { + AddEntry(ProfileBufferEntry::ColumnNumber(*aColumnNumber)); + } + + if (aCategoryPair.isSome()) { + AddEntry(ProfileBufferEntry::CategoryPair(int(*aCategoryPair))); + } +} + +size_t ProfileBuffer::SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const { + // Measurement of the following members may be added later if DMD finds it + // is worthwhile: + // - memory pointed to by the elements within mEntries + return mEntries.SizeOfExcludingThis(aMallocSizeOf); +} + +size_t ProfileBuffer::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const { + return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf); +} + +void ProfileBuffer::CollectOverheadStats(double aSamplingTimeMs, + TimeDuration aLocking, + TimeDuration aCleaning, + TimeDuration aCounters, + TimeDuration aThreads) { + double timeUs = aSamplingTimeMs * 1000.0; + if (mFirstSamplingTimeUs == 0.0) { + mFirstSamplingTimeUs = timeUs; + } else { + // Note that we'll have 1 fewer interval than other numbers (because + // we need both ends of an interval to know its duration). The final + // difference should be insignificant over the expected many thousands + // of iterations. + mIntervalsUs.Count(timeUs - mLastSamplingTimeUs); + } + mLastSamplingTimeUs = timeUs; + double locking = aLocking.ToMilliseconds() * 1000.0; + double cleaning = aCleaning.ToMilliseconds() * 1000.0; + double counters = aCounters.ToMilliseconds() * 1000.0; + double threads = aThreads.ToMilliseconds() * 1000.0; + + mOverheadsUs.Count(locking + cleaning + counters + threads); + mLockingsUs.Count(locking); + mCleaningsUs.Count(cleaning); + mCountersUs.Count(counters); + mThreadsUs.Count(threads); + + static const bool sRecordSamplingOverhead = []() { + const char* recordOverheads = getenv("MOZ_PROFILER_RECORD_OVERHEADS"); + return recordOverheads && recordOverheads[0] != '\0'; + }(); + if (sRecordSamplingOverhead) { + AddEntry(ProfileBufferEntry::ProfilerOverheadTime(aSamplingTimeMs)); + AddEntry(ProfileBufferEntry::ProfilerOverheadDuration(locking)); + AddEntry(ProfileBufferEntry::ProfilerOverheadDuration(cleaning)); + AddEntry(ProfileBufferEntry::ProfilerOverheadDuration(counters)); + AddEntry(ProfileBufferEntry::ProfilerOverheadDuration(threads)); + } +} + +ProfilerBufferInfo ProfileBuffer::GetProfilerBufferInfo() const { + return {BufferRangeStart(), + BufferRangeEnd(), + static_cast<uint32_t>(*mEntries.BufferLength() / + 8), // 8 bytes per entry. + mIntervalsUs, + mOverheadsUs, + mLockingsUs, + mCleaningsUs, + mCountersUs, + mThreadsUs}; +} + +/* ProfileBufferCollector */ + +void ProfileBufferCollector::CollectNativeLeafAddr(void* aAddr) { + mBuf.AddEntry(ProfileBufferEntry::NativeLeafAddr(aAddr)); +} + +void ProfileBufferCollector::CollectJitReturnAddr(void* aAddr) { + mBuf.AddEntry(ProfileBufferEntry::JitReturnAddr(aAddr)); +} + +void ProfileBufferCollector::CollectWasmFrame(const char* aLabel) { + mBuf.CollectCodeLocation("", aLabel, 0, 0, Nothing(), Nothing(), + Some(JS::ProfilingCategoryPair::JS_Wasm)); +} + +void ProfileBufferCollector::CollectProfilingStackFrame( + const js::ProfilingStackFrame& aFrame) { + // WARNING: this function runs within the profiler's "critical section". + + MOZ_ASSERT(aFrame.isLabelFrame() || + (aFrame.isJsFrame() && !aFrame.isOSRFrame())); + + const char* label = aFrame.label(); + const char* dynamicString = aFrame.dynamicString(); + Maybe<uint32_t> line; + Maybe<uint32_t> column; + + if (aFrame.isJsFrame()) { + // There are two kinds of JS frames that get pushed onto the ProfilingStack. + // + // - label = "", dynamic string = <something> + // - label = "js::RunScript", dynamic string = nullptr + // + // The line number is only interesting in the first case. + + if (label[0] == '\0') { + MOZ_ASSERT(dynamicString); + + // We call aFrame.script() repeatedly -- rather than storing the result in + // a local variable in order -- to avoid rooting hazards. + if (aFrame.script()) { + if (aFrame.pc()) { + unsigned col = 0; + line = Some(JS_PCToLineNumber(aFrame.script(), aFrame.pc(), &col)); + column = Some(col); + } + } + + } else { + MOZ_ASSERT(strcmp(label, "js::RunScript") == 0 && !dynamicString); + } + } else { + MOZ_ASSERT(aFrame.isLabelFrame()); + } + + mBuf.CollectCodeLocation(label, dynamicString, aFrame.flags(), + aFrame.realmID(), line, column, + Some(aFrame.categoryPair())); +} diff --git a/tools/profiler/core/ProfileBuffer.h b/tools/profiler/core/ProfileBuffer.h new file mode 100644 index 0000000000..5da34909cc --- /dev/null +++ b/tools/profiler/core/ProfileBuffer.h @@ -0,0 +1,260 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef MOZ_PROFILE_BUFFER_H +#define MOZ_PROFILE_BUFFER_H + +#include "GeckoProfiler.h" +#include "ProfileBufferEntry.h" + +#include "mozilla/Maybe.h" +#include "mozilla/PowerOfTwo.h" +#include "mozilla/ProfileBufferChunkManagerSingle.h" +#include "mozilla/ProfileChunkedBuffer.h" + +class ProcessStreamingContext; +class RunningTimes; + +// Class storing most profiling data in a ProfileChunkedBuffer. +// +// This class is used as a queue of entries which, after construction, never +// allocates. This makes it safe to use in the profiler's "critical section". +class ProfileBuffer final { + public: + // ProfileBuffer constructor + // @param aBuffer The in-session ProfileChunkedBuffer to use as buffer + // manager. + explicit ProfileBuffer(mozilla::ProfileChunkedBuffer& aBuffer); + + mozilla::ProfileChunkedBuffer& UnderlyingChunkedBuffer() const { + return mEntries; + } + + bool IsThreadSafe() const { return mEntries.IsThreadSafe(); } + + // Add |aEntry| to the buffer, ignoring what kind of entry it is. + uint64_t AddEntry(const ProfileBufferEntry& aEntry); + + // Add to the buffer a sample start (ThreadId) entry for aThreadId. + // Returns the position of the entry. + uint64_t AddThreadIdEntry(ProfilerThreadId aThreadId); + + void CollectCodeLocation( + const char* aLabel, const char* aStr, uint32_t aFrameFlags, + uint64_t aInnerWindowID, const mozilla::Maybe<uint32_t>& aLineNumber, + const mozilla::Maybe<uint32_t>& aColumnNumber, + const mozilla::Maybe<JS::ProfilingCategoryPair>& aCategoryPair); + + // Maximum size of a frameKey string that we'll handle. + static const size_t kMaxFrameKeyLength = 512; + + // Add JIT frame information to aJITFrameInfo for any JitReturnAddr entries + // that are currently in the buffer at or after aRangeStart, in samples + // for the given thread. + void AddJITInfoForRange(uint64_t aRangeStart, ProfilerThreadId aThreadId, + JSContext* aContext, JITFrameInfo& aJITFrameInfo, + mozilla::ProgressLogger aProgressLogger) const; + + // Stream JSON for samples in the buffer to aWriter, using the supplied + // UniqueStacks object. + // Only streams samples for the given thread ID and which were taken at or + // after aSinceTime. If ID is 0, ignore the stored thread ID; this should only + // be used when the buffer contains only one sample. + // aUniqueStacks needs to contain information about any JIT frames that we + // might encounter in the buffer, before this method is called. In other + // words, you need to have called AddJITInfoForRange for every range that + // might contain JIT frame information before calling this method. + // Return the thread ID of the streamed sample(s), or 0. + ProfilerThreadId StreamSamplesToJSON( + SpliceableJSONWriter& aWriter, ProfilerThreadId aThreadId, + double aSinceTime, UniqueStacks& aUniqueStacks, + mozilla::ProgressLogger aProgressLogger) const; + + void StreamMarkersToJSON(SpliceableJSONWriter& aWriter, + ProfilerThreadId aThreadId, + const mozilla::TimeStamp& aProcessStartTime, + double aSinceTime, UniqueStacks& aUniqueStacks, + mozilla::ProgressLogger aProgressLogger) const; + + // Stream samples and markers from all threads that `aProcessStreamingContext` + // accepts. + void StreamSamplesAndMarkersToJSON( + ProcessStreamingContext& aProcessStreamingContext, + mozilla::ProgressLogger aProgressLogger) const; + + void StreamPausedRangesToJSON(SpliceableJSONWriter& aWriter, + double aSinceTime, + mozilla::ProgressLogger aProgressLogger) const; + void StreamProfilerOverheadToJSON( + SpliceableJSONWriter& aWriter, + const mozilla::TimeStamp& aProcessStartTime, double aSinceTime, + mozilla::ProgressLogger aProgressLogger) const; + void StreamCountersToJSON(SpliceableJSONWriter& aWriter, + const mozilla::TimeStamp& aProcessStartTime, + double aSinceTime, + mozilla::ProgressLogger aProgressLogger) const; + + // Find (via |aLastSample|) the most recent sample for the thread denoted by + // |aThreadId| and clone it, patching in the current time as appropriate. + // Mutate |aLastSample| to point to the newly inserted sample. + // Returns whether duplication was successful. + bool DuplicateLastSample(ProfilerThreadId aThreadId, double aSampleTimeMs, + mozilla::Maybe<uint64_t>& aLastSample, + const RunningTimes& aRunningTimes); + + void DiscardSamplesBeforeTime(double aTime); + + // Read an entry in the buffer. + ProfileBufferEntry GetEntry(uint64_t aPosition) const { + return mEntries.ReadAt( + mozilla::ProfileBufferBlockIndex::CreateFromProfileBufferIndex( + aPosition), + [&](mozilla::Maybe<mozilla::ProfileBufferEntryReader>&& aMER) { + ProfileBufferEntry entry; + if (aMER.isSome()) { + if (aMER->CurrentBlockIndex().ConvertToProfileBufferIndex() == + aPosition) { + // If we're here, it means `aPosition` pointed at a valid block. + MOZ_RELEASE_ASSERT(aMER->RemainingBytes() <= sizeof(entry)); + aMER->ReadBytes(&entry, aMER->RemainingBytes()); + } else { + // EntryReader at the wrong position, pretend to have read + // everything. + aMER->SetRemainingBytes(0); + } + } + return entry; + }); + } + + size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const; + size_t SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) const; + + void CollectOverheadStats(double aSamplingTimeMs, + mozilla::TimeDuration aLocking, + mozilla::TimeDuration aCleaning, + mozilla::TimeDuration aCounters, + mozilla::TimeDuration aThreads); + + ProfilerBufferInfo GetProfilerBufferInfo() const; + + private: + // Add |aEntry| to the provided ProfileChunkedBuffer. + // `static` because it may be used to add an entry to a `ProfileChunkedBuffer` + // that is not attached to a `ProfileBuffer`. + static mozilla::ProfileBufferBlockIndex AddEntry( + mozilla::ProfileChunkedBuffer& aProfileChunkedBuffer, + const ProfileBufferEntry& aEntry); + + // Add a sample start (ThreadId) entry for aThreadId to the provided + // ProfileChunkedBuffer. Returns the position of the entry. + // `static` because it may be used to add an entry to a `ProfileChunkedBuffer` + // that is not attached to a `ProfileBuffer`. + static mozilla::ProfileBufferBlockIndex AddThreadIdEntry( + mozilla::ProfileChunkedBuffer& aProfileChunkedBuffer, + ProfilerThreadId aThreadId); + + // The storage in which this ProfileBuffer stores its entries. + mozilla::ProfileChunkedBuffer& mEntries; + + public: + // `BufferRangeStart()` and `BufferRangeEnd()` return `uint64_t` values + // corresponding to the first entry and past the last entry stored in + // `mEntries`. + // + // The returned values are not guaranteed to be stable, because other threads + // may also be accessing the buffer concurrently. But they will always + // increase, and can therefore give an indication of how far these values have + // *at least* reached. In particular: + // - Entries whose index is strictly less that `BufferRangeStart()` have been + // discarded by now, so any related data may also be safely discarded. + // - It is safe to try and read entries at any index strictly less than + // `BufferRangeEnd()` -- but note that these reads may fail by the time you + // request them, as old entries get overwritten by new ones. + uint64_t BufferRangeStart() const { return mEntries.GetState().mRangeStart; } + uint64_t BufferRangeEnd() const { return mEntries.GetState().mRangeEnd; } + + private: + // Single pre-allocated chunk (to avoid spurious mallocs), used when: + // - Duplicating sleeping stacks (hence scExpectedMaximumStackSize). + // - Adding JIT info. + // - Streaming stacks to JSON. + // Mutable because it's accessed from non-multithreaded const methods. + mutable mozilla::Maybe<mozilla::ProfileBufferChunkManagerSingle> + mMaybeWorkerChunkManager; + mozilla::ProfileBufferChunkManagerSingle& WorkerChunkManager() const { + if (mMaybeWorkerChunkManager.isNothing()) { + // Only actually allocate it on first use. (Some ProfileBuffers are + // temporary and don't actually need this.) + mMaybeWorkerChunkManager.emplace( + mozilla::ProfileBufferChunk::SizeofChunkMetadata() + + mozilla::ProfileBufferChunkManager::scExpectedMaximumStackSize); + } + return *mMaybeWorkerChunkManager; + } + + // GetStreamingParametersForThreadCallback: + // (ProfilerThreadId) -> Maybe<StreamingParametersForThread> + template <typename GetStreamingParametersForThreadCallback> + ProfilerThreadId DoStreamSamplesAndMarkersToJSON( + mozilla::FailureLatch& aFailureLatch, + GetStreamingParametersForThreadCallback&& + aGetStreamingParametersForThreadCallback, + double aSinceTime, ProcessStreamingContext* aStreamingContextForMarkers, + mozilla::ProgressLogger aProgressLogger) const; + + double mFirstSamplingTimeUs = 0.0; + double mLastSamplingTimeUs = 0.0; + ProfilerStats mIntervalsUs; + ProfilerStats mOverheadsUs; + ProfilerStats mLockingsUs; + ProfilerStats mCleaningsUs; + ProfilerStats mCountersUs; + ProfilerStats mThreadsUs; +}; + +/** + * Helper type used to implement ProfilerStackCollector. This type is used as + * the collector for MergeStacks by ProfileBuffer. It holds a reference to the + * buffer, as well as additional feature flags which are needed to control the + * data collection strategy + */ +class ProfileBufferCollector final : public ProfilerStackCollector { + public: + ProfileBufferCollector(ProfileBuffer& aBuf, uint64_t aSamplePos, + uint64_t aBufferRangeStart) + : mBuf(aBuf), + mSamplePositionInBuffer(aSamplePos), + mBufferRangeStart(aBufferRangeStart) { + MOZ_ASSERT( + mSamplePositionInBuffer >= mBufferRangeStart, + "The sample position should always be after the buffer range start"); + } + + // Position at which the sample starts in the profiler buffer (which may be + // different from the buffer in which the sample data is collected here). + mozilla::Maybe<uint64_t> SamplePositionInBuffer() override { + return mozilla::Some(mSamplePositionInBuffer); + } + + // Profiler buffer's range start (which may be different from the buffer in + // which the sample data is collected here). + mozilla::Maybe<uint64_t> BufferRangeStart() override { + return mozilla::Some(mBufferRangeStart); + } + + virtual void CollectNativeLeafAddr(void* aAddr) override; + virtual void CollectJitReturnAddr(void* aAddr) override; + virtual void CollectWasmFrame(const char* aLabel) override; + virtual void CollectProfilingStackFrame( + const js::ProfilingStackFrame& aFrame) override; + + private: + ProfileBuffer& mBuf; + uint64_t mSamplePositionInBuffer; + uint64_t mBufferRangeStart; +}; + +#endif diff --git a/tools/profiler/core/ProfileBufferEntry.cpp b/tools/profiler/core/ProfileBufferEntry.cpp new file mode 100644 index 0000000000..c2273d2ec6 --- /dev/null +++ b/tools/profiler/core/ProfileBufferEntry.cpp @@ -0,0 +1,2310 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ProfileBufferEntry.h" + +#include "mozilla/ProfilerMarkers.h" +#include "platform.h" +#include "ProfileBuffer.h" +#include "ProfiledThreadData.h" +#include "ProfilerBacktrace.h" +#include "ProfilerRustBindings.h" + +#include "js/ProfilingFrameIterator.h" +#include "jsapi.h" +#include "jsfriendapi.h" +#include "mozilla/Logging.h" +#include "mozilla/JSONStringWriteFuncs.h" +#include "mozilla/ScopeExit.h" +#include "mozilla/Sprintf.h" +#include "mozilla/StackWalk.h" +#include "nsThreadUtils.h" +#include "nsXULAppAPI.h" +#include "ProfilerCodeAddressService.h" + +#include <ostream> +#include <type_traits> + +using namespace mozilla; +using namespace mozilla::literals::ProportionValue_literals; + +//////////////////////////////////////////////////////////////////////// +// BEGIN ProfileBufferEntry + +ProfileBufferEntry::ProfileBufferEntry() + : mKind(Kind::INVALID), mStorage{0, 0, 0, 0, 0, 0, 0, 0} {} + +// aString must be a static string. +ProfileBufferEntry::ProfileBufferEntry(Kind aKind, const char* aString) + : mKind(aKind) { + MOZ_ASSERT(aKind == Kind::Label); + memcpy(mStorage, &aString, sizeof(aString)); +} + +ProfileBufferEntry::ProfileBufferEntry(Kind aKind, char aChars[kNumChars]) + : mKind(aKind) { + MOZ_ASSERT(aKind == Kind::DynamicStringFragment); + memcpy(mStorage, aChars, kNumChars); +} + +ProfileBufferEntry::ProfileBufferEntry(Kind aKind, void* aPtr) : mKind(aKind) { + memcpy(mStorage, &aPtr, sizeof(aPtr)); +} + +ProfileBufferEntry::ProfileBufferEntry(Kind aKind, double aDouble) + : mKind(aKind) { + memcpy(mStorage, &aDouble, sizeof(aDouble)); +} + +ProfileBufferEntry::ProfileBufferEntry(Kind aKind, int aInt) : mKind(aKind) { + memcpy(mStorage, &aInt, sizeof(aInt)); +} + +ProfileBufferEntry::ProfileBufferEntry(Kind aKind, int64_t aInt64) + : mKind(aKind) { + memcpy(mStorage, &aInt64, sizeof(aInt64)); +} + +ProfileBufferEntry::ProfileBufferEntry(Kind aKind, uint64_t aUint64) + : mKind(aKind) { + memcpy(mStorage, &aUint64, sizeof(aUint64)); +} + +ProfileBufferEntry::ProfileBufferEntry(Kind aKind, ProfilerThreadId aThreadId) + : mKind(aKind) { + static_assert(std::is_trivially_copyable_v<ProfilerThreadId>); + static_assert(sizeof(aThreadId) <= sizeof(mStorage)); + memcpy(mStorage, &aThreadId, sizeof(aThreadId)); +} + +const char* ProfileBufferEntry::GetString() const { + const char* result; + memcpy(&result, mStorage, sizeof(result)); + return result; +} + +void* ProfileBufferEntry::GetPtr() const { + void* result; + memcpy(&result, mStorage, sizeof(result)); + return result; +} + +double ProfileBufferEntry::GetDouble() const { + double result; + memcpy(&result, mStorage, sizeof(result)); + return result; +} + +int ProfileBufferEntry::GetInt() const { + int result; + memcpy(&result, mStorage, sizeof(result)); + return result; +} + +int64_t ProfileBufferEntry::GetInt64() const { + int64_t result; + memcpy(&result, mStorage, sizeof(result)); + return result; +} + +uint64_t ProfileBufferEntry::GetUint64() const { + uint64_t result; + memcpy(&result, mStorage, sizeof(result)); + return result; +} + +ProfilerThreadId ProfileBufferEntry::GetThreadId() const { + ProfilerThreadId result; + static_assert(std::is_trivially_copyable_v<ProfilerThreadId>); + memcpy(&result, mStorage, sizeof(result)); + return result; +} + +void ProfileBufferEntry::CopyCharsInto(char (&aOutArray)[kNumChars]) const { + memcpy(aOutArray, mStorage, kNumChars); +} + +// END ProfileBufferEntry +//////////////////////////////////////////////////////////////////////// + +struct TypeInfo { + Maybe<nsCString> mKeyedBy; + Maybe<nsCString> mName; + Maybe<nsCString> mLocation; + Maybe<unsigned> mLineNumber; +}; + +// As mentioned in ProfileBufferEntry.h, the JSON format contains many +// arrays whose elements are laid out according to various schemas to help +// de-duplication. This RAII class helps write these arrays by keeping track of +// the last non-null element written and adding the appropriate number of null +// elements when writing new non-null elements. It also automatically opens and +// closes an array element on the given JSON writer. +// +// You grant the AutoArraySchemaWriter exclusive access to the JSONWriter and +// the UniqueJSONStrings objects for the lifetime of AutoArraySchemaWriter. Do +// not access them independently while the AutoArraySchemaWriter is alive. +// If you need to add complex objects, call FreeFormElement(), which will give +// you temporary access to the writer. +// +// Example usage: +// +// // Define the schema of elements in this type of array: [FOO, BAR, BAZ] +// enum Schema : uint32_t { +// FOO = 0, +// BAR = 1, +// BAZ = 2 +// }; +// +// AutoArraySchemaWriter writer(someJsonWriter, someUniqueStrings); +// if (shouldWriteFoo) { +// writer.IntElement(FOO, getFoo()); +// } +// ... etc ... +// +// The elements need to be added in-order. +class MOZ_RAII AutoArraySchemaWriter { + public: + explicit AutoArraySchemaWriter(SpliceableJSONWriter& aWriter) + : mJSONWriter(aWriter), mNextFreeIndex(0) { + mJSONWriter.StartArrayElement(); + } + + ~AutoArraySchemaWriter() { mJSONWriter.EndArray(); } + + template <typename T> + void IntElement(uint32_t aIndex, T aValue) { + static_assert(!std::is_same_v<T, uint64_t>, + "Narrowing uint64 -> int64 conversion not allowed"); + FillUpTo(aIndex); + mJSONWriter.IntElement(static_cast<int64_t>(aValue)); + } + + void DoubleElement(uint32_t aIndex, double aValue) { + FillUpTo(aIndex); + mJSONWriter.DoubleElement(aValue); + } + + void TimeMsElement(uint32_t aIndex, double aTime_ms) { + FillUpTo(aIndex); + mJSONWriter.TimeDoubleMsElement(aTime_ms); + } + + void BoolElement(uint32_t aIndex, bool aValue) { + FillUpTo(aIndex); + mJSONWriter.BoolElement(aValue); + } + + protected: + SpliceableJSONWriter& Writer() { return mJSONWriter; } + + void FillUpTo(uint32_t aIndex) { + MOZ_ASSERT(aIndex >= mNextFreeIndex); + mJSONWriter.NullElements(aIndex - mNextFreeIndex); + mNextFreeIndex = aIndex + 1; + } + + private: + SpliceableJSONWriter& mJSONWriter; + uint32_t mNextFreeIndex; +}; + +// Same as AutoArraySchemaWriter, but this can also write strings (output as +// indexes into the table of unique strings). +class MOZ_RAII AutoArraySchemaWithStringsWriter : public AutoArraySchemaWriter { + public: + AutoArraySchemaWithStringsWriter(SpliceableJSONWriter& aWriter, + UniqueJSONStrings& aStrings) + : AutoArraySchemaWriter(aWriter), mStrings(aStrings) {} + + void StringElement(uint32_t aIndex, const Span<const char>& aValue) { + FillUpTo(aIndex); + mStrings.WriteElement(Writer(), aValue); + } + + private: + UniqueJSONStrings& mStrings; +}; + +Maybe<UniqueStacks::StackKey> UniqueStacks::BeginStack(const FrameKey& aFrame) { + if (Maybe<uint32_t> frameIndex = GetOrAddFrameIndex(aFrame); frameIndex) { + return Some(StackKey(*frameIndex)); + } + return Nothing{}; +} + +Vector<JITFrameInfoForBufferRange>&& +JITFrameInfo::MoveRangesWithNewFailureLatch(FailureLatch& aFailureLatch) && { + aFailureLatch.SetFailureFrom(mLocalFailureLatchSource); + return std::move(mRanges); +} + +UniquePtr<UniqueJSONStrings>&& +JITFrameInfo::MoveUniqueStringsWithNewFailureLatch( + FailureLatch& aFailureLatch) && { + if (mUniqueStrings) { + mUniqueStrings->ChangeFailureLatchAndForwardState(aFailureLatch); + } else { + aFailureLatch.SetFailureFrom(mLocalFailureLatchSource); + } + return std::move(mUniqueStrings); +} + +Maybe<UniqueStacks::StackKey> UniqueStacks::AppendFrame( + const StackKey& aStack, const FrameKey& aFrame) { + if (Maybe<uint32_t> stackIndex = GetOrAddStackIndex(aStack); stackIndex) { + if (Maybe<uint32_t> frameIndex = GetOrAddFrameIndex(aFrame); frameIndex) { + return Some(StackKey(aStack, *stackIndex, *frameIndex)); + } + } + return Nothing{}; +} + +JITFrameInfoForBufferRange JITFrameInfoForBufferRange::Clone() const { + JITFrameInfoForBufferRange::JITAddressToJITFramesMap jitAddressToJITFramesMap; + MOZ_RELEASE_ASSERT( + jitAddressToJITFramesMap.reserve(mJITAddressToJITFramesMap.count())); + for (auto iter = mJITAddressToJITFramesMap.iter(); !iter.done(); + iter.next()) { + const mozilla::Vector<JITFrameKey>& srcKeys = iter.get().value(); + mozilla::Vector<JITFrameKey> destKeys; + MOZ_RELEASE_ASSERT(destKeys.appendAll(srcKeys)); + jitAddressToJITFramesMap.putNewInfallible(iter.get().key(), + std::move(destKeys)); + } + + JITFrameInfoForBufferRange::JITFrameToFrameJSONMap jitFrameToFrameJSONMap; + MOZ_RELEASE_ASSERT( + jitFrameToFrameJSONMap.reserve(mJITFrameToFrameJSONMap.count())); + for (auto iter = mJITFrameToFrameJSONMap.iter(); !iter.done(); iter.next()) { + jitFrameToFrameJSONMap.putNewInfallible(iter.get().key(), + iter.get().value()); + } + + return JITFrameInfoForBufferRange{mRangeStart, mRangeEnd, + std::move(jitAddressToJITFramesMap), + std::move(jitFrameToFrameJSONMap)}; +} + +JITFrameInfo::JITFrameInfo(const JITFrameInfo& aOther, + mozilla::ProgressLogger aProgressLogger) + : mUniqueStrings(MakeUniqueFallible<UniqueJSONStrings>( + mLocalFailureLatchSource, *aOther.mUniqueStrings, + aProgressLogger.CreateSubLoggerFromTo( + 0_pc, "Creating JIT frame info unique strings...", 49_pc, + "Created JIT frame info unique strings"))) { + if (!mUniqueStrings) { + mLocalFailureLatchSource.SetFailure( + "OOM in JITFrameInfo allocating mUniqueStrings"); + return; + } + + if (mRanges.reserve(aOther.mRanges.length())) { + for (auto&& [i, progressLogger] : + aProgressLogger.CreateLoopSubLoggersFromTo(50_pc, 100_pc, + aOther.mRanges.length(), + "Copying JIT frame info")) { + mRanges.infallibleAppend(aOther.mRanges[i].Clone()); + } + } else { + mLocalFailureLatchSource.SetFailure("OOM in JITFrameInfo resizing mRanges"); + } +} + +bool UniqueStacks::FrameKey::NormalFrameData::operator==( + const NormalFrameData& aOther) const { + return mLocation == aOther.mLocation && + mRelevantForJS == aOther.mRelevantForJS && + mBaselineInterp == aOther.mBaselineInterp && + mInnerWindowID == aOther.mInnerWindowID && mLine == aOther.mLine && + mColumn == aOther.mColumn && mCategoryPair == aOther.mCategoryPair; +} + +bool UniqueStacks::FrameKey::JITFrameData::operator==( + const JITFrameData& aOther) const { + return mCanonicalAddress == aOther.mCanonicalAddress && + mDepth == aOther.mDepth && mRangeIndex == aOther.mRangeIndex; +} + +// Consume aJITFrameInfo by stealing its string table and its JIT frame info +// ranges. The JIT frame info contains JSON which refers to strings from the +// JIT frame info's string table, so our string table needs to have the same +// strings at the same indices. +UniqueStacks::UniqueStacks( + FailureLatch& aFailureLatch, JITFrameInfo&& aJITFrameInfo, + ProfilerCodeAddressService* aCodeAddressService /* = nullptr */) + : mUniqueStrings(std::move(aJITFrameInfo) + .MoveUniqueStringsWithNewFailureLatch(aFailureLatch)), + mCodeAddressService(aCodeAddressService), + mFrameTableWriter(aFailureLatch), + mStackTableWriter(aFailureLatch), + mJITInfoRanges(std::move(aJITFrameInfo) + .MoveRangesWithNewFailureLatch(aFailureLatch)) { + if (!mUniqueStrings) { + SetFailure("Did not get mUniqueStrings from JITFrameInfo"); + return; + } + + mFrameTableWriter.StartBareList(); + mStackTableWriter.StartBareList(); +} + +Maybe<uint32_t> UniqueStacks::GetOrAddStackIndex(const StackKey& aStack) { + if (Failed()) { + return Nothing{}; + } + + uint32_t count = mStackToIndexMap.count(); + auto entry = mStackToIndexMap.lookupForAdd(aStack); + if (entry) { + MOZ_ASSERT(entry->value() < count); + return Some(entry->value()); + } + + if (!mStackToIndexMap.add(entry, aStack, count)) { + SetFailure("OOM in UniqueStacks::GetOrAddStackIndex"); + return Nothing{}; + } + StreamStack(aStack); + return Some(count); +} + +Maybe<Vector<UniqueStacks::FrameKey>> +UniqueStacks::LookupFramesForJITAddressFromBufferPos(void* aJITAddress, + uint64_t aBufferPos) { + JITFrameInfoForBufferRange* rangeIter = + std::lower_bound(mJITInfoRanges.begin(), mJITInfoRanges.end(), aBufferPos, + [](const JITFrameInfoForBufferRange& aRange, + uint64_t aPos) { return aRange.mRangeEnd < aPos; }); + MOZ_RELEASE_ASSERT( + rangeIter != mJITInfoRanges.end() && + rangeIter->mRangeStart <= aBufferPos && + aBufferPos < rangeIter->mRangeEnd, + "Buffer position of jit address needs to be in one of the ranges"); + + using JITFrameKey = JITFrameInfoForBufferRange::JITFrameKey; + + const JITFrameInfoForBufferRange& jitFrameInfoRange = *rangeIter; + auto jitFrameKeys = + jitFrameInfoRange.mJITAddressToJITFramesMap.lookup(aJITAddress); + if (!jitFrameKeys) { + return Nothing(); + } + + // Map the array of JITFrameKeys to an array of FrameKeys, and ensure that + // each of the FrameKeys exists in mFrameToIndexMap. + Vector<FrameKey> frameKeys; + MOZ_RELEASE_ASSERT(frameKeys.initCapacity(jitFrameKeys->value().length())); + for (const JITFrameKey& jitFrameKey : jitFrameKeys->value()) { + FrameKey frameKey(jitFrameKey.mCanonicalAddress, jitFrameKey.mDepth, + rangeIter - mJITInfoRanges.begin()); + uint32_t index = mFrameToIndexMap.count(); + auto entry = mFrameToIndexMap.lookupForAdd(frameKey); + if (!entry) { + // We need to add this frame to our frame table. The JSON for this frame + // already exists in jitFrameInfoRange, we just need to splice it into + // the frame table and give it an index. + auto frameJSON = + jitFrameInfoRange.mJITFrameToFrameJSONMap.lookup(jitFrameKey); + MOZ_RELEASE_ASSERT(frameJSON, "Should have cached JSON for this frame"); + mFrameTableWriter.Splice(frameJSON->value()); + MOZ_RELEASE_ASSERT(mFrameToIndexMap.add(entry, frameKey, index)); + } + MOZ_RELEASE_ASSERT(frameKeys.append(std::move(frameKey))); + } + return Some(std::move(frameKeys)); +} + +Maybe<uint32_t> UniqueStacks::GetOrAddFrameIndex(const FrameKey& aFrame) { + if (Failed()) { + return Nothing{}; + } + + uint32_t count = mFrameToIndexMap.count(); + auto entry = mFrameToIndexMap.lookupForAdd(aFrame); + if (entry) { + MOZ_ASSERT(entry->value() < count); + return Some(entry->value()); + } + + if (!mFrameToIndexMap.add(entry, aFrame, count)) { + SetFailure("OOM in UniqueStacks::GetOrAddFrameIndex"); + return Nothing{}; + } + StreamNonJITFrame(aFrame); + return Some(count); +} + +void UniqueStacks::SpliceFrameTableElements(SpliceableJSONWriter& aWriter) { + mFrameTableWriter.EndBareList(); + aWriter.TakeAndSplice(mFrameTableWriter.TakeChunkedWriteFunc()); +} + +void UniqueStacks::SpliceStackTableElements(SpliceableJSONWriter& aWriter) { + mStackTableWriter.EndBareList(); + aWriter.TakeAndSplice(mStackTableWriter.TakeChunkedWriteFunc()); +} + +[[nodiscard]] nsAutoCString UniqueStacks::FunctionNameOrAddress(void* aPC) { + nsAutoCString nameOrAddress; + + if (!mCodeAddressService || + !mCodeAddressService->GetFunction(aPC, nameOrAddress) || + nameOrAddress.IsEmpty()) { + nameOrAddress.AppendASCII("0x"); + // `AppendInt` only knows `uint32_t` or `uint64_t`, but because these are + // just aliases for *two* of (`unsigned`, `unsigned long`, and `unsigned + // long long`), a call with `uintptr_t` could use the third type and + // therefore would be ambiguous. + // So we want to force using exactly `uint32_t` or `uint64_t`, whichever + // matches the size of `uintptr_t`. + // (The outer cast to `uint` should then be a no-op.) + using uint = std::conditional_t<sizeof(uintptr_t) <= sizeof(uint32_t), + uint32_t, uint64_t>; + nameOrAddress.AppendInt(static_cast<uint>(reinterpret_cast<uintptr_t>(aPC)), + 16); + } + + return nameOrAddress; +} + +void UniqueStacks::StreamStack(const StackKey& aStack) { + enum Schema : uint32_t { PREFIX = 0, FRAME = 1 }; + + AutoArraySchemaWriter writer(mStackTableWriter); + if (aStack.mPrefixStackIndex.isSome()) { + writer.IntElement(PREFIX, *aStack.mPrefixStackIndex); + } + writer.IntElement(FRAME, aStack.mFrameIndex); +} + +void UniqueStacks::StreamNonJITFrame(const FrameKey& aFrame) { + if (Failed()) { + return; + } + + using NormalFrameData = FrameKey::NormalFrameData; + + enum Schema : uint32_t { + LOCATION = 0, + RELEVANT_FOR_JS = 1, + INNER_WINDOW_ID = 2, + IMPLEMENTATION = 3, + OPTIMIZATIONS = 4, + LINE = 5, + COLUMN = 6, + CATEGORY = 7, + SUBCATEGORY = 8 + }; + + AutoArraySchemaWithStringsWriter writer(mFrameTableWriter, *mUniqueStrings); + + const NormalFrameData& data = aFrame.mData.as<NormalFrameData>(); + writer.StringElement(LOCATION, data.mLocation); + writer.BoolElement(RELEVANT_FOR_JS, data.mRelevantForJS); + + // It's okay to convert uint64_t to double here because DOM always creates IDs + // that are convertible to double. + writer.DoubleElement(INNER_WINDOW_ID, data.mInnerWindowID); + + // The C++ interpreter is the default implementation so we only emit element + // for Baseline Interpreter frames. + if (data.mBaselineInterp) { + writer.StringElement(IMPLEMENTATION, MakeStringSpan("blinterp")); + } + + if (data.mLine.isSome()) { + writer.IntElement(LINE, *data.mLine); + } + if (data.mColumn.isSome()) { + writer.IntElement(COLUMN, *data.mColumn); + } + if (data.mCategoryPair.isSome()) { + const JS::ProfilingCategoryPairInfo& info = + JS::GetProfilingCategoryPairInfo(*data.mCategoryPair); + writer.IntElement(CATEGORY, uint32_t(info.mCategory)); + writer.IntElement(SUBCATEGORY, info.mSubcategoryIndex); + } +} + +static void StreamJITFrame(JSContext* aContext, SpliceableJSONWriter& aWriter, + UniqueJSONStrings& aUniqueStrings, + const JS::ProfiledFrameHandle& aJITFrame) { + enum Schema : uint32_t { + LOCATION = 0, + RELEVANT_FOR_JS = 1, + INNER_WINDOW_ID = 2, + IMPLEMENTATION = 3, + OPTIMIZATIONS = 4, + LINE = 5, + COLUMN = 6, + CATEGORY = 7, + SUBCATEGORY = 8 + }; + + AutoArraySchemaWithStringsWriter writer(aWriter, aUniqueStrings); + + writer.StringElement(LOCATION, MakeStringSpan(aJITFrame.label())); + writer.BoolElement(RELEVANT_FOR_JS, false); + + // It's okay to convert uint64_t to double here because DOM always creates IDs + // that are convertible to double. + // Realm ID is the name of innerWindowID inside JS code. + writer.DoubleElement(INNER_WINDOW_ID, aJITFrame.realmID()); + + JS::ProfilingFrameIterator::FrameKind frameKind = aJITFrame.frameKind(); + MOZ_ASSERT(frameKind == JS::ProfilingFrameIterator::Frame_Ion || + frameKind == JS::ProfilingFrameIterator::Frame_Baseline); + writer.StringElement(IMPLEMENTATION, + frameKind == JS::ProfilingFrameIterator::Frame_Ion + ? MakeStringSpan("ion") + : MakeStringSpan("baseline")); + + const JS::ProfilingCategoryPairInfo& info = JS::GetProfilingCategoryPairInfo( + frameKind == JS::ProfilingFrameIterator::Frame_Ion + ? JS::ProfilingCategoryPair::JS_IonMonkey + : JS::ProfilingCategoryPair::JS_Baseline); + writer.IntElement(CATEGORY, uint32_t(info.mCategory)); + writer.IntElement(SUBCATEGORY, info.mSubcategoryIndex); +} + +static nsCString JSONForJITFrame(JSContext* aContext, + const JS::ProfiledFrameHandle& aJITFrame, + UniqueJSONStrings& aUniqueStrings) { + nsCString json; + JSONStringRefWriteFunc jw(json); + SpliceableJSONWriter writer(jw, aUniqueStrings.SourceFailureLatch()); + StreamJITFrame(aContext, writer, aUniqueStrings, aJITFrame); + return json; +} + +void JITFrameInfo::AddInfoForRange( + uint64_t aRangeStart, uint64_t aRangeEnd, JSContext* aCx, + const std::function<void(const std::function<void(void*)>&)>& + aJITAddressProvider) { + if (mLocalFailureLatchSource.Failed()) { + return; + } + + if (aRangeStart == aRangeEnd) { + return; + } + + MOZ_RELEASE_ASSERT(aRangeStart < aRangeEnd); + + if (!mRanges.empty()) { + const JITFrameInfoForBufferRange& prevRange = mRanges.back(); + MOZ_RELEASE_ASSERT(prevRange.mRangeEnd <= aRangeStart, + "Ranges must be non-overlapping and added in-order."); + } + + using JITFrameKey = JITFrameInfoForBufferRange::JITFrameKey; + + JITFrameInfoForBufferRange::JITAddressToJITFramesMap jitAddressToJITFrameMap; + JITFrameInfoForBufferRange::JITFrameToFrameJSONMap jitFrameToFrameJSONMap; + + aJITAddressProvider([&](void* aJITAddress) { + // Make sure that we have cached data for aJITAddress. + auto addressEntry = jitAddressToJITFrameMap.lookupForAdd(aJITAddress); + if (!addressEntry) { + Vector<JITFrameKey> jitFrameKeys; + for (JS::ProfiledFrameHandle handle : + JS::GetProfiledFrames(aCx, aJITAddress)) { + uint32_t depth = jitFrameKeys.length(); + JITFrameKey jitFrameKey{handle.canonicalAddress(), depth}; + auto frameEntry = jitFrameToFrameJSONMap.lookupForAdd(jitFrameKey); + if (!frameEntry) { + if (!jitFrameToFrameJSONMap.add( + frameEntry, jitFrameKey, + JSONForJITFrame(aCx, handle, *mUniqueStrings))) { + mLocalFailureLatchSource.SetFailure( + "OOM in JITFrameInfo::AddInfoForRange adding jit->frame map"); + return; + } + } + if (!jitFrameKeys.append(jitFrameKey)) { + mLocalFailureLatchSource.SetFailure( + "OOM in JITFrameInfo::AddInfoForRange adding jit frame key"); + return; + } + } + if (!jitAddressToJITFrameMap.add(addressEntry, aJITAddress, + std::move(jitFrameKeys))) { + mLocalFailureLatchSource.SetFailure( + "OOM in JITFrameInfo::AddInfoForRange adding addr->jit map"); + return; + } + } + }); + + if (!mRanges.append(JITFrameInfoForBufferRange{ + aRangeStart, aRangeEnd, std::move(jitAddressToJITFrameMap), + std::move(jitFrameToFrameJSONMap)})) { + mLocalFailureLatchSource.SetFailure( + "OOM in JITFrameInfo::AddInfoForRange adding range"); + return; + } +} + +struct ProfileSample { + uint32_t mStack = 0; + double mTime = 0.0; + Maybe<double> mResponsiveness; + RunningTimes mRunningTimes; +}; + +// Write CPU measurements with "Delta" unit, which is some amount of work that +// happened since the previous sample. +static void WriteDelta(AutoArraySchemaWriter& aSchemaWriter, uint32_t aProperty, + uint64_t aDelta) { + aSchemaWriter.IntElement(aProperty, int64_t(aDelta)); +} + +static void WriteSample(SpliceableJSONWriter& aWriter, + const ProfileSample& aSample) { + enum Schema : uint32_t { + STACK = 0, + TIME = 1, + EVENT_DELAY = 2 +#define RUNNING_TIME_SCHEMA(index, name, unit, jsonProperty) , name + PROFILER_FOR_EACH_RUNNING_TIME(RUNNING_TIME_SCHEMA) +#undef RUNNING_TIME_SCHEMA + }; + + AutoArraySchemaWriter writer(aWriter); + + writer.IntElement(STACK, aSample.mStack); + + writer.TimeMsElement(TIME, aSample.mTime); + + if (aSample.mResponsiveness.isSome()) { + writer.DoubleElement(EVENT_DELAY, *aSample.mResponsiveness); + } + +#define RUNNING_TIME_STREAM(index, name, unit, jsonProperty) \ + aSample.mRunningTimes.GetJson##name##unit().apply( \ + [&writer](const uint64_t& aValue) { \ + Write##unit(writer, name, aValue); \ + }); + + PROFILER_FOR_EACH_RUNNING_TIME(RUNNING_TIME_STREAM) + +#undef RUNNING_TIME_STREAM +} + +static void StreamMarkerAfterKind( + ProfileBufferEntryReader& aER, + ProcessStreamingContext& aProcessStreamingContext) { + ThreadStreamingContext* threadData = nullptr; + mozilla::base_profiler_markers_detail::DeserializeAfterKindAndStream( + aER, + [&](ProfilerThreadId aThreadId) -> baseprofiler::SpliceableJSONWriter* { + threadData = + aProcessStreamingContext.GetThreadStreamingContext(aThreadId); + return threadData ? &threadData->mMarkersDataWriter : nullptr; + }, + [&](ProfileChunkedBuffer& aChunkedBuffer) { + ProfilerBacktrace backtrace("", &aChunkedBuffer); + MOZ_ASSERT(threadData, + "threadData should have been set before calling here"); + backtrace.StreamJSON(threadData->mMarkersDataWriter, + aProcessStreamingContext.ProcessStartTime(), + *threadData->mUniqueStacks); + }, + [&](mozilla::base_profiler_markers_detail::Streaming::DeserializerTag + aTag) { + MOZ_ASSERT(threadData, + "threadData should have been set before calling here"); + + size_t payloadSize = aER.RemainingBytes(); + + ProfileBufferEntryReader::DoubleSpanOfConstBytes spans = + aER.ReadSpans(payloadSize); + if (MOZ_LIKELY(spans.IsSingleSpan())) { + // Only a single span, we can just refer to it directly + // instead of copying it. + profiler::ffi::gecko_profiler_serialize_marker_for_tag( + aTag, spans.mFirstOrOnly.Elements(), payloadSize, + &threadData->mMarkersDataWriter); + } else { + // Two spans, we need to concatenate them by copying. + uint8_t* payloadBuffer = new uint8_t[payloadSize]; + spans.CopyBytesTo(payloadBuffer); + profiler::ffi::gecko_profiler_serialize_marker_for_tag( + aTag, payloadBuffer, payloadSize, + &threadData->mMarkersDataWriter); + delete[] payloadBuffer; + } + }); +} + +class EntryGetter { + public: + explicit EntryGetter( + ProfileChunkedBuffer::Reader& aReader, + mozilla::FailureLatch& aFailureLatch, + mozilla::ProgressLogger aProgressLogger = {}, + uint64_t aInitialReadPos = 0, + ProcessStreamingContext* aStreamingContextForMarkers = nullptr) + : mFailureLatch(aFailureLatch), + mStreamingContextForMarkers(aStreamingContextForMarkers), + mBlockIt( + aReader.At(ProfileBufferBlockIndex::CreateFromProfileBufferIndex( + aInitialReadPos))), + mBlockItEnd(aReader.end()), + mRangeStart(mBlockIt.BufferRangeStart().ConvertToProfileBufferIndex()), + mRangeSize( + double(mBlockIt.BufferRangeEnd().ConvertToProfileBufferIndex() - + mRangeStart)), + mProgressLogger(std::move(aProgressLogger)) { + SetLocalProgress(ProgressLogger::NO_LOCATION_UPDATE); + if (!ReadLegacyOrEnd()) { + // Find and read the next non-legacy entry. + Next(); + } + } + + bool Has() const { + return (!mFailureLatch.Failed()) && (mBlockIt != mBlockItEnd); + } + + const ProfileBufferEntry& Get() const { + MOZ_ASSERT(Has() || mFailureLatch.Failed(), + "Caller should have checked `Has()` before `Get()`"); + return mEntry; + } + + void Next() { + MOZ_ASSERT(Has() || mFailureLatch.Failed(), + "Caller should have checked `Has()` before `Next()`"); + ++mBlockIt; + ReadUntilLegacyOrEnd(); + } + + // Hand off the current iterator to the caller, which may be used to read + // any kind of entries (legacy or modern). + ProfileChunkedBuffer::BlockIterator Iterator() const { return mBlockIt; } + + // After `Iterator()` was used, we can restart from *after* its updated + // position. + void RestartAfter(const ProfileChunkedBuffer::BlockIterator& it) { + mBlockIt = it; + if (!Has()) { + return; + } + Next(); + } + + ProfileBufferBlockIndex CurBlockIndex() const { + return mBlockIt.CurrentBlockIndex(); + } + + uint64_t CurPos() const { + return CurBlockIndex().ConvertToProfileBufferIndex(); + } + + void SetLocalProgress(const char* aLocation) { + mProgressLogger.SetLocalProgress( + ProportionValue{double(CurBlockIndex().ConvertToProfileBufferIndex() - + mRangeStart) / + mRangeSize}, + aLocation); + } + + private: + // Try to read the entry at the current `mBlockIt` position. + // * If we're at the end of the buffer, just return `true`. + // * If there is a "legacy" entry (containing a real `ProfileBufferEntry`), + // read it into `mEntry`, and return `true` as well. + // * Otherwise the entry contains a "modern" type that cannot be read into + // `mEntry`, return `false` (so `EntryGetter` can skip to another entry). + bool ReadLegacyOrEnd() { + if (!Has()) { + return true; + } + // Read the entry "kind", which is always at the start of all entries. + ProfileBufferEntryReader er = *mBlockIt; + auto type = static_cast<ProfileBufferEntry::Kind>( + er.ReadObject<ProfileBufferEntry::KindUnderlyingType>()); + MOZ_ASSERT(static_cast<ProfileBufferEntry::KindUnderlyingType>(type) < + static_cast<ProfileBufferEntry::KindUnderlyingType>( + ProfileBufferEntry::Kind::MODERN_LIMIT)); + if (type >= ProfileBufferEntry::Kind::LEGACY_LIMIT) { + if (type == ProfileBufferEntry::Kind::Marker && + mStreamingContextForMarkers) { + StreamMarkerAfterKind(er, *mStreamingContextForMarkers); + if (!Has()) { + return true; + } + SetLocalProgress("Processed marker"); + } + er.SetRemainingBytes(0); + return false; + } + // Here, we have a legacy item, we need to read it from the start. + // Because the above `ReadObject` moved the reader, we ned to reset it to + // the start of the entry before reading the whole entry. + er = *mBlockIt; + er.ReadBytes(&mEntry, er.RemainingBytes()); + return true; + } + + void ReadUntilLegacyOrEnd() { + for (;;) { + if (ReadLegacyOrEnd()) { + // Either we're at the end, or we could read a legacy entry -> Done. + break; + } + // Otherwise loop around until we hit a legacy entry or the end. + ++mBlockIt; + } + SetLocalProgress(ProgressLogger::NO_LOCATION_UPDATE); + } + + mozilla::FailureLatch& mFailureLatch; + + ProcessStreamingContext* const mStreamingContextForMarkers; + + ProfileBufferEntry mEntry; + ProfileChunkedBuffer::BlockIterator mBlockIt; + const ProfileChunkedBuffer::BlockIterator mBlockItEnd; + + // Progress logger, and the data needed to compute the current relative + // position in the buffer. + const mozilla::ProfileBufferIndex mRangeStart; + const double mRangeSize; + mozilla::ProgressLogger mProgressLogger; +}; + +// The following grammar shows legal sequences of profile buffer entries. +// The sequences beginning with a ThreadId entry are known as "samples". +// +// ( +// ( /* Samples */ +// ThreadId +// TimeBeforeCompactStack +// RunningTimes? +// UnresponsivenessDurationMs? +// CompactStack +// /* internally including: +// ( NativeLeafAddr +// | Label FrameFlags? DynamicStringFragment* +// LineNumber? CategoryPair? +// | JitReturnAddr +// )+ +// */ +// ) +// | ( /* Reference to a previous identical sample */ +// ThreadId +// TimeBeforeSameSample +// RunningTimes? +// SameSample +// ) +// | Marker +// | ( /* Counters */ +// CounterId +// Time +// ( +// CounterKey +// Count +// Number? +// )* +// ) +// | CollectionStart +// | CollectionEnd +// | Pause +// | Resume +// | ( ProfilerOverheadTime /* Sampling start timestamp */ +// ProfilerOverheadDuration /* Lock acquisition */ +// ProfilerOverheadDuration /* Expired markers cleaning */ +// ProfilerOverheadDuration /* Counters */ +// ProfilerOverheadDuration /* Threads */ +// ) +// )* +// +// The most complicated part is the stack entry sequence that begins with +// Label. Here are some examples. +// +// - ProfilingStack frames without a dynamic string: +// +// Label("js::RunScript") +// CategoryPair(JS::ProfilingCategoryPair::JS) +// +// Label("XREMain::XRE_main") +// LineNumber(4660) +// CategoryPair(JS::ProfilingCategoryPair::OTHER) +// +// Label("ElementRestyler::ComputeStyleChangeFor") +// LineNumber(3003) +// CategoryPair(JS::ProfilingCategoryPair::CSS) +// +// - ProfilingStack frames with a dynamic string: +// +// Label("nsObserverService::NotifyObservers") +// FrameFlags(uint64_t(ProfilingStackFrame::Flags::IS_LABEL_FRAME)) +// DynamicStringFragment("domwindo") +// DynamicStringFragment("wopened") +// LineNumber(291) +// CategoryPair(JS::ProfilingCategoryPair::OTHER) +// +// Label("") +// FrameFlags(uint64_t(ProfilingStackFrame::Flags::IS_JS_FRAME)) +// DynamicStringFragment("closeWin") +// DynamicStringFragment("dow (chr") +// DynamicStringFragment("ome://gl") +// DynamicStringFragment("obal/con") +// DynamicStringFragment("tent/glo") +// DynamicStringFragment("balOverl") +// DynamicStringFragment("ay.js:5)") +// DynamicStringFragment("") # this string holds the closing '\0' +// LineNumber(25) +// CategoryPair(JS::ProfilingCategoryPair::JS) +// +// Label("") +// FrameFlags(uint64_t(ProfilingStackFrame::Flags::IS_JS_FRAME)) +// DynamicStringFragment("bound (s") +// DynamicStringFragment("elf-host") +// DynamicStringFragment("ed:914)") +// LineNumber(945) +// CategoryPair(JS::ProfilingCategoryPair::JS) +// +// - A profiling stack frame with an overly long dynamic string: +// +// Label("") +// FrameFlags(uint64_t(ProfilingStackFrame::Flags::IS_LABEL_FRAME)) +// DynamicStringFragment("(too lon") +// DynamicStringFragment("g)") +// LineNumber(100) +// CategoryPair(JS::ProfilingCategoryPair::NETWORK) +// +// - A wasm JIT frame: +// +// Label("") +// FrameFlags(uint64_t(0)) +// DynamicStringFragment("wasm-fun") +// DynamicStringFragment("ction[87") +// DynamicStringFragment("36] (blo") +// DynamicStringFragment("b:http:/") +// DynamicStringFragment("/webasse") +// DynamicStringFragment("mbly.org") +// DynamicStringFragment("/3dc5759") +// DynamicStringFragment("4-ce58-4") +// DynamicStringFragment("626-975b") +// DynamicStringFragment("-08ad116") +// DynamicStringFragment("30bc1:38") +// DynamicStringFragment("29856)") +// +// - A JS frame in a synchronous sample: +// +// Label("") +// FrameFlags(uint64_t(ProfilingStackFrame::Flags::IS_LABEL_FRAME)) +// DynamicStringFragment("u (https") +// DynamicStringFragment("://perf-") +// DynamicStringFragment("html.io/") +// DynamicStringFragment("ac0da204") +// DynamicStringFragment("aaa44d75") +// DynamicStringFragment("a800.bun") +// DynamicStringFragment("dle.js:2") +// DynamicStringFragment("5)") + +// Because this is a format entirely internal to the Profiler, any parsing +// error indicates a bug in the ProfileBuffer writing or the parser itself, +// or possibly flaky hardware. +#define ERROR_AND_CONTINUE(msg) \ + { \ + fprintf(stderr, "ProfileBuffer parse error: %s", msg); \ + MOZ_ASSERT(false, msg); \ + continue; \ + } + +struct StreamingParametersForThread { + SpliceableJSONWriter& mWriter; + UniqueStacks& mUniqueStacks; + ThreadStreamingContext::PreviousStackState& mPreviousStackState; + uint32_t& mPreviousStack; + + StreamingParametersForThread( + SpliceableJSONWriter& aWriter, UniqueStacks& aUniqueStacks, + ThreadStreamingContext::PreviousStackState& aPreviousStackState, + uint32_t& aPreviousStack) + : mWriter(aWriter), + mUniqueStacks(aUniqueStacks), + mPreviousStackState(aPreviousStackState), + mPreviousStack(aPreviousStack) {} +}; + +// GetStreamingParametersForThreadCallback: +// (ProfilerThreadId) -> Maybe<StreamingParametersForThread> +template <typename GetStreamingParametersForThreadCallback> +ProfilerThreadId ProfileBuffer::DoStreamSamplesAndMarkersToJSON( + mozilla::FailureLatch& aFailureLatch, + GetStreamingParametersForThreadCallback&& + aGetStreamingParametersForThreadCallback, + double aSinceTime, ProcessStreamingContext* aStreamingContextForMarkers, + mozilla::ProgressLogger aProgressLogger) const { + UniquePtr<char[]> dynStrBuf = MakeUnique<char[]>(kMaxFrameKeyLength); + + return mEntries.Read([&](ProfileChunkedBuffer::Reader* aReader) { + MOZ_ASSERT(aReader, + "ProfileChunkedBuffer cannot be out-of-session when sampler is " + "running"); + + ProfilerThreadId processedThreadId; + + EntryGetter e(*aReader, aFailureLatch, std::move(aProgressLogger), + /* aInitialReadPos */ 0, aStreamingContextForMarkers); + + for (;;) { + // This block skips entries until we find the start of the next sample. + // This is useful in three situations. + // + // - The circular buffer overwrites old entries, so when we start parsing + // we might be in the middle of a sample, and we must skip forward to + // the start of the next sample. + // + // - We skip samples that don't have an appropriate ThreadId or Time. + // + // - We skip range Pause, Resume, CollectionStart, Marker, Counter + // and CollectionEnd entries between samples. + while (e.Has()) { + if (e.Get().IsThreadId()) { + break; + } + e.Next(); + } + + if (!e.Has()) { + break; + } + + // Due to the skip_to_next_sample block above, if we have an entry here it + // must be a ThreadId entry. + MOZ_ASSERT(e.Get().IsThreadId()); + + ProfilerThreadId threadId = e.Get().GetThreadId(); + e.Next(); + + Maybe<StreamingParametersForThread> streamingParameters = + std::forward<GetStreamingParametersForThreadCallback>( + aGetStreamingParametersForThreadCallback)(threadId); + + // Ignore samples that are for the wrong thread. + if (!streamingParameters) { + continue; + } + + SpliceableJSONWriter& writer = streamingParameters->mWriter; + UniqueStacks& uniqueStacks = streamingParameters->mUniqueStacks; + ThreadStreamingContext::PreviousStackState& previousStackState = + streamingParameters->mPreviousStackState; + uint32_t& previousStack = streamingParameters->mPreviousStack; + + auto ReadStack = [&](EntryGetter& e, double time, uint64_t entryPosition, + const Maybe<double>& unresponsiveDuration, + const RunningTimes& runningTimes) { + if (writer.Failed()) { + return; + } + + Maybe<UniqueStacks::StackKey> maybeStack = + uniqueStacks.BeginStack(UniqueStacks::FrameKey("(root)")); + if (!maybeStack) { + writer.SetFailure("BeginStack failure"); + return; + } + + UniqueStacks::StackKey stack = *maybeStack; + + int numFrames = 0; + while (e.Has()) { + if (e.Get().IsNativeLeafAddr()) { + numFrames++; + + void* pc = e.Get().GetPtr(); + e.Next(); + + nsAutoCString functionNameOrAddress = + uniqueStacks.FunctionNameOrAddress(pc); + + maybeStack = uniqueStacks.AppendFrame( + stack, UniqueStacks::FrameKey(functionNameOrAddress.get())); + if (!maybeStack) { + writer.SetFailure("AppendFrame failure"); + return; + } + stack = *maybeStack; + + } else if (e.Get().IsLabel()) { + numFrames++; + + const char* label = e.Get().GetString(); + e.Next(); + + using FrameFlags = js::ProfilingStackFrame::Flags; + uint32_t frameFlags = 0; + if (e.Has() && e.Get().IsFrameFlags()) { + frameFlags = uint32_t(e.Get().GetUint64()); + e.Next(); + } + + bool relevantForJS = + frameFlags & uint32_t(FrameFlags::RELEVANT_FOR_JS); + + bool isBaselineInterp = + frameFlags & uint32_t(FrameFlags::IS_BLINTERP_FRAME); + + // Copy potential dynamic string fragments into dynStrBuf, so that + // dynStrBuf will then contain the entire dynamic string. + size_t i = 0; + dynStrBuf[0] = '\0'; + while (e.Has()) { + if (e.Get().IsDynamicStringFragment()) { + char chars[ProfileBufferEntry::kNumChars]; + e.Get().CopyCharsInto(chars); + for (char c : chars) { + if (i < kMaxFrameKeyLength) { + dynStrBuf[i] = c; + i++; + } + } + e.Next(); + } else { + break; + } + } + dynStrBuf[kMaxFrameKeyLength - 1] = '\0'; + bool hasDynamicString = (i != 0); + + nsAutoCStringN<1024> frameLabel; + if (label[0] != '\0' && hasDynamicString) { + if (frameFlags & uint32_t(FrameFlags::STRING_TEMPLATE_METHOD)) { + frameLabel.AppendPrintf("%s.%s", label, dynStrBuf.get()); + } else if (frameFlags & + uint32_t(FrameFlags::STRING_TEMPLATE_GETTER)) { + frameLabel.AppendPrintf("get %s.%s", label, dynStrBuf.get()); + } else if (frameFlags & + uint32_t(FrameFlags::STRING_TEMPLATE_SETTER)) { + frameLabel.AppendPrintf("set %s.%s", label, dynStrBuf.get()); + } else { + frameLabel.AppendPrintf("%s %s", label, dynStrBuf.get()); + } + } else if (hasDynamicString) { + frameLabel.Append(dynStrBuf.get()); + } else { + frameLabel.Append(label); + } + + uint64_t innerWindowID = 0; + if (e.Has() && e.Get().IsInnerWindowID()) { + innerWindowID = uint64_t(e.Get().GetUint64()); + e.Next(); + } + + Maybe<unsigned> line; + if (e.Has() && e.Get().IsLineNumber()) { + line = Some(unsigned(e.Get().GetInt())); + e.Next(); + } + + Maybe<unsigned> column; + if (e.Has() && e.Get().IsColumnNumber()) { + column = Some(unsigned(e.Get().GetInt())); + e.Next(); + } + + Maybe<JS::ProfilingCategoryPair> categoryPair; + if (e.Has() && e.Get().IsCategoryPair()) { + categoryPair = + Some(JS::ProfilingCategoryPair(uint32_t(e.Get().GetInt()))); + e.Next(); + } + + maybeStack = uniqueStacks.AppendFrame( + stack, + UniqueStacks::FrameKey(std::move(frameLabel), relevantForJS, + isBaselineInterp, innerWindowID, line, + column, categoryPair)); + if (!maybeStack) { + writer.SetFailure("AppendFrame failure"); + return; + } + stack = *maybeStack; + + } else if (e.Get().IsJitReturnAddr()) { + numFrames++; + + // A JIT frame may expand to multiple frames due to inlining. + void* pc = e.Get().GetPtr(); + const Maybe<Vector<UniqueStacks::FrameKey>>& frameKeys = + uniqueStacks.LookupFramesForJITAddressFromBufferPos( + pc, entryPosition ? entryPosition : e.CurPos()); + MOZ_RELEASE_ASSERT( + frameKeys, + "Attempting to stream samples for a buffer range " + "for which we don't have JITFrameInfo?"); + for (const UniqueStacks::FrameKey& frameKey : *frameKeys) { + maybeStack = uniqueStacks.AppendFrame(stack, frameKey); + if (!maybeStack) { + writer.SetFailure("AppendFrame failure"); + return; + } + stack = *maybeStack; + } + + e.Next(); + + } else { + break; + } + } + + // Even if this stack is considered empty, it contains the root frame, + // which needs to be in the JSON output because following "same samples" + // may refer to it when reusing this sample.mStack. + const Maybe<uint32_t> stackIndex = + uniqueStacks.GetOrAddStackIndex(stack); + if (!stackIndex) { + writer.SetFailure("Can't add unique string for stack"); + return; + } + + // And store that possibly-empty stack in case it's followed by "same + // sample" entries. + previousStack = *stackIndex; + previousStackState = (numFrames == 0) + ? ThreadStreamingContext::eStackWasEmpty + : ThreadStreamingContext::eStackWasNotEmpty; + + // Even if too old or empty, we did process a sample for this thread id. + processedThreadId = threadId; + + // Discard samples that are too old. + if (time < aSinceTime) { + return; + } + + if (numFrames == 0 && runningTimes.IsEmpty()) { + // It is possible to have empty stacks if native stackwalking is + // disabled. Skip samples with empty stacks, unless we have useful + // running times. + return; + } + + WriteSample(writer, ProfileSample{*stackIndex, time, + unresponsiveDuration, runningTimes}); + }; // End of `ReadStack(EntryGetter&)` lambda. + + if (e.Has() && e.Get().IsTime()) { + double time = e.Get().GetDouble(); + e.Next(); + // Note: Even if this sample is too old (before aSinceTime), we still + // need to read it, so that its frames are in the tables, in case there + // is a same-sample following it that would be after aSinceTime, which + // would need these frames to be present. + + ReadStack(e, time, 0, Nothing{}, RunningTimes{}); + + e.SetLocalProgress("Processed sample"); + } else if (e.Has() && e.Get().IsTimeBeforeCompactStack()) { + double time = e.Get().GetDouble(); + // Note: Even if this sample is too old (before aSinceTime), we still + // need to read it, so that its frames are in the tables, in case there + // is a same-sample following it that would be after aSinceTime, which + // would need these frames to be present. + + RunningTimes runningTimes; + Maybe<double> unresponsiveDuration; + + ProfileChunkedBuffer::BlockIterator it = e.Iterator(); + for (;;) { + ++it; + if (it.IsAtEnd()) { + break; + } + ProfileBufferEntryReader er = *it; + ProfileBufferEntry::Kind kind = + er.ReadObject<ProfileBufferEntry::Kind>(); + + // There may be running times before the CompactStack. + if (kind == ProfileBufferEntry::Kind::RunningTimes) { + er.ReadIntoObject(runningTimes); + continue; + } + + // There may be an UnresponsiveDurationMs before the CompactStack. + if (kind == ProfileBufferEntry::Kind::UnresponsiveDurationMs) { + unresponsiveDuration = Some(er.ReadObject<double>()); + continue; + } + + if (kind == ProfileBufferEntry::Kind::CompactStack) { + ProfileChunkedBuffer tempBuffer( + ProfileChunkedBuffer::ThreadSafety::WithoutMutex, + WorkerChunkManager()); + er.ReadIntoObject(tempBuffer); + tempBuffer.Read([&](ProfileChunkedBuffer::Reader* aReader) { + MOZ_ASSERT(aReader, + "Local ProfileChunkedBuffer cannot be out-of-session"); + // This is a compact stack, it should only contain one sample. + EntryGetter stackEntryGetter(*aReader, aFailureLatch); + ReadStack(stackEntryGetter, time, + it.CurrentBlockIndex().ConvertToProfileBufferIndex(), + unresponsiveDuration, runningTimes); + }); + WorkerChunkManager().Reset(tempBuffer.GetAllChunks()); + break; + } + + if (kind == ProfileBufferEntry::Kind::Marker && + aStreamingContextForMarkers) { + StreamMarkerAfterKind(er, *aStreamingContextForMarkers); + continue; + } + + MOZ_ASSERT(kind >= ProfileBufferEntry::Kind::LEGACY_LIMIT, + "There should be no legacy entries between " + "TimeBeforeCompactStack and CompactStack"); + er.SetRemainingBytes(0); + } + + e.RestartAfter(it); + + e.SetLocalProgress("Processed compact sample"); + } else if (e.Has() && e.Get().IsTimeBeforeSameSample()) { + if (previousStackState == ThreadStreamingContext::eNoStackYet) { + // We don't have any full sample yet, we cannot duplicate a "previous" + // one. This should only happen at most once per thread, for the very + // first sample. + continue; + } + + ProfileSample sample; + + // Keep the same `mStack` as previously output. + // Note that it may be empty, this is checked below before writing it. + sample.mStack = previousStack; + + sample.mTime = e.Get().GetDouble(); + + // Ignore samples that are too old. + if (sample.mTime < aSinceTime) { + e.Next(); + continue; + } + + sample.mResponsiveness = Nothing{}; + + sample.mRunningTimes.Clear(); + + ProfileChunkedBuffer::BlockIterator it = e.Iterator(); + for (;;) { + ++it; + if (it.IsAtEnd()) { + break; + } + ProfileBufferEntryReader er = *it; + ProfileBufferEntry::Kind kind = + er.ReadObject<ProfileBufferEntry::Kind>(); + + // There may be running times before the SameSample. + if (kind == ProfileBufferEntry::Kind::RunningTimes) { + er.ReadIntoObject(sample.mRunningTimes); + continue; + } + + if (kind == ProfileBufferEntry::Kind::SameSample) { + if (previousStackState == ThreadStreamingContext::eStackWasEmpty && + sample.mRunningTimes.IsEmpty()) { + // Skip samples with empty stacks, unless we have useful running + // times. + break; + } + WriteSample(writer, sample); + break; + } + + if (kind == ProfileBufferEntry::Kind::Marker && + aStreamingContextForMarkers) { + StreamMarkerAfterKind(er, *aStreamingContextForMarkers); + continue; + } + + MOZ_ASSERT(kind >= ProfileBufferEntry::Kind::LEGACY_LIMIT, + "There should be no legacy entries between " + "TimeBeforeSameSample and SameSample"); + er.SetRemainingBytes(0); + } + + e.RestartAfter(it); + + e.SetLocalProgress("Processed repeated sample"); + } else { + ERROR_AND_CONTINUE("expected a Time entry"); + } + } + + return processedThreadId; + }); +} + +ProfilerThreadId ProfileBuffer::StreamSamplesToJSON( + SpliceableJSONWriter& aWriter, ProfilerThreadId aThreadId, + double aSinceTime, UniqueStacks& aUniqueStacks, + mozilla::ProgressLogger aProgressLogger) const { + ThreadStreamingContext::PreviousStackState previousStackState = + ThreadStreamingContext::eNoStackYet; + uint32_t stack = 0u; +#ifdef DEBUG + int processedCount = 0; +#endif // DEBUG + return DoStreamSamplesAndMarkersToJSON( + aWriter.SourceFailureLatch(), + [&](ProfilerThreadId aReadThreadId) { + Maybe<StreamingParametersForThread> streamingParameters; +#ifdef DEBUG + ++processedCount; + MOZ_ASSERT( + aThreadId.IsSpecified() || + (processedCount == 1 && aReadThreadId.IsSpecified()), + "Unspecified aThreadId should only be used with 1-sample buffer"); +#endif // DEBUG + if (!aThreadId.IsSpecified() || aThreadId == aReadThreadId) { + streamingParameters.emplace(aWriter, aUniqueStacks, + previousStackState, stack); + } + return streamingParameters; + }, + aSinceTime, /* aStreamingContextForMarkers */ nullptr, + std::move(aProgressLogger)); +} + +void ProfileBuffer::StreamSamplesAndMarkersToJSON( + ProcessStreamingContext& aProcessStreamingContext, + mozilla::ProgressLogger aProgressLogger) const { + (void)DoStreamSamplesAndMarkersToJSON( + aProcessStreamingContext.SourceFailureLatch(), + [&](ProfilerThreadId aReadThreadId) { + Maybe<StreamingParametersForThread> streamingParameters; + ThreadStreamingContext* threadData = + aProcessStreamingContext.GetThreadStreamingContext(aReadThreadId); + if (threadData) { + streamingParameters.emplace( + threadData->mSamplesDataWriter, *threadData->mUniqueStacks, + threadData->mPreviousStackState, threadData->mPreviousStack); + } + return streamingParameters; + }, + aProcessStreamingContext.GetSinceTime(), &aProcessStreamingContext, + std::move(aProgressLogger)); +} + +void ProfileBuffer::AddJITInfoForRange( + uint64_t aRangeStart, ProfilerThreadId aThreadId, JSContext* aContext, + JITFrameInfo& aJITFrameInfo, + mozilla::ProgressLogger aProgressLogger) const { + // We can only process JitReturnAddr entries if we have a JSContext. + MOZ_RELEASE_ASSERT(aContext); + + aRangeStart = std::max(aRangeStart, BufferRangeStart()); + aJITFrameInfo.AddInfoForRange( + aRangeStart, BufferRangeEnd(), aContext, + [&](const std::function<void(void*)>& aJITAddressConsumer) { + // Find all JitReturnAddr entries in the given range for the given + // thread, and call aJITAddressConsumer with those addresses. + + mEntries.Read([&](ProfileChunkedBuffer::Reader* aReader) { + MOZ_ASSERT(aReader, + "ProfileChunkedBuffer cannot be out-of-session when " + "sampler is running"); + + EntryGetter e(*aReader, aJITFrameInfo.LocalFailureLatchSource(), + std::move(aProgressLogger), aRangeStart); + + while (true) { + // Advance to the next ThreadId entry. + while (e.Has() && !e.Get().IsThreadId()) { + e.Next(); + } + if (!e.Has()) { + break; + } + + MOZ_ASSERT(e.Get().IsThreadId()); + ProfilerThreadId threadId = e.Get().GetThreadId(); + e.Next(); + + // Ignore samples that are for a different thread. + if (threadId != aThreadId) { + continue; + } + + if (e.Has() && e.Get().IsTime()) { + // Legacy stack. + e.Next(); + while (e.Has() && !e.Get().IsThreadId()) { + if (e.Get().IsJitReturnAddr()) { + aJITAddressConsumer(e.Get().GetPtr()); + } + e.Next(); + } + } else if (e.Has() && e.Get().IsTimeBeforeCompactStack()) { + // Compact stack. + ProfileChunkedBuffer::BlockIterator it = e.Iterator(); + for (;;) { + ++it; + if (it.IsAtEnd()) { + break; + } + ProfileBufferEntryReader er = *it; + ProfileBufferEntry::Kind kind = + er.ReadObject<ProfileBufferEntry::Kind>(); + if (kind == ProfileBufferEntry::Kind::CompactStack) { + ProfileChunkedBuffer tempBuffer( + ProfileChunkedBuffer::ThreadSafety::WithoutMutex, + WorkerChunkManager()); + er.ReadIntoObject(tempBuffer); + tempBuffer.Read([&](ProfileChunkedBuffer::Reader* aReader) { + MOZ_ASSERT( + aReader, + "Local ProfileChunkedBuffer cannot be out-of-session"); + EntryGetter stackEntryGetter( + *aReader, aJITFrameInfo.LocalFailureLatchSource()); + while (stackEntryGetter.Has()) { + if (stackEntryGetter.Get().IsJitReturnAddr()) { + aJITAddressConsumer(stackEntryGetter.Get().GetPtr()); + } + stackEntryGetter.Next(); + } + }); + WorkerChunkManager().Reset(tempBuffer.GetAllChunks()); + break; + } + + MOZ_ASSERT(kind >= ProfileBufferEntry::Kind::LEGACY_LIMIT, + "There should be no legacy entries between " + "TimeBeforeCompactStack and CompactStack"); + er.SetRemainingBytes(0); + } + + e.Next(); + } else if (e.Has() && e.Get().IsTimeBeforeSameSample()) { + // Sample index, nothing to do. + + } else { + ERROR_AND_CONTINUE("expected a Time entry"); + } + } + }); + }); +} + +void ProfileBuffer::StreamMarkersToJSON( + SpliceableJSONWriter& aWriter, ProfilerThreadId aThreadId, + const TimeStamp& aProcessStartTime, double aSinceTime, + UniqueStacks& aUniqueStacks, + mozilla::ProgressLogger aProgressLogger) const { + mEntries.ReadEach([&](ProfileBufferEntryReader& aER) { + auto type = static_cast<ProfileBufferEntry::Kind>( + aER.ReadObject<ProfileBufferEntry::KindUnderlyingType>()); + MOZ_ASSERT(static_cast<ProfileBufferEntry::KindUnderlyingType>(type) < + static_cast<ProfileBufferEntry::KindUnderlyingType>( + ProfileBufferEntry::Kind::MODERN_LIMIT)); + if (type == ProfileBufferEntry::Kind::Marker) { + mozilla::base_profiler_markers_detail::DeserializeAfterKindAndStream( + aER, + [&](const ProfilerThreadId& aMarkerThreadId) { + return (!aThreadId.IsSpecified() || aMarkerThreadId == aThreadId) + ? &aWriter + : nullptr; + }, + [&](ProfileChunkedBuffer& aChunkedBuffer) { + ProfilerBacktrace backtrace("", &aChunkedBuffer); + backtrace.StreamJSON(aWriter, aProcessStartTime, aUniqueStacks); + }, + [&](mozilla::base_profiler_markers_detail::Streaming::DeserializerTag + aTag) { + size_t payloadSize = aER.RemainingBytes(); + + ProfileBufferEntryReader::DoubleSpanOfConstBytes spans = + aER.ReadSpans(payloadSize); + if (MOZ_LIKELY(spans.IsSingleSpan())) { + // Only a single span, we can just refer to it directly + // instead of copying it. + profiler::ffi::gecko_profiler_serialize_marker_for_tag( + aTag, spans.mFirstOrOnly.Elements(), payloadSize, &aWriter); + } else { + // Two spans, we need to concatenate them by copying. + uint8_t* payloadBuffer = new uint8_t[payloadSize]; + spans.CopyBytesTo(payloadBuffer); + profiler::ffi::gecko_profiler_serialize_marker_for_tag( + aTag, payloadBuffer, payloadSize, &aWriter); + delete[] payloadBuffer; + } + }); + } else { + // The entry was not a marker, we need to skip to the end. + aER.SetRemainingBytes(0); + } + }); +} + +void ProfileBuffer::StreamProfilerOverheadToJSON( + SpliceableJSONWriter& aWriter, const TimeStamp& aProcessStartTime, + double aSinceTime, mozilla::ProgressLogger aProgressLogger) const { + mEntries.Read([&](ProfileChunkedBuffer::Reader* aReader) { + MOZ_ASSERT(aReader, + "ProfileChunkedBuffer cannot be out-of-session when sampler is " + "running"); + + EntryGetter e(*aReader, aWriter.SourceFailureLatch(), + std::move(aProgressLogger)); + + enum Schema : uint32_t { + TIME = 0, + LOCKING = 1, + MARKER_CLEANING = 2, + COUNTERS = 3, + THREADS = 4 + }; + + aWriter.StartObjectProperty("profilerOverhead"); + aWriter.StartObjectProperty("samples"); + // Stream all sampling overhead data. We skip other entries, because we + // process them in StreamSamplesToJSON()/etc. + { + JSONSchemaWriter schema(aWriter); + schema.WriteField("time"); + schema.WriteField("locking"); + schema.WriteField("expiredMarkerCleaning"); + schema.WriteField("counters"); + schema.WriteField("threads"); + } + + aWriter.StartArrayProperty("data"); + double firstTime = 0.0; + double lastTime = 0.0; + ProfilerStats intervals, overheads, lockings, cleanings, counters, threads; + while (e.Has()) { + // valid sequence: ProfilerOverheadTime, ProfilerOverheadDuration * 4 + if (e.Get().IsProfilerOverheadTime()) { + double time = e.Get().GetDouble(); + if (time >= aSinceTime) { + e.Next(); + if (!e.Has() || !e.Get().IsProfilerOverheadDuration()) { + ERROR_AND_CONTINUE( + "expected a ProfilerOverheadDuration entry after " + "ProfilerOverheadTime"); + } + double locking = e.Get().GetDouble(); + e.Next(); + if (!e.Has() || !e.Get().IsProfilerOverheadDuration()) { + ERROR_AND_CONTINUE( + "expected a ProfilerOverheadDuration entry after " + "ProfilerOverheadTime,ProfilerOverheadDuration"); + } + double cleaning = e.Get().GetDouble(); + e.Next(); + if (!e.Has() || !e.Get().IsProfilerOverheadDuration()) { + ERROR_AND_CONTINUE( + "expected a ProfilerOverheadDuration entry after " + "ProfilerOverheadTime,ProfilerOverheadDuration*2"); + } + double counter = e.Get().GetDouble(); + e.Next(); + if (!e.Has() || !e.Get().IsProfilerOverheadDuration()) { + ERROR_AND_CONTINUE( + "expected a ProfilerOverheadDuration entry after " + "ProfilerOverheadTime,ProfilerOverheadDuration*3"); + } + double thread = e.Get().GetDouble(); + + if (firstTime == 0.0) { + firstTime = time; + } else { + // Note that we'll have 1 fewer interval than other numbers (because + // we need both ends of an interval to know its duration). The final + // difference should be insignificant over the expected many + // thousands of iterations. + intervals.Count(time - lastTime); + } + lastTime = time; + overheads.Count(locking + cleaning + counter + thread); + lockings.Count(locking); + cleanings.Count(cleaning); + counters.Count(counter); + threads.Count(thread); + + AutoArraySchemaWriter writer(aWriter); + writer.TimeMsElement(TIME, time); + writer.DoubleElement(LOCKING, locking); + writer.DoubleElement(MARKER_CLEANING, cleaning); + writer.DoubleElement(COUNTERS, counter); + writer.DoubleElement(THREADS, thread); + } + } + e.Next(); + } + aWriter.EndArray(); // data + aWriter.EndObject(); // samples + + // Only output statistics if there is at least one full interval (and + // therefore at least two samplings.) + if (intervals.n > 0) { + aWriter.StartObjectProperty("statistics"); + aWriter.DoubleProperty("profiledDuration", lastTime - firstTime); + aWriter.IntProperty("samplingCount", overheads.n); + aWriter.DoubleProperty("overheadDurations", overheads.sum); + aWriter.DoubleProperty("overheadPercentage", + overheads.sum / (lastTime - firstTime)); +#define PROFILER_STATS(name, var) \ + aWriter.DoubleProperty("mean" name, (var).sum / (var).n); \ + aWriter.DoubleProperty("min" name, (var).min); \ + aWriter.DoubleProperty("max" name, (var).max); + PROFILER_STATS("Interval", intervals); + PROFILER_STATS("Overhead", overheads); + PROFILER_STATS("Lockings", lockings); + PROFILER_STATS("Cleaning", cleanings); + PROFILER_STATS("Counter", counters); + PROFILER_STATS("Thread", threads); +#undef PROFILER_STATS + aWriter.EndObject(); // statistics + } + aWriter.EndObject(); // profilerOverhead + }); +} + +struct CounterKeyedSample { + double mTime; + uint64_t mNumber; + int64_t mCount; +}; + +using CounterKeyedSamples = Vector<CounterKeyedSample>; + +static LazyLogModule sFuzzyfoxLog("Fuzzyfox"); + +using CounterMap = HashMap<uint64_t, CounterKeyedSamples>; + +// HashMap lookup, if not found, a default value is inserted. +// Returns reference to (existing or new) value inside the HashMap. +template <typename HashM, typename Key> +static auto& LookupOrAdd(HashM& aMap, Key&& aKey) { + auto addPtr = aMap.lookupForAdd(aKey); + if (!addPtr) { + MOZ_RELEASE_ASSERT(aMap.add(addPtr, std::forward<Key>(aKey), + typename HashM::Entry::ValueType{})); + MOZ_ASSERT(!!addPtr); + } + return addPtr->value(); +} + +void ProfileBuffer::StreamCountersToJSON( + SpliceableJSONWriter& aWriter, const TimeStamp& aProcessStartTime, + double aSinceTime, mozilla::ProgressLogger aProgressLogger) const { + // Because this is a format entirely internal to the Profiler, any parsing + // error indicates a bug in the ProfileBuffer writing or the parser itself, + // or possibly flaky hardware. + + mEntries.Read([&](ProfileChunkedBuffer::Reader* aReader) { + MOZ_ASSERT(aReader, + "ProfileChunkedBuffer cannot be out-of-session when sampler is " + "running"); + + EntryGetter e(*aReader, aWriter.SourceFailureLatch(), + std::move(aProgressLogger)); + + enum Schema : uint32_t { TIME = 0, NUMBER = 1, COUNT = 2 }; + + // Stream all counters. We skip other entries, because we process them in + // StreamSamplesToJSON()/etc. + // + // Valid sequence in the buffer: + // CounterID + // Time + // ( CounterKey Count Number? )* + // + // And the JSON (example): + // "counters": { + // "name": "malloc", + // "category": "Memory", + // "description": "Amount of allocated memory", + // "sample_groups": { + // "id": 0, + // "samples": { + // "schema": {"time": 0, "number": 1, "count": 2}, + // "data": [ + // [ + // 16117.033968000002, + // 2446216, + // 6801320 + // ], + // [ + // 16118.037638, + // 2446216, + // 6801320 + // ], + // ], + // } + // } + // }, + + // Build the map of counters and populate it + HashMap<void*, CounterMap> counters; + + while (e.Has()) { + // skip all non-Counters, including if we start in the middle of a counter + if (e.Get().IsCounterId()) { + void* id = e.Get().GetPtr(); + CounterMap& counter = LookupOrAdd(counters, id); + e.Next(); + if (!e.Has() || !e.Get().IsTime()) { + ERROR_AND_CONTINUE("expected a Time entry"); + } + double time = e.Get().GetDouble(); + e.Next(); + if (time >= aSinceTime) { + while (e.Has() && e.Get().IsCounterKey()) { + uint64_t key = e.Get().GetUint64(); + CounterKeyedSamples& data = LookupOrAdd(counter, key); + e.Next(); + if (!e.Has() || !e.Get().IsCount()) { + ERROR_AND_CONTINUE("expected a Count entry"); + } + int64_t count = e.Get().GetUint64(); + e.Next(); + uint64_t number; + if (!e.Has() || !e.Get().IsNumber()) { + number = 0; + } else { + number = e.Get().GetInt64(); + e.Next(); + } + CounterKeyedSample sample = {time, number, count}; + MOZ_RELEASE_ASSERT(data.append(sample)); + } + } else { + // skip counter sample - only need to skip the initial counter + // id, then let the loop at the top skip the rest + } + } else { + e.Next(); + } + } + // we have a map of a map of counter entries; dump them to JSON + if (counters.count() == 0) { + return; + } + + aWriter.StartArrayProperty("counters"); + for (auto iter = counters.iter(); !iter.done(); iter.next()) { + CounterMap& counter = iter.get().value(); + const BaseProfilerCount* base_counter = + static_cast<const BaseProfilerCount*>(iter.get().key()); + + aWriter.Start(); + aWriter.StringProperty("name", MakeStringSpan(base_counter->mLabel)); + aWriter.StringProperty("category", + MakeStringSpan(base_counter->mCategory)); + aWriter.StringProperty("description", + MakeStringSpan(base_counter->mDescription)); + + aWriter.StartArrayProperty("sample_groups"); + for (auto counter_iter = counter.iter(); !counter_iter.done(); + counter_iter.next()) { + CounterKeyedSamples& samples = counter_iter.get().value(); + uint64_t key = counter_iter.get().key(); + + size_t size = samples.length(); + if (size == 0) { + continue; + } + + aWriter.StartObjectElement(); + { + aWriter.IntProperty("id", static_cast<int64_t>(key)); + aWriter.StartObjectProperty("samples"); + { + // XXX Can we assume a missing count means 0? + JSONSchemaWriter schema(aWriter); + schema.WriteField("time"); + schema.WriteField("number"); + schema.WriteField("count"); + } + + aWriter.StartArrayProperty("data"); + double previousSkippedTime = 0.0; + uint64_t previousNumber = 0; + int64_t previousCount = 0; + for (size_t i = 0; i < size; i++) { + // Encode as deltas, and only encode if different than the previous + // or next sample; Always write the first and last samples. + if (i == 0 || i == size - 1 || + samples[i].mNumber != previousNumber || + samples[i].mCount != previousCount || + // Ensure we ouput the first 0 before skipping samples. + (i >= 2 && (samples[i - 2].mNumber != previousNumber || + samples[i - 2].mCount != previousCount))) { + if (i != 0 && samples[i].mTime >= samples[i - 1].mTime) { + MOZ_LOG(sFuzzyfoxLog, mozilla::LogLevel::Error, + ("Fuzzyfox Profiler Assertion: %f >= %f", + samples[i].mTime, samples[i - 1].mTime)); + } + MOZ_ASSERT(i == 0 || samples[i].mTime >= samples[i - 1].mTime); + MOZ_ASSERT(samples[i].mNumber >= previousNumber); + MOZ_ASSERT(samples[i].mNumber - previousNumber <= + uint64_t(std::numeric_limits<int64_t>::max())); + + int64_t numberDelta = + static_cast<int64_t>(samples[i].mNumber - previousNumber); + int64_t countDelta = samples[i].mCount - previousCount; + + if (previousSkippedTime != 0.0 && + (numberDelta != 0 || countDelta != 0)) { + // Write the last skipped sample, unless the new one is all + // zeroes (that'd be redundant) This is useful to know when a + // certain value was last sampled, so that the front-end graph + // will be more correct. + AutoArraySchemaWriter writer(aWriter); + writer.TimeMsElement(TIME, previousSkippedTime); + // The deltas are effectively zeroes, since no change happened + // between the last actually-written sample and the last skipped + // one. + writer.IntElement(NUMBER, 0); + writer.IntElement(COUNT, 0); + } + + AutoArraySchemaWriter writer(aWriter); + writer.TimeMsElement(TIME, samples[i].mTime); + writer.IntElement(NUMBER, numberDelta); + writer.IntElement(COUNT, countDelta); + + previousSkippedTime = 0.0; + previousNumber = samples[i].mNumber; + previousCount = samples[i].mCount; + } else { + previousSkippedTime = samples[i].mTime; + } + } + aWriter.EndArray(); // data + aWriter.EndObject(); // samples + } + aWriter.EndObject(); // sample_groups item + } + aWriter.EndArray(); // sample groups + aWriter.End(); // for each counter + } + aWriter.EndArray(); // counters + }); +} + +#undef ERROR_AND_CONTINUE + +static void AddPausedRange(SpliceableJSONWriter& aWriter, const char* aReason, + const Maybe<double>& aStartTime, + const Maybe<double>& aEndTime) { + aWriter.Start(); + if (aStartTime) { + aWriter.TimeDoubleMsProperty("startTime", *aStartTime); + } else { + aWriter.NullProperty("startTime"); + } + if (aEndTime) { + aWriter.TimeDoubleMsProperty("endTime", *aEndTime); + } else { + aWriter.NullProperty("endTime"); + } + aWriter.StringProperty("reason", MakeStringSpan(aReason)); + aWriter.End(); +} + +void ProfileBuffer::StreamPausedRangesToJSON( + SpliceableJSONWriter& aWriter, double aSinceTime, + mozilla::ProgressLogger aProgressLogger) const { + mEntries.Read([&](ProfileChunkedBuffer::Reader* aReader) { + MOZ_ASSERT(aReader, + "ProfileChunkedBuffer cannot be out-of-session when sampler is " + "running"); + + EntryGetter e(*aReader, aWriter.SourceFailureLatch(), + aProgressLogger.CreateSubLoggerFromTo( + 1_pc, "Streaming pauses...", 99_pc, "Streamed pauses")); + + Maybe<double> currentPauseStartTime; + Maybe<double> currentCollectionStartTime; + + while (e.Has()) { + if (e.Get().IsPause()) { + currentPauseStartTime = Some(e.Get().GetDouble()); + } else if (e.Get().IsResume()) { + AddPausedRange(aWriter, "profiler-paused", currentPauseStartTime, + Some(e.Get().GetDouble())); + currentPauseStartTime = Nothing(); + } else if (e.Get().IsCollectionStart()) { + currentCollectionStartTime = Some(e.Get().GetDouble()); + } else if (e.Get().IsCollectionEnd()) { + AddPausedRange(aWriter, "collecting", currentCollectionStartTime, + Some(e.Get().GetDouble())); + currentCollectionStartTime = Nothing(); + } + e.Next(); + } + + if (currentPauseStartTime) { + AddPausedRange(aWriter, "profiler-paused", currentPauseStartTime, + Nothing()); + } + if (currentCollectionStartTime) { + AddPausedRange(aWriter, "collecting", currentCollectionStartTime, + Nothing()); + } + }); +} + +bool ProfileBuffer::DuplicateLastSample(ProfilerThreadId aThreadId, + double aSampleTimeMs, + Maybe<uint64_t>& aLastSample, + const RunningTimes& aRunningTimes) { + if (!aLastSample) { + return false; + } + + if (mEntries.IsIndexInCurrentChunk(ProfileBufferIndex{*aLastSample})) { + // The last (fully-written) sample is in this chunk, we can refer to it. + + // Note that between now and when we write the SameSample below, another + // chunk could have been started, so the SameSample will in fact refer to a + // block in a previous chunk. This is okay, because: + // - When serializing to JSON, if that chunk is still there, we'll still be + // able to find that old stack, so nothing will be lost. + // - If unfortunately that chunk has been destroyed, we will lose this + // sample. But this will only happen to the first sample (per thread) in + // in the whole JSON output, because the next time we're here to duplicate + // the same sample again, IsIndexInCurrentChunk will say `false` and we + // will fall back to the normal copy or even re-sample. Losing the first + // sample out of many in a whole recording is acceptable. + // + // |---| = chunk, S = Sample, D = Duplicate, s = same sample + // |---S-s-s--| |s-D--s--s-| |s-D--s---s| + // Later, the first chunk is destroyed/recycled: + // |s-D--s--s-| |s-D--s---s| |-... + // Output: ^ ^ ^ ^ + // `-|--|-------|--- Same but no previous -> lost. + // `--|-------|--- Full duplicate sample. + // `-------|--- Same with previous -> okay. + // `--- Same but now we have a previous -> okay! + + AUTO_PROFILER_STATS(DuplicateLastSample_SameSample); + + // Add the thread id first. We don't update `aLastSample` because we are not + // writing a full sample. + (void)AddThreadIdEntry(aThreadId); + + // Copy the new time, to be followed by a SameSample. + AddEntry(ProfileBufferEntry::TimeBeforeSameSample(aSampleTimeMs)); + + // Add running times if they have data. + if (!aRunningTimes.IsEmpty()) { + mEntries.PutObjects(ProfileBufferEntry::Kind::RunningTimes, + aRunningTimes); + } + + // Finish with a SameSample entry. + mEntries.PutObjects(ProfileBufferEntry::Kind::SameSample); + + return true; + } + + AUTO_PROFILER_STATS(DuplicateLastSample_copy); + + ProfileChunkedBuffer tempBuffer( + ProfileChunkedBuffer::ThreadSafety::WithoutMutex, WorkerChunkManager()); + + auto retrieveWorkerChunk = MakeScopeExit( + [&]() { WorkerChunkManager().Reset(tempBuffer.GetAllChunks()); }); + + const bool ok = mEntries.Read([&](ProfileChunkedBuffer::Reader* aReader) { + MOZ_ASSERT(aReader, + "ProfileChunkedBuffer cannot be out-of-session when sampler is " + "running"); + + // DuplicateLastSample is only called during profiling, so we don't need a + // progress logger (only useful when capturing the final profile). + EntryGetter e(*aReader, mozilla::FailureLatchInfallibleSource::Singleton(), + ProgressLogger{}, *aLastSample); + + if (e.CurPos() != *aLastSample) { + // The last sample is no longer within the buffer range, so we cannot + // use it. Reset the stored buffer position to Nothing(). + aLastSample.reset(); + return false; + } + + MOZ_RELEASE_ASSERT(e.Has() && e.Get().IsThreadId() && + e.Get().GetThreadId() == aThreadId); + + e.Next(); + + // Go through the whole entry and duplicate it, until we find the next + // one. + while (e.Has()) { + switch (e.Get().GetKind()) { + case ProfileBufferEntry::Kind::Pause: + case ProfileBufferEntry::Kind::Resume: + case ProfileBufferEntry::Kind::PauseSampling: + case ProfileBufferEntry::Kind::ResumeSampling: + case ProfileBufferEntry::Kind::CollectionStart: + case ProfileBufferEntry::Kind::CollectionEnd: + case ProfileBufferEntry::Kind::ThreadId: + case ProfileBufferEntry::Kind::TimeBeforeSameSample: + // We're done. + return true; + case ProfileBufferEntry::Kind::Time: + // Copy with new time + AddEntry(tempBuffer, ProfileBufferEntry::Time(aSampleTimeMs)); + break; + case ProfileBufferEntry::Kind::TimeBeforeCompactStack: { + // Copy with new time, followed by a compact stack. + AddEntry(tempBuffer, + ProfileBufferEntry::TimeBeforeCompactStack(aSampleTimeMs)); + + // Add running times if they have data. + if (!aRunningTimes.IsEmpty()) { + tempBuffer.PutObjects(ProfileBufferEntry::Kind::RunningTimes, + aRunningTimes); + } + + // The `CompactStack` *must* be present afterwards, but may not + // immediately follow `TimeBeforeCompactStack` (e.g., some markers + // could be written in-between), so we need to look for it in the + // following entries. + ProfileChunkedBuffer::BlockIterator it = e.Iterator(); + for (;;) { + ++it; + if (it.IsAtEnd()) { + break; + } + ProfileBufferEntryReader er = *it; + auto kind = static_cast<ProfileBufferEntry::Kind>( + er.ReadObject<ProfileBufferEntry::KindUnderlyingType>()); + MOZ_ASSERT( + static_cast<ProfileBufferEntry::KindUnderlyingType>(kind) < + static_cast<ProfileBufferEntry::KindUnderlyingType>( + ProfileBufferEntry::Kind::MODERN_LIMIT)); + if (kind == ProfileBufferEntry::Kind::CompactStack) { + // Found our CompactStack, just make a copy of the whole entry. + er = *it; + auto bytes = er.RemainingBytes(); + MOZ_ASSERT(bytes < + ProfileBufferChunkManager::scExpectedMaximumStackSize); + tempBuffer.Put(bytes, [&](Maybe<ProfileBufferEntryWriter>& aEW) { + MOZ_ASSERT(aEW.isSome(), "tempBuffer cannot be out-of-session"); + aEW->WriteFromReader(er, bytes); + }); + // CompactStack marks the end, we're done. + break; + } + + MOZ_ASSERT(kind >= ProfileBufferEntry::Kind::LEGACY_LIMIT, + "There should be no legacy entries between " + "TimeBeforeCompactStack and CompactStack"); + er.SetRemainingBytes(0); + // Here, we have encountered a non-legacy entry that was not the + // CompactStack we're looking for; just continue the search... + } + // We're done. + return true; + } + case ProfileBufferEntry::Kind::CounterKey: + case ProfileBufferEntry::Kind::Number: + case ProfileBufferEntry::Kind::Count: + // Don't copy anything not part of a thread's stack sample + break; + case ProfileBufferEntry::Kind::CounterId: + // CounterId is normally followed by Time - if so, we'd like + // to skip it. If we duplicate Time, it won't hurt anything, just + // waste buffer space (and this can happen if the CounterId has + // fallen off the end of the buffer, but Time (and Number/Count) + // are still in the buffer). + e.Next(); + if (e.Has() && e.Get().GetKind() != ProfileBufferEntry::Kind::Time) { + // this would only happen if there was an invalid sequence + // in the buffer. Don't skip it. + continue; + } + // we've skipped Time + break; + case ProfileBufferEntry::Kind::ProfilerOverheadTime: + // ProfilerOverheadTime is normally followed by + // ProfilerOverheadDuration*4 - if so, we'd like to skip it. Don't + // duplicate, as we are in the middle of a sampling and will soon + // capture its own overhead. + e.Next(); + // A missing Time would only happen if there was an invalid + // sequence in the buffer. Don't skip unexpected entry. + if (e.Has() && + e.Get().GetKind() != + ProfileBufferEntry::Kind::ProfilerOverheadDuration) { + continue; + } + e.Next(); + if (e.Has() && + e.Get().GetKind() != + ProfileBufferEntry::Kind::ProfilerOverheadDuration) { + continue; + } + e.Next(); + if (e.Has() && + e.Get().GetKind() != + ProfileBufferEntry::Kind::ProfilerOverheadDuration) { + continue; + } + e.Next(); + if (e.Has() && + e.Get().GetKind() != + ProfileBufferEntry::Kind::ProfilerOverheadDuration) { + continue; + } + // we've skipped ProfilerOverheadTime and + // ProfilerOverheadDuration*4. + break; + default: { + // Copy anything else we don't know about. + AddEntry(tempBuffer, e.Get()); + break; + } + } + e.Next(); + } + return true; + }); + + if (!ok) { + return false; + } + + // If the buffer was big enough, there won't be any cleared blocks. + if (tempBuffer.GetState().mClearedBlockCount != 0) { + // No need to try to read stack again as it won't fit. Reset the stored + // buffer position to Nothing(). + aLastSample.reset(); + return false; + } + + aLastSample = Some(AddThreadIdEntry(aThreadId)); + + mEntries.AppendContents(tempBuffer); + + return true; +} + +void ProfileBuffer::DiscardSamplesBeforeTime(double aTime) { + // This function does nothing! + // The duration limit will be removed from Firefox, see bug 1632365. + Unused << aTime; +} + +// END ProfileBuffer +//////////////////////////////////////////////////////////////////////// diff --git a/tools/profiler/core/ProfileBufferEntry.h b/tools/profiler/core/ProfileBufferEntry.h new file mode 100644 index 0000000000..c7088ec60b --- /dev/null +++ b/tools/profiler/core/ProfileBufferEntry.h @@ -0,0 +1,531 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ProfileBufferEntry_h +#define ProfileBufferEntry_h + +#include <cstdint> +#include <cstdlib> +#include <functional> +#include <utility> +#include "gtest/MozGtestFriend.h" +#include "js/ProfilingCategory.h" +#include "mozilla/Attributes.h" +#include "mozilla/HashFunctions.h" +#include "mozilla/HashTable.h" +#include "mozilla/Maybe.h" +#include "mozilla/ProfileBufferEntryKinds.h" +#include "mozilla/ProfileJSONWriter.h" +#include "mozilla/ProfilerUtils.h" +#include "mozilla/UniquePtrExtensions.h" +#include "mozilla/Variant.h" +#include "mozilla/Vector.h" +#include "nsString.h" + +class ProfilerCodeAddressService; +struct JSContext; + +class ProfileBufferEntry { + public: + using KindUnderlyingType = mozilla::ProfileBufferEntryKindUnderlyingType; + using Kind = mozilla::ProfileBufferEntryKind; + + ProfileBufferEntry(); + + static constexpr size_t kNumChars = mozilla::ProfileBufferEntryNumChars; + + private: + // aString must be a static string. + ProfileBufferEntry(Kind aKind, const char* aString); + ProfileBufferEntry(Kind aKind, char aChars[kNumChars]); + ProfileBufferEntry(Kind aKind, void* aPtr); + ProfileBufferEntry(Kind aKind, double aDouble); + ProfileBufferEntry(Kind aKind, int64_t aInt64); + ProfileBufferEntry(Kind aKind, uint64_t aUint64); + ProfileBufferEntry(Kind aKind, int aInt); + ProfileBufferEntry(Kind aKind, ProfilerThreadId aThreadId); + + public: +#define CTOR(KIND, TYPE, SIZE) \ + static ProfileBufferEntry KIND(TYPE aVal) { \ + return ProfileBufferEntry(Kind::KIND, aVal); \ + } + FOR_EACH_PROFILE_BUFFER_ENTRY_KIND(CTOR) +#undef CTOR + + Kind GetKind() const { return mKind; } + +#define IS_KIND(KIND, TYPE, SIZE) \ + bool Is##KIND() const { return mKind == Kind::KIND; } + FOR_EACH_PROFILE_BUFFER_ENTRY_KIND(IS_KIND) +#undef IS_KIND + + private: + FRIEND_TEST(ThreadProfile, InsertOneEntry); + FRIEND_TEST(ThreadProfile, InsertOneEntryWithTinyBuffer); + FRIEND_TEST(ThreadProfile, InsertEntriesNoWrap); + FRIEND_TEST(ThreadProfile, InsertEntriesWrap); + FRIEND_TEST(ThreadProfile, MemoryMeasure); + friend class ProfileBuffer; + + Kind mKind; + uint8_t mStorage[kNumChars]; + + const char* GetString() const; + void* GetPtr() const; + double GetDouble() const; + int GetInt() const; + int64_t GetInt64() const; + uint64_t GetUint64() const; + ProfilerThreadId GetThreadId() const; + void CopyCharsInto(char (&aOutArray)[kNumChars]) const; +}; + +// Packed layout: 1 byte for the tag + 8 bytes for the value. +static_assert(sizeof(ProfileBufferEntry) == 9, "bad ProfileBufferEntry size"); + +// Contains all the information about JIT frames that is needed to stream stack +// frames for JitReturnAddr entries in the profiler buffer. +// Every return address (void*) is mapped to one or more JITFrameKeys, and +// every JITFrameKey is mapped to a JSON string for that frame. +// mRangeStart and mRangeEnd describe the range in the buffer for which this +// mapping is valid. Only JitReturnAddr entries within that buffer range can be +// processed using this JITFrameInfoForBufferRange object. +struct JITFrameInfoForBufferRange final { + JITFrameInfoForBufferRange Clone() const; + + uint64_t mRangeStart; + uint64_t mRangeEnd; // mRangeEnd marks the first invalid index. + + struct JITFrameKey { + bool operator==(const JITFrameKey& aOther) const { + return mCanonicalAddress == aOther.mCanonicalAddress && + mDepth == aOther.mDepth; + } + bool operator!=(const JITFrameKey& aOther) const { + return !(*this == aOther); + } + + void* mCanonicalAddress; + uint32_t mDepth; + }; + struct JITFrameKeyHasher { + using Lookup = JITFrameKey; + + static mozilla::HashNumber hash(const JITFrameKey& aLookup) { + mozilla::HashNumber hash = 0; + hash = mozilla::AddToHash(hash, aLookup.mCanonicalAddress); + hash = mozilla::AddToHash(hash, aLookup.mDepth); + return hash; + } + + static bool match(const JITFrameKey& aKey, const JITFrameKey& aLookup) { + return aKey == aLookup; + } + + static void rekey(JITFrameKey& aKey, const JITFrameKey& aNewKey) { + aKey = aNewKey; + } + }; + + using JITAddressToJITFramesMap = + mozilla::HashMap<void*, mozilla::Vector<JITFrameKey>>; + JITAddressToJITFramesMap mJITAddressToJITFramesMap; + using JITFrameToFrameJSONMap = + mozilla::HashMap<JITFrameKey, nsCString, JITFrameKeyHasher>; + JITFrameToFrameJSONMap mJITFrameToFrameJSONMap; +}; + +// Contains JITFrameInfoForBufferRange objects for multiple profiler buffer +// ranges. +class JITFrameInfo final { + public: + JITFrameInfo() + : mUniqueStrings(mozilla::MakeUniqueFallible<UniqueJSONStrings>( + mLocalFailureLatchSource)) { + if (!mUniqueStrings) { + mLocalFailureLatchSource.SetFailure( + "OOM in JITFrameInfo allocating mUniqueStrings"); + } + } + + MOZ_IMPLICIT JITFrameInfo(const JITFrameInfo& aOther, + mozilla::ProgressLogger aProgressLogger); + + // Creates a new JITFrameInfoForBufferRange object in mRanges by looking up + // information about the provided JIT return addresses using aCx. + // Addresses are provided like this: + // The caller of AddInfoForRange supplies a function in aJITAddressProvider. + // This function will be called once, synchronously, with an + // aJITAddressConsumer argument, which is a function that needs to be called + // for every address. That function can be called multiple times for the same + // address. + void AddInfoForRange( + uint64_t aRangeStart, uint64_t aRangeEnd, JSContext* aCx, + const std::function<void(const std::function<void(void*)>&)>& + aJITAddressProvider); + + // Returns whether the information stored in this object is still relevant + // for any entries in the buffer. + bool HasExpired(uint64_t aCurrentBufferRangeStart) const { + if (mRanges.empty()) { + // No information means no relevant information. Allow this object to be + // discarded. + return true; + } + return mRanges.back().mRangeEnd <= aCurrentBufferRangeStart; + } + + mozilla::FailureLatch& LocalFailureLatchSource() { + return mLocalFailureLatchSource; + } + + // The encapsulated data points at the local FailureLatch, so on the way out + // they must be given a new external FailureLatch to start using instead. + mozilla::Vector<JITFrameInfoForBufferRange>&& MoveRangesWithNewFailureLatch( + mozilla::FailureLatch& aFailureLatch) &&; + mozilla::UniquePtr<UniqueJSONStrings>&& MoveUniqueStringsWithNewFailureLatch( + mozilla::FailureLatch& aFailureLatch) &&; + + private: + // JITFrameInfo's may exist during profiling, so it carries its own fallible + // FailureLatch. If&when the data below is finally extracted, any error is + // forwarded to the caller. + mozilla::FailureLatchSource mLocalFailureLatchSource; + + // The array of ranges of JIT frame information, sorted by buffer position. + // Ranges are non-overlapping. + // The JSON of the cached frames can contain string indexes, which refer + // to strings in mUniqueStrings. + mozilla::Vector<JITFrameInfoForBufferRange> mRanges; + + // The string table which contains strings used in the frame JSON that's + // cached in mRanges. + mozilla::UniquePtr<UniqueJSONStrings> mUniqueStrings; +}; + +class UniqueStacks final : public mozilla::FailureLatch { + public: + struct FrameKey { + explicit FrameKey(const char* aLocation) + : mData(NormalFrameData{nsCString(aLocation), false, false, 0, + mozilla::Nothing(), mozilla::Nothing()}) {} + + FrameKey(nsCString&& aLocation, bool aRelevantForJS, bool aBaselineInterp, + uint64_t aInnerWindowID, const mozilla::Maybe<unsigned>& aLine, + const mozilla::Maybe<unsigned>& aColumn, + const mozilla::Maybe<JS::ProfilingCategoryPair>& aCategoryPair) + : mData(NormalFrameData{aLocation, aRelevantForJS, aBaselineInterp, + aInnerWindowID, aLine, aColumn, + aCategoryPair}) {} + + FrameKey(void* aJITAddress, uint32_t aJITDepth, uint32_t aRangeIndex) + : mData(JITFrameData{aJITAddress, aJITDepth, aRangeIndex}) {} + + FrameKey(const FrameKey& aToCopy) = default; + + uint32_t Hash() const; + bool operator==(const FrameKey& aOther) const { + return mData == aOther.mData; + } + + struct NormalFrameData { + bool operator==(const NormalFrameData& aOther) const; + + nsCString mLocation; + bool mRelevantForJS; + bool mBaselineInterp; + uint64_t mInnerWindowID; + mozilla::Maybe<unsigned> mLine; + mozilla::Maybe<unsigned> mColumn; + mozilla::Maybe<JS::ProfilingCategoryPair> mCategoryPair; + }; + struct JITFrameData { + bool operator==(const JITFrameData& aOther) const; + + void* mCanonicalAddress; + uint32_t mDepth; + uint32_t mRangeIndex; + }; + mozilla::Variant<NormalFrameData, JITFrameData> mData; + }; + + struct FrameKeyHasher { + using Lookup = FrameKey; + + static mozilla::HashNumber hash(const FrameKey& aLookup) { + mozilla::HashNumber hash = 0; + if (aLookup.mData.is<FrameKey::NormalFrameData>()) { + const FrameKey::NormalFrameData& data = + aLookup.mData.as<FrameKey::NormalFrameData>(); + if (!data.mLocation.IsEmpty()) { + hash = mozilla::AddToHash(hash, + mozilla::HashString(data.mLocation.get())); + } + hash = mozilla::AddToHash(hash, data.mRelevantForJS); + hash = mozilla::AddToHash(hash, data.mBaselineInterp); + hash = mozilla::AddToHash(hash, data.mInnerWindowID); + if (data.mLine.isSome()) { + hash = mozilla::AddToHash(hash, *data.mLine); + } + if (data.mColumn.isSome()) { + hash = mozilla::AddToHash(hash, *data.mColumn); + } + if (data.mCategoryPair.isSome()) { + hash = mozilla::AddToHash(hash, + static_cast<uint32_t>(*data.mCategoryPair)); + } + } else { + const FrameKey::JITFrameData& data = + aLookup.mData.as<FrameKey::JITFrameData>(); + hash = mozilla::AddToHash(hash, data.mCanonicalAddress); + hash = mozilla::AddToHash(hash, data.mDepth); + hash = mozilla::AddToHash(hash, data.mRangeIndex); + } + return hash; + } + + static bool match(const FrameKey& aKey, const FrameKey& aLookup) { + return aKey == aLookup; + } + + static void rekey(FrameKey& aKey, const FrameKey& aNewKey) { + aKey = aNewKey; + } + }; + + struct StackKey { + mozilla::Maybe<uint32_t> mPrefixStackIndex; + uint32_t mFrameIndex; + + explicit StackKey(uint32_t aFrame) + : mFrameIndex(aFrame), mHash(mozilla::HashGeneric(aFrame)) {} + + StackKey(const StackKey& aPrefix, uint32_t aPrefixStackIndex, + uint32_t aFrame) + : mPrefixStackIndex(mozilla::Some(aPrefixStackIndex)), + mFrameIndex(aFrame), + mHash(mozilla::AddToHash(aPrefix.mHash, aFrame)) {} + + mozilla::HashNumber Hash() const { return mHash; } + + bool operator==(const StackKey& aOther) const { + return mPrefixStackIndex == aOther.mPrefixStackIndex && + mFrameIndex == aOther.mFrameIndex; + } + + private: + mozilla::HashNumber mHash; + }; + + struct StackKeyHasher { + using Lookup = StackKey; + + static mozilla::HashNumber hash(const StackKey& aLookup) { + return aLookup.Hash(); + } + + static bool match(const StackKey& aKey, const StackKey& aLookup) { + return aKey == aLookup; + } + + static void rekey(StackKey& aKey, const StackKey& aNewKey) { + aKey = aNewKey; + } + }; + + UniqueStacks(mozilla::FailureLatch& aFailureLatch, + JITFrameInfo&& aJITFrameInfo, + ProfilerCodeAddressService* aCodeAddressService = nullptr); + + // Return a StackKey for aFrame as the stack's root frame (no prefix). + [[nodiscard]] mozilla::Maybe<StackKey> BeginStack(const FrameKey& aFrame); + + // Return a new StackKey that is obtained by appending aFrame to aStack. + [[nodiscard]] mozilla::Maybe<StackKey> AppendFrame(const StackKey& aStack, + const FrameKey& aFrame); + + // Look up frame keys for the given JIT address, and ensure that our frame + // table has entries for the returned frame keys. The JSON for these frames + // is taken from mJITInfoRanges. + // aBufferPosition is needed in order to look up the correct JIT frame info + // object in mJITInfoRanges. + [[nodiscard]] mozilla::Maybe<mozilla::Vector<UniqueStacks::FrameKey>> + LookupFramesForJITAddressFromBufferPos(void* aJITAddress, + uint64_t aBufferPosition); + + [[nodiscard]] mozilla::Maybe<uint32_t> GetOrAddFrameIndex( + const FrameKey& aFrame); + [[nodiscard]] mozilla::Maybe<uint32_t> GetOrAddStackIndex( + const StackKey& aStack); + + void SpliceFrameTableElements(SpliceableJSONWriter& aWriter); + void SpliceStackTableElements(SpliceableJSONWriter& aWriter); + + [[nodiscard]] UniqueJSONStrings& UniqueStrings() { + MOZ_RELEASE_ASSERT(mUniqueStrings.get()); + return *mUniqueStrings; + } + + // Find the function name at the given PC (if a ProfilerCodeAddressService was + // provided), otherwise just stringify that PC. + [[nodiscard]] nsAutoCString FunctionNameOrAddress(void* aPC); + + FAILURELATCH_IMPL_PROXY(mFrameTableWriter) + + private: + void StreamNonJITFrame(const FrameKey& aFrame); + void StreamStack(const StackKey& aStack); + + mozilla::UniquePtr<UniqueJSONStrings> mUniqueStrings; + + ProfilerCodeAddressService* mCodeAddressService = nullptr; + + SpliceableChunkedJSONWriter mFrameTableWriter; + mozilla::HashMap<FrameKey, uint32_t, FrameKeyHasher> mFrameToIndexMap; + + SpliceableChunkedJSONWriter mStackTableWriter; + mozilla::HashMap<StackKey, uint32_t, StackKeyHasher> mStackToIndexMap; + + mozilla::Vector<JITFrameInfoForBufferRange> mJITInfoRanges; +}; + +// +// Thread profile JSON Format +// -------------------------- +// +// The profile contains much duplicate information. The output JSON of the +// profile attempts to deduplicate strings, frames, and stack prefixes, to cut +// down on size and to increase JSON streaming speed. Deduplicated values are +// streamed as indices into their respective tables. +// +// Further, arrays of objects with the same set of properties (e.g., samples, +// frames) are output as arrays according to a schema instead of an object +// with property names. A property that is not present is represented in the +// array as null or undefined. +// +// The format of the thread profile JSON is shown by the following example +// with 1 sample and 1 marker: +// +// { +// "name": "Foo", +// "tid": 42, +// "samples": +// { +// "schema": +// { +// "stack": 0, /* index into stackTable */ +// "time": 1, /* number */ +// "eventDelay": 2, /* number */ +// "ThreadCPUDelta": 3, /* optional number */ +// }, +// "data": +// [ +// [ 1, 0.0, 0.0 ] /* { stack: 1, time: 0.0, eventDelay: 0.0 } */ +// ] +// }, +// +// "markers": +// { +// "schema": +// { +// "name": 0, /* index into stringTable */ +// "time": 1, /* number */ +// "data": 2 /* arbitrary JSON */ +// }, +// "data": +// [ +// [ 3, 0.1 ] /* { name: 'example marker', time: 0.1 } */ +// ] +// }, +// +// "stackTable": +// { +// "schema": +// { +// "prefix": 0, /* index into stackTable */ +// "frame": 1 /* index into frameTable */ +// }, +// "data": +// [ +// [ null, 0 ], /* (root) */ +// [ 0, 1 ] /* (root) > foo.js */ +// ] +// }, +// +// "frameTable": +// { +// "schema": +// { +// "location": 0, /* index into stringTable */ +// "relevantForJS": 1, /* bool */ +// "innerWindowID": 2, /* inner window ID of global JS `window` object */ +// "implementation": 3, /* index into stringTable */ +// "optimizations": 4, /* arbitrary JSON */ +// "line": 5, /* number */ +// "column": 6, /* number */ +// "category": 7, /* index into profile.meta.categories */ +// "subcategory": 8 /* index into +// profile.meta.categories[category].subcategories */ +// }, +// "data": +// [ +// [ 0 ], /* { location: '(root)' } */ +// [ 1, null, null, 2 ] /* { location: 'foo.js', +// implementation: 'baseline' } */ +// ] +// }, +// +// "stringTable": +// [ +// "(root)", +// "foo.js", +// "baseline", +// "example marker" +// ] +// } +// +// Process: +// { +// "name": "Bar", +// "pid": 24, +// "threads": +// [ +// <0-N threads from above> +// ], +// "counters": /* includes the memory counter */ +// [ +// { +// "name": "qwerty", +// "category": "uiop", +// "description": "this is qwerty uiop", +// "sample_groups: +// [ +// { +// "id": 42, /* number (thread id, or object identifier (tab), etc) */ +// "samples: +// { +// "schema": +// { +// "time": 1, /* number */ +// "number": 2, /* number (of times the counter was touched) */ +// "count": 3 /* number (total for the counter) */ +// }, +// "data": +// [ +// [ 0.1, 1824, +// 454622 ] /* { time: 0.1, number: 1824, count: 454622 } */ +// ] +// }, +// }, +// /* more sample-group objects with different id's */ +// ] +// }, +// /* more counters */ +// ], +// } +// +#endif /* ndef ProfileBufferEntry_h */ diff --git a/tools/profiler/core/ProfiledThreadData.cpp b/tools/profiler/core/ProfiledThreadData.cpp new file mode 100644 index 0000000000..515b4a7f72 --- /dev/null +++ b/tools/profiler/core/ProfiledThreadData.cpp @@ -0,0 +1,456 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ProfiledThreadData.h" + +#include "platform.h" +#include "ProfileBuffer.h" + +#include "mozilla/OriginAttributes.h" +#include "mozilla/Span.h" +#include "nsXULAppAPI.h" + +#if defined(GP_OS_darwin) +# include <pthread.h> +#endif + +using namespace mozilla::literals::ProportionValue_literals; + +ProfiledThreadData::ProfiledThreadData( + const mozilla::profiler::ThreadRegistrationInfo& aThreadInfo) + : mThreadInfo(aThreadInfo.Name(), aThreadInfo.ThreadId(), + aThreadInfo.IsMainThread(), aThreadInfo.RegisterTime()) { + MOZ_COUNT_CTOR(ProfiledThreadData); +} + +ProfiledThreadData::ProfiledThreadData( + mozilla::profiler::ThreadRegistrationInfo&& aThreadInfo) + : mThreadInfo(std::move(aThreadInfo)) { + MOZ_COUNT_CTOR(ProfiledThreadData); +} + +ProfiledThreadData::~ProfiledThreadData() { + MOZ_COUNT_DTOR(ProfiledThreadData); +} + +static void StreamTables(UniqueStacks&& aUniqueStacks, JSContext* aCx, + SpliceableJSONWriter& aWriter, + const mozilla::TimeStamp& aProcessStartTime, + mozilla::ProgressLogger aProgressLogger) { + aWriter.StartObjectProperty("stackTable"); + { + { + JSONSchemaWriter schema(aWriter); + schema.WriteField("prefix"); + schema.WriteField("frame"); + } + + aWriter.StartArrayProperty("data"); + { + aProgressLogger.SetLocalProgress(1_pc, "Splicing stack table..."); + aUniqueStacks.SpliceStackTableElements(aWriter); + aProgressLogger.SetLocalProgress(30_pc, "Spliced stack table"); + } + aWriter.EndArray(); + } + aWriter.EndObject(); + + aWriter.StartObjectProperty("frameTable"); + { + { + JSONSchemaWriter schema(aWriter); + schema.WriteField("location"); + schema.WriteField("relevantForJS"); + schema.WriteField("innerWindowID"); + schema.WriteField("implementation"); + schema.WriteField("optimizations"); + schema.WriteField("line"); + schema.WriteField("column"); + schema.WriteField("category"); + schema.WriteField("subcategory"); + } + + aWriter.StartArrayProperty("data"); + { + aProgressLogger.SetLocalProgress(30_pc, "Splicing frame table..."); + aUniqueStacks.SpliceFrameTableElements(aWriter); + aProgressLogger.SetLocalProgress(60_pc, "Spliced frame table"); + } + aWriter.EndArray(); + } + aWriter.EndObject(); + + aWriter.StartArrayProperty("stringTable"); + { + aProgressLogger.SetLocalProgress(60_pc, "Splicing string table..."); + std::move(aUniqueStacks.UniqueStrings()).SpliceStringTableElements(aWriter); + aProgressLogger.SetLocalProgress(90_pc, "Spliced string table"); + } + aWriter.EndArray(); +} + +mozilla::NotNull<mozilla::UniquePtr<UniqueStacks>> +ProfiledThreadData::PrepareUniqueStacks( + const ProfileBuffer& aBuffer, JSContext* aCx, + mozilla::FailureLatch& aFailureLatch, ProfilerCodeAddressService* aService, + mozilla::ProgressLogger aProgressLogger) { + if (mJITFrameInfoForPreviousJSContexts && + mJITFrameInfoForPreviousJSContexts->HasExpired( + aBuffer.BufferRangeStart())) { + mJITFrameInfoForPreviousJSContexts = nullptr; + } + aProgressLogger.SetLocalProgress(1_pc, "Checked JIT frame info presence"); + + // If we have an existing JITFrameInfo in mJITFrameInfoForPreviousJSContexts, + // copy the data from it. + JITFrameInfo jitFrameInfo = + mJITFrameInfoForPreviousJSContexts + ? JITFrameInfo(*mJITFrameInfoForPreviousJSContexts, + aProgressLogger.CreateSubLoggerTo( + "Retrieving JIT frame info...", 10_pc, + "Retrieved JIT frame info")) + : JITFrameInfo(); + + if (aCx && mBufferPositionWhenReceivedJSContext) { + aBuffer.AddJITInfoForRange( + *mBufferPositionWhenReceivedJSContext, mThreadInfo.ThreadId(), aCx, + jitFrameInfo, + aProgressLogger.CreateSubLoggerTo("Adding JIT info...", 90_pc, + "Added JIT info")); + } else { + aProgressLogger.SetLocalProgress(90_pc, "No JIT info"); + } + + return mozilla::MakeNotNull<mozilla::UniquePtr<UniqueStacks>>( + aFailureLatch, std::move(jitFrameInfo), aService); +} + +void ProfiledThreadData::StreamJSON( + const ProfileBuffer& aBuffer, JSContext* aCx, SpliceableJSONWriter& aWriter, + const nsACString& aProcessName, const nsACString& aETLDplus1, + const mozilla::TimeStamp& aProcessStartTime, double aSinceTime, + ProfilerCodeAddressService* aService, + mozilla::ProgressLogger aProgressLogger) { + mozilla::NotNull<mozilla::UniquePtr<UniqueStacks>> uniqueStacks = + PrepareUniqueStacks(aBuffer, aCx, aWriter.SourceFailureLatch(), aService, + aProgressLogger.CreateSubLoggerFromTo( + 0_pc, "Preparing unique stacks...", 10_pc, + "Prepared Unique stacks")); + + aWriter.SetUniqueStrings(uniqueStacks->UniqueStrings()); + + aWriter.Start(); + { + StreamSamplesAndMarkers( + mThreadInfo.Name(), mThreadInfo.ThreadId(), aBuffer, aWriter, + aProcessName, aETLDplus1, aProcessStartTime, mThreadInfo.RegisterTime(), + mUnregisterTime, aSinceTime, *uniqueStacks, + aProgressLogger.CreateSubLoggerTo( + 90_pc, + "ProfiledThreadData::StreamJSON: Streamed samples and markers")); + + StreamTables(std::move(*uniqueStacks), aCx, aWriter, aProcessStartTime, + aProgressLogger.CreateSubLoggerTo( + 99_pc, "Streamed tables and trace logger")); + } + aWriter.End(); + + aWriter.ResetUniqueStrings(); +} + +void ProfiledThreadData::StreamJSON( + ThreadStreamingContext&& aThreadStreamingContext, + SpliceableJSONWriter& aWriter, const nsACString& aProcessName, + const nsACString& aETLDplus1, const mozilla::TimeStamp& aProcessStartTime, + ProfilerCodeAddressService* aService, + mozilla::ProgressLogger aProgressLogger) { + aWriter.Start(); + { + StreamSamplesAndMarkers( + mThreadInfo.Name(), aThreadStreamingContext, aWriter, aProcessName, + aETLDplus1, aProcessStartTime, mThreadInfo.RegisterTime(), + mUnregisterTime, + aProgressLogger.CreateSubLoggerFromTo( + 1_pc, "ProfiledThreadData::StreamJSON(context): Streaming...", + 90_pc, + "ProfiledThreadData::StreamJSON(context): Streamed samples and " + "markers")); + + StreamTables( + std::move(*aThreadStreamingContext.mUniqueStacks), + aThreadStreamingContext.mJSContext, aWriter, aProcessStartTime, + aProgressLogger.CreateSubLoggerTo( + "ProfiledThreadData::StreamJSON(context): Streaming tables...", + 99_pc, "ProfiledThreadData::StreamJSON(context): Streamed tables")); + } + aWriter.End(); +} + +// StreamSamplesDataCallback: (ProgressLogger) -> ProfilerThreadId +// StreamMarkersDataCallback: (ProgressLogger) -> void +// Returns the ProfilerThreadId returned by StreamSamplesDataCallback, which +// should be the thread id of the last sample that was processed (if any; +// otherwise it is left unspecified). This is mostly useful when the caller +// doesn't know where the sample comes from, e.g., when it's a backtrace in a +// marker. +template <typename StreamSamplesDataCallback, + typename StreamMarkersDataCallback> +ProfilerThreadId DoStreamSamplesAndMarkers( + const char* aName, SpliceableJSONWriter& aWriter, + const nsACString& aProcessName, const nsACString& aETLDplus1, + const mozilla::TimeStamp& aProcessStartTime, + const mozilla::TimeStamp& aRegisterTime, + const mozilla::TimeStamp& aUnregisterTime, + mozilla::ProgressLogger aProgressLogger, + StreamSamplesDataCallback&& aStreamSamplesDataCallback, + StreamMarkersDataCallback&& aStreamMarkersDataCallback) { + ProfilerThreadId processedThreadId; + + aWriter.StringProperty("processType", + mozilla::MakeStringSpan(XRE_GetProcessTypeString())); + + aWriter.StringProperty("name", mozilla::MakeStringSpan(aName)); + + // Use given process name (if any), unless we're the parent process. + if (XRE_IsParentProcess()) { + aWriter.StringProperty("processName", "Parent Process"); + } else if (!aProcessName.IsEmpty()) { + aWriter.StringProperty("processName", aProcessName); + } + if (!aETLDplus1.IsEmpty()) { + nsAutoCString originNoSuffix; + mozilla::OriginAttributes attrs; + if (!attrs.PopulateFromOrigin(aETLDplus1, originNoSuffix)) { + aWriter.StringProperty("eTLD+1", aETLDplus1); + } else { + aWriter.StringProperty("eTLD+1", originNoSuffix); + aWriter.BoolProperty("isPrivateBrowsing", attrs.mPrivateBrowsingId > 0); + aWriter.IntProperty("userContextId", attrs.mUserContextId); + } + } + + if (aRegisterTime) { + aWriter.DoubleProperty( + "registerTime", (aRegisterTime - aProcessStartTime).ToMilliseconds()); + } else { + aWriter.NullProperty("registerTime"); + } + + if (aUnregisterTime) { + aWriter.DoubleProperty( + "unregisterTime", + (aUnregisterTime - aProcessStartTime).ToMilliseconds()); + } else { + aWriter.NullProperty("unregisterTime"); + } + + aWriter.StartObjectProperty("samples"); + { + { + JSONSchemaWriter schema(aWriter); + schema.WriteField("stack"); + schema.WriteField("time"); + schema.WriteField("eventDelay"); +#define RUNNING_TIME_FIELD(index, name, unit, jsonProperty) \ + schema.WriteField(#jsonProperty); + PROFILER_FOR_EACH_RUNNING_TIME(RUNNING_TIME_FIELD) +#undef RUNNING_TIME_FIELD + } + + aWriter.StartArrayProperty("data"); + { + processedThreadId = std::forward<StreamSamplesDataCallback>( + aStreamSamplesDataCallback)(aProgressLogger.CreateSubLoggerFromTo( + 1_pc, "Streaming samples...", 49_pc, "Streamed samples")); + } + aWriter.EndArray(); + } + aWriter.EndObject(); + + aWriter.StartObjectProperty("markers"); + { + { + JSONSchemaWriter schema(aWriter); + schema.WriteField("name"); + schema.WriteField("startTime"); + schema.WriteField("endTime"); + schema.WriteField("phase"); + schema.WriteField("category"); + schema.WriteField("data"); + } + + aWriter.StartArrayProperty("data"); + { + std::forward<StreamMarkersDataCallback>(aStreamMarkersDataCallback)( + aProgressLogger.CreateSubLoggerFromTo(50_pc, "Streaming markers...", + 99_pc, "Streamed markers")); + } + aWriter.EndArray(); + } + aWriter.EndObject(); + + // Tech note: If `ToNumber()` returns a uint64_t, the conversion to int64_t is + // "implementation-defined" before C++20. This is acceptable here, because + // this is a one-way conversion to a unique identifier that's used to visually + // separate data by thread on the front-end. + aWriter.IntProperty( + "pid", static_cast<int64_t>(profiler_current_process_id().ToNumber())); + aWriter.IntProperty("tid", + static_cast<int64_t>(processedThreadId.ToNumber())); + + return processedThreadId; +} + +ProfilerThreadId StreamSamplesAndMarkers( + const char* aName, ProfilerThreadId aThreadId, const ProfileBuffer& aBuffer, + SpliceableJSONWriter& aWriter, const nsACString& aProcessName, + const nsACString& aETLDplus1, const mozilla::TimeStamp& aProcessStartTime, + const mozilla::TimeStamp& aRegisterTime, + const mozilla::TimeStamp& aUnregisterTime, double aSinceTime, + UniqueStacks& aUniqueStacks, mozilla::ProgressLogger aProgressLogger) { + return DoStreamSamplesAndMarkers( + aName, aWriter, aProcessName, aETLDplus1, aProcessStartTime, + aRegisterTime, aUnregisterTime, std::move(aProgressLogger), + [&](mozilla::ProgressLogger aSubProgressLogger) { + ProfilerThreadId processedThreadId = aBuffer.StreamSamplesToJSON( + aWriter, aThreadId, aSinceTime, aUniqueStacks, + std::move(aSubProgressLogger)); + return aThreadId.IsSpecified() ? aThreadId : processedThreadId; + }, + [&](mozilla::ProgressLogger aSubProgressLogger) { + aBuffer.StreamMarkersToJSON(aWriter, aThreadId, aProcessStartTime, + aSinceTime, aUniqueStacks, + std::move(aSubProgressLogger)); + }); +} + +void StreamSamplesAndMarkers(const char* aName, + ThreadStreamingContext& aThreadData, + SpliceableJSONWriter& aWriter, + const nsACString& aProcessName, + const nsACString& aETLDplus1, + const mozilla::TimeStamp& aProcessStartTime, + const mozilla::TimeStamp& aRegisterTime, + const mozilla::TimeStamp& aUnregisterTime, + mozilla::ProgressLogger aProgressLogger) { + (void)DoStreamSamplesAndMarkers( + aName, aWriter, aProcessName, aETLDplus1, aProcessStartTime, + aRegisterTime, aUnregisterTime, std::move(aProgressLogger), + [&](mozilla::ProgressLogger aSubProgressLogger) { + aWriter.TakeAndSplice( + aThreadData.mSamplesDataWriter.TakeChunkedWriteFunc()); + return aThreadData.mProfiledThreadData.Info().ThreadId(); + }, + [&](mozilla::ProgressLogger aSubProgressLogger) { + aWriter.TakeAndSplice( + aThreadData.mMarkersDataWriter.TakeChunkedWriteFunc()); + }); +} + +void ProfiledThreadData::NotifyAboutToLoseJSContext( + JSContext* aContext, const mozilla::TimeStamp& aProcessStartTime, + ProfileBuffer& aBuffer) { + if (!mBufferPositionWhenReceivedJSContext) { + return; + } + + MOZ_RELEASE_ASSERT(aContext); + + if (mJITFrameInfoForPreviousJSContexts && + mJITFrameInfoForPreviousJSContexts->HasExpired( + aBuffer.BufferRangeStart())) { + mJITFrameInfoForPreviousJSContexts = nullptr; + } + + mozilla::UniquePtr<JITFrameInfo> jitFrameInfo = + mJITFrameInfoForPreviousJSContexts + ? std::move(mJITFrameInfoForPreviousJSContexts) + : mozilla::MakeUnique<JITFrameInfo>(); + + aBuffer.AddJITInfoForRange(*mBufferPositionWhenReceivedJSContext, + mThreadInfo.ThreadId(), aContext, *jitFrameInfo, + mozilla::ProgressLogger{}); + + mJITFrameInfoForPreviousJSContexts = std::move(jitFrameInfo); + mBufferPositionWhenReceivedJSContext = mozilla::Nothing(); +} + +ThreadStreamingContext::ThreadStreamingContext( + ProfiledThreadData& aProfiledThreadData, const ProfileBuffer& aBuffer, + JSContext* aCx, mozilla::FailureLatch& aFailureLatch, + ProfilerCodeAddressService* aService, + mozilla::ProgressLogger aProgressLogger) + : mProfiledThreadData(aProfiledThreadData), + mJSContext(aCx), + mSamplesDataWriter(aFailureLatch), + mMarkersDataWriter(aFailureLatch), + mUniqueStacks(mProfiledThreadData.PrepareUniqueStacks( + aBuffer, aCx, aFailureLatch, aService, + aProgressLogger.CreateSubLoggerFromTo( + 0_pc, "Preparing thread streaming context unique stacks...", + 99_pc, "Prepared thread streaming context Unique stacks"))) { + if (aFailureLatch.Failed()) { + return; + } + mSamplesDataWriter.SetUniqueStrings(mUniqueStacks->UniqueStrings()); + mSamplesDataWriter.StartBareList(); + mMarkersDataWriter.SetUniqueStrings(mUniqueStacks->UniqueStrings()); + mMarkersDataWriter.StartBareList(); +} + +void ThreadStreamingContext::FinalizeWriter() { + mSamplesDataWriter.EndBareList(); + mMarkersDataWriter.EndBareList(); +} + +ProcessStreamingContext::ProcessStreamingContext( + size_t aThreadCount, mozilla::FailureLatch& aFailureLatch, + const mozilla::TimeStamp& aProcessStartTime, double aSinceTime) + : mFailureLatch(aFailureLatch), + mProcessStartTime(aProcessStartTime), + mSinceTime(aSinceTime) { + if (mFailureLatch.Failed()) { + return; + } + if (!mTIDList.initCapacity(aThreadCount)) { + mFailureLatch.SetFailure( + "OOM in ProcessStreamingContext allocating TID list"); + return; + } + if (!mThreadStreamingContextList.initCapacity(aThreadCount)) { + mFailureLatch.SetFailure( + "OOM in ProcessStreamingContext allocating context list"); + mTIDList.clear(); + return; + } +} + +ProcessStreamingContext::~ProcessStreamingContext() { + if (mFailureLatch.Failed()) { + return; + } + MOZ_ASSERT(mTIDList.length() == mThreadStreamingContextList.length()); + MOZ_ASSERT(mTIDList.length() == mTIDList.capacity(), + "Didn't pre-allocate exactly right"); +} + +void ProcessStreamingContext::AddThreadStreamingContext( + ProfiledThreadData& aProfiledThreadData, const ProfileBuffer& aBuffer, + JSContext* aCx, ProfilerCodeAddressService* aService, + mozilla::ProgressLogger aProgressLogger) { + if (mFailureLatch.Failed()) { + return; + } + MOZ_ASSERT(mTIDList.length() == mThreadStreamingContextList.length()); + MOZ_ASSERT(mTIDList.length() < mTIDList.capacity(), + "Didn't pre-allocate enough"); + mTIDList.infallibleAppend(aProfiledThreadData.Info().ThreadId()); + mThreadStreamingContextList.infallibleEmplaceBack( + aProfiledThreadData, aBuffer, aCx, mFailureLatch, aService, + aProgressLogger.CreateSubLoggerFromTo( + 1_pc, "Prepared streaming thread id", 100_pc, + "Added thread streaming context")); +} diff --git a/tools/profiler/core/ProfiledThreadData.h b/tools/profiler/core/ProfiledThreadData.h new file mode 100644 index 0000000000..47ae0c579c --- /dev/null +++ b/tools/profiler/core/ProfiledThreadData.h @@ -0,0 +1,250 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ProfiledThreadData_h +#define ProfiledThreadData_h + +#include "platform.h" +#include "ProfileBuffer.h" +#include "ProfileBufferEntry.h" + +#include "mozilla/FailureLatch.h" +#include "mozilla/Maybe.h" +#include "mozilla/NotNull.h" +#include "mozilla/ProfileJSONWriter.h" +#include "mozilla/ProfilerThreadRegistrationInfo.h" +#include "mozilla/RefPtr.h" +#include "mozilla/TimeStamp.h" +#include "mozilla/UniquePtr.h" +#include "mozilla/Vector.h" +#include "nsStringFwd.h" + +class nsIEventTarget; +class ProfilerCodeAddressService; +struct JSContext; +struct ThreadStreamingContext; + +// This class contains information about a thread that is only relevant while +// the profiler is running, for any threads (both alive and dead) whose thread +// name matches the "thread filter" in the current profiler run. +// ProfiledThreadData objects may be kept alive even after the thread is +// unregistered, as long as there is still data for that thread in the profiler +// buffer. +// +// Accesses to this class are protected by the profiler state lock. +// +// Created as soon as the following are true for the thread: +// - The profiler is running, and +// - the thread matches the profiler's thread filter, and +// - the thread is registered with the profiler. +// So it gets created in response to either (1) the profiler being started (for +// an existing registered thread) or (2) the thread being registered (if the +// profiler is already running). +// +// The thread may be unregistered during the lifetime of ProfiledThreadData. +// If that happens, NotifyUnregistered() is called. +// +// This class is the right place to store buffer positions. Profiler buffer +// positions become invalid if the profiler buffer is destroyed, which happens +// when the profiler is stopped. +class ProfiledThreadData final { + public: + explicit ProfiledThreadData( + const mozilla::profiler::ThreadRegistrationInfo& aThreadInfo); + explicit ProfiledThreadData( + mozilla::profiler::ThreadRegistrationInfo&& aThreadInfo); + ~ProfiledThreadData(); + + void NotifyUnregistered(uint64_t aBufferPosition) { + mLastSample = mozilla::Nothing(); + MOZ_ASSERT(!mBufferPositionWhenReceivedJSContext, + "JSContext should have been cleared before the thread was " + "unregistered"); + mUnregisterTime = mozilla::TimeStamp::Now(); + mBufferPositionWhenUnregistered = mozilla::Some(aBufferPosition); + mPreviousThreadRunningTimes.Clear(); + } + mozilla::Maybe<uint64_t> BufferPositionWhenUnregistered() { + return mBufferPositionWhenUnregistered; + } + + mozilla::Maybe<uint64_t>& LastSample() { return mLastSample; } + + mozilla::NotNull<mozilla::UniquePtr<UniqueStacks>> PrepareUniqueStacks( + const ProfileBuffer& aBuffer, JSContext* aCx, + mozilla::FailureLatch& aFailureLatch, + ProfilerCodeAddressService* aService, + mozilla::ProgressLogger aProgressLogger); + + void StreamJSON(const ProfileBuffer& aBuffer, JSContext* aCx, + SpliceableJSONWriter& aWriter, const nsACString& aProcessName, + const nsACString& aETLDplus1, + const mozilla::TimeStamp& aProcessStartTime, + double aSinceTime, ProfilerCodeAddressService* aService, + mozilla::ProgressLogger aProgressLogger); + void StreamJSON(ThreadStreamingContext&& aThreadStreamingContext, + SpliceableJSONWriter& aWriter, const nsACString& aProcessName, + const nsACString& aETLDplus1, + const mozilla::TimeStamp& aProcessStartTime, + ProfilerCodeAddressService* aService, + mozilla::ProgressLogger aProgressLogger); + + const mozilla::profiler::ThreadRegistrationInfo& Info() const { + return mThreadInfo; + } + + void NotifyReceivedJSContext(uint64_t aCurrentBufferPosition) { + mBufferPositionWhenReceivedJSContext = + mozilla::Some(aCurrentBufferPosition); + } + + // Call this method when the JS entries inside the buffer are about to + // become invalid, i.e., just before JS shutdown. + void NotifyAboutToLoseJSContext(JSContext* aCx, + const mozilla::TimeStamp& aProcessStartTime, + ProfileBuffer& aBuffer); + + RunningTimes& PreviousThreadRunningTimesRef() { + return mPreviousThreadRunningTimes; + } + + private: + // Group A: + // The following fields are interesting for the entire lifetime of a + // ProfiledThreadData object. + + // This thread's thread info. Local copy because the one in ThreadRegistration + // may be destroyed while ProfiledThreadData stays alive. + const mozilla::profiler::ThreadRegistrationInfo mThreadInfo; + + // Contains JSON for JIT frames from any JSContexts that were used for this + // thread in the past. + // Null if this thread has never lost a JSContext or if all samples from + // previous JSContexts have been evicted from the profiler buffer. + mozilla::UniquePtr<JITFrameInfo> mJITFrameInfoForPreviousJSContexts; + + // Group B: + // The following fields are only used while this thread is alive and + // registered. They become Nothing() or empty once the thread is unregistered. + + // When sampling, this holds the position in ActivePS::mBuffer of the most + // recent sample for this thread, or Nothing() if there is no sample for this + // thread in the buffer. + mozilla::Maybe<uint64_t> mLastSample; + + // Only non-Nothing() if the thread currently has a JSContext. + mozilla::Maybe<uint64_t> mBufferPositionWhenReceivedJSContext; + + // RunningTimes at the previous sample if any, or empty. + RunningTimes mPreviousThreadRunningTimes; + + // Group C: + // The following fields are only used once this thread has been unregistered. + + mozilla::Maybe<uint64_t> mBufferPositionWhenUnregistered; + mozilla::TimeStamp mUnregisterTime; +}; + +// This class will be used when outputting the profile data for one thread. +struct ThreadStreamingContext { + ProfiledThreadData& mProfiledThreadData; + JSContext* mJSContext; + SpliceableChunkedJSONWriter mSamplesDataWriter; + SpliceableChunkedJSONWriter mMarkersDataWriter; + mozilla::NotNull<mozilla::UniquePtr<UniqueStacks>> mUniqueStacks; + + // These are updated when writing samples, and reused for "same-sample"s. + enum PreviousStackState { eNoStackYet, eStackWasNotEmpty, eStackWasEmpty }; + PreviousStackState mPreviousStackState = eNoStackYet; + uint32_t mPreviousStack = 0; + + ThreadStreamingContext(ProfiledThreadData& aProfiledThreadData, + const ProfileBuffer& aBuffer, JSContext* aCx, + mozilla::FailureLatch& aFailureLatch, + ProfilerCodeAddressService* aService, + mozilla::ProgressLogger aProgressLogger); + + void FinalizeWriter(); +}; + +// This class will be used when outputting the profile data for all threads. +class ProcessStreamingContext final : public mozilla::FailureLatch { + public: + // Pre-allocate space for `aThreadCount` threads. + ProcessStreamingContext(size_t aThreadCount, + mozilla::FailureLatch& aFailureLatch, + const mozilla::TimeStamp& aProcessStartTime, + double aSinceTime); + + ~ProcessStreamingContext(); + + // Add the streaming context corresponding to each profiled thread. This + // should be called exactly the number of times specified in the constructor. + void AddThreadStreamingContext(ProfiledThreadData& aProfiledThreadData, + const ProfileBuffer& aBuffer, JSContext* aCx, + ProfilerCodeAddressService* aService, + mozilla::ProgressLogger aProgressLogger); + + // Retrieve the ThreadStreamingContext for a given thread id. + // Returns null if that thread id doesn't correspond to any profiled thread. + ThreadStreamingContext* GetThreadStreamingContext( + const ProfilerThreadId& aThreadId) { + for (size_t i = 0; i < mTIDList.length(); ++i) { + if (mTIDList[i] == aThreadId) { + return &mThreadStreamingContextList[i]; + } + } + return nullptr; + } + + const mozilla::TimeStamp& ProcessStartTime() const { + return mProcessStartTime; + } + + double GetSinceTime() const { return mSinceTime; } + + ThreadStreamingContext* begin() { + return mThreadStreamingContextList.begin(); + }; + ThreadStreamingContext* end() { return mThreadStreamingContextList.end(); }; + + FAILURELATCH_IMPL_PROXY(mFailureLatch) + + private: + // Separate list of thread ids, it's much faster to do a linear search + // here than a vector of bigger items like mThreadStreamingContextList. + mozilla::Vector<ProfilerThreadId> mTIDList; + // Contexts corresponding to the thread id at the same indexes. + mozilla::Vector<ThreadStreamingContext> mThreadStreamingContextList; + + mozilla::FailureLatch& mFailureLatch; + + const mozilla::TimeStamp mProcessStartTime; + + const double mSinceTime; +}; + +// Stream all samples and markers from aBuffer with the given aThreadId (or 0 +// for everything, which is assumed to be a single backtrace sample.) +// Returns the thread id of the output sample(s), or 0 if none was present. +ProfilerThreadId StreamSamplesAndMarkers( + const char* aName, ProfilerThreadId aThreadId, const ProfileBuffer& aBuffer, + SpliceableJSONWriter& aWriter, const nsACString& aProcessName, + const nsACString& aETLDplus1, const mozilla::TimeStamp& aProcessStartTime, + const mozilla::TimeStamp& aRegisterTime, + const mozilla::TimeStamp& aUnregisterTime, double aSinceTime, + UniqueStacks& aUniqueStacks, mozilla::ProgressLogger aProgressLogger); +void StreamSamplesAndMarkers(const char* aName, + ThreadStreamingContext& aThreadData, + SpliceableJSONWriter& aWriter, + const nsACString& aProcessName, + const nsACString& aETLDplus1, + const mozilla::TimeStamp& aProcessStartTime, + const mozilla::TimeStamp& aRegisterTime, + const mozilla::TimeStamp& aUnregisterTime, + mozilla::ProgressLogger aProgressLogger); + +#endif // ProfiledThreadData_h diff --git a/tools/profiler/core/ProfilerBacktrace.cpp b/tools/profiler/core/ProfilerBacktrace.cpp new file mode 100644 index 0000000000..a264d85d64 --- /dev/null +++ b/tools/profiler/core/ProfilerBacktrace.cpp @@ -0,0 +1,101 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ProfilerBacktrace.h" + +#include "ProfileBuffer.h" +#include "ProfiledThreadData.h" + +#include "mozilla/ProfileJSONWriter.h" + +ProfilerBacktrace::ProfilerBacktrace( + const char* aName, + mozilla::UniquePtr<mozilla::ProfileChunkedBuffer> + aProfileChunkedBufferStorage, + mozilla::UniquePtr<ProfileBuffer> + aProfileBufferStorageOrNull /* = nullptr */) + : mName(aName), + mOptionalProfileChunkedBufferStorage( + std::move(aProfileChunkedBufferStorage)), + mProfileChunkedBuffer(mOptionalProfileChunkedBufferStorage.get()), + mOptionalProfileBufferStorage(std::move(aProfileBufferStorageOrNull)), + mProfileBuffer(mOptionalProfileBufferStorage.get()) { + MOZ_COUNT_CTOR(ProfilerBacktrace); + if (mProfileBuffer) { + MOZ_RELEASE_ASSERT(mProfileChunkedBuffer, + "If we take ownership of a ProfileBuffer, we must also " + "receive ownership of a ProfileChunkedBuffer"); + MOZ_RELEASE_ASSERT( + mProfileChunkedBuffer == &mProfileBuffer->UnderlyingChunkedBuffer(), + "If we take ownership of a ProfileBuffer, we must also receive " + "ownership of its ProfileChunkedBuffer"); + } + MOZ_ASSERT( + !mProfileChunkedBuffer || !mProfileChunkedBuffer->IsThreadSafe(), + "ProfilerBacktrace only takes a non-thread-safe ProfileChunkedBuffer"); +} + +ProfilerBacktrace::ProfilerBacktrace( + const char* aName, + mozilla::ProfileChunkedBuffer* aExternalProfileChunkedBuffer, + ProfileBuffer* aExternalProfileBuffer) + : mName(aName), + mProfileChunkedBuffer(aExternalProfileChunkedBuffer), + mProfileBuffer(aExternalProfileBuffer) { + MOZ_COUNT_CTOR(ProfilerBacktrace); + if (!mProfileChunkedBuffer) { + if (mProfileBuffer) { + // We don't have a ProfileChunkedBuffer but we have a ProfileBuffer, use + // the latter's ProfileChunkedBuffer. + mProfileChunkedBuffer = &mProfileBuffer->UnderlyingChunkedBuffer(); + MOZ_ASSERT(!mProfileChunkedBuffer->IsThreadSafe(), + "ProfilerBacktrace only takes a non-thread-safe " + "ProfileChunkedBuffer"); + } + } else { + if (mProfileBuffer) { + MOZ_RELEASE_ASSERT( + mProfileChunkedBuffer == &mProfileBuffer->UnderlyingChunkedBuffer(), + "If we reference both ProfileChunkedBuffer and ProfileBuffer, they " + "must already be connected"); + } + MOZ_ASSERT(!mProfileChunkedBuffer->IsThreadSafe(), + "ProfilerBacktrace only takes a non-thread-safe " + "ProfileChunkedBuffer"); + } +} + +ProfilerBacktrace::~ProfilerBacktrace() { MOZ_COUNT_DTOR(ProfilerBacktrace); } + +ProfilerThreadId ProfilerBacktrace::StreamJSON( + SpliceableJSONWriter& aWriter, const mozilla::TimeStamp& aProcessStartTime, + UniqueStacks& aUniqueStacks) { + ProfilerThreadId processedThreadId; + + // Unlike ProfiledThreadData::StreamJSON, we don't need to call + // ProfileBuffer::AddJITInfoForRange because ProfileBuffer does not contain + // any JitReturnAddr entries. For synchronous samples, JIT frames get expanded + // at sample time. + if (mProfileBuffer) { + processedThreadId = StreamSamplesAndMarkers( + mName.c_str(), ProfilerThreadId{}, *mProfileBuffer, aWriter, ""_ns, + ""_ns, aProcessStartTime, + /* aRegisterTime */ mozilla::TimeStamp(), + /* aUnregisterTime */ mozilla::TimeStamp(), + /* aSinceTime */ 0, aUniqueStacks, mozilla::ProgressLogger{}); + } else if (mProfileChunkedBuffer) { + ProfileBuffer profileBuffer(*mProfileChunkedBuffer); + processedThreadId = StreamSamplesAndMarkers( + mName.c_str(), ProfilerThreadId{}, profileBuffer, aWriter, ""_ns, ""_ns, + aProcessStartTime, + /* aRegisterTime */ mozilla::TimeStamp(), + /* aUnregisterTime */ mozilla::TimeStamp(), + /* aSinceTime */ 0, aUniqueStacks, mozilla::ProgressLogger{}); + } + // If there are no buffers, the backtrace is empty and nothing is streamed. + + return processedThreadId; +} diff --git a/tools/profiler/core/ProfilerBacktrace.h b/tools/profiler/core/ProfilerBacktrace.h new file mode 100644 index 0000000000..55811f4422 --- /dev/null +++ b/tools/profiler/core/ProfilerBacktrace.h @@ -0,0 +1,184 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef __PROFILER_BACKTRACE_H +#define __PROFILER_BACKTRACE_H + +#include "ProfileBuffer.h" + +#include "mozilla/ProfileBufferEntrySerialization.h" +#include "mozilla/UniquePtrExtensions.h" + +#include <string> + +class ProfileBuffer; +class ProfilerCodeAddressService; +class ThreadInfo; +class UniqueStacks; + +namespace mozilla { +class ProfileChunkedBuffer; +class TimeStamp; +namespace baseprofiler { +class SpliceableJSONWriter; +} // namespace baseprofiler +} // namespace mozilla + +// ProfilerBacktrace encapsulates a synchronous sample. +// It can work with a ProfileBuffer and/or a ProfileChunkedBuffer (if both, they +// must already be linked together). The ProfileChunkedBuffer contains all the +// data; the ProfileBuffer is not strictly needed, only provide it if it is +// already available at the call site. +// And these buffers can either be: +// - owned here, so that the ProfilerBacktrace object can be kept for later +// use), OR +// - referenced through pointers (in cases where the backtrace is immediately +// streamed out, so we only need temporary references to external buffers); +// these pointers may be null for empty backtraces. +class ProfilerBacktrace { + public: + // Take ownership of external buffers and use them to keep, and to stream a + // backtrace. If a ProfileBuffer is given, its underlying chunked buffer must + // be provided as well. + explicit ProfilerBacktrace( + const char* aName, + mozilla::UniquePtr<mozilla::ProfileChunkedBuffer> + aProfileChunkedBufferStorage, + mozilla::UniquePtr<ProfileBuffer> aProfileBufferStorageOrNull = nullptr); + + // Take pointers to external buffers and use them to stream a backtrace. + // If null, the backtrace is effectively empty. + // If both are provided, they must already be connected. + explicit ProfilerBacktrace( + const char* aName, + mozilla::ProfileChunkedBuffer* aExternalProfileChunkedBufferOrNull = + nullptr, + ProfileBuffer* aExternalProfileBufferOrNull = nullptr); + + ~ProfilerBacktrace(); + + [[nodiscard]] bool IsEmpty() const { + return !mProfileChunkedBuffer || + mozilla::ProfileBufferEntryWriter::Serializer< + mozilla::ProfileChunkedBuffer>::Bytes(*mProfileChunkedBuffer) <= + mozilla::ULEB128Size(0u); + } + + // ProfilerBacktraces' stacks are deduplicated in the context of the + // profile that contains the backtrace as a marker payload. + // + // That is, markers that contain backtraces should not need their own stack, + // frame, and string tables. They should instead reuse their parent + // profile's tables. + ProfilerThreadId StreamJSON( + mozilla::baseprofiler::SpliceableJSONWriter& aWriter, + const mozilla::TimeStamp& aProcessStartTime, UniqueStacks& aUniqueStacks); + + private: + // Used to serialize a ProfilerBacktrace. + friend struct mozilla::ProfileBufferEntryWriter::Serializer< + ProfilerBacktrace>; + friend struct mozilla::ProfileBufferEntryReader::Deserializer< + ProfilerBacktrace>; + + std::string mName; + + // `ProfileChunkedBuffer` in which `mProfileBuffer` stores its data; must be + // located before `mProfileBuffer` so that it's destroyed after. + mozilla::UniquePtr<mozilla::ProfileChunkedBuffer> + mOptionalProfileChunkedBufferStorage; + // If null, there is no need to check mProfileBuffer's (if present) underlying + // buffer because this is done when constructed. + mozilla::ProfileChunkedBuffer* mProfileChunkedBuffer; + + mozilla::UniquePtr<ProfileBuffer> mOptionalProfileBufferStorage; + ProfileBuffer* mProfileBuffer; +}; + +namespace mozilla { + +// Format: [ UniquePtr<BlockRingsBuffer> | name ] +// Initial len==0 marks a nullptr or empty backtrace. +template <> +struct mozilla::ProfileBufferEntryWriter::Serializer<ProfilerBacktrace> { + static Length Bytes(const ProfilerBacktrace& aBacktrace) { + if (!aBacktrace.mProfileChunkedBuffer) { + // No buffer. + return ULEB128Size(0u); + } + auto bufferBytes = SumBytes(*aBacktrace.mProfileChunkedBuffer); + if (bufferBytes <= ULEB128Size(0u)) { + // Empty buffer. + return ULEB128Size(0u); + } + return bufferBytes + SumBytes(aBacktrace.mName); + } + + static void Write(mozilla::ProfileBufferEntryWriter& aEW, + const ProfilerBacktrace& aBacktrace) { + if (!aBacktrace.mProfileChunkedBuffer || + SumBytes(*aBacktrace.mProfileChunkedBuffer) <= ULEB128Size(0u)) { + // No buffer, or empty buffer. + aEW.WriteULEB128(0u); + return; + } + aEW.WriteObject(*aBacktrace.mProfileChunkedBuffer); + aEW.WriteObject(aBacktrace.mName); + } +}; + +template <typename Destructor> +struct mozilla::ProfileBufferEntryWriter::Serializer< + mozilla::UniquePtr<ProfilerBacktrace, Destructor>> { + static Length Bytes( + const mozilla::UniquePtr<ProfilerBacktrace, Destructor>& aBacktrace) { + if (!aBacktrace) { + // Null backtrace pointer (treated like an empty backtrace). + return ULEB128Size(0u); + } + return SumBytes(*aBacktrace); + } + + static void Write( + mozilla::ProfileBufferEntryWriter& aEW, + const mozilla::UniquePtr<ProfilerBacktrace, Destructor>& aBacktrace) { + if (!aBacktrace) { + // Null backtrace pointer (treated like an empty backtrace). + aEW.WriteULEB128(0u); + return; + } + aEW.WriteObject(*aBacktrace); + } +}; + +template <typename Destructor> +struct mozilla::ProfileBufferEntryReader::Deserializer< + mozilla::UniquePtr<ProfilerBacktrace, Destructor>> { + static void ReadInto( + mozilla::ProfileBufferEntryReader& aER, + mozilla::UniquePtr<ProfilerBacktrace, Destructor>& aBacktrace) { + aBacktrace = Read(aER); + } + + static mozilla::UniquePtr<ProfilerBacktrace, Destructor> Read( + mozilla::ProfileBufferEntryReader& aER) { + auto profileChunkedBuffer = + aER.ReadObject<UniquePtr<ProfileChunkedBuffer>>(); + if (!profileChunkedBuffer) { + return nullptr; + } + MOZ_ASSERT( + !profileChunkedBuffer->IsThreadSafe(), + "ProfilerBacktrace only stores non-thread-safe ProfileChunkedBuffers"); + std::string name = aER.ReadObject<std::string>(); + return UniquePtr<ProfilerBacktrace, Destructor>{ + new ProfilerBacktrace(name.c_str(), std::move(profileChunkedBuffer))}; + } +}; + +} // namespace mozilla + +#endif // __PROFILER_BACKTRACE_H diff --git a/tools/profiler/core/ProfilerBindings.cpp b/tools/profiler/core/ProfilerBindings.cpp new file mode 100644 index 0000000000..280580e80b --- /dev/null +++ b/tools/profiler/core/ProfilerBindings.cpp @@ -0,0 +1,384 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* FFI functions for Profiler Rust API to call into profiler */ + +#include "ProfilerBindings.h" + +#include "GeckoProfiler.h" + +#include <set> +#include <type_traits> + +void gecko_profiler_register_thread(const char* aName) { + PROFILER_REGISTER_THREAD(aName); +} + +void gecko_profiler_unregister_thread() { PROFILER_UNREGISTER_THREAD(); } + +void gecko_profiler_construct_label(mozilla::AutoProfilerLabel* aAutoLabel, + JS::ProfilingCategoryPair aCategoryPair) { +#ifdef MOZ_GECKO_PROFILER + new (aAutoLabel) mozilla::AutoProfilerLabel( + "", nullptr, aCategoryPair, + uint32_t( + js::ProfilingStackFrame::Flags::LABEL_DETERMINED_BY_CATEGORY_PAIR)); +#endif +} + +void gecko_profiler_destruct_label(mozilla::AutoProfilerLabel* aAutoLabel) { +#ifdef MOZ_GECKO_PROFILER + aAutoLabel->~AutoProfilerLabel(); +#endif +} + +void gecko_profiler_construct_timestamp_now(mozilla::TimeStamp* aTimeStamp) { + new (aTimeStamp) mozilla::TimeStamp(mozilla::TimeStamp::Now()); +} + +void gecko_profiler_clone_timestamp(const mozilla::TimeStamp* aSrcTimeStamp, + mozilla::TimeStamp* aDestTimeStamp) { + new (aDestTimeStamp) mozilla::TimeStamp(*aSrcTimeStamp); +} + +void gecko_profiler_destruct_timestamp(mozilla::TimeStamp* aTimeStamp) { + aTimeStamp->~TimeStamp(); +} + +void gecko_profiler_add_timestamp(const mozilla::TimeStamp* aTimeStamp, + mozilla::TimeStamp* aDestTimeStamp, + double aMicroseconds) { + new (aDestTimeStamp) mozilla::TimeStamp( + *aTimeStamp + mozilla::TimeDuration::FromMicroseconds(aMicroseconds)); +} + +void gecko_profiler_subtract_timestamp(const mozilla::TimeStamp* aTimeStamp, + mozilla::TimeStamp* aDestTimeStamp, + double aMicroseconds) { + new (aDestTimeStamp) mozilla::TimeStamp( + *aTimeStamp - mozilla::TimeDuration::FromMicroseconds(aMicroseconds)); +} + +void gecko_profiler_construct_marker_timing_instant_at( + mozilla::MarkerTiming* aMarkerTiming, const mozilla::TimeStamp* aTime) { +#ifdef MOZ_GECKO_PROFILER + static_assert(std::is_trivially_copyable_v<mozilla::MarkerTiming>); + mozilla::MarkerTiming::UnsafeConstruct(aMarkerTiming, *aTime, + mozilla::TimeStamp{}, + mozilla::MarkerTiming::Phase::Instant); +#endif +} + +void gecko_profiler_construct_marker_timing_instant_now( + mozilla::MarkerTiming* aMarkerTiming) { +#ifdef MOZ_GECKO_PROFILER + static_assert(std::is_trivially_copyable_v<mozilla::MarkerTiming>); + mozilla::MarkerTiming::UnsafeConstruct( + aMarkerTiming, mozilla::TimeStamp::Now(), mozilla::TimeStamp{}, + mozilla::MarkerTiming::Phase::Instant); +#endif +} + +void gecko_profiler_construct_marker_timing_interval( + mozilla::MarkerTiming* aMarkerTiming, const mozilla::TimeStamp* aStartTime, + const mozilla::TimeStamp* aEndTime) { +#ifdef MOZ_GECKO_PROFILER + static_assert(std::is_trivially_copyable_v<mozilla::MarkerTiming>); + mozilla::MarkerTiming::UnsafeConstruct( + aMarkerTiming, *aStartTime, *aEndTime, + mozilla::MarkerTiming::Phase::Interval); +#endif +} + +void gecko_profiler_construct_marker_timing_interval_until_now_from( + mozilla::MarkerTiming* aMarkerTiming, + const mozilla::TimeStamp* aStartTime) { +#ifdef MOZ_GECKO_PROFILER + static_assert(std::is_trivially_copyable_v<mozilla::MarkerTiming>); + mozilla::MarkerTiming::UnsafeConstruct( + aMarkerTiming, *aStartTime, mozilla::TimeStamp::Now(), + mozilla::MarkerTiming::Phase::Interval); +#endif +} + +void gecko_profiler_construct_marker_timing_interval_start( + mozilla::MarkerTiming* aMarkerTiming, const mozilla::TimeStamp* aTime) { +#ifdef MOZ_GECKO_PROFILER + static_assert(std::is_trivially_copyable_v<mozilla::MarkerTiming>); + mozilla::MarkerTiming::UnsafeConstruct( + aMarkerTiming, *aTime, mozilla::TimeStamp{}, + mozilla::MarkerTiming::Phase::IntervalStart); +#endif +} + +void gecko_profiler_construct_marker_timing_interval_end( + mozilla::MarkerTiming* aMarkerTiming, const mozilla::TimeStamp* aTime) { +#ifdef MOZ_GECKO_PROFILER + static_assert(std::is_trivially_copyable_v<mozilla::MarkerTiming>); + mozilla::MarkerTiming::UnsafeConstruct( + aMarkerTiming, mozilla::TimeStamp{}, *aTime, + mozilla::MarkerTiming::Phase::IntervalEnd); +#endif +} + +void gecko_profiler_destruct_marker_timing( + mozilla::MarkerTiming* aMarkerTiming) { +#ifdef MOZ_GECKO_PROFILER + aMarkerTiming->~MarkerTiming(); +#endif +} + +void gecko_profiler_construct_marker_schema( + mozilla::MarkerSchema* aMarkerSchema, + const mozilla::MarkerSchema::Location* aLocations, size_t aLength) { +#ifdef MOZ_GECKO_PROFILER + new (aMarkerSchema) mozilla::MarkerSchema(aLocations, aLength); +#endif +} + +void gecko_profiler_construct_marker_schema_with_special_front_end_location( + mozilla::MarkerSchema* aMarkerSchema) { +#ifdef MOZ_GECKO_PROFILER + new (aMarkerSchema) + mozilla::MarkerSchema(mozilla::MarkerSchema::SpecialFrontendLocation{}); +#endif +} + +void gecko_profiler_destruct_marker_schema( + mozilla::MarkerSchema* aMarkerSchema) { +#ifdef MOZ_GECKO_PROFILER + aMarkerSchema->~MarkerSchema(); +#endif +} + +void gecko_profiler_marker_schema_set_chart_label( + mozilla::MarkerSchema* aSchema, const char* aLabel, size_t aLabelLength) { +#ifdef MOZ_GECKO_PROFILER + aSchema->SetChartLabel(std::string(aLabel, aLabelLength)); +#endif +} + +void gecko_profiler_marker_schema_set_tooltip_label( + mozilla::MarkerSchema* aSchema, const char* aLabel, size_t aLabelLength) { +#ifdef MOZ_GECKO_PROFILER + aSchema->SetTooltipLabel(std::string(aLabel, aLabelLength)); +#endif +} + +void gecko_profiler_marker_schema_set_table_label( + mozilla::MarkerSchema* aSchema, const char* aLabel, size_t aLabelLength) { +#ifdef MOZ_GECKO_PROFILER + aSchema->SetTableLabel(std::string(aLabel, aLabelLength)); +#endif +} + +void gecko_profiler_marker_schema_set_all_labels(mozilla::MarkerSchema* aSchema, + const char* aLabel, + size_t aLabelLength) { +#ifdef MOZ_GECKO_PROFILER + aSchema->SetAllLabels(std::string(aLabel, aLabelLength)); +#endif +} + +void gecko_profiler_marker_schema_add_key_format( + mozilla::MarkerSchema* aSchema, const char* aKey, size_t aKeyLength, + mozilla::MarkerSchema::Format aFormat) { +#ifdef MOZ_GECKO_PROFILER + aSchema->AddKeyFormat(std::string(aKey, aKeyLength), aFormat); +#endif +} + +void gecko_profiler_marker_schema_add_key_label_format( + mozilla::MarkerSchema* aSchema, const char* aKey, size_t aKeyLength, + const char* aLabel, size_t aLabelLength, + mozilla::MarkerSchema::Format aFormat) { +#ifdef MOZ_GECKO_PROFILER + aSchema->AddKeyLabelFormat(std::string(aKey, aKeyLength), + std::string(aLabel, aLabelLength), aFormat); +#endif +} + +void gecko_profiler_marker_schema_add_key_format_searchable( + mozilla::MarkerSchema* aSchema, const char* aKey, size_t aKeyLength, + mozilla::MarkerSchema::Format aFormat, + mozilla::MarkerSchema::Searchable aSearchable) { +#ifdef MOZ_GECKO_PROFILER + aSchema->AddKeyFormatSearchable(std::string(aKey, aKeyLength), aFormat, + aSearchable); +#endif +} + +void gecko_profiler_marker_schema_add_key_label_format_searchable( + mozilla::MarkerSchema* aSchema, const char* aKey, size_t aKeyLength, + const char* aLabel, size_t aLabelLength, + mozilla::MarkerSchema::Format aFormat, + mozilla::MarkerSchema::Searchable aSearchable) { +#ifdef MOZ_GECKO_PROFILER + aSchema->AddKeyLabelFormatSearchable(std::string(aKey, aKeyLength), + std::string(aLabel, aLabelLength), + aFormat, aSearchable); +#endif +} + +void gecko_profiler_marker_schema_add_static_label_value( + mozilla::MarkerSchema* aSchema, const char* aLabel, size_t aLabelLength, + const char* aValue, size_t aValueLength) { +#ifdef MOZ_GECKO_PROFILER + aSchema->AddStaticLabelValue(std::string(aLabel, aLabelLength), + std::string(aValue, aValueLength)); +#endif +} + +void gecko_profiler_marker_schema_stream( + mozilla::baseprofiler::SpliceableJSONWriter* aWriter, const char* aName, + size_t aNameLength, mozilla::MarkerSchema* aMarkerSchema, + void* aStreamedNamesSet) { +#ifdef MOZ_GECKO_PROFILER + auto* streamedNames = static_cast<std::set<std::string>*>(aStreamedNamesSet); + // std::set.insert(T&&) returns a pair, its `second` is true if the element + // was actually inserted (i.e., it was not there yet.). + const bool didInsert = + streamedNames->insert(std::string(aName, aNameLength)).second; + if (didInsert) { + std::move(*aMarkerSchema) + .Stream(*aWriter, mozilla::Span(aName, aNameLength)); + } +#endif +} + +void gecko_profiler_json_writer_int_property( + mozilla::baseprofiler::SpliceableJSONWriter* aWriter, const char* aName, + size_t aNameLength, int64_t aValue) { +#ifdef MOZ_GECKO_PROFILER + aWriter->IntProperty(mozilla::Span(aName, aNameLength), aValue); +#endif +} + +void gecko_profiler_json_writer_float_property( + mozilla::baseprofiler::SpliceableJSONWriter* aWriter, const char* aName, + size_t aNameLength, double aValue) { +#ifdef MOZ_GECKO_PROFILER + aWriter->DoubleProperty(mozilla::Span(aName, aNameLength), aValue); +#endif +} + +void gecko_profiler_json_writer_bool_property( + mozilla::baseprofiler::SpliceableJSONWriter* aWriter, const char* aName, + size_t aNameLength, bool aValue) { +#ifdef MOZ_GECKO_PROFILER + aWriter->BoolProperty(mozilla::Span(aName, aNameLength), aValue); +#endif +} +void gecko_profiler_json_writer_string_property( + mozilla::baseprofiler::SpliceableJSONWriter* aWriter, const char* aName, + size_t aNameLength, const char* aValue, size_t aValueLength) { +#ifdef MOZ_GECKO_PROFILER + aWriter->StringProperty(mozilla::Span(aName, aNameLength), + mozilla::Span(aValue, aValueLength)); +#endif +} + +void gecko_profiler_json_writer_null_property( + mozilla::baseprofiler::SpliceableJSONWriter* aWriter, const char* aName, + size_t aNameLength) { +#ifdef MOZ_GECKO_PROFILER + aWriter->NullProperty(mozilla::Span(aName, aNameLength)); +#endif +} + +void gecko_profiler_add_marker_untyped( + const char* aName, size_t aNameLength, + mozilla::baseprofiler::ProfilingCategoryPair aCategoryPair, + mozilla::MarkerTiming* aMarkerTiming, + mozilla::StackCaptureOptions aStackCaptureOptions) { +#ifdef MOZ_GECKO_PROFILER + profiler_add_marker( + mozilla::ProfilerString8View(aName, aNameLength), + mozilla::MarkerCategory{aCategoryPair}, + mozilla::MarkerOptions( + std::move(*aMarkerTiming), + mozilla::MarkerStack::WithCaptureOptions(aStackCaptureOptions))); +#endif +} + +void gecko_profiler_add_marker_text( + const char* aName, size_t aNameLength, + mozilla::baseprofiler::ProfilingCategoryPair aCategoryPair, + mozilla::MarkerTiming* aMarkerTiming, + mozilla::StackCaptureOptions aStackCaptureOptions, const char* aText, + size_t aTextLength) { +#ifdef MOZ_GECKO_PROFILER + profiler_add_marker( + mozilla::ProfilerString8View(aName, aNameLength), + mozilla::MarkerCategory{aCategoryPair}, + mozilla::MarkerOptions( + std::move(*aMarkerTiming), + mozilla::MarkerStack::WithCaptureOptions(aStackCaptureOptions)), + geckoprofiler::markers::TextMarker{}, + mozilla::ProfilerString8View(aText, aTextLength)); +#endif +} + +void gecko_profiler_add_marker( + const char* aName, size_t aNameLength, + mozilla::baseprofiler::ProfilingCategoryPair aCategoryPair, + mozilla::MarkerTiming* aMarkerTiming, + mozilla::StackCaptureOptions aStackCaptureOptions, uint8_t aMarkerTag, + const uint8_t* aPayload, size_t aPayloadSize) { +#ifdef MOZ_GECKO_PROFILER + // Copy the marker timing and create the marker option. + mozilla::MarkerOptions markerOptions( + std::move(*aMarkerTiming), + mozilla::MarkerStack::WithCaptureOptions(aStackCaptureOptions)); + + // Currently it's not possible to add a threadId option, but we will + // have it soon. + if (markerOptions.ThreadId().IsUnspecified()) { + // If yet unspecified, set thread to this thread where the marker is added. + markerOptions.Set(mozilla::MarkerThreadId::CurrentThread()); + } + + auto& buffer = profiler_get_core_buffer(); + mozilla::Span payload(aPayload, aPayloadSize); + + mozilla::StackCaptureOptions captureOptions = + markerOptions.Stack().CaptureOptions(); + if (captureOptions != mozilla::StackCaptureOptions::NoStack) { + // A capture was requested, let's attempt to do it here&now. This avoids a + // lot of allocations that would be necessary if capturing a backtrace + // separately. + // TODO use a local on-stack byte buffer to remove last allocation. + // TODO reduce internal profiler stack levels, see bug 1659872. + mozilla::ProfileBufferChunkManagerSingle chunkManager( + mozilla::ProfileBufferChunkManager::scExpectedMaximumStackSize); + mozilla::ProfileChunkedBuffer chunkedBuffer( + mozilla::ProfileChunkedBuffer::ThreadSafety::WithoutMutex, + chunkManager); + markerOptions.StackRef().UseRequestedBacktrace( + profiler_capture_backtrace_into(chunkedBuffer, captureOptions) + ? &chunkedBuffer + : nullptr); + + // This call must be made from here, while chunkedBuffer is in scope. + buffer.PutObjects( + mozilla::ProfileBufferEntryKind::Marker, markerOptions, + mozilla::ProfilerString8View(aName, aNameLength), + mozilla::MarkerCategory{aCategoryPair}, + mozilla::base_profiler_markers_detail::Streaming::DeserializerTag( + aMarkerTag), + mozilla::MarkerPayloadType::Rust, payload); + return; + } + + buffer.PutObjects( + mozilla::ProfileBufferEntryKind::Marker, markerOptions, + mozilla::ProfilerString8View(aName, aNameLength), + mozilla::MarkerCategory{aCategoryPair}, + mozilla::base_profiler_markers_detail::Streaming::DeserializerTag( + aMarkerTag), + mozilla::MarkerPayloadType::Rust, payload); +#endif +} diff --git a/tools/profiler/core/ProfilerCodeAddressService.cpp b/tools/profiler/core/ProfilerCodeAddressService.cpp new file mode 100644 index 0000000000..5a65e06379 --- /dev/null +++ b/tools/profiler/core/ProfilerCodeAddressService.cpp @@ -0,0 +1,75 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ProfilerCodeAddressService.h" + +#include "platform.h" +#include "mozilla/StackWalk.h" + +using namespace mozilla; + +#if defined(XP_LINUX) || defined(XP_FREEBSD) +static char* SearchSymbolTable(SymbolTable& aTable, uint32_t aOffset) { + size_t index; + bool exact = + BinarySearch(aTable.mAddrs, 0, aTable.mAddrs.Length(), aOffset, &index); + + if (index == 0 && !exact) { + // Our offset is before the first symbol in the table; no result. + return nullptr; + } + + // Extract the (mangled) symbol name out of the string table. + auto strings = reinterpret_cast<char*>(aTable.mBuffer.Elements()); + nsCString symbol; + symbol.Append(strings + aTable.mIndex[index - 1], + aTable.mIndex[index] - aTable.mIndex[index - 1]); + + // First try demangling as a Rust identifier. + char demangled[1024]; + if (!profiler_demangle_rust(symbol.get(), demangled, + ArrayLength(demangled))) { + // Then as a C++ identifier. + DemangleSymbol(symbol.get(), demangled, ArrayLength(demangled)); + } + demangled[ArrayLength(demangled) - 1] = '\0'; + + // Use the mangled name if we didn't successfully demangle. + return strdup(demangled[0] != '\0' ? demangled : symbol.get()); +} +#endif + +bool ProfilerCodeAddressService::GetFunction(const void* aPc, + nsACString& aResult) { + Entry& entry = GetEntry(aPc); + +#if defined(XP_LINUX) || defined(XP_FREEBSD) + // On Linux, most symbols will not be found by the MozDescribeCodeAddress call + // that GetEntry does. So we read the symbol table directly from the ELF + // image. + + // SymbolTable currently assumes library offsets will not be larger than + // 4 GiB. + if (entry.mLOffset <= 0xFFFFFFFF && !entry.mFunction) { + auto p = mSymbolTables.lookupForAdd(entry.mLibrary); + if (!p) { + if (!mSymbolTables.add(p, entry.mLibrary, SymbolTable())) { + MOZ_CRASH("ProfilerCodeAddressService OOM"); + } + profiler_get_symbol_table(entry.mLibrary, nullptr, &p->value()); + } + entry.mFunction = + SearchSymbolTable(p->value(), static_cast<uint32_t>(entry.mLOffset)); + } +#endif + + if (!entry.mFunction || entry.mFunction[0] == '\0') { + return false; + } + + aResult = nsDependentCString(entry.mFunction); + return true; +} diff --git a/tools/profiler/core/ProfilerMarkers.cpp b/tools/profiler/core/ProfilerMarkers.cpp new file mode 100644 index 0000000000..7c299678d1 --- /dev/null +++ b/tools/profiler/core/ProfilerMarkers.cpp @@ -0,0 +1,32 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/ProfilerMarkers.h" + +template mozilla::ProfileBufferBlockIndex AddMarkerToBuffer( + mozilla::ProfileChunkedBuffer&, const mozilla::ProfilerString8View&, + const mozilla::MarkerCategory&, mozilla::MarkerOptions&&, + mozilla::baseprofiler::markers::NoPayload); + +template mozilla::ProfileBufferBlockIndex AddMarkerToBuffer( + mozilla::ProfileChunkedBuffer&, const mozilla::ProfilerString8View&, + const mozilla::MarkerCategory&, mozilla::MarkerOptions&&, + mozilla::baseprofiler::markers::TextMarker, const std::string&); + +template mozilla::ProfileBufferBlockIndex profiler_add_marker( + const mozilla::ProfilerString8View&, const mozilla::MarkerCategory&, + mozilla::MarkerOptions&&, mozilla::baseprofiler::markers::TextMarker, + const std::string&); + +template mozilla::ProfileBufferBlockIndex profiler_add_marker( + const mozilla::ProfilerString8View&, const mozilla::MarkerCategory&, + mozilla::MarkerOptions&&, mozilla::baseprofiler::markers::TextMarker, + const nsCString&); + +template mozilla::ProfileBufferBlockIndex profiler_add_marker( + const mozilla::ProfilerString8View&, const mozilla::MarkerCategory&, + mozilla::MarkerOptions&&, mozilla::baseprofiler::markers::Tracing, + const mozilla::ProfilerString8View&); diff --git a/tools/profiler/core/ProfilerThreadRegistration.cpp b/tools/profiler/core/ProfilerThreadRegistration.cpp new file mode 100644 index 0000000000..c81d00573d --- /dev/null +++ b/tools/profiler/core/ProfilerThreadRegistration.cpp @@ -0,0 +1,198 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/ProfilerThreadRegistration.h" + +#include "mozilla/ProfilerMarkers.h" +#include "mozilla/ProfilerThreadRegistry.h" +#include "nsString.h" +#ifdef MOZ_GECKO_PROFILER +# include "platform.h" +#else +# define profiler_mark_thread_awake() +# define profiler_mark_thread_asleep() +#endif + +namespace mozilla::profiler { + +/* static */ +MOZ_THREAD_LOCAL(ThreadRegistration*) ThreadRegistration::tlsThreadRegistration; + +ThreadRegistration::ThreadRegistration(const char* aName, const void* aStackTop) + : mData(aName, aStackTop) { + auto* tls = GetTLS(); + if (MOZ_UNLIKELY(!tls)) { + // No TLS, nothing can be done without it. + return; + } + + if (ThreadRegistration* rootRegistration = tls->get(); rootRegistration) { + // This is a nested ThreadRegistration object, so the thread is already + // registered in the TLS and ThreadRegistry and we don't need to register + // again. + MOZ_ASSERT( + mData.Info().ThreadId() == rootRegistration->mData.Info().ThreadId(), + "Thread being re-registered has changed its TID"); + // TODO: Use new name. This is currently not possible because the + // TLS-stored RegisteredThread's ThreadInfo cannot be changed. + // In the meantime, we record a marker that could be used in the frontend. + PROFILER_MARKER_TEXT("Nested ThreadRegistration()", OTHER_Profiling, + MarkerOptions{}, + ProfilerString8View::WrapNullTerminatedString(aName)); + return; + } + + tls->set(this); + ThreadRegistry::Register(OnThreadRef{*this}); + profiler_mark_thread_awake(); +} + +ThreadRegistration::~ThreadRegistration() { + MOZ_ASSERT(profiler_current_thread_id() == mData.mInfo.ThreadId(), + "ThreadRegistration must be destroyed on its thread"); + MOZ_ASSERT(!mDataMutex.IsLockedOnCurrentThread(), + "Mutex shouldn't be locked here, as it's about to be destroyed " + "in ~ThreadRegistration()"); + auto* tls = GetTLS(); + if (MOZ_UNLIKELY(!tls)) { + // No TLS, nothing can be done without it. + return; + } + + if (ThreadRegistration* rootRegistration = tls->get(); rootRegistration) { + if (rootRegistration != this) { + // `this` is not in the TLS, so it was a nested registration, there is + // nothing to unregister yet. + PROFILER_MARKER_TEXT( + "Nested ~ThreadRegistration()", OTHER_Profiling, MarkerOptions{}, + ProfilerString8View::WrapNullTerminatedString(mData.Info().Name())); + return; + } + + profiler_mark_thread_asleep(); +#ifdef NIGHTLY_BUILD + mData.RecordWakeCount(); +#endif + ThreadRegistry::Unregister(OnThreadRef{*this}); +#ifdef DEBUG + // After ThreadRegistry::Unregister, other threads should not be able to + // find this ThreadRegistration, and shouldn't have kept any reference to + // it across the ThreadRegistry mutex. + MOZ_ASSERT(mDataMutex.TryLock(), + "Mutex shouldn't be locked in any thread, as it's about to be " + "destroyed in ~ThreadRegistration()"); + // Undo the above successful TryLock. + mDataMutex.Unlock(); +#endif // DEBUG + + tls->set(nullptr); + return; + } + + // Already removed from the TLS!? This could happen with improperly-nested + // register/unregister calls, and the first ThreadRegistration has already + // been unregistered. + // We cannot record a marker on this thread because it was already + // unregistered. Send it to the main thread (unless this *is* already the + // main thread, which has been unregistered); this may be useful to catch + // mismatched register/unregister pairs in Firefox. + if (!profiler_is_main_thread()) { + nsAutoCString threadId("thread id: "); + threadId.AppendInt(profiler_current_thread_id().ToNumber()); + threadId.AppendLiteral(", name: \""); + threadId.AppendASCII(mData.Info().Name()); + threadId.AppendLiteral("\""); + PROFILER_MARKER_TEXT( + "~ThreadRegistration() but TLS is empty", OTHER_Profiling, + MarkerOptions(MarkerThreadId::MainThread(), MarkerStack::Capture()), + threadId); + } +} + +/* static */ +ProfilingStack* ThreadRegistration::RegisterThread(const char* aName, + const void* aStackTop) { + auto* tls = GetTLS(); + if (MOZ_UNLIKELY(!tls)) { + // No TLS, nothing can be done without it. + return nullptr; + } + + if (ThreadRegistration* rootRegistration = tls->get(); rootRegistration) { + // Already registered, record the extra depth to ignore the matching + // UnregisterThread. + ++rootRegistration->mOtherRegistrations; + // TODO: Use new name. This is currently not possible because the + // TLS-stored RegisteredThread's ThreadInfo cannot be changed. + // In the meantime, we record a marker that could be used in the frontend. + PROFILER_MARKER_TEXT("Nested ThreadRegistration::RegisterThread()", + OTHER_Profiling, MarkerOptions{}, + ProfilerString8View::WrapNullTerminatedString(aName)); + return &rootRegistration->mData.mProfilingStack; + } + + // Create on heap, it self-registers with the TLS (its effective owner, so + // we can forget the pointer after this), and with the Profiler. + ThreadRegistration* tr = new ThreadRegistration(aName, aStackTop); + tr->mIsOnHeap = true; + return &tr->mData.mProfilingStack; +} + +/* static */ +void ThreadRegistration::UnregisterThread() { + auto* tls = GetTLS(); + if (MOZ_UNLIKELY(!tls)) { + // No TLS, nothing can be done without it. + return; + } + + if (ThreadRegistration* rootRegistration = tls->get(); rootRegistration) { + if (rootRegistration->mOtherRegistrations != 0) { + // This is assumed to be a matching UnregisterThread() for a nested + // RegisterThread(). Decrease depth and we're done. + --rootRegistration->mOtherRegistrations; + // We don't know what name was used in the related RegisterThread(). + PROFILER_MARKER_UNTYPED("Nested ThreadRegistration::UnregisterThread()", + OTHER_Profiling); + return; + } + + if (!rootRegistration->mIsOnHeap) { + // The root registration was not added by `RegisterThread()`, so it + // shouldn't be deleted! + // This could happen if there are un-paired `UnregisterThread` calls when + // the initial registration (still alive) was done on the stack. We don't + // know what name was used in the related RegisterThread(). + PROFILER_MARKER_UNTYPED("Excess ThreadRegistration::UnregisterThread()", + OTHER_Profiling, MarkerStack::Capture()); + return; + } + + // This is the last `UnregisterThread()` that should match the first + // `RegisterThread()` that created this ThreadRegistration on the heap. + // Just delete this root registration, it will de-register itself from the + // TLS (and from the Profiler). + delete rootRegistration; + return; + } + + // There is no known ThreadRegistration for this thread, ignore this + // request. We cannot record a marker on this thread because it was already + // unregistered. Send it to the main thread (unless this *is* already the + // main thread, which has been unregistered); this may be useful to catch + // mismatched register/unregister pairs in Firefox. + if (!profiler_is_main_thread()) { + nsAutoCString threadId("thread id: "); + threadId.AppendInt(profiler_current_thread_id().ToNumber()); + PROFILER_MARKER_TEXT( + "ThreadRegistration::UnregisterThread() but TLS is empty", + OTHER_Profiling, + MarkerOptions(MarkerThreadId::MainThread(), MarkerStack::Capture()), + threadId); + } +} + +} // namespace mozilla::profiler diff --git a/tools/profiler/core/ProfilerThreadRegistrationData.cpp b/tools/profiler/core/ProfilerThreadRegistrationData.cpp new file mode 100644 index 0000000000..e70f9e749a --- /dev/null +++ b/tools/profiler/core/ProfilerThreadRegistrationData.cpp @@ -0,0 +1,303 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/ProfilerThreadRegistrationData.h" + +#include "mozilla/FOGIPC.h" +#include "mozilla/glean/GleanMetrics.h" +#include "mozilla/ProfilerMarkers.h" +#include "js/AllocationRecording.h" +#include "js/ProfilingStack.h" + +#if defined(XP_WIN) +# include <windows.h> +#elif defined(XP_DARWIN) +# include <pthread.h> +#endif + +#ifdef NIGHTLY_BUILD +namespace geckoprofiler::markers { + +using namespace mozilla; + +struct ThreadCpuUseMarker { + static constexpr Span<const char> MarkerTypeName() { + return MakeStringSpan("ThreadCpuUse"); + } + static void StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter& aWriter, + ProfilerThreadId aThreadId, + int64_t aCpuTimeMs, int64_t aWakeUps, + const ProfilerString8View& aThreadName) { + aWriter.IntProperty("threadId", static_cast<int64_t>(aThreadId.ToNumber())); + aWriter.IntProperty("time", aCpuTimeMs); + aWriter.IntProperty("wakeups", aWakeUps); + aWriter.StringProperty("label", aThreadName); + } + static MarkerSchema MarkerTypeDisplay() { + using MS = MarkerSchema; + MS schema{MS::Location::MarkerChart, MS::Location::MarkerTable}; + schema.AddKeyLabelFormat("time", "CPU Time", MS::Format::Milliseconds); + schema.AddKeyLabelFormat("wakeups", "Wake ups", MS::Format::Integer); + schema.SetTooltipLabel("{marker.name} - {marker.data.label}"); + schema.SetTableLabel( + "{marker.name} - {marker.data.label}: {marker.data.time} of CPU time, " + "{marker.data.wakeups} wake ups"); + return schema; + } +}; + +} // namespace geckoprofiler::markers +#endif + +namespace mozilla::profiler { + +ThreadRegistrationData::ThreadRegistrationData(const char* aName, + const void* aStackTop) + : mInfo(aName), + mPlatformData(mInfo.ThreadId()), + mStackTop( +#if defined(XP_WIN) + // We don't have to guess on Windows. + reinterpret_cast<const void*>( + reinterpret_cast<PNT_TIB>(NtCurrentTeb())->StackBase) +#elif defined(XP_DARWIN) + // We don't have to guess on Mac/Darwin. + reinterpret_cast<const void*>( + pthread_get_stackaddr_np(pthread_self())) +#else + // Otherwise use the given guess. + aStackTop +#endif + ) { +} + +// This is a simplified version of profiler_add_marker that can be easily passed +// into the JS engine. +static void profiler_add_js_marker(const char* aMarkerName, + const char* aMarkerText) { + PROFILER_MARKER_TEXT( + mozilla::ProfilerString8View::WrapNullTerminatedString(aMarkerName), JS, + {}, mozilla::ProfilerString8View::WrapNullTerminatedString(aMarkerText)); +} + +static void profiler_add_js_allocation_marker(JS::RecordAllocationInfo&& info) { + if (!profiler_thread_is_being_profiled_for_markers()) { + return; + } + + struct JsAllocationMarker { + static constexpr mozilla::Span<const char> MarkerTypeName() { + return mozilla::MakeStringSpan("JS allocation"); + } + static void StreamJSONMarkerData( + mozilla::baseprofiler::SpliceableJSONWriter& aWriter, + const mozilla::ProfilerString16View& aTypeName, + const mozilla::ProfilerString8View& aClassName, + const mozilla::ProfilerString16View& aDescriptiveTypeName, + const mozilla::ProfilerString8View& aCoarseType, uint64_t aSize, + bool aInNursery) { + if (aClassName.Length() != 0) { + aWriter.StringProperty("className", aClassName); + } + if (aTypeName.Length() != 0) { + aWriter.StringProperty("typeName", NS_ConvertUTF16toUTF8(aTypeName)); + } + if (aDescriptiveTypeName.Length() != 0) { + aWriter.StringProperty("descriptiveTypeName", + NS_ConvertUTF16toUTF8(aDescriptiveTypeName)); + } + aWriter.StringProperty("coarseType", aCoarseType); + aWriter.IntProperty("size", aSize); + aWriter.BoolProperty("inNursery", aInNursery); + } + static mozilla::MarkerSchema MarkerTypeDisplay() { + return mozilla::MarkerSchema::SpecialFrontendLocation{}; + } + }; + + profiler_add_marker( + "JS allocation", geckoprofiler::category::JS, + mozilla::MarkerStack::Capture(), JsAllocationMarker{}, + mozilla::ProfilerString16View::WrapNullTerminatedString(info.typeName), + mozilla::ProfilerString8View::WrapNullTerminatedString(info.className), + mozilla::ProfilerString16View::WrapNullTerminatedString( + info.descriptiveTypeName), + mozilla::ProfilerString8View::WrapNullTerminatedString(info.coarseType), + info.size, info.inNursery); +} + +void ThreadRegistrationLockedRWFromAnyThread::SetProfilingFeaturesAndData( + ThreadProfilingFeatures aProfilingFeatures, + ProfiledThreadData* aProfiledThreadData, const PSAutoLock&) { + MOZ_ASSERT(mProfilingFeatures == ThreadProfilingFeatures::NotProfiled); + mProfilingFeatures = aProfilingFeatures; + + MOZ_ASSERT(!mProfiledThreadData); + MOZ_ASSERT(aProfiledThreadData); + mProfiledThreadData = aProfiledThreadData; + + if (mJSContext) { + // The thread is now being profiled, and we already have a JSContext, + // allocate a JsFramesBuffer to allow profiler-unlocked on-thread sampling. + MOZ_ASSERT(!mJsFrameBuffer); + mJsFrameBuffer = new JsFrame[MAX_JS_FRAMES]; + } + + // Check invariants. + MOZ_ASSERT((mProfilingFeatures != ThreadProfilingFeatures::NotProfiled) == + !!mProfiledThreadData); + MOZ_ASSERT((mJSContext && + (mProfilingFeatures != ThreadProfilingFeatures::NotProfiled)) == + !!mJsFrameBuffer); +} + +void ThreadRegistrationLockedRWFromAnyThread::ClearProfilingFeaturesAndData( + const PSAutoLock&) { + mProfilingFeatures = ThreadProfilingFeatures::NotProfiled; + mProfiledThreadData = nullptr; + + if (mJsFrameBuffer) { + delete[] mJsFrameBuffer; + mJsFrameBuffer = nullptr; + } + + // Check invariants. + MOZ_ASSERT((mProfilingFeatures != ThreadProfilingFeatures::NotProfiled) == + !!mProfiledThreadData); + MOZ_ASSERT((mJSContext && + (mProfilingFeatures != ThreadProfilingFeatures::NotProfiled)) == + !!mJsFrameBuffer); +} + +void ThreadRegistrationLockedRWOnThread::SetJSContext(JSContext* aJSContext) { + MOZ_ASSERT(aJSContext && !mJSContext); + + mJSContext = aJSContext; + + if (mProfiledThreadData) { + MOZ_ASSERT((mProfilingFeatures != ThreadProfilingFeatures::NotProfiled) == + !!mProfiledThreadData); + // We now have a JSContext, and the thread is already being profiled, + // allocate a JsFramesBuffer to allow profiler-unlocked on-thread sampling. + MOZ_ASSERT(!mJsFrameBuffer); + mJsFrameBuffer = new JsFrame[MAX_JS_FRAMES]; + } + + // We give the JS engine a non-owning reference to the ProfilingStack. It's + // important that the JS engine doesn't touch this once the thread dies. + js::SetContextProfilingStack(aJSContext, &ProfilingStackRef()); + + // Check invariants. + MOZ_ASSERT((mJSContext && + (mProfilingFeatures != ThreadProfilingFeatures::NotProfiled)) == + !!mJsFrameBuffer); +} + +void ThreadRegistrationLockedRWOnThread::ClearJSContext() { + mJSContext = nullptr; + + if (mJsFrameBuffer) { + delete[] mJsFrameBuffer; + mJsFrameBuffer = nullptr; + } + + // Check invariants. + MOZ_ASSERT((mJSContext && + (mProfilingFeatures != ThreadProfilingFeatures::NotProfiled)) == + !!mJsFrameBuffer); +} + +void ThreadRegistrationLockedRWOnThread::PollJSSampling() { + // We can't start/stop profiling until we have the thread's JSContext. + if (mJSContext) { + // It is possible for mJSSampling to go through the following sequences. + // + // - INACTIVE, ACTIVE_REQUESTED, INACTIVE_REQUESTED, INACTIVE + // + // - ACTIVE, INACTIVE_REQUESTED, ACTIVE_REQUESTED, ACTIVE + // + // Therefore, the if and else branches here aren't always interleaved. + // This is ok because the JS engine can handle that. + // + if (mJSSampling == ACTIVE_REQUESTED) { + mJSSampling = ACTIVE; + js::EnableContextProfilingStack(mJSContext, true); + + if (JSAllocationsEnabled()) { + // TODO - This probability should not be hardcoded. See Bug 1547284. + JS::EnableRecordingAllocations(mJSContext, + profiler_add_js_allocation_marker, 0.01); + } + js::RegisterContextProfilingEventMarker(mJSContext, + profiler_add_js_marker); + + } else if (mJSSampling == INACTIVE_REQUESTED) { + mJSSampling = INACTIVE; + js::EnableContextProfilingStack(mJSContext, false); + + if (JSAllocationsEnabled()) { + JS::DisableRecordingAllocations(mJSContext); + } + } + } +} + +#ifdef NIGHTLY_BUILD +void ThreadRegistrationUnlockedConstReaderAndAtomicRW::RecordWakeCount() const { + baseprofiler::detail::BaseProfilerAutoLock lock(mRecordWakeCountMutex); + + uint64_t newWakeCount = mWakeCount - mAlreadyRecordedWakeCount; + if (newWakeCount == 0 && mSleep != AWAKE) { + // If no new wake-up was counted, and the thread is not marked awake, + // we can be pretty sure there is no CPU activity to record. + // Threads that are never annotated as asleep/awake (typically rust threads) + // start as awake. + return; + } + + uint64_t cpuTimeNs; + if (!GetCpuTimeSinceThreadStartInNs(&cpuTimeNs, PlatformDataCRef())) { + cpuTimeNs = 0; + } + + constexpr uint64_t NS_PER_MS = 1'000'000; + uint64_t cpuTimeMs = cpuTimeNs / NS_PER_MS; + + uint64_t newCpuTimeMs = MOZ_LIKELY(cpuTimeMs > mAlreadyRecordedCpuTimeInMs) + ? cpuTimeMs - mAlreadyRecordedCpuTimeInMs + : 0; + + if (!newWakeCount && !newCpuTimeMs) { + // Nothing to report, avoid computing the Glean friendly thread name. + return; + } + + nsAutoCString threadName(mInfo.Name()); + // Trim the trailing number of threads that are part of a thread pool. + for (size_t length = threadName.Length(); length > 0; --length) { + const char c = threadName.CharAt(length - 1); + if ((c < '0' || c > '9') && c != '#' && c != ' ') { + if (length != threadName.Length()) { + threadName.SetLength(length); + } + break; + } + } + + mozilla::glean::RecordThreadCpuUse(threadName, newCpuTimeMs, newWakeCount); + + // The thread id is provided as part of the payload because this call is + // inside a ThreadRegistration data function, which could be invoked with + // the ThreadRegistry locked. We cannot call any function/option that could + // attempt to lock the ThreadRegistry again, like MarkerThreadId. + PROFILER_MARKER("Thread CPU use", OTHER, {}, ThreadCpuUseMarker, + mInfo.ThreadId(), newCpuTimeMs, newWakeCount, threadName); + mAlreadyRecordedCpuTimeInMs = cpuTimeMs; + mAlreadyRecordedWakeCount += newWakeCount; +} +#endif + +} // namespace mozilla::profiler diff --git a/tools/profiler/core/ProfilerThreadRegistry.cpp b/tools/profiler/core/ProfilerThreadRegistry.cpp new file mode 100644 index 0000000000..cb456471d9 --- /dev/null +++ b/tools/profiler/core/ProfilerThreadRegistry.cpp @@ -0,0 +1,40 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/ProfilerThreadRegistry.h" + +namespace mozilla::profiler { + +/* static */ +ThreadRegistry::RegistryContainer ThreadRegistry::sRegistryContainer; + +/* static */ +ThreadRegistry::RegistryMutex ThreadRegistry::sRegistryMutex; + +#if !defined(MOZ_GECKO_PROFILER) +// When MOZ_GECKO_PROFILER is not defined, the function definitions in +// platform.cpp are not built, causing link errors. So we keep these simple +// definitions here. + +/* static */ +void ThreadRegistry::Register(ThreadRegistration::OnThreadRef aOnThreadRef) { + LockedRegistry lock; + MOZ_RELEASE_ASSERT(sRegistryContainer.append(OffThreadRef{aOnThreadRef})); +} + +/* static */ +void ThreadRegistry::Unregister(ThreadRegistration::OnThreadRef aOnThreadRef) { + LockedRegistry lock; + for (OffThreadRef& thread : sRegistryContainer) { + if (thread.IsPointingAt(*aOnThreadRef.mThreadRegistration)) { + sRegistryContainer.erase(&thread); + break; + } + } +} +#endif // !defined(MOZ_GECKO_PROFILER) + +} // namespace mozilla::profiler diff --git a/tools/profiler/core/ProfilerUtils.cpp b/tools/profiler/core/ProfilerUtils.cpp new file mode 100644 index 0000000000..6a46878ad7 --- /dev/null +++ b/tools/profiler/core/ProfilerUtils.cpp @@ -0,0 +1,118 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// This file implements functions from ProfilerUtils.h on all platforms. +// Functions with platform-specific implementations are separated in #if blocks +// below, with each block being self-contained with all the #includes and +// definitions it needs, to keep platform code easier to maintain in isolation. + +#include "mozilla/ProfilerUtils.h" + +// --------------------------------------------- Windows process & thread ids +#if defined(XP_WIN) + +# include <process.h> +# include <processthreadsapi.h> + +ProfilerProcessId profiler_current_process_id() { + return ProfilerProcessId::FromNativeId(_getpid()); +} + +ProfilerThreadId profiler_current_thread_id() { + static_assert(std::is_same_v<ProfilerThreadId::NativeType, + decltype(GetCurrentThreadId())>, + "ProfilerThreadId::NativeType must be exactly the type " + "returned by GetCurrentThreadId()"); + return ProfilerThreadId::FromNativeId(GetCurrentThreadId()); +} + +// --------------------------------------------- Non-Windows process id +#else +// All non-Windows platforms are assumed to be POSIX, which has getpid(). + +# include <unistd.h> + +ProfilerProcessId profiler_current_process_id() { + return ProfilerProcessId::FromNativeId(getpid()); +} + +// --------------------------------------------- Non-Windows thread id +// ------------------------------------------------------- macOS +# if defined(XP_MACOSX) + +# include <pthread.h> + +ProfilerThreadId profiler_current_thread_id() { + uint64_t tid; + if (pthread_threadid_np(nullptr, &tid) != 0) { + return ProfilerThreadId{}; + } + return ProfilerThreadId::FromNativeId(tid); +} + +// ------------------------------------------------------- Android +// Test Android before Linux, because Linux includes Android. +# elif defined(__ANDROID__) || defined(ANDROID) + +ProfilerThreadId profiler_current_thread_id() { + return ProfilerThreadId::FromNativeId(gettid()); +} + +// ------------------------------------------------------- Linux +# elif defined(XP_LINUX) + +# include <sys/syscall.h> + +ProfilerThreadId profiler_current_thread_id() { + // glibc doesn't provide a wrapper for gettid() until 2.30 + return ProfilerThreadId::FromNativeId(syscall(SYS_gettid)); +} + +// ------------------------------------------------------- FreeBSD +# elif defined(XP_FREEBSD) + +# include <sys/thr.h> + +ProfilerThreadId profiler_current_thread_id() { + long id; + if (thr_self(&id) != 0) { + return ProfilerThreadId{}; + } + return ProfilerThreadId::FromNativeId(id); +} + +// ------------------------------------------------------- Others +# else + +ProfilerThreadId profiler_current_thread_id() { + return ProfilerThreadId::FromNativeId(std::this_thread::get_id()); +} + +# endif +#endif // End of non-XP_WIN. + +// --------------------------------------------- Platform-agnostic definitions + +#include "MainThreadUtils.h" +#include "mozilla/Assertions.h" + +static ProfilerThreadId scProfilerMainThreadId; + +void profiler_init_main_thread_id() { + MOZ_ASSERT(NS_IsMainThread()); + mozilla::baseprofiler::profiler_init_main_thread_id(); + if (!scProfilerMainThreadId.IsSpecified()) { + scProfilerMainThreadId = profiler_current_thread_id(); + } +} + +[[nodiscard]] ProfilerThreadId profiler_main_thread_id() { + return scProfilerMainThreadId; +} + +[[nodiscard]] bool profiler_is_main_thread() { + return profiler_current_thread_id() == scProfilerMainThreadId; +} diff --git a/tools/profiler/core/VTuneProfiler.cpp b/tools/profiler/core/VTuneProfiler.cpp new file mode 100644 index 0000000000..58a39c51ee --- /dev/null +++ b/tools/profiler/core/VTuneProfiler.cpp @@ -0,0 +1,80 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef XP_WIN +# undef UNICODE +# undef _UNICODE +#endif + +#include "VTuneProfiler.h" +#include "mozilla/Bootstrap.h" +#include <memory> + +VTuneProfiler* VTuneProfiler::mInstance = nullptr; + +void VTuneProfiler::Initialize() { + // This is just a 'dirty trick' to find out if the ittnotify DLL was found. + // If it wasn't this function always returns 0, otherwise it returns + // incrementing numbers, if the library was found this wastes 2 events but + // that should be okay. + __itt_event testEvent = + __itt_event_create("Test event", strlen("Test event")); + testEvent = __itt_event_create("Test event 2", strlen("Test event 2")); + + if (testEvent) { + mInstance = new VTuneProfiler(); + } +} + +void VTuneProfiler::Shutdown() {} + +void VTuneProfiler::TraceInternal(const char* aName, TracingKind aKind) { + std::string str(aName); + + auto iter = mStrings.find(str); + + __itt_event event; + if (iter != mStrings.end()) { + event = iter->second; + } else { + event = __itt_event_create(aName, str.length()); + mStrings.insert({str, event}); + } + + if (aKind == TRACING_INTERVAL_START || aKind == TRACING_EVENT) { + // VTune will consider starts not matched with an end to be single point in + // time events. + __itt_event_start(event); + } else { + __itt_event_end(event); + } +} + +void VTuneProfiler::RegisterThreadInternal(const char* aName) { + std::string str(aName); + + if (!str.compare("GeckoMain")) { + // Process main thread. + switch (XRE_GetProcessType()) { + case GeckoProcessType::GeckoProcessType_Default: + __itt_thread_set_name("Main Process"); + break; + case GeckoProcessType::GeckoProcessType_Content: + __itt_thread_set_name("Content Process"); + break; + case GeckoProcessType::GeckoProcessType_GMPlugin: + __itt_thread_set_name("Plugin Process"); + break; + case GeckoProcessType::GeckoProcessType_GPU: + __itt_thread_set_name("GPU Process"); + break; + default: + __itt_thread_set_name("Unknown Process"); + } + return; + } + __itt_thread_set_name(aName); +} diff --git a/tools/profiler/core/VTuneProfiler.h b/tools/profiler/core/VTuneProfiler.h new file mode 100644 index 0000000000..e3abe6b90d --- /dev/null +++ b/tools/profiler/core/VTuneProfiler.h @@ -0,0 +1,78 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef VTuneProfiler_h +#define VTuneProfiler_h + +// The intent here is to add 0 overhead for regular users. In order to build +// the VTune profiler code at all --enable-vtune-instrumentation needs to be +// set as a build option. Even then, when none of the environment variables +// is specified that allow us to find the ittnotify DLL, these functions +// should be minimal overhead. When starting Firefox under VTune, these +// env vars will be automatically defined, otherwise INTEL_LIBITTNOTIFY32/64 +// should be set to point at the ittnotify DLL. +#ifndef MOZ_VTUNE_INSTRUMENTATION + +# define VTUNE_INIT() +# define VTUNE_SHUTDOWN() + +# define VTUNE_TRACING(name, kind) +# define VTUNE_REGISTER_THREAD(name) + +#else + +# include "GeckoProfiler.h" + +// This is the regular Intel header, these functions are actually defined for +// us inside js/src/vtune by an intel C file which actually dynamically resolves +// them to the correct DLL. Through libxul these will 'magically' resolve. +# include "vtune/ittnotify.h" + +# include <stddef.h> +# include <unordered_map> +# include <string> + +class VTuneProfiler { + public: + static void Initialize(); + static void Shutdown(); + + enum TracingKind { + TRACING_EVENT, + TRACING_INTERVAL_START, + TRACING_INTERVAL_END, + }; + + static void Trace(const char* aName, TracingKind aKind) { + if (mInstance) { + mInstance->TraceInternal(aName, aKind); + } + } + static void RegisterThread(const char* aName) { + if (mInstance) { + mInstance->RegisterThreadInternal(aName); + } + } + + private: + void TraceInternal(const char* aName, TracingKind aKind); + void RegisterThreadInternal(const char* aName); + + // This is null when the ittnotify DLL could not be found. + static VTuneProfiler* mInstance; + + std::unordered_map<std::string, __itt_event> mStrings; +}; + +# define VTUNE_INIT() VTuneProfiler::Initialize() +# define VTUNE_SHUTDOWN() VTuneProfiler::Shutdown() + +# define VTUNE_TRACING(name, kind) VTuneProfiler::Trace(name, kind) +# define VTUNE_REGISTER_THREAD(name) VTuneProfiler::RegisterThread(name) + +#endif + +#endif /* VTuneProfiler_h */ diff --git a/tools/profiler/core/memory_hooks.cpp b/tools/profiler/core/memory_hooks.cpp new file mode 100644 index 0000000000..be83c6bf82 --- /dev/null +++ b/tools/profiler/core/memory_hooks.cpp @@ -0,0 +1,628 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "memory_hooks.h" + +#include "nscore.h" + +#include "mozilla/Assertions.h" +#include "mozilla/Atomics.h" +#include "mozilla/FastBernoulliTrial.h" +#include "mozilla/IntegerPrintfMacros.h" +#include "mozilla/JSONWriter.h" +#include "mozilla/MemoryReporting.h" +#include "mozilla/PlatformMutex.h" +#include "mozilla/ProfilerCounts.h" +#include "mozilla/ThreadLocal.h" + +#include "GeckoProfiler.h" +#include "prenv.h" +#include "replace_malloc.h" + +#include <ctype.h> +#include <errno.h> +#include <limits.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#ifdef XP_WIN +# include <windows.h> +# include <process.h> +#else +# include <pthread.h> +# include <sys/types.h> +# include <unistd.h> +#endif + +#ifdef ANDROID +# include <android/log.h> +#endif + +// The counters start out as a nullptr, and then get initialized only once. They +// are never destroyed, as it would cause race conditions for the memory hooks +// that use the counters. This helps guard against potentially expensive +// operations like using a mutex. +// +// In addition, this is a raw pointer and not a UniquePtr, as the counter +// machinery will try and de-register itself from the profiler. This could +// happen after the profiler and its PSMutex was already destroyed, resulting in +// a crash. +static ProfilerCounterTotal* sCounter; + +// The gBernoulli value starts out as a nullptr, and only gets initialized once. +// It then lives for the entire lifetime of the process. It cannot be deleted +// without additional multi-threaded protections, since if we deleted it during +// profiler_stop then there could be a race between threads already in a +// memory hook that might try to access the value after or during deletion. +static mozilla::FastBernoulliTrial* gBernoulli; + +namespace mozilla::profiler { + +//--------------------------------------------------------------------------- +// Utilities +//--------------------------------------------------------------------------- + +// Returns true or or false depending on whether the marker was actually added +// or not. +static bool profiler_add_native_allocation_marker(int64_t aSize, + uintptr_t aMemoryAddress) { + if (!profiler_thread_is_being_profiled_for_markers( + profiler_main_thread_id())) { + return false; + } + + // Because native allocations may be intercepted anywhere, blocking while + // locking the profiler mutex here could end up causing a deadlock if another + // mutex is taken, which the profiler may indirectly need elsewhere. + // See bug 1642726 for such a scenario. + // So instead we bail out if the mutex is already locked. Native allocations + // are statistically sampled anyway, so missing a few because of this is + // acceptable. + if (profiler_is_locked_on_current_thread()) { + return false; + } + + struct NativeAllocationMarker { + static constexpr mozilla::Span<const char> MarkerTypeName() { + return mozilla::MakeStringSpan("Native allocation"); + } + static void StreamJSONMarkerData( + mozilla::baseprofiler::SpliceableJSONWriter& aWriter, int64_t aSize, + uintptr_t aMemoryAddress, ProfilerThreadId aThreadId) { + aWriter.IntProperty("size", aSize); + aWriter.IntProperty("memoryAddress", + static_cast<int64_t>(aMemoryAddress)); + // Tech note: If `ToNumber()` returns a uint64_t, the conversion to + // int64_t is "implementation-defined" before C++20. This is acceptable + // here, because this is a one-way conversion to a unique identifier + // that's used to visually separate data by thread on the front-end. + aWriter.IntProperty("threadId", + static_cast<int64_t>(aThreadId.ToNumber())); + } + static mozilla::MarkerSchema MarkerTypeDisplay() { + return mozilla::MarkerSchema::SpecialFrontendLocation{}; + } + }; + + profiler_add_marker("Native allocation", geckoprofiler::category::OTHER, + {MarkerThreadId::MainThread(), MarkerStack::Capture()}, + NativeAllocationMarker{}, aSize, aMemoryAddress, + profiler_current_thread_id()); + return true; +} + +static malloc_table_t gMallocTable; + +// This is only needed because of the |const void*| vs |void*| arg mismatch. +static size_t MallocSizeOf(const void* aPtr) { + return gMallocTable.malloc_usable_size(const_cast<void*>(aPtr)); +} + +// The values for the Bernoulli trial are taken from DMD. According to DMD: +// +// In testing, a probability of 0.003 resulted in ~25% of heap blocks getting +// a stack trace and ~80% of heap bytes getting a stack trace. (This is +// possible because big heap blocks are more likely to get a stack trace.) +// +// The random number seeds are arbitrary and were obtained from random.org. +// +// However this value resulted in a lot of slowdown since the profiler stacks +// are pretty heavy to collect. The value was lowered to 10% of the original to +// 0.0003. +static void EnsureBernoulliIsInstalled() { + if (!gBernoulli) { + // This is only installed once. See the gBernoulli definition for more + // information. + gBernoulli = + new FastBernoulliTrial(0.0003, 0x8e26eeee166bc8ca, 0x56820f304a9c9ae0); + } +} + +// This class provides infallible allocations (they abort on OOM) like +// mozalloc's InfallibleAllocPolicy, except that memory hooks are bypassed. This +// policy is used by the HashSet. +class InfallibleAllocWithoutHooksPolicy { + static void ExitOnFailure(const void* aP) { + if (!aP) { + MOZ_CRASH("Profiler memory hooks out of memory; aborting"); + } + } + + public: + template <typename T> + static T* maybe_pod_malloc(size_t aNumElems) { + if (aNumElems & mozilla::tl::MulOverflowMask<sizeof(T)>::value) { + return nullptr; + } + return (T*)gMallocTable.malloc(aNumElems * sizeof(T)); + } + + template <typename T> + static T* maybe_pod_calloc(size_t aNumElems) { + return (T*)gMallocTable.calloc(aNumElems, sizeof(T)); + } + + template <typename T> + static T* maybe_pod_realloc(T* aPtr, size_t aOldSize, size_t aNewSize) { + if (aNewSize & mozilla::tl::MulOverflowMask<sizeof(T)>::value) { + return nullptr; + } + return (T*)gMallocTable.realloc(aPtr, aNewSize * sizeof(T)); + } + + template <typename T> + static T* pod_malloc(size_t aNumElems) { + T* p = maybe_pod_malloc<T>(aNumElems); + ExitOnFailure(p); + return p; + } + + template <typename T> + static T* pod_calloc(size_t aNumElems) { + T* p = maybe_pod_calloc<T>(aNumElems); + ExitOnFailure(p); + return p; + } + + template <typename T> + static T* pod_realloc(T* aPtr, size_t aOldSize, size_t aNewSize) { + T* p = maybe_pod_realloc(aPtr, aOldSize, aNewSize); + ExitOnFailure(p); + return p; + } + + template <typename T> + static void free_(T* aPtr, size_t aSize = 0) { + gMallocTable.free(aPtr); + } + + static void reportAllocOverflow() { ExitOnFailure(nullptr); } + bool checkSimulatedOOM() const { return true; } +}; + +// We can't use mozilla::Mutex because it causes re-entry into the memory hooks. +// Define a custom implementation here. +class Mutex : private ::mozilla::detail::MutexImpl { + public: + Mutex() : ::mozilla::detail::MutexImpl() {} + + void Lock() { ::mozilla::detail::MutexImpl::lock(); } + void Unlock() { ::mozilla::detail::MutexImpl::unlock(); } +}; + +class MutexAutoLock { + MutexAutoLock(const MutexAutoLock&) = delete; + void operator=(const MutexAutoLock&) = delete; + + Mutex& mMutex; + + public: + explicit MutexAutoLock(Mutex& aMutex) : mMutex(aMutex) { mMutex.Lock(); } + ~MutexAutoLock() { mMutex.Unlock(); } +}; + +//--------------------------------------------------------------------------- +// Tracked allocations +//--------------------------------------------------------------------------- + +// The allocation tracker is shared between multiple threads, and is the +// coordinator for knowing when allocations have been tracked. The mutable +// internal state is protected by a mutex, and managed by the methods. +// +// The tracker knows about all the allocations that we have added to the +// profiler. This way, whenever any given piece of memory is freed, we can see +// if it was previously tracked, and we can track its deallocation. + +class AllocationTracker { + // This type tracks all of the allocations that we have captured. This way, we + // can see if a deallocation is inside of this set. We want to provide a + // balanced view into the allocations and deallocations. + typedef mozilla::HashSet<const void*, mozilla::DefaultHasher<const void*>, + InfallibleAllocWithoutHooksPolicy> + AllocationSet; + + public: + AllocationTracker() : mAllocations(), mMutex() {} + + void AddMemoryAddress(const void* memoryAddress) { + MutexAutoLock lock(mMutex); + if (!mAllocations.put(memoryAddress)) { + MOZ_CRASH("Out of memory while tracking native allocations."); + }; + } + + void Reset() { + MutexAutoLock lock(mMutex); + mAllocations.clearAndCompact(); + } + + // Returns true when the memory address is found and removed, otherwise that + // memory address is not being tracked and it returns false. + bool RemoveMemoryAddressIfFound(const void* memoryAddress) { + MutexAutoLock lock(mMutex); + + auto ptr = mAllocations.lookup(memoryAddress); + if (ptr) { + // The memory was present. It no longer needs to be tracked. + mAllocations.remove(ptr); + return true; + } + + return false; + } + + private: + AllocationSet mAllocations; + Mutex mMutex MOZ_UNANNOTATED; +}; + +static AllocationTracker* gAllocationTracker; + +static void EnsureAllocationTrackerIsInstalled() { + if (!gAllocationTracker) { + // This is only installed once. + gAllocationTracker = new AllocationTracker(); + } +} + +//--------------------------------------------------------------------------- +// Per-thread blocking of intercepts +//--------------------------------------------------------------------------- + +// On MacOS, and Linux the first __thread/thread_local access calls malloc, +// which leads to an infinite loop. So we use pthread-based TLS instead, which +// somehow doesn't have this problem. +#if !defined(XP_DARWIN) && !defined(XP_LINUX) +# define PROFILER_THREAD_LOCAL(T) MOZ_THREAD_LOCAL(T) +#else +# define PROFILER_THREAD_LOCAL(T) \ + ::mozilla::detail::ThreadLocal<T, ::mozilla::detail::ThreadLocalKeyStorage> +#endif + +// This class is used to determine if allocations on this thread should be +// intercepted or not. +// Creating a ThreadIntercept object on the stack will implicitly block nested +// ones. There are other reasons to block: The feature is off, or we're inside a +// profiler function that is locking a mutex. +class MOZ_RAII ThreadIntercept { + // When set to true, malloc does not intercept additional allocations. This is + // needed because collecting stacks creates new allocations. When blocked, + // these allocations are then ignored by the memory hook. + static PROFILER_THREAD_LOCAL(bool) tlsIsBlocked; + + // This is a quick flag to check and see if the allocations feature is enabled + // or disabled. + static mozilla::Atomic<bool, mozilla::Relaxed> sAllocationsFeatureEnabled; + + // True if this ThreadIntercept has set tlsIsBlocked. + bool mIsBlockingTLS; + + // True if interception is blocked for any reason. + bool mIsBlocked; + + public: + static void Init() { + tlsIsBlocked.infallibleInit(); + // infallibleInit should zero-initialize, which corresponds to `false`. + MOZ_ASSERT(!tlsIsBlocked.get()); + } + + ThreadIntercept() { + // If the allocation interception feature is enabled, and the TLS is not + // blocked yet, we will block the TLS now, and unblock on destruction. + mIsBlockingTLS = sAllocationsFeatureEnabled && !tlsIsBlocked.get(); + if (mIsBlockingTLS) { + MOZ_ASSERT(!tlsIsBlocked.get()); + tlsIsBlocked.set(true); + // Since this is the top-level ThreadIntercept, interceptions are not + // blocked unless the profiler itself holds a locked mutex, in which case + // we don't want to intercept allocations that originate from such a + // profiler call. + mIsBlocked = profiler_is_locked_on_current_thread(); + } else { + // The feature is off, or the TLS was already blocked, then we block this + // interception. + mIsBlocked = true; + } + } + + ~ThreadIntercept() { + if (mIsBlockingTLS) { + MOZ_ASSERT(tlsIsBlocked.get()); + tlsIsBlocked.set(false); + } + } + + // Is this ThreadIntercept effectively blocked? (Feature is off, or this + // ThreadIntercept is nested, or we're inside a locked-Profiler function.) + bool IsBlocked() const { return mIsBlocked; } + + static void EnableAllocationFeature() { sAllocationsFeatureEnabled = true; } + + static void DisableAllocationFeature() { sAllocationsFeatureEnabled = false; } +}; + +PROFILER_THREAD_LOCAL(bool) ThreadIntercept::tlsIsBlocked; + +mozilla::Atomic<bool, mozilla::Relaxed> + ThreadIntercept::sAllocationsFeatureEnabled(false); + +//--------------------------------------------------------------------------- +// malloc/free callbacks +//--------------------------------------------------------------------------- + +static void AllocCallback(void* aPtr, size_t aReqSize) { + if (!aPtr) { + return; + } + + // The first part of this function does not allocate. + size_t actualSize = gMallocTable.malloc_usable_size(aPtr); + if (actualSize > 0) { + sCounter->Add(actualSize); + } + + ThreadIntercept threadIntercept; + if (threadIntercept.IsBlocked()) { + // Either the native allocations feature is not turned on, or we may be + // recursing into a memory hook, return. We'll still collect counter + // information about this allocation, but no stack. + return; + } + + AUTO_PROFILER_LABEL("AllocCallback", PROFILER); + + // Perform a bernoulli trial, which will return true or false based on its + // configured probability. It takes into account the byte size so that + // larger allocations are weighted heavier than smaller allocations. + MOZ_ASSERT(gBernoulli, + "gBernoulli must be properly installed for the memory hooks."); + if ( + // First perform the Bernoulli trial. + gBernoulli->trial(actualSize) && + // Second, attempt to add a marker if the Bernoulli trial passed. + profiler_add_native_allocation_marker( + static_cast<int64_t>(actualSize), + reinterpret_cast<uintptr_t>(aPtr))) { + MOZ_ASSERT(gAllocationTracker, + "gAllocationTracker must be properly installed for the memory " + "hooks."); + // Only track the memory if the allocation marker was actually added to the + // profiler. + gAllocationTracker->AddMemoryAddress(aPtr); + } + + // We're ignoring aReqSize here +} + +static void FreeCallback(void* aPtr) { + if (!aPtr) { + return; + } + + // The first part of this function does not allocate. + size_t unsignedSize = MallocSizeOf(aPtr); + int64_t signedSize = -(static_cast<int64_t>(unsignedSize)); + sCounter->Add(signedSize); + + ThreadIntercept threadIntercept; + if (threadIntercept.IsBlocked()) { + // Either the native allocations feature is not turned on, or we may be + // recursing into a memory hook, return. We'll still collect counter + // information about this allocation, but no stack. + return; + } + + AUTO_PROFILER_LABEL("FreeCallback", PROFILER); + + // Perform a bernoulli trial, which will return true or false based on its + // configured probability. It takes into account the byte size so that + // larger allocations are weighted heavier than smaller allocations. + MOZ_ASSERT( + gAllocationTracker, + "gAllocationTracker must be properly installed for the memory hooks."); + if (gAllocationTracker->RemoveMemoryAddressIfFound(aPtr)) { + // This size here is negative, indicating a deallocation. + profiler_add_native_allocation_marker(signedSize, + reinterpret_cast<uintptr_t>(aPtr)); + } +} + +} // namespace mozilla::profiler + +//--------------------------------------------------------------------------- +// malloc/free interception +//--------------------------------------------------------------------------- + +using namespace mozilla::profiler; + +static void* replace_malloc(size_t aSize) { + // This must be a call to malloc from outside. Intercept it. + void* ptr = gMallocTable.malloc(aSize); + AllocCallback(ptr, aSize); + return ptr; +} + +static void* replace_calloc(size_t aCount, size_t aSize) { + void* ptr = gMallocTable.calloc(aCount, aSize); + AllocCallback(ptr, aCount * aSize); + return ptr; +} + +static void* replace_realloc(void* aOldPtr, size_t aSize) { + // If |aOldPtr| is nullptr, the call is equivalent to |malloc(aSize)|. + if (!aOldPtr) { + return replace_malloc(aSize); + } + + FreeCallback(aOldPtr); + void* ptr = gMallocTable.realloc(aOldPtr, aSize); + if (ptr) { + AllocCallback(ptr, aSize); + } else { + // If realloc fails, we undo the prior operations by re-inserting the old + // pointer into the live block table. We don't have to do anything with the + // dead block list because the dead block hasn't yet been inserted. The + // block will end up looking like it was allocated for the first time here, + // which is untrue, and the slop bytes will be zero, which may be untrue. + // But this case is rare and doing better isn't worth the effort. + AllocCallback(aOldPtr, gMallocTable.malloc_usable_size(aOldPtr)); + } + return ptr; +} + +static void* replace_memalign(size_t aAlignment, size_t aSize) { + void* ptr = gMallocTable.memalign(aAlignment, aSize); + AllocCallback(ptr, aSize); + return ptr; +} + +static void replace_free(void* aPtr) { + FreeCallback(aPtr); + gMallocTable.free(aPtr); +} + +static void* replace_moz_arena_malloc(arena_id_t aArena, size_t aSize) { + void* ptr = gMallocTable.moz_arena_malloc(aArena, aSize); + AllocCallback(ptr, aSize); + return ptr; +} + +static void* replace_moz_arena_calloc(arena_id_t aArena, size_t aCount, + size_t aSize) { + void* ptr = gMallocTable.moz_arena_calloc(aArena, aCount, aSize); + AllocCallback(ptr, aCount * aSize); + return ptr; +} + +static void* replace_moz_arena_realloc(arena_id_t aArena, void* aPtr, + size_t aSize) { + void* ptr = gMallocTable.moz_arena_realloc(aArena, aPtr, aSize); + AllocCallback(ptr, aSize); + return ptr; +} + +static void replace_moz_arena_free(arena_id_t aArena, void* aPtr) { + FreeCallback(aPtr); + gMallocTable.moz_arena_free(aArena, aPtr); +} + +static void* replace_moz_arena_memalign(arena_id_t aArena, size_t aAlignment, + size_t aSize) { + void* ptr = gMallocTable.moz_arena_memalign(aArena, aAlignment, aSize); + AllocCallback(ptr, aSize); + return ptr; +} + +// we have to replace these or jemalloc will assume we don't implement any +// of the arena replacements! +static arena_id_t replace_moz_create_arena_with_params( + arena_params_t* aParams) { + return gMallocTable.moz_create_arena_with_params(aParams); +} + +static void replace_moz_dispose_arena(arena_id_t aArenaId) { + return gMallocTable.moz_dispose_arena(aArenaId); +} + +// Must come after all the replace_* funcs +void replace_init(malloc_table_t* aMallocTable, ReplaceMallocBridge** aBridge) { + gMallocTable = *aMallocTable; +#define MALLOC_FUNCS (MALLOC_FUNCS_MALLOC_BASE | MALLOC_FUNCS_ARENA) +#define MALLOC_DECL(name, ...) aMallocTable->name = replace_##name; +#include "malloc_decls.h" +} + +void profiler_replace_remove() {} + +namespace mozilla::profiler { +//--------------------------------------------------------------------------- +// Initialization +//--------------------------------------------------------------------------- + +BaseProfilerCount* install_memory_hooks() { + if (!sCounter) { + sCounter = new ProfilerCounterTotal("malloc", "Memory", + "Amount of allocated memory"); + // Also initialize the ThreadIntercept, even if native allocation tracking + // won't be turned on. This way the TLS will be initialized. + ThreadIntercept::Init(); + } else { + sCounter->Clear(); + } + jemalloc_replace_dynamic(replace_init); + return sCounter; +} + +// Remove the hooks, but leave the sCounter machinery. Deleting the counter +// would race with any existing memory hooks that are currently running. Rather +// than adding overhead here of mutexes it's cheaper for the performance to just +// leak these values. +void remove_memory_hooks() { jemalloc_replace_dynamic(nullptr); } + +void enable_native_allocations() { + // The bloat log tracks allocations and deallocations. This can conflict + // with the memory hook machinery, as the bloat log creates its own + // allocations. This means we can re-enter inside the bloat log machinery. At + // this time, the bloat log does not know about cannot handle the native + // allocation feature. + // + // At the time of this writing, we hit this assertion: + // IsIdle(oldState) || IsRead(oldState) in Checker::StartReadOp() + // + // #01: GetBloatEntry(char const*, unsigned int) + // #02: NS_LogCtor + // #03: profiler_get_backtrace() + // #04: profiler_add_native_allocation_marker(long long) + // #05: mozilla::profiler::AllocCallback(void*, unsigned long) + // #06: replace_calloc(unsigned long, unsigned long) + // #07: PLDHashTable::ChangeTable(int) + // #08: PLDHashTable::Add(void const*, std::nothrow_t const&) + // #09: nsBaseHashtable<nsDepCharHashKey, nsAutoPtr<BloatEntry>, ... + // #10: GetBloatEntry(char const*, unsigned int) + // #11: NS_LogCtor + // #12: profiler_get_backtrace() + // ... + MOZ_ASSERT(!PR_GetEnv("XPCOM_MEM_BLOAT_LOG"), + "The bloat log feature is not compatible with the native " + "allocations instrumentation."); + + EnsureBernoulliIsInstalled(); + EnsureAllocationTrackerIsInstalled(); + ThreadIntercept::EnableAllocationFeature(); +} + +// This is safe to call even if native allocations hasn't been enabled. +void disable_native_allocations() { + ThreadIntercept::DisableAllocationFeature(); + if (gAllocationTracker) { + gAllocationTracker->Reset(); + } +} + +} // namespace mozilla::profiler diff --git a/tools/profiler/core/memory_hooks.h b/tools/profiler/core/memory_hooks.h new file mode 100644 index 0000000000..a6ace771dd --- /dev/null +++ b/tools/profiler/core/memory_hooks.h @@ -0,0 +1,25 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef memory_hooks_h +#define memory_hooks_h + +#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY) +class BaseProfilerCount; + +namespace mozilla { +namespace profiler { + +BaseProfilerCount* install_memory_hooks(); +void remove_memory_hooks(); +void enable_native_allocations(); +void disable_native_allocations(); + +} // namespace profiler +} // namespace mozilla +#endif + +#endif diff --git a/tools/profiler/core/platform-linux-android.cpp b/tools/profiler/core/platform-linux-android.cpp new file mode 100644 index 0000000000..3bfe36ffc8 --- /dev/null +++ b/tools/profiler/core/platform-linux-android.cpp @@ -0,0 +1,637 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +// Copyright (c) 2006-2011 The Chromium Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in +// the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google, Inc. nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS +// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +// SUCH DAMAGE. + +// This file is used for both Linux and Android as well as FreeBSD. + +#include <stdio.h> +#include <math.h> + +#include <pthread.h> +#if defined(GP_OS_freebsd) +# include <sys/thr.h> +#endif +#include <semaphore.h> +#include <signal.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <stdlib.h> +#include <sched.h> +#include <ucontext.h> +// Ubuntu Dapper requires memory pages to be marked as +// executable. Otherwise, OS raises an exception when executing code +// in that page. +#include <sys/types.h> // mmap & munmap +#include <sys/mman.h> // mmap & munmap +#include <sys/stat.h> // open +#include <fcntl.h> // open +#include <unistd.h> // sysconf +#include <semaphore.h> +#ifdef __GLIBC__ +# include <execinfo.h> // backtrace, backtrace_symbols +#endif // def __GLIBC__ +#include <strings.h> // index +#include <errno.h> +#include <stdarg.h> + +#include "prenv.h" +#include "mozilla/PodOperations.h" +#include "mozilla/DebugOnly.h" +#if defined(GP_OS_linux) || defined(GP_OS_android) +# include "common/linux/breakpad_getcontext.h" +#endif + +#include <string.h> +#include <list> + +using namespace mozilla; + +static void PopulateRegsFromContext(Registers& aRegs, ucontext_t* aContext) { + aRegs.mContext = aContext; + mcontext_t& mcontext = aContext->uc_mcontext; + + // Extracting the sample from the context is extremely machine dependent. +#if defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android) + aRegs.mPC = reinterpret_cast<Address>(mcontext.gregs[REG_EIP]); + aRegs.mSP = reinterpret_cast<Address>(mcontext.gregs[REG_ESP]); + aRegs.mFP = reinterpret_cast<Address>(mcontext.gregs[REG_EBP]); + aRegs.mLR = 0; +#elif defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android) + aRegs.mPC = reinterpret_cast<Address>(mcontext.gregs[REG_RIP]); + aRegs.mSP = reinterpret_cast<Address>(mcontext.gregs[REG_RSP]); + aRegs.mFP = reinterpret_cast<Address>(mcontext.gregs[REG_RBP]); + aRegs.mLR = 0; +#elif defined(GP_PLAT_amd64_freebsd) + aRegs.mPC = reinterpret_cast<Address>(mcontext.mc_rip); + aRegs.mSP = reinterpret_cast<Address>(mcontext.mc_rsp); + aRegs.mFP = reinterpret_cast<Address>(mcontext.mc_rbp); + aRegs.mLR = 0; +#elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android) + aRegs.mPC = reinterpret_cast<Address>(mcontext.arm_pc); + aRegs.mSP = reinterpret_cast<Address>(mcontext.arm_sp); + aRegs.mFP = reinterpret_cast<Address>(mcontext.arm_fp); + aRegs.mLR = reinterpret_cast<Address>(mcontext.arm_lr); +#elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android) + aRegs.mPC = reinterpret_cast<Address>(mcontext.pc); + aRegs.mSP = reinterpret_cast<Address>(mcontext.sp); + aRegs.mFP = reinterpret_cast<Address>(mcontext.regs[29]); + aRegs.mLR = reinterpret_cast<Address>(mcontext.regs[30]); +#elif defined(GP_PLAT_arm64_freebsd) + aRegs.mPC = reinterpret_cast<Address>(mcontext.mc_gpregs.gp_elr); + aRegs.mSP = reinterpret_cast<Address>(mcontext.mc_gpregs.gp_sp); + aRegs.mFP = reinterpret_cast<Address>(mcontext.mc_gpregs.gp_x[29]); + aRegs.mLR = reinterpret_cast<Address>(mcontext.mc_gpregs.gp_lr); +#elif defined(GP_PLAT_mips64_linux) || defined(GP_PLAT_mips64_android) + aRegs.mPC = reinterpret_cast<Address>(mcontext.pc); + aRegs.mSP = reinterpret_cast<Address>(mcontext.gregs[29]); + aRegs.mFP = reinterpret_cast<Address>(mcontext.gregs[30]); + +#else +# error "bad platform" +#endif +} + +#if defined(GP_OS_android) +# define SYS_tgkill __NR_tgkill +#endif + +#if defined(GP_OS_linux) || defined(GP_OS_android) +int tgkill(pid_t tgid, pid_t tid, int signalno) { + return syscall(SYS_tgkill, tgid, tid, signalno); +} +#endif + +#if defined(GP_OS_freebsd) +# define tgkill thr_kill2 +#endif + +mozilla::profiler::PlatformData::PlatformData(ProfilerThreadId aThreadId) { + MOZ_ASSERT(aThreadId == profiler_current_thread_id()); + if (clockid_t clockid; pthread_getcpuclockid(pthread_self(), &clockid) == 0) { + mClockId = Some(clockid); + } +} + +mozilla::profiler::PlatformData::~PlatformData() = default; + +//////////////////////////////////////////////////////////////////////// +// BEGIN Sampler target specifics + +// The only way to reliably interrupt a Linux thread and inspect its register +// and stack state is by sending a signal to it, and doing the work inside the +// signal handler. But we don't want to run much code inside the signal +// handler, since POSIX severely restricts what we can do in signal handlers. +// So we use a system of semaphores to suspend the thread and allow the +// sampler thread to do all the work of unwinding and copying out whatever +// data it wants. +// +// A four-message protocol is used to reliably suspend and later resume the +// thread to be sampled (the samplee): +// +// Sampler (signal sender) thread Samplee (thread to be sampled) +// +// Prepare the SigHandlerCoordinator +// and point sSigHandlerCoordinator at it +// +// send SIGPROF to samplee ------- MSG 1 ----> (enter signal handler) +// wait(mMessage2) Copy register state +// into sSigHandlerCoordinator +// <------ MSG 2 ----- post(mMessage2) +// Samplee is now suspended. wait(mMessage3) +// Examine its stack/register +// state at leisure +// +// Release samplee: +// post(mMessage3) ------- MSG 3 -----> +// wait(mMessage4) Samplee now resumes. Tell +// the sampler that we are done. +// <------ MSG 4 ------ post(mMessage4) +// Now we know the samplee's signal (leave signal handler) +// handler has finished using +// sSigHandlerCoordinator. We can +// safely reuse it for some other thread. +// + +// A type used to coordinate between the sampler (signal sending) thread and +// the thread currently being sampled (the samplee, which receives the +// signals). +// +// The first message is sent using a SIGPROF signal delivery. The subsequent +// three are sent using sem_wait/sem_post pairs. They are named accordingly +// in the following struct. +struct SigHandlerCoordinator { + SigHandlerCoordinator() { + PodZero(&mUContext); + int r = sem_init(&mMessage2, /* pshared */ 0, 0); + r |= sem_init(&mMessage3, /* pshared */ 0, 0); + r |= sem_init(&mMessage4, /* pshared */ 0, 0); + MOZ_ASSERT(r == 0); + (void)r; + } + + ~SigHandlerCoordinator() { + int r = sem_destroy(&mMessage2); + r |= sem_destroy(&mMessage3); + r |= sem_destroy(&mMessage4); + MOZ_ASSERT(r == 0); + (void)r; + } + + sem_t mMessage2; // To sampler: "context is in sSigHandlerCoordinator" + sem_t mMessage3; // To samplee: "resume" + sem_t mMessage4; // To sampler: "finished with sSigHandlerCoordinator" + ucontext_t mUContext; // Context at signal +}; + +struct SigHandlerCoordinator* Sampler::sSigHandlerCoordinator = nullptr; + +static void SigprofHandler(int aSignal, siginfo_t* aInfo, void* aContext) { + // Avoid TSan warning about clobbering errno. + int savedErrno = errno; + + MOZ_ASSERT(aSignal == SIGPROF); + MOZ_ASSERT(Sampler::sSigHandlerCoordinator); + + // By sending us this signal, the sampler thread has sent us message 1 in + // the comment above, with the meaning "|sSigHandlerCoordinator| is ready + // for use, please copy your register context into it." + Sampler::sSigHandlerCoordinator->mUContext = + *static_cast<ucontext_t*>(aContext); + + // Send message 2: tell the sampler thread that the context has been copied + // into |sSigHandlerCoordinator->mUContext|. sem_post can never fail by + // being interrupted by a signal, so there's no loop around this call. + int r = sem_post(&Sampler::sSigHandlerCoordinator->mMessage2); + MOZ_ASSERT(r == 0); + + // At this point, the sampler thread assumes we are suspended, so we must + // not touch any global state here. + + // Wait for message 3: the sampler thread tells us to resume. + while (true) { + r = sem_wait(&Sampler::sSigHandlerCoordinator->mMessage3); + if (r == -1 && errno == EINTR) { + // Interrupted by a signal. Try again. + continue; + } + // We don't expect any other kind of failure + MOZ_ASSERT(r == 0); + break; + } + + // Send message 4: tell the sampler thread that we are finished accessing + // |sSigHandlerCoordinator|. After this point it is not safe to touch + // |sSigHandlerCoordinator|. + r = sem_post(&Sampler::sSigHandlerCoordinator->mMessage4); + MOZ_ASSERT(r == 0); + + errno = savedErrno; +} + +Sampler::Sampler(PSLockRef aLock) + : mMyPid(profiler_current_process_id()), + // We don't know what the sampler thread's ID will be until it runs, so + // set mSamplerTid to a dummy value and fill it in for real in + // SuspendAndSampleAndResumeThread(). + mSamplerTid{} { +#if defined(USE_EHABI_STACKWALK) + mozilla::EHABIStackWalkInit(); +#endif + + // NOTE: We don't initialize LUL here, instead initializing it in + // SamplerThread's constructor. This is because with the + // profiler_suspend_and_sample_thread entry point, we want to be able to + // sample without waiting for LUL to be initialized. + + // Request profiling signals. + struct sigaction sa; + sa.sa_sigaction = SigprofHandler; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_RESTART | SA_SIGINFO; + if (sigaction(SIGPROF, &sa, &mOldSigprofHandler) != 0) { + MOZ_CRASH("Error installing SIGPROF handler in the profiler"); + } +} + +void Sampler::Disable(PSLockRef aLock) { + // Restore old signal handler. This is global state so it's important that + // we do it now, while gPSMutex is locked. + sigaction(SIGPROF, &mOldSigprofHandler, 0); +} + +static void StreamMetaPlatformSampleUnits(PSLockRef aLock, + SpliceableJSONWriter& aWriter) { + aWriter.StringProperty("threadCPUDelta", "ns"); +} + +/* static */ +uint64_t RunningTimes::ConvertRawToJson(uint64_t aRawValue) { + return aRawValue; +} + +namespace mozilla::profiler { +bool GetCpuTimeSinceThreadStartInNs( + uint64_t* aResult, const mozilla::profiler::PlatformData& aPlatformData) { + Maybe<clockid_t> maybeCid = aPlatformData.GetClockId(); + if (MOZ_UNLIKELY(!maybeCid)) { + return false; + } + + timespec t; + if (clock_gettime(*maybeCid, &t) != 0) { + return false; + } + + *aResult = uint64_t(t.tv_sec) * 1'000'000'000u + uint64_t(t.tv_nsec); + return true; +} +} // namespace mozilla::profiler + +static RunningTimes GetProcessRunningTimesDiff( + PSLockRef aLock, RunningTimes& aPreviousRunningTimesToBeUpdated) { + AUTO_PROFILER_STATS(GetProcessRunningTimes); + + RunningTimes newRunningTimes; + { + AUTO_PROFILER_STATS(GetProcessRunningTimes_clock_gettime); + if (timespec ts; clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts) == 0) { + newRunningTimes.SetThreadCPUDelta(uint64_t(ts.tv_sec) * 1'000'000'000u + + uint64_t(ts.tv_nsec)); + } + newRunningTimes.SetPostMeasurementTimeStamp(TimeStamp::Now()); + }; + + const RunningTimes diff = newRunningTimes - aPreviousRunningTimesToBeUpdated; + aPreviousRunningTimesToBeUpdated = newRunningTimes; + return diff; +} + +static RunningTimes GetThreadRunningTimesDiff( + PSLockRef aLock, + ThreadRegistration::UnlockedRWForLockedProfiler& aThreadData) { + AUTO_PROFILER_STATS(GetRunningTimes_clock_gettime_thread); + + const mozilla::profiler::PlatformData& platformData = + aThreadData.PlatformDataCRef(); + Maybe<clockid_t> maybeCid = platformData.GetClockId(); + + if (MOZ_UNLIKELY(!maybeCid)) { + // No clock id -> Nothing to measure apart from the timestamp. + RunningTimes emptyRunningTimes; + emptyRunningTimes.SetPostMeasurementTimeStamp(TimeStamp::Now()); + return emptyRunningTimes; + } + + const RunningTimes newRunningTimes = GetRunningTimesWithTightTimestamp( + [cid = *maybeCid](RunningTimes& aRunningTimes) { + AUTO_PROFILER_STATS(GetRunningTimes_clock_gettime); + if (timespec ts; clock_gettime(cid, &ts) == 0) { + aRunningTimes.ResetThreadCPUDelta( + uint64_t(ts.tv_sec) * 1'000'000'000u + uint64_t(ts.tv_nsec)); + } else { + aRunningTimes.ClearThreadCPUDelta(); + } + }); + + ProfiledThreadData* profiledThreadData = + aThreadData.GetProfiledThreadData(aLock); + MOZ_ASSERT(profiledThreadData); + RunningTimes& previousRunningTimes = + profiledThreadData->PreviousThreadRunningTimesRef(); + const RunningTimes diff = newRunningTimes - previousRunningTimes; + previousRunningTimes = newRunningTimes; + return diff; +} + +static void DiscardSuspendedThreadRunningTimes( + PSLockRef aLock, + ThreadRegistration::UnlockedRWForLockedProfiler& aThreadData) { + AUTO_PROFILER_STATS(DiscardSuspendedThreadRunningTimes); + + // On Linux, suspending a thread uses a signal that makes that thread work + // to handle it. So we want to discard any added running time since the call + // to GetThreadRunningTimesDiff, which is done by overwriting the thread's + // PreviousThreadRunningTimesRef() with the current running time now. + + const mozilla::profiler::PlatformData& platformData = + aThreadData.PlatformDataCRef(); + Maybe<clockid_t> maybeCid = platformData.GetClockId(); + + if (MOZ_UNLIKELY(!maybeCid)) { + // No clock id -> Nothing to measure. + return; + } + + ProfiledThreadData* profiledThreadData = + aThreadData.GetProfiledThreadData(aLock); + MOZ_ASSERT(profiledThreadData); + RunningTimes& previousRunningTimes = + profiledThreadData->PreviousThreadRunningTimesRef(); + + if (timespec ts; clock_gettime(*maybeCid, &ts) == 0) { + previousRunningTimes.ResetThreadCPUDelta( + uint64_t(ts.tv_sec) * 1'000'000'000u + uint64_t(ts.tv_nsec)); + } else { + previousRunningTimes.ClearThreadCPUDelta(); + } +} + +template <typename Func> +void Sampler::SuspendAndSampleAndResumeThread( + PSLockRef aLock, + const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, + const TimeStamp& aNow, const Func& aProcessRegs) { + // Only one sampler thread can be sampling at once. So we expect to have + // complete control over |sSigHandlerCoordinator|. + MOZ_ASSERT(!sSigHandlerCoordinator); + + if (!mSamplerTid.IsSpecified()) { + mSamplerTid = profiler_current_thread_id(); + } + ProfilerThreadId sampleeTid = aThreadData.Info().ThreadId(); + MOZ_RELEASE_ASSERT(sampleeTid != mSamplerTid); + + //----------------------------------------------------------------// + // Suspend the samplee thread and get its context. + + SigHandlerCoordinator coord; // on sampler thread's stack + sSigHandlerCoordinator = &coord; + + // Send message 1 to the samplee (the thread to be sampled), by + // signalling at it. + // This could fail if the thread doesn't exist anymore. + int r = tgkill(mMyPid.ToNumber(), sampleeTid.ToNumber(), SIGPROF); + if (r == 0) { + // Wait for message 2 from the samplee, indicating that the context + // is available and that the thread is suspended. + while (true) { + r = sem_wait(&sSigHandlerCoordinator->mMessage2); + if (r == -1 && errno == EINTR) { + // Interrupted by a signal. Try again. + continue; + } + // We don't expect any other kind of failure. + MOZ_ASSERT(r == 0); + break; + } + + //----------------------------------------------------------------// + // Sample the target thread. + + // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING + // + // The profiler's "critical section" begins here. In the critical section, + // we must not do any dynamic memory allocation, nor try to acquire any lock + // or any other unshareable resource. This is because the thread to be + // sampled has been suspended at some entirely arbitrary point, and we have + // no idea which unsharable resources (locks, essentially) it holds. So any + // attempt to acquire any lock, including the implied locks used by the + // malloc implementation, risks deadlock. This includes TimeStamp::Now(), + // which gets a lock on Windows. + + // The samplee thread is now frozen and sSigHandlerCoordinator->mUContext is + // valid. We can poke around in it and unwind its stack as we like. + + // Extract the current register values. + Registers regs; + PopulateRegsFromContext(regs, &sSigHandlerCoordinator->mUContext); + aProcessRegs(regs, aNow); + + //----------------------------------------------------------------// + // Resume the target thread. + + // Send message 3 to the samplee, which tells it to resume. + r = sem_post(&sSigHandlerCoordinator->mMessage3); + MOZ_ASSERT(r == 0); + + // Wait for message 4 from the samplee, which tells us that it has + // finished with |sSigHandlerCoordinator|. + while (true) { + r = sem_wait(&sSigHandlerCoordinator->mMessage4); + if (r == -1 && errno == EINTR) { + continue; + } + MOZ_ASSERT(r == 0); + break; + } + + // The profiler's critical section ends here. After this point, none of the + // critical section limitations documented above apply. + // + // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING + } + + // This isn't strictly necessary, but doing so does help pick up anomalies + // in which the signal handler is running when it shouldn't be. + sSigHandlerCoordinator = nullptr; +} + +// END Sampler target specifics +//////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////// +// BEGIN SamplerThread target specifics + +static void* ThreadEntry(void* aArg) { + auto thread = static_cast<SamplerThread*>(aArg); + thread->Run(); + return nullptr; +} + +SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration, + double aIntervalMilliseconds, uint32_t aFeatures) + : mSampler(aLock), + mActivityGeneration(aActivityGeneration), + mIntervalMicroseconds( + std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5)))) { +#if defined(USE_LUL_STACKWALK) + lul::LUL* lul = CorePS::Lul(); + if (!lul && ProfilerFeature::HasStackWalk(aFeatures)) { + CorePS::SetLul(MakeUnique<lul::LUL>(logging_sink_for_LUL)); + // Read all the unwind info currently available. + lul = CorePS::Lul(); + read_procmaps(lul); + + // Switch into unwind mode. After this point, we can't add or remove any + // unwind info to/from this LUL instance. The only thing we can do with + // it is Unwind() calls. + lul->EnableUnwinding(); + + // Has a test been requested? + if (PR_GetEnv("MOZ_PROFILER_LUL_TEST")) { + int nTests = 0, nTestsPassed = 0; + RunLulUnitTests(&nTests, &nTestsPassed, lul); + } + } +#endif + + // Start the sampling thread. It repeatedly sends a SIGPROF signal. Sending + // the signal ourselves instead of relying on itimer provides much better + // accuracy. + // + // At least 350 KiB of stack space are needed when built with TSAN. This + // includes lul::N_STACK_BYTES plus whatever else is needed for the sampler + // thread. Set the stack size to 800 KiB to keep a safe margin above that. + pthread_attr_t attr; + if (pthread_attr_init(&attr) != 0 || + pthread_attr_setstacksize(&attr, 800 * 1024) != 0 || + pthread_create(&mThread, &attr, ThreadEntry, this) != 0) { + MOZ_CRASH("pthread_create failed"); + } + pthread_attr_destroy(&attr); +} + +SamplerThread::~SamplerThread() { + pthread_join(mThread, nullptr); + // Just in the unlikely case some callbacks were added between the end of the + // thread and now. + InvokePostSamplingCallbacks(std::move(mPostSamplingCallbackList), + SamplingState::JustStopped); +} + +void SamplerThread::SleepMicro(uint32_t aMicroseconds) { + if (aMicroseconds >= 1000000) { + // Use usleep for larger intervals, because the nanosleep + // code below only supports intervals < 1 second. + MOZ_ALWAYS_TRUE(!::usleep(aMicroseconds)); + return; + } + + struct timespec ts; + ts.tv_sec = 0; + ts.tv_nsec = aMicroseconds * 1000UL; + + int rv = ::nanosleep(&ts, &ts); + + while (rv != 0 && errno == EINTR) { + // Keep waiting in case of interrupt. + // nanosleep puts the remaining time back into ts. + rv = ::nanosleep(&ts, &ts); + } + + MOZ_ASSERT(!rv, "nanosleep call failed"); +} + +void SamplerThread::Stop(PSLockRef aLock) { + // Restore old signal handler. This is global state so it's important that + // we do it now, while gPSMutex is locked. It's safe to do this now even + // though this SamplerThread is still alive, because the next time the main + // loop of Run() iterates it won't get past the mActivityGeneration check, + // and so won't send any signals. + mSampler.Disable(aLock); +} + +// END SamplerThread target specifics +//////////////////////////////////////////////////////////////////////// + +#if defined(GP_OS_linux) || defined(GP_OS_freebsd) + +// We use pthread_atfork() to temporarily disable signal delivery during any +// fork() call. Without that, fork() can be repeatedly interrupted by signal +// delivery, requiring it to be repeatedly restarted, which can lead to *long* +// delays. See bug 837390. +// +// We provide no paf_child() function to run in the child after forking. This +// is fine because we always immediately exec() after fork(), and exec() +// clobbers all process state. Also, we don't want the sampler to resume in the +// child process between fork() and exec(), it would be wasteful. +// +// Unfortunately all this is only doable on non-Android because Bionic doesn't +// have pthread_atfork. + +// In the parent, before the fork, increase gSkipSampling to ensure that +// profiler sampling loops will be skipped. There could be one in progress now, +// causing a small delay, but further sampling will be skipped, allowing `fork` +// to complete. +static void paf_prepare() { ++gSkipSampling; } + +// In the parent, after the fork, decrease gSkipSampling to let the sampler +// resume sampling (unless other places have made it non-zero as well). +static void paf_parent() { --gSkipSampling; } + +static void PlatformInit(PSLockRef aLock) { + // Set up the fork handlers. + pthread_atfork(paf_prepare, paf_parent, nullptr); +} + +#else + +static void PlatformInit(PSLockRef aLock) {} + +#endif + +#if defined(HAVE_NATIVE_UNWIND) +void Registers::SyncPopulate() { + if (!getcontext(&mContextSyncStorage)) { + PopulateRegsFromContext(*this, &mContextSyncStorage); + } +} +#endif diff --git a/tools/profiler/core/platform-macos.cpp b/tools/profiler/core/platform-macos.cpp new file mode 100644 index 0000000000..789f61ab4a --- /dev/null +++ b/tools/profiler/core/platform-macos.cpp @@ -0,0 +1,296 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <unistd.h> +#include <sys/mman.h> +#include <mach/mach_init.h> +#include <mach-o/getsect.h> + +#include <AvailabilityMacros.h> + +#include <pthread.h> +#include <semaphore.h> +#include <signal.h> +#include <libkern/OSAtomic.h> +#include <libproc.h> +#include <mach/mach.h> +#include <mach/semaphore.h> +#include <mach/task.h> +#include <mach/thread_act.h> +#include <mach/vm_statistics.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/sysctl.h> +#include <stdarg.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <math.h> + +// this port is based off of v8 svn revision 9837 + +mozilla::profiler::PlatformData::PlatformData(ProfilerThreadId aThreadId) + : mProfiledThread(mach_thread_self()) {} + +mozilla::profiler::PlatformData::~PlatformData() { + // Deallocate Mach port for thread. + mach_port_deallocate(mach_task_self(), mProfiledThread); +} + +//////////////////////////////////////////////////////////////////////// +// BEGIN Sampler target specifics + +Sampler::Sampler(PSLockRef aLock) {} + +void Sampler::Disable(PSLockRef aLock) {} + +static void StreamMetaPlatformSampleUnits(PSLockRef aLock, + SpliceableJSONWriter& aWriter) { + // Microseconds. + aWriter.StringProperty("threadCPUDelta", "\u00B5s"); +} + +/* static */ +uint64_t RunningTimes::ConvertRawToJson(uint64_t aRawValue) { + return aRawValue; +} + +namespace mozilla::profiler { +bool GetCpuTimeSinceThreadStartInNs( + uint64_t* aResult, const mozilla::profiler::PlatformData& aPlatformData) { + thread_extended_info_data_t threadInfoData; + mach_msg_type_number_t count = THREAD_EXTENDED_INFO_COUNT; + if (thread_info(aPlatformData.ProfiledThread(), THREAD_EXTENDED_INFO, + (thread_info_t)&threadInfoData, &count) != KERN_SUCCESS) { + return false; + } + + *aResult = threadInfoData.pth_user_time + threadInfoData.pth_system_time; + return true; +} +} // namespace mozilla::profiler + +static RunningTimes GetProcessRunningTimesDiff( + PSLockRef aLock, RunningTimes& aPreviousRunningTimesToBeUpdated) { + AUTO_PROFILER_STATS(GetProcessRunningTimes); + + RunningTimes newRunningTimes; + { + AUTO_PROFILER_STATS(GetProcessRunningTimes_task_info); + + static const auto pid = getpid(); + struct proc_taskinfo pti; + if ((unsigned long)proc_pidinfo(pid, PROC_PIDTASKINFO, 0, &pti, + PROC_PIDTASKINFO_SIZE) >= + PROC_PIDTASKINFO_SIZE) { + newRunningTimes.SetThreadCPUDelta(pti.pti_total_user + + pti.pti_total_system); + } + newRunningTimes.SetPostMeasurementTimeStamp(TimeStamp::Now()); + }; + + const RunningTimes diff = newRunningTimes - aPreviousRunningTimesToBeUpdated; + aPreviousRunningTimesToBeUpdated = newRunningTimes; + return diff; +} + +static RunningTimes GetThreadRunningTimesDiff( + PSLockRef aLock, + ThreadRegistration::UnlockedRWForLockedProfiler& aThreadData) { + AUTO_PROFILER_STATS(GetRunningTimes); + + const mozilla::profiler::PlatformData& platformData = + aThreadData.PlatformDataCRef(); + + const RunningTimes newRunningTimes = GetRunningTimesWithTightTimestamp( + [&platformData](RunningTimes& aRunningTimes) { + AUTO_PROFILER_STATS(GetRunningTimes_thread_info); + thread_basic_info_data_t threadBasicInfo; + mach_msg_type_number_t basicCount = THREAD_BASIC_INFO_COUNT; + if (thread_info(platformData.ProfiledThread(), THREAD_BASIC_INFO, + reinterpret_cast<thread_info_t>(&threadBasicInfo), + &basicCount) == KERN_SUCCESS && + basicCount == THREAD_BASIC_INFO_COUNT) { + uint64_t userTimeUs = + uint64_t(threadBasicInfo.user_time.seconds) * + uint64_t(USEC_PER_SEC) + + uint64_t(threadBasicInfo.user_time.microseconds); + uint64_t systemTimeUs = + uint64_t(threadBasicInfo.system_time.seconds) * + uint64_t(USEC_PER_SEC) + + uint64_t(threadBasicInfo.system_time.microseconds); + aRunningTimes.ResetThreadCPUDelta(userTimeUs + systemTimeUs); + } else { + aRunningTimes.ClearThreadCPUDelta(); + } + }); + + ProfiledThreadData* profiledThreadData = + aThreadData.GetProfiledThreadData(aLock); + MOZ_ASSERT(profiledThreadData); + RunningTimes& previousRunningTimes = + profiledThreadData->PreviousThreadRunningTimesRef(); + const RunningTimes diff = newRunningTimes - previousRunningTimes; + previousRunningTimes = newRunningTimes; + return diff; +} + +static void DiscardSuspendedThreadRunningTimes( + PSLockRef aLock, + ThreadRegistration::UnlockedRWForLockedProfiler& aThreadData) { + // Nothing to do! + // On macOS, suspending a thread doesn't make that thread work. +} + +template <typename Func> +void Sampler::SuspendAndSampleAndResumeThread( + PSLockRef aLock, + const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, + const TimeStamp& aNow, const Func& aProcessRegs) { + thread_act_t samplee_thread = aThreadData.PlatformDataCRef().ProfiledThread(); + + //----------------------------------------------------------------// + // Suspend the samplee thread and get its context. + + // We're using thread_suspend on OS X because pthread_kill (which is what we + // at one time used on Linux) has less consistent performance and causes + // strange crashes, see bug 1166778 and bug 1166808. thread_suspend + // is also just a lot simpler to use. + + if (KERN_SUCCESS != thread_suspend(samplee_thread)) { + return; + } + + //----------------------------------------------------------------// + // Sample the target thread. + + // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING + // + // The profiler's "critical section" begins here. We must be very careful + // what we do here, or risk deadlock. See the corresponding comment in + // platform-linux-android.cpp for details. + +#if defined(__x86_64__) + thread_state_flavor_t flavor = x86_THREAD_STATE64; + x86_thread_state64_t state; + mach_msg_type_number_t count = x86_THREAD_STATE64_COUNT; +# if __DARWIN_UNIX03 +# define REGISTER_FIELD(name) __r##name +# else +# define REGISTER_FIELD(name) r##name +# endif // __DARWIN_UNIX03 +#elif defined(__aarch64__) + thread_state_flavor_t flavor = ARM_THREAD_STATE64; + arm_thread_state64_t state; + mach_msg_type_number_t count = ARM_THREAD_STATE64_COUNT; +# if __DARWIN_UNIX03 +# define REGISTER_FIELD(name) __##name +# else +# define REGISTER_FIELD(name) name +# endif // __DARWIN_UNIX03 +#else +# error "unknown architecture" +#endif + + if (thread_get_state(samplee_thread, flavor, + reinterpret_cast<natural_t*>(&state), + &count) == KERN_SUCCESS) { + Registers regs; +#if defined(__x86_64__) + regs.mPC = reinterpret_cast<Address>(state.REGISTER_FIELD(ip)); + regs.mSP = reinterpret_cast<Address>(state.REGISTER_FIELD(sp)); + regs.mFP = reinterpret_cast<Address>(state.REGISTER_FIELD(bp)); + regs.mLR = 0; +#elif defined(__aarch64__) + regs.mPC = reinterpret_cast<Address>(state.REGISTER_FIELD(pc)); + regs.mSP = reinterpret_cast<Address>(state.REGISTER_FIELD(sp)); + regs.mFP = reinterpret_cast<Address>(state.REGISTER_FIELD(fp)); + regs.mLR = reinterpret_cast<Address>(state.REGISTER_FIELD(lr)); +#else +# error "unknown architecture" +#endif + + aProcessRegs(regs, aNow); + } + +#undef REGISTER_FIELD + + //----------------------------------------------------------------// + // Resume the target thread. + + thread_resume(samplee_thread); + + // The profiler's critical section ends here. + // + // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING +} + +// END Sampler target specifics +//////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////// +// BEGIN SamplerThread target specifics + +static void* ThreadEntry(void* aArg) { + auto thread = static_cast<SamplerThread*>(aArg); + thread->Run(); + return nullptr; +} + +SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration, + double aIntervalMilliseconds, uint32_t aFeatures) + : mSampler(aLock), + mActivityGeneration(aActivityGeneration), + mIntervalMicroseconds( + std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5)))), + mThread{nullptr} { + pthread_attr_t* attr_ptr = nullptr; + if (pthread_create(&mThread, attr_ptr, ThreadEntry, this) != 0) { + MOZ_CRASH("pthread_create failed"); + } +} + +SamplerThread::~SamplerThread() { + pthread_join(mThread, nullptr); + // Just in the unlikely case some callbacks were added between the end of the + // thread and now. + InvokePostSamplingCallbacks(std::move(mPostSamplingCallbackList), + SamplingState::JustStopped); +} + +void SamplerThread::SleepMicro(uint32_t aMicroseconds) { + usleep(aMicroseconds); + // FIXME: the OSX 10.12 page for usleep says "The usleep() function is + // obsolescent. Use nanosleep(2) instead." This implementation could be + // merged with the linux-android version. Also, this doesn't handle the + // case where the usleep call is interrupted by a signal. +} + +void SamplerThread::Stop(PSLockRef aLock) { mSampler.Disable(aLock); } + +// END SamplerThread target specifics +//////////////////////////////////////////////////////////////////////// + +static void PlatformInit(PSLockRef aLock) {} + +#if defined(HAVE_NATIVE_UNWIND) +void Registers::SyncPopulate() { + // Derive the stack pointer from the frame pointer. The 0x10 offset is + // 8 bytes for the previous frame pointer and 8 bytes for the return + // address both stored on the stack after at the beginning of the current + // frame. + mSP = reinterpret_cast<Address>(__builtin_frame_address(0)) + 0x10; +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wframe-address" + mFP = reinterpret_cast<Address>(__builtin_frame_address(1)); +# pragma GCC diagnostic pop + mPC = reinterpret_cast<Address>( + __builtin_extract_return_addr(__builtin_return_address(0))); + mLR = 0; +} +#endif diff --git a/tools/profiler/core/platform-win32.cpp b/tools/profiler/core/platform-win32.cpp new file mode 100644 index 0000000000..cfe246ea40 --- /dev/null +++ b/tools/profiler/core/platform-win32.cpp @@ -0,0 +1,497 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +// Copyright (c) 2006-2011 The Chromium Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in +// the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google, Inc. nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS +// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +// SUCH DAMAGE. + +#include <windows.h> +#include <mmsystem.h> +#include <process.h> + +#include "nsWindowsDllInterceptor.h" +#include "mozilla/StackWalk_windows.h" +#include "mozilla/WindowsVersion.h" + +#include <type_traits> + +static void PopulateRegsFromContext(Registers& aRegs, CONTEXT* aContext) { +#if defined(GP_ARCH_amd64) + aRegs.mPC = reinterpret_cast<Address>(aContext->Rip); + aRegs.mSP = reinterpret_cast<Address>(aContext->Rsp); + aRegs.mFP = reinterpret_cast<Address>(aContext->Rbp); + aRegs.mLR = 0; +#elif defined(GP_ARCH_x86) + aRegs.mPC = reinterpret_cast<Address>(aContext->Eip); + aRegs.mSP = reinterpret_cast<Address>(aContext->Esp); + aRegs.mFP = reinterpret_cast<Address>(aContext->Ebp); + aRegs.mLR = 0; +#elif defined(GP_ARCH_arm64) + aRegs.mPC = reinterpret_cast<Address>(aContext->Pc); + aRegs.mSP = reinterpret_cast<Address>(aContext->Sp); + aRegs.mFP = reinterpret_cast<Address>(aContext->Fp); + aRegs.mLR = reinterpret_cast<Address>(aContext->Lr); +#else +# error "bad arch" +#endif +} + +// Gets a real (i.e. not pseudo) handle for the current thread, with the +// permissions needed for profiling. +// @return a real HANDLE for the current thread. +static HANDLE GetRealCurrentThreadHandleForProfiling() { + HANDLE realCurrentThreadHandle; + if (!::DuplicateHandle( + ::GetCurrentProcess(), ::GetCurrentThread(), ::GetCurrentProcess(), + &realCurrentThreadHandle, + THREAD_GET_CONTEXT | THREAD_SUSPEND_RESUME | THREAD_QUERY_INFORMATION, + FALSE, 0)) { + return nullptr; + } + + return realCurrentThreadHandle; +} + +static_assert( + std::is_same_v<mozilla::profiler::PlatformData::WindowsHandle, HANDLE>); + +mozilla::profiler::PlatformData::PlatformData(ProfilerThreadId aThreadId) + : mProfiledThread(GetRealCurrentThreadHandleForProfiling()) { + MOZ_ASSERT(aThreadId == ProfilerThreadId::FromNumber(::GetCurrentThreadId())); +} + +mozilla::profiler::PlatformData::~PlatformData() { + if (mProfiledThread) { + CloseHandle(mProfiledThread); + mProfiledThread = nullptr; + } +} + +static const HANDLE kNoThread = INVALID_HANDLE_VALUE; + +//////////////////////////////////////////////////////////////////////// +// BEGIN Sampler target specifics + +Sampler::Sampler(PSLockRef aLock) {} + +void Sampler::Disable(PSLockRef aLock) {} + +static void StreamMetaPlatformSampleUnits(PSLockRef aLock, + SpliceableJSONWriter& aWriter) { + static const Span<const char> units = + (GetCycleTimeFrequencyMHz() != 0) ? MakeStringSpan("ns") + : MakeStringSpan("variable CPU cycles"); + aWriter.StringProperty("threadCPUDelta", units); +} + +/* static */ +uint64_t RunningTimes::ConvertRawToJson(uint64_t aRawValue) { + static const uint64_t cycleTimeFrequencyMHz = GetCycleTimeFrequencyMHz(); + if (cycleTimeFrequencyMHz == 0u) { + return aRawValue; + } + + constexpr uint64_t GHZ_PER_MHZ = 1'000u; + // To get ns, we need to divide cycles by a frequency in GHz, i.e.: + // cycles / (f_MHz / GHZ_PER_MHZ). To avoid losing the integer precision of + // f_MHz, this is computed as (cycles * GHZ_PER_MHZ) / f_MHz. + // Adding GHZ_PER_MHZ/2 to (cycles * GHZ_PER_MHZ) will round to nearest when + // the result of the division is truncated. + return (aRawValue * GHZ_PER_MHZ + (GHZ_PER_MHZ / 2u)) / cycleTimeFrequencyMHz; +} + +static inline uint64_t ToNanoSeconds(const FILETIME& aFileTime) { + // FILETIME values are 100-nanoseconds units, converting + ULARGE_INTEGER usec = {{aFileTime.dwLowDateTime, aFileTime.dwHighDateTime}}; + return usec.QuadPart * 100; +} + +namespace mozilla::profiler { +bool GetCpuTimeSinceThreadStartInNs( + uint64_t* aResult, const mozilla::profiler::PlatformData& aPlatformData) { + const HANDLE profiledThread = aPlatformData.ProfiledThread(); + int frequencyInMHz = GetCycleTimeFrequencyMHz(); + if (frequencyInMHz) { + uint64_t cpuCycleCount; + if (!QueryThreadCycleTime(profiledThread, &cpuCycleCount)) { + return false; + } + + constexpr uint64_t USEC_PER_NSEC = 1000L; + *aResult = cpuCycleCount * USEC_PER_NSEC / frequencyInMHz; + return true; + } + + FILETIME createTime, exitTime, kernelTime, userTime; + if (!GetThreadTimes(profiledThread, &createTime, &exitTime, &kernelTime, + &userTime)) { + return false; + } + + *aResult = ToNanoSeconds(kernelTime) + ToNanoSeconds(userTime); + return true; +} +} // namespace mozilla::profiler + +static RunningTimes GetProcessRunningTimesDiff( + PSLockRef aLock, RunningTimes& aPreviousRunningTimesToBeUpdated) { + AUTO_PROFILER_STATS(GetProcessRunningTimes); + + static const HANDLE processHandle = GetCurrentProcess(); + + RunningTimes newRunningTimes; + { + AUTO_PROFILER_STATS(GetProcessRunningTimes_QueryProcessCycleTime); + if (ULONG64 cycles; QueryProcessCycleTime(processHandle, &cycles) != 0) { + newRunningTimes.SetThreadCPUDelta(cycles); + } + newRunningTimes.SetPostMeasurementTimeStamp(TimeStamp::Now()); + }; + + const RunningTimes diff = newRunningTimes - aPreviousRunningTimesToBeUpdated; + aPreviousRunningTimesToBeUpdated = newRunningTimes; + return diff; +} + +static RunningTimes GetThreadRunningTimesDiff( + PSLockRef aLock, + ThreadRegistration::UnlockedRWForLockedProfiler& aThreadData) { + AUTO_PROFILER_STATS(GetThreadRunningTimes); + + const mozilla::profiler::PlatformData& platformData = + aThreadData.PlatformDataCRef(); + const HANDLE profiledThread = platformData.ProfiledThread(); + + const RunningTimes newRunningTimes = GetRunningTimesWithTightTimestamp( + [profiledThread](RunningTimes& aRunningTimes) { + AUTO_PROFILER_STATS(GetThreadRunningTimes_QueryThreadCycleTime); + if (ULONG64 cycles; + QueryThreadCycleTime(profiledThread, &cycles) != 0) { + aRunningTimes.ResetThreadCPUDelta(cycles); + } else { + aRunningTimes.ClearThreadCPUDelta(); + } + }); + + ProfiledThreadData* profiledThreadData = + aThreadData.GetProfiledThreadData(aLock); + MOZ_ASSERT(profiledThreadData); + RunningTimes& previousRunningTimes = + profiledThreadData->PreviousThreadRunningTimesRef(); + const RunningTimes diff = newRunningTimes - previousRunningTimes; + previousRunningTimes = newRunningTimes; + return diff; +} + +static void DiscardSuspendedThreadRunningTimes( + PSLockRef aLock, + ThreadRegistration::UnlockedRWForLockedProfiler& aThreadData) { + AUTO_PROFILER_STATS(DiscardSuspendedThreadRunningTimes); + + // On Windows, suspending a thread makes that thread work a little bit. So we + // want to discard any added running time since the call to + // GetThreadRunningTimesDiff, which is done by overwriting the thread's + // PreviousThreadRunningTimesRef() with the current running time now. + + const mozilla::profiler::PlatformData& platformData = + aThreadData.PlatformDataCRef(); + const HANDLE profiledThread = platformData.ProfiledThread(); + + ProfiledThreadData* profiledThreadData = + aThreadData.GetProfiledThreadData(aLock); + MOZ_ASSERT(profiledThreadData); + RunningTimes& previousRunningTimes = + profiledThreadData->PreviousThreadRunningTimesRef(); + + if (ULONG64 cycles; QueryThreadCycleTime(profiledThread, &cycles) != 0) { + previousRunningTimes.ResetThreadCPUDelta(cycles); + } else { + previousRunningTimes.ClearThreadCPUDelta(); + } +} + +template <typename Func> +void Sampler::SuspendAndSampleAndResumeThread( + PSLockRef aLock, + const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, + const TimeStamp& aNow, const Func& aProcessRegs) { + HANDLE profiled_thread = aThreadData.PlatformDataCRef().ProfiledThread(); + if (profiled_thread == nullptr) { + return; + } + + // Context used for sampling the register state of the profiled thread. + CONTEXT context; + memset(&context, 0, sizeof(context)); + + //----------------------------------------------------------------// + // Suspend the samplee thread and get its context. + + static const DWORD kSuspendFailed = static_cast<DWORD>(-1); + if (SuspendThread(profiled_thread) == kSuspendFailed) { + return; + } + + // SuspendThread is asynchronous, so the thread may still be running. + // Call GetThreadContext first to ensure the thread is really suspended. + // See https://blogs.msdn.microsoft.com/oldnewthing/20150205-00/?p=44743. + + // Using only CONTEXT_CONTROL is faster but on 64-bit it causes crashes in + // RtlVirtualUnwind (see bug 1120126) so we set all the flags. +#if defined(GP_ARCH_amd64) + context.ContextFlags = CONTEXT_FULL; +#else + context.ContextFlags = CONTEXT_CONTROL; +#endif + if (!GetThreadContext(profiled_thread, &context)) { + ResumeThread(profiled_thread); + return; + } + + //----------------------------------------------------------------// + // Sample the target thread. + + // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING + // + // The profiler's "critical section" begins here. We must be very careful + // what we do here, or risk deadlock. See the corresponding comment in + // platform-linux-android.cpp for details. + + Registers regs; + PopulateRegsFromContext(regs, &context); + aProcessRegs(regs, aNow); + + //----------------------------------------------------------------// + // Resume the target thread. + + ResumeThread(profiled_thread); + + // The profiler's critical section ends here. + // + // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING +} + +// END Sampler target specifics +//////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////// +// BEGIN SamplerThread target specifics + +static unsigned int __stdcall ThreadEntry(void* aArg) { + auto thread = static_cast<SamplerThread*>(aArg); + thread->Run(); + return 0; +} + +static unsigned int __stdcall UnregisteredThreadSpyEntry(void* aArg) { + auto thread = static_cast<SamplerThread*>(aArg); + thread->RunUnregisteredThreadSpy(); + return 0; +} + +SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration, + double aIntervalMilliseconds, uint32_t aFeatures) + : mSampler(aLock), + mActivityGeneration(aActivityGeneration), + mIntervalMicroseconds( + std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5)))), + mNoTimerResolutionChange( + ProfilerFeature::HasNoTimerResolutionChange(aFeatures)) { + if ((!mNoTimerResolutionChange) && (mIntervalMicroseconds < 10 * 1000)) { + // By default the timer resolution (which tends to be 1/64Hz, around 16ms) + // is not changed. However, if the requested interval is sufficiently low, + // the resolution will be adjusted to match. Note that this affects all + // timers in Firefox, and could therefore hide issues while profiling. This + // change may be prevented with the "notimerresolutionchange" feature. + ::timeBeginPeriod(mIntervalMicroseconds / 1000); + } + + if (ProfilerFeature::HasUnregisteredThreads(aFeatures)) { + // Sampler&spy threads are not running yet, so it's safe to modify + // mSpyingState without locking the monitor. + mSpyingState = SpyingState::Spy_Initializing; + mUnregisteredThreadSpyThread = reinterpret_cast<HANDLE>( + _beginthreadex(nullptr, + /* stack_size */ 0, UnregisteredThreadSpyEntry, this, + /* initflag */ 0, nullptr)); + if (mUnregisteredThreadSpyThread == 0) { + MOZ_CRASH("_beginthreadex failed"); + } + } + + // Create a new thread. It is important to use _beginthreadex() instead of + // the Win32 function CreateThread(), because the CreateThread() does not + // initialize thread-specific structures in the C runtime library. + mThread = reinterpret_cast<HANDLE>(_beginthreadex(nullptr, + /* stack_size */ 0, + ThreadEntry, this, + /* initflag */ 0, nullptr)); + if (mThread == 0) { + MOZ_CRASH("_beginthreadex failed"); + } +} + +SamplerThread::~SamplerThread() { + if (mUnregisteredThreadSpyThread) { + { + // Make sure the spying thread is not actively working, because the win32 + // function it's using could deadlock with WaitForSingleObject below. + MonitorAutoLock spyingStateLock{mSpyingStateMonitor}; + while (mSpyingState != SpyingState::Spy_Waiting && + mSpyingState != SpyingState::SamplerToSpy_Start) { + spyingStateLock.Wait(); + } + + mSpyingState = SpyingState::MainToSpy_Shutdown; + spyingStateLock.NotifyAll(); + + do { + spyingStateLock.Wait(); + } while (mSpyingState != SpyingState::SpyToMain_ShuttingDown); + } + + WaitForSingleObject(mUnregisteredThreadSpyThread, INFINITE); + + // Close our own handle for the thread. + if (mUnregisteredThreadSpyThread != kNoThread) { + CloseHandle(mUnregisteredThreadSpyThread); + } + } + + WaitForSingleObject(mThread, INFINITE); + + // Close our own handle for the thread. + if (mThread != kNoThread) { + CloseHandle(mThread); + } + + // Just in the unlikely case some callbacks were added between the end of the + // thread and now. + InvokePostSamplingCallbacks(std::move(mPostSamplingCallbackList), + SamplingState::JustStopped); +} + +void SamplerThread::RunUnregisteredThreadSpy() { + // TODO: Consider registering this thread. + // Pros: Remove from list of unregistered threads; Not useful to profiling + // Firefox itself. + // Cons: Doesn't appear in the profile, so users may miss the expensive CPU + // cost of this work on Windows. + PR_SetCurrentThreadName("UnregisteredThreadSpy"); + + while (true) { + { + MonitorAutoLock spyingStateLock{mSpyingStateMonitor}; + // Either this is the first loop, or we're looping after working. + MOZ_ASSERT(mSpyingState == SpyingState::Spy_Initializing || + mSpyingState == SpyingState::Spy_Working); + + // Let everyone know we're waiting, and then wait. + mSpyingState = SpyingState::Spy_Waiting; + mSpyingStateMonitor.NotifyAll(); + do { + spyingStateLock.Wait(); + } while (mSpyingState == SpyingState::Spy_Waiting); + + if (mSpyingState == SpyingState::MainToSpy_Shutdown) { + mSpyingState = SpyingState::SpyToMain_ShuttingDown; + mSpyingStateMonitor.NotifyAll(); + break; + } + + MOZ_ASSERT(mSpyingState == SpyingState::SamplerToSpy_Start); + mSpyingState = SpyingState::Spy_Working; + } + + // Do the work without lock, so other threads can read the current state. + SpyOnUnregisteredThreads(); + } +} + +void SamplerThread::SleepMicro(uint32_t aMicroseconds) { + // For now, keep the old behaviour of minimum Sleep(1), even for + // smaller-than-usual sleeps after an overshoot, unless the user has + // explicitly opted into a sub-millisecond profiler interval. + if (mIntervalMicroseconds >= 1000) { + ::Sleep(std::max(1u, aMicroseconds / 1000)); + } else { + TimeStamp start = TimeStamp::Now(); + TimeStamp end = start + TimeDuration::FromMicroseconds(aMicroseconds); + + // First, sleep for as many whole milliseconds as possible. + if (aMicroseconds >= 1000) { + ::Sleep(aMicroseconds / 1000); + } + + // Then, spin until enough time has passed. + while (TimeStamp::Now() < end) { + YieldProcessor(); + } + } +} + +void SamplerThread::Stop(PSLockRef aLock) { + if ((!mNoTimerResolutionChange) && (mIntervalMicroseconds < 10 * 1000)) { + // Disable any timer resolution changes we've made. Do it now while + // gPSMutex is locked, i.e. before any other SamplerThread can be created + // and call ::timeBeginPeriod(). + // + // It's safe to do this now even though this SamplerThread is still alive, + // because the next time the main loop of Run() iterates it won't get past + // the mActivityGeneration check, and so it won't make any more ::Sleep() + // calls. + ::timeEndPeriod(mIntervalMicroseconds / 1000); + } + + mSampler.Disable(aLock); +} + +// END SamplerThread target specifics +//////////////////////////////////////////////////////////////////////// + +static void PlatformInit(PSLockRef aLock) {} + +#if defined(HAVE_NATIVE_UNWIND) +void Registers::SyncPopulate() { + CONTEXT context; + RtlCaptureContext(&context); + PopulateRegsFromContext(*this, &context); +} +#endif + +#if defined(GP_PLAT_amd64_windows) + +// Use InitializeWin64ProfilerHooks from the base profiler. + +namespace mozilla { +namespace baseprofiler { +MFBT_API void InitializeWin64ProfilerHooks(); +} // namespace baseprofiler +} // namespace mozilla + +using mozilla::baseprofiler::InitializeWin64ProfilerHooks; + +#endif // defined(GP_PLAT_amd64_windows) diff --git a/tools/profiler/core/platform.cpp b/tools/profiler/core/platform.cpp new file mode 100644 index 0000000000..b080a4b2df --- /dev/null +++ b/tools/profiler/core/platform.cpp @@ -0,0 +1,7035 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// There are three kinds of samples done by the profiler. +// +// - A "periodic" sample is the most complex kind. It is done in response to a +// timer while the profiler is active. It involves writing a stack trace plus +// a variety of other values (memory measurements, responsiveness +// measurements, markers, etc.) into the main ProfileBuffer. The sampling is +// done from off-thread, and so SuspendAndSampleAndResumeThread() is used to +// get the register values. +// +// - A "synchronous" sample is a simpler kind. It is done in response to an API +// call (profiler_get_backtrace()). It involves writing a stack trace and +// little else into a temporary ProfileBuffer, and wrapping that up in a +// ProfilerBacktrace that can be subsequently used in a marker. The sampling +// is done on-thread, and so Registers::SyncPopulate() is used to get the +// register values. +// +// - A "backtrace" sample is the simplest kind. It is done in response to an +// API call (profiler_suspend_and_sample_thread()). It involves getting a +// stack trace via a ProfilerStackCollector; it does not write to a +// ProfileBuffer. The sampling is done from off-thread, and so uses +// SuspendAndSampleAndResumeThread() to get the register values. + +#include "platform.h" + +#include "GeckoProfiler.h" +#include "GeckoProfilerReporter.h" +#include "PageInformation.h" +#include "PowerCounters.h" +#include "ProfileBuffer.h" +#include "ProfiledThreadData.h" +#include "ProfilerBacktrace.h" +#include "ProfilerChild.h" +#include "ProfilerCodeAddressService.h" +#include "ProfilerControl.h" +#include "ProfilerIOInterposeObserver.h" +#include "ProfilerParent.h" +#include "ProfilerRustBindings.h" +#include "mozilla/MozPromise.h" +#include "shared-libraries.h" +#include "VTuneProfiler.h" + +#include "js/ProfilingFrameIterator.h" +#include "memory_hooks.h" +#include "mozilla/ArrayUtils.h" +#include "mozilla/AutoProfilerLabel.h" +#include "mozilla/BaseAndGeckoProfilerDetail.h" +#include "mozilla/ExtensionPolicyService.h" +#include "mozilla/extensions/WebExtensionPolicy.h" +#include "mozilla/glean/GleanMetrics.h" +#include "mozilla/Monitor.h" +#include "mozilla/Preferences.h" +#include "mozilla/Printf.h" +#include "mozilla/ProcInfo.h" +#include "mozilla/ProfileBufferChunkManagerSingle.h" +#include "mozilla/ProfileBufferChunkManagerWithLocalLimit.h" +#include "mozilla/ProfileChunkedBuffer.h" +#include "mozilla/SchedulerGroup.h" +#include "mozilla/Services.h" +#include "mozilla/StackWalk.h" +#ifdef XP_WIN +# include "mozilla/StackWalkThread.h" +#endif +#include "mozilla/StaticPtr.h" +#include "mozilla/ThreadLocal.h" +#include "mozilla/TimeStamp.h" +#include "mozilla/UniquePtr.h" +#include "mozilla/Vector.h" +#include "BaseProfiler.h" +#include "nsDirectoryServiceDefs.h" +#include "nsDirectoryServiceUtils.h" +#include "nsIDocShell.h" +#include "nsIHttpProtocolHandler.h" +#include "nsIObserverService.h" +#include "nsIPropertyBag2.h" +#include "nsIXULAppInfo.h" +#include "nsIXULRuntime.h" +#include "nsJSPrincipals.h" +#include "nsMemoryReporterManager.h" +#include "nsPIDOMWindow.h" +#include "nsProfilerStartParams.h" +#include "nsScriptSecurityManager.h" +#include "nsSystemInfo.h" +#include "nsThreadUtils.h" +#include "nsXULAppAPI.h" +#include "Tracing.h" +#include "prdtoa.h" +#include "prtime.h" + +#include <algorithm> +#include <errno.h> +#include <fstream> +#include <ostream> +#include <set> +#include <sstream> +#include <string_view> +#include <type_traits> + +#if defined(GP_OS_android) +# include "JavaExceptions.h" +# include "mozilla/java/GeckoJavaSamplerNatives.h" +# include "mozilla/jni/Refs.h" +#endif + +#if defined(GP_OS_darwin) +# include "nsCocoaFeatures.h" +#endif + +#if defined(GP_PLAT_amd64_darwin) +# include <cpuid.h> +#endif + +#if defined(GP_OS_windows) +# include <processthreadsapi.h> + +// GetThreadInformation is not available on Windows 7. +WINBASEAPI +BOOL WINAPI GetThreadInformation( + _In_ HANDLE hThread, _In_ THREAD_INFORMATION_CLASS ThreadInformationClass, + _Out_writes_bytes_(ThreadInformationSize) LPVOID ThreadInformation, + _In_ DWORD ThreadInformationSize); + +#endif + +// Win32 builds always have frame pointers, so FramePointerStackWalk() always +// works. +#if defined(GP_PLAT_x86_windows) +# define HAVE_NATIVE_UNWIND +# define USE_FRAME_POINTER_STACK_WALK +#endif + +// Win64 builds always omit frame pointers, so we use the slower +// MozStackWalk(), which works in that case. +#if defined(GP_PLAT_amd64_windows) +# define HAVE_NATIVE_UNWIND +# define USE_MOZ_STACK_WALK +#endif + +// AArch64 Win64 doesn't seem to use frame pointers, so we use the slower +// MozStackWalk(). +#if defined(GP_PLAT_arm64_windows) +# define HAVE_NATIVE_UNWIND +# define USE_MOZ_STACK_WALK +#endif + +// Mac builds use FramePointerStackWalk(). Even if we build without +// frame pointers, we'll still get useful stacks in system libraries +// because those always have frame pointers. +// We don't use MozStackWalk() on Mac. +#if defined(GP_OS_darwin) +# define HAVE_NATIVE_UNWIND +# define USE_FRAME_POINTER_STACK_WALK +#endif + +// Android builds use the ARM Exception Handling ABI to unwind. +#if defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android) +# define HAVE_NATIVE_UNWIND +# define USE_EHABI_STACKWALK +# include "EHABIStackWalk.h" +#endif + +// Linux/BSD builds use LUL, which uses DWARF info to unwind stacks. +#if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) || \ + defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) || \ + defined(GP_PLAT_mips64_linux) || defined(GP_PLAT_arm64_linux) || \ + defined(GP_PLAT_arm64_android) || defined(GP_PLAT_amd64_freebsd) || \ + defined(GP_PLAT_arm64_freebsd) +# define HAVE_NATIVE_UNWIND +# define USE_LUL_STACKWALK +# include "lul/LulMain.h" +# include "lul/platform-linux-lul.h" + +// On linux we use LUL for periodic samples and synchronous samples, but we use +// FramePointerStackWalk for backtrace samples when MOZ_PROFILING is enabled. +// (See the comment at the top of the file for a definition of +// periodic/synchronous/backtrace.). +// +// FramePointerStackWalk can produce incomplete stacks when the current entry is +// in a shared library without framepointers, however LUL can take a long time +// to initialize, which is undesirable for consumers of +// profiler_suspend_and_sample_thread like the Background Hang Reporter. +# if defined(MOZ_PROFILING) +# define USE_FRAME_POINTER_STACK_WALK +# endif +#endif + +// We can only stackwalk without expensive initialization on platforms which +// support FramePointerStackWalk or MozStackWalk. LUL Stackwalking requires +// initializing LUL, and EHABIStackWalk requires initializing EHABI, both of +// which can be expensive. +#if defined(USE_FRAME_POINTER_STACK_WALK) || defined(USE_MOZ_STACK_WALK) +# define HAVE_FASTINIT_NATIVE_UNWIND +#endif + +#ifdef MOZ_VALGRIND +# include <valgrind/memcheck.h> +#else +# define VALGRIND_MAKE_MEM_DEFINED(_addr, _len) ((void)0) +#endif + +#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd) +# include <ucontext.h> +#endif + +using namespace mozilla; +using namespace mozilla::literals::ProportionValue_literals; + +using mozilla::profiler::detail::RacyFeatures; +using ThreadRegistration = mozilla::profiler::ThreadRegistration; +using ThreadRegistrationInfo = mozilla::profiler::ThreadRegistrationInfo; +using ThreadRegistry = mozilla::profiler::ThreadRegistry; + +LazyLogModule gProfilerLog("prof"); + +ProfileChunkedBuffer& profiler_get_core_buffer() { + // Defer to the Base Profiler in mozglue to create the core buffer if needed, + // and keep a reference here, for quick access in xul. + static ProfileChunkedBuffer& sProfileChunkedBuffer = + baseprofiler::profiler_get_core_buffer(); + return sProfileChunkedBuffer; +} + +mozilla::Atomic<int, mozilla::MemoryOrdering::Relaxed> gSkipSampling; + +#if defined(GP_OS_android) +class GeckoJavaSampler + : public java::GeckoJavaSampler::Natives<GeckoJavaSampler> { + private: + GeckoJavaSampler(); + + public: + static double GetProfilerTime() { + if (!profiler_is_active()) { + return 0.0; + } + return profiler_time(); + }; + + static void JavaStringArrayToCharArray(jni::ObjectArray::Param& aJavaArray, + Vector<const char*>& aCharArray, + JNIEnv* aJni) { + int arraySize = aJavaArray->Length(); + for (int i = 0; i < arraySize; i++) { + jstring javaString = + (jstring)(aJni->GetObjectArrayElement(aJavaArray.Get(), i)); + const char* filterString = aJni->GetStringUTFChars(javaString, 0); + // FIXME. These strings are leaked. + MOZ_RELEASE_ASSERT(aCharArray.append(filterString)); + } + } + + static void StartProfiler(jni::ObjectArray::Param aFiltersArray, + jni::ObjectArray::Param aFeaturesArray) { + JNIEnv* jni = jni::GetEnvForThread(); + Vector<const char*> filtersTemp; + Vector<const char*> featureStringArray; + + JavaStringArrayToCharArray(aFiltersArray, filtersTemp, jni); + JavaStringArrayToCharArray(aFeaturesArray, featureStringArray, jni); + + uint32_t features = 0; + features = ParseFeaturesFromStringArray(featureStringArray.begin(), + featureStringArray.length()); + + // 128 * 1024 * 1024 is the entries preset that is given in + // devtools/client/performance-new/popup/background.jsm.js + profiler_start(PowerOfTwo32(128 * 1024 * 1024), 5.0, features, + filtersTemp.begin(), filtersTemp.length(), 0, Nothing()); + } + + static void StopProfiler(jni::Object::Param aGeckoResult) { + auto result = java::GeckoResult::LocalRef(aGeckoResult); + profiler_pause(); + nsCOMPtr<nsIProfiler> nsProfiler( + do_GetService("@mozilla.org/tools/profiler;1")); + nsProfiler->GetProfileDataAsGzippedArrayBufferAndroid(0)->Then( + GetMainThreadSerialEventTarget(), __func__, + [result](FallibleTArray<uint8_t> compressedProfile) { + result->Complete(jni::ByteArray::New( + reinterpret_cast<const int8_t*>(compressedProfile.Elements()), + compressedProfile.Length())); + }, + [result](nsresult aRv) { + char errorString[9]; + sprintf(errorString, "%08x", aRv); + result->CompleteExceptionally( + mozilla::java::sdk::IllegalStateException::New(errorString) + .Cast<jni::Throwable>()); + }); + } +}; +#endif + +constexpr static bool ValidateFeatures() { + int expectedFeatureNumber = 0; + + // Feature numbers should start at 0 and increase by 1 each. +#define CHECK_FEATURE(n_, str_, Name_, desc_) \ + if ((n_) != expectedFeatureNumber) { \ + return false; \ + } \ + ++expectedFeatureNumber; + + PROFILER_FOR_EACH_FEATURE(CHECK_FEATURE) + +#undef CHECK_FEATURE + + return true; +} + +static_assert(ValidateFeatures(), "Feature list is invalid"); + +// Return all features that are available on this platform. +static uint32_t AvailableFeatures() { + uint32_t features = 0; + +#define ADD_FEATURE(n_, str_, Name_, desc_) \ + ProfilerFeature::Set##Name_(features); + + // Add all the possible features. + PROFILER_FOR_EACH_FEATURE(ADD_FEATURE) + +#undef ADD_FEATURE + + // Now remove features not supported on this platform/configuration. +#if !defined(GP_OS_android) + ProfilerFeature::ClearJava(features); +#endif +#if !defined(HAVE_NATIVE_UNWIND) + ProfilerFeature::ClearStackWalk(features); +#endif +#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY) + if (getenv("XPCOM_MEM_BLOAT_LOG")) { + NS_WARNING("XPCOM_MEM_BLOAT_LOG is set, disabling native allocations."); + // The memory hooks are available, but the bloat log is enabled, which is + // not compatible with the native allocations tracking. See the comment in + // enable_native_allocations() (tools/profiler/core/memory_hooks.cpp) for + // more information. + ProfilerFeature::ClearNativeAllocations(features); + } +#else + // The memory hooks are not available. + ProfilerFeature::ClearNativeAllocations(features); +#endif + +#if !defined(GP_OS_windows) + ProfilerFeature::ClearNoTimerResolutionChange(features); +#endif + + return features; +} + +// Default features common to all contexts (even if not available). +static constexpr uint32_t DefaultFeatures() { + return ProfilerFeature::Java | ProfilerFeature::JS | + ProfilerFeature::StackWalk | ProfilerFeature::CPUUtilization | + ProfilerFeature::Screenshots | ProfilerFeature::ProcessCPU; +} + +// Extra default features when MOZ_PROFILER_STARTUP is set (even if not +// available). +static constexpr uint32_t StartupExtraDefaultFeatures() { + // Enable file I/Os by default for startup profiles as startup is heavy on + // I/O operations. + return ProfilerFeature::FileIOAll | ProfilerFeature::IPCMessages; +} + +Json::String ToCompactString(const Json::Value& aJsonValue) { + Json::StreamWriterBuilder builder; + // No indentations, and no newlines. + builder["indentation"] = ""; + // This removes spaces after colons. + builder["enableYAMLCompatibility"] = false; + // Only 6 digits after the decimal point; timestamps in ms have ns precision. + builder["precision"] = 6; + builder["precisionType"] = "decimal"; + + return Json::writeString(builder, aJsonValue); +} + +/* static */ mozilla::baseprofiler::detail::BaseProfilerMutex + ProfilingLog::gMutex; +/* static */ mozilla::UniquePtr<Json::Value> ProfilingLog::gLog; + +/* static */ void ProfilingLog::Init() { + mozilla::baseprofiler::detail::BaseProfilerAutoLock lock{gMutex}; + MOZ_ASSERT(!gLog); + gLog = mozilla::MakeUniqueFallible<Json::Value>(Json::objectValue); + if (gLog) { + (*gLog)[Json::StaticString{"profilingLogBegin" TIMESTAMP_JSON_SUFFIX}] = + ProfilingLog::Timestamp(); + } +} + +/* static */ void ProfilingLog::Destroy() { + mozilla::baseprofiler::detail::BaseProfilerAutoLock lock{gMutex}; + MOZ_ASSERT(gLog); + gLog = nullptr; +} + +/* static */ bool ProfilingLog::IsLockedOnCurrentThread() { + return gMutex.IsLockedOnCurrentThread(); +} + +// RAII class to lock the profiler mutex. +// It provides a mechanism to determine if it is locked or not in order for +// memory hooks to avoid re-entering the profiler locked state. +// Locking order: Profiler, ThreadRegistry, ThreadRegistration. +class MOZ_RAII PSAutoLock { + public: + PSAutoLock() + : mLock([]() -> mozilla::baseprofiler::detail::BaseProfilerMutex& { + // In DEBUG builds, *before* we attempt to lock gPSMutex, we want to + // check that the ThreadRegistry, ThreadRegistration, and ProfilingLog + // mutexes are *not* locked on this thread, to avoid inversion + // deadlocks. + MOZ_ASSERT(!ThreadRegistry::IsRegistryMutexLockedOnCurrentThread()); + MOZ_ASSERT(!ThreadRegistration::IsDataMutexLockedOnCurrentThread()); + MOZ_ASSERT(!ProfilingLog::IsLockedOnCurrentThread()); + return gPSMutex; + }()) {} + + PSAutoLock(const PSAutoLock&) = delete; + void operator=(const PSAutoLock&) = delete; + + static bool IsLockedOnCurrentThread() { + return gPSMutex.IsLockedOnCurrentThread(); + } + + private: + static mozilla::baseprofiler::detail::BaseProfilerMutex gPSMutex; + mozilla::baseprofiler::detail::BaseProfilerAutoLock mLock; +}; + +/* static */ mozilla::baseprofiler::detail::BaseProfilerMutex + PSAutoLock::gPSMutex{"Gecko Profiler mutex"}; + +// Only functions that take a PSLockRef arg can access CorePS's and ActivePS's +// fields. +typedef const PSAutoLock& PSLockRef; + +#define PS_GET(type_, name_) \ + static type_ name_(PSLockRef) { \ + MOZ_ASSERT(sInstance); \ + return sInstance->m##name_; \ + } + +#define PS_GET_LOCKLESS(type_, name_) \ + static type_ name_() { \ + MOZ_ASSERT(sInstance); \ + return sInstance->m##name_; \ + } + +#define PS_GET_AND_SET(type_, name_) \ + PS_GET(type_, name_) \ + static void Set##name_(PSLockRef, type_ a##name_) { \ + MOZ_ASSERT(sInstance); \ + sInstance->m##name_ = a##name_; \ + } + +static constexpr size_t MAX_JS_FRAMES = + mozilla::profiler::ThreadRegistrationData::MAX_JS_FRAMES; +using JsFrame = mozilla::profiler::ThreadRegistrationData::JsFrame; +using JsFrameBuffer = mozilla::profiler::ThreadRegistrationData::JsFrameBuffer; + +// All functions in this file can run on multiple threads unless they have an +// NS_IsMainThread() assertion. + +// This class contains the profiler's core global state, i.e. that which is +// valid even when the profiler is not active. Most profile operations can't do +// anything useful when this class is not instantiated, so we release-assert +// its non-nullness in all such operations. +// +// Accesses to CorePS are guarded by gPSMutex. Getters and setters take a +// PSAutoLock reference as an argument as proof that the gPSMutex is currently +// locked. This makes it clear when gPSMutex is locked and helps avoid +// accidental unlocked accesses to global state. There are ways to circumvent +// this mechanism, but please don't do so without *very* good reason and a +// detailed explanation. +// +// The exceptions to this rule: +// +// - mProcessStartTime, because it's immutable; +class CorePS { + private: + CorePS() + : mProcessStartTime(TimeStamp::ProcessCreation()) +#ifdef USE_LUL_STACKWALK + , + mLul(nullptr) +#endif + { + MOZ_ASSERT(NS_IsMainThread(), + "CorePS must be created from the main thread"); + } + + ~CorePS() { +#ifdef USE_LUL_STACKWALK + delete sInstance->mLul; +#endif + } + + public: + static void Create(PSLockRef aLock) { + MOZ_ASSERT(!sInstance); + sInstance = new CorePS(); + } + + static void Destroy(PSLockRef aLock) { + MOZ_ASSERT(sInstance); + delete sInstance; + sInstance = nullptr; + } + + // Unlike ActivePS::Exists(), CorePS::Exists() can be called without gPSMutex + // being locked. This is because CorePS is instantiated so early on the main + // thread that we don't have to worry about it being racy. + static bool Exists() { return !!sInstance; } + + static void AddSizeOf(PSLockRef, MallocSizeOf aMallocSizeOf, + size_t& aProfSize, size_t& aLulSize) { + MOZ_ASSERT(sInstance); + + aProfSize += aMallocSizeOf(sInstance); + + aProfSize += ThreadRegistry::SizeOfIncludingThis(aMallocSizeOf); + + for (auto& registeredPage : sInstance->mRegisteredPages) { + aProfSize += registeredPage->SizeOfIncludingThis(aMallocSizeOf); + } + + // Measurement of the following things may be added later if DMD finds it + // is worthwhile: + // - CorePS::mRegisteredPages itself (its elements' children are + // measured above) + +#if defined(USE_LUL_STACKWALK) + if (lul::LUL* lulPtr = sInstance->mLul; lulPtr) { + aLulSize += lulPtr->SizeOfIncludingThis(aMallocSizeOf); + } +#endif + } + + // No PSLockRef is needed for this field because it's immutable. + PS_GET_LOCKLESS(TimeStamp, ProcessStartTime) + + PS_GET(JsFrameBuffer&, JsFrames) + + PS_GET(Vector<RefPtr<PageInformation>>&, RegisteredPages) + + static void AppendRegisteredPage(PSLockRef, + RefPtr<PageInformation>&& aRegisteredPage) { + MOZ_ASSERT(sInstance); + struct RegisteredPageComparator { + PageInformation* aA; + bool operator()(PageInformation* aB) const { return aA->Equals(aB); } + }; + + auto foundPageIter = std::find_if( + sInstance->mRegisteredPages.begin(), sInstance->mRegisteredPages.end(), + RegisteredPageComparator{aRegisteredPage.get()}); + + if (foundPageIter != sInstance->mRegisteredPages.end()) { + if ((*foundPageIter)->Url().EqualsLiteral("about:blank")) { + // When a BrowsingContext is loaded, the first url loaded in it will be + // about:blank, and if the principal matches, the first document loaded + // in it will share an inner window. That's why we should delete the + // intermittent about:blank if they share the inner window. + sInstance->mRegisteredPages.erase(foundPageIter); + } else { + // Do not register the same page again. + return; + } + } + + MOZ_RELEASE_ASSERT( + sInstance->mRegisteredPages.append(std::move(aRegisteredPage))); + } + + static void RemoveRegisteredPage(PSLockRef, + uint64_t aRegisteredInnerWindowID) { + MOZ_ASSERT(sInstance); + // Remove RegisteredPage from mRegisteredPages by given inner window ID. + sInstance->mRegisteredPages.eraseIf([&](const RefPtr<PageInformation>& rd) { + return rd->InnerWindowID() == aRegisteredInnerWindowID; + }); + } + + static void ClearRegisteredPages(PSLockRef) { + MOZ_ASSERT(sInstance); + sInstance->mRegisteredPages.clear(); + } + + PS_GET(const Vector<BaseProfilerCount*>&, Counters) + + static void AppendCounter(PSLockRef, BaseProfilerCount* aCounter) { + MOZ_ASSERT(sInstance); + // we don't own the counter; they may be stored in static objects + MOZ_RELEASE_ASSERT(sInstance->mCounters.append(aCounter)); + } + + static void RemoveCounter(PSLockRef, BaseProfilerCount* aCounter) { + // we may be called to remove a counter after the profiler is stopped or + // late in shutdown. + if (sInstance) { + auto* counter = std::find(sInstance->mCounters.begin(), + sInstance->mCounters.end(), aCounter); + MOZ_RELEASE_ASSERT(counter != sInstance->mCounters.end()); + sInstance->mCounters.erase(counter); + } + } + +#ifdef USE_LUL_STACKWALK + static lul::LUL* Lul() { + MOZ_RELEASE_ASSERT(sInstance); + return sInstance->mLul; + } + static void SetLul(UniquePtr<lul::LUL> aLul) { + MOZ_RELEASE_ASSERT(sInstance); + MOZ_RELEASE_ASSERT( + sInstance->mLul.compareExchange(nullptr, aLul.release())); + } +#endif + + PS_GET_AND_SET(const nsACString&, ProcessName) + PS_GET_AND_SET(const nsACString&, ETLDplus1) + + private: + // The singleton instance + static CorePS* sInstance; + + // The time that the process started. + const TimeStamp mProcessStartTime; + + // Info on all the registered pages. + // InnerWindowIDs in mRegisteredPages are unique. + Vector<RefPtr<PageInformation>> mRegisteredPages; + + // Non-owning pointers to all active counters + Vector<BaseProfilerCount*> mCounters; + +#ifdef USE_LUL_STACKWALK + // LUL's state. Null prior to the first activation, non-null thereafter. + // Owned by this CorePS. + mozilla::Atomic<lul::LUL*> mLul; +#endif + + // Process name, provided by child process initialization code. + nsAutoCString mProcessName; + // Private name, provided by child process initialization code (eTLD+1 in + // fission) + nsAutoCString mETLDplus1; + + // This memory buffer is used by the MergeStacks mechanism. Previously it was + // stack allocated, but this led to a stack overflow, as it was too much + // memory. Here the buffer can be pre-allocated, and shared with the + // MergeStacks feature as needed. MergeStacks is only run while holding the + // lock, so it is safe to have only one instance allocated for all of the + // threads. + JsFrameBuffer mJsFrames; +}; + +CorePS* CorePS::sInstance = nullptr; + +void locked_profiler_add_sampled_counter(PSLockRef aLock, + BaseProfilerCount* aCounter) { + CorePS::AppendCounter(aLock, aCounter); +} + +void locked_profiler_remove_sampled_counter(PSLockRef aLock, + BaseProfilerCount* aCounter) { + // Note: we don't enforce a final sample, though we could do so if the + // profiler was active + CorePS::RemoveCounter(aLock, aCounter); +} + +class SamplerThread; + +static SamplerThread* NewSamplerThread(PSLockRef aLock, uint32_t aGeneration, + double aInterval, uint32_t aFeatures); + +struct LiveProfiledThreadData { + UniquePtr<ProfiledThreadData> mProfiledThreadData; +}; + +// The buffer size is provided as a number of "entries", this is their size in +// bytes. +constexpr static uint32_t scBytesPerEntry = 8; + +// This class contains the profiler's global state that is valid only when the +// profiler is active. When not instantiated, the profiler is inactive. +// +// Accesses to ActivePS are guarded by gPSMutex, in much the same fashion as +// CorePS. +// +class ActivePS { + private: + // We need to decide how many chunks of what size we want to fit in the given + // total maximum capacity for this process, in the (likely) context of + // multiple processes doing the same choice and having an inter-process + // mechanism to control the overal memory limit. + + // Minimum chunk size allowed, enough for at least one stack. + constexpr static uint32_t scMinimumChunkSize = + 2 * ProfileBufferChunkManager::scExpectedMaximumStackSize; + + // Ideally we want at least 2 unreleased chunks to work with (1 current and 1 + // next), and 2 released chunks (so that one can be recycled when old, leaving + // one with some data). + constexpr static uint32_t scMinimumNumberOfChunks = 4; + + // And we want to limit chunks to a maximum size, which is a compromise + // between: + // - A big size, which helps with reducing the rate of allocations and IPCs. + // - A small size, which helps with equalizing the duration of recorded data + // (as the inter-process controller will discard the oldest chunks in all + // Firefox processes). + constexpr static uint32_t scMaximumChunkSize = 1024 * 1024; + + public: + // We should be able to store at least the minimum number of the smallest- + // possible chunks. + constexpr static uint32_t scMinimumBufferSize = + scMinimumNumberOfChunks * scMinimumChunkSize; + // Note: Keep in sync with GeckoThread.maybeStartGeckoProfiler: + // https://searchfox.org/mozilla-central/source/mobile/android/geckoview/src/main/java/org/mozilla/gecko/GeckoThread.java + constexpr static uint32_t scMinimumBufferEntries = + scMinimumBufferSize / scBytesPerEntry; + + // Limit to 2GiB. + constexpr static uint32_t scMaximumBufferSize = 2u * 1024u * 1024u * 1024u; + constexpr static uint32_t scMaximumBufferEntries = + scMaximumBufferSize / scBytesPerEntry; + + constexpr static uint32_t ClampToAllowedEntries(uint32_t aEntries) { + if (aEntries <= scMinimumBufferEntries) { + return scMinimumBufferEntries; + } + if (aEntries >= scMaximumBufferEntries) { + return scMaximumBufferEntries; + } + return aEntries; + } + + private: + constexpr static uint32_t ChunkSizeForEntries(uint32_t aEntries) { + return uint32_t(std::min(size_t(ClampToAllowedEntries(aEntries)) * + scBytesPerEntry / scMinimumNumberOfChunks, + size_t(scMaximumChunkSize))); + } + + static uint32_t AdjustFeatures(uint32_t aFeatures, uint32_t aFilterCount) { + // Filter out any features unavailable in this platform/configuration. + aFeatures &= AvailableFeatures(); + + // Some features imply others. + if (aFeatures & ProfilerFeature::FileIOAll) { + aFeatures |= ProfilerFeature::MainThreadIO | ProfilerFeature::FileIO; + } else if (aFeatures & ProfilerFeature::FileIO) { + aFeatures |= ProfilerFeature::MainThreadIO; + } + + if (aFeatures & ProfilerFeature::CPUAllThreads) { + aFeatures |= ProfilerFeature::CPUUtilization; + } + + return aFeatures; + } + + bool ShouldInterposeIOs() { + return ProfilerFeature::HasMainThreadIO(mFeatures) || + ProfilerFeature::HasFileIO(mFeatures) || + ProfilerFeature::HasFileIOAll(mFeatures); + } + + ActivePS( + PSLockRef aLock, const TimeStamp& aProfilingStartTime, + PowerOfTwo32 aCapacity, double aInterval, uint32_t aFeatures, + const char** aFilters, uint32_t aFilterCount, uint64_t aActiveTabID, + const Maybe<double>& aDuration, + UniquePtr<ProfileBufferChunkManagerWithLocalLimit> aChunkManagerOrNull) + : mProfilingStartTime(aProfilingStartTime), + mGeneration(sNextGeneration++), + mCapacity(aCapacity), + mDuration(aDuration), + mInterval(aInterval), + mFeatures(AdjustFeatures(aFeatures, aFilterCount)), + mActiveTabID(aActiveTabID), + mProfileBufferChunkManager( + aChunkManagerOrNull + ? std::move(aChunkManagerOrNull) + : MakeUnique<ProfileBufferChunkManagerWithLocalLimit>( + size_t(ClampToAllowedEntries(aCapacity.Value())) * + scBytesPerEntry, + ChunkSizeForEntries(aCapacity.Value()))), + mProfileBuffer([this]() -> ProfileChunkedBuffer& { + ProfileChunkedBuffer& coreBuffer = profiler_get_core_buffer(); + coreBuffer.SetChunkManagerIfDifferent(*mProfileBufferChunkManager); + return coreBuffer; + }()), + mMaybeProcessCPUCounter(ProfilerFeature::HasProcessCPU(aFeatures) + ? new ProcessCPUCounter(aLock) + : nullptr), + mMaybePowerCounters(nullptr), + // The new sampler thread doesn't start sampling immediately because the + // main loop within Run() is blocked until this function's caller + // unlocks gPSMutex. + mSamplerThread( + NewSamplerThread(aLock, mGeneration, aInterval, aFeatures)), + mIsPaused(false), + mIsSamplingPaused(false) { + ProfilingLog::Init(); + + // Deep copy and lower-case aFilters. + MOZ_ALWAYS_TRUE(mFilters.resize(aFilterCount)); + MOZ_ALWAYS_TRUE(mFiltersLowered.resize(aFilterCount)); + for (uint32_t i = 0; i < aFilterCount; ++i) { + mFilters[i] = aFilters[i]; + mFiltersLowered[i].reserve(mFilters[i].size()); + std::transform(mFilters[i].cbegin(), mFilters[i].cend(), + std::back_inserter(mFiltersLowered[i]), ::tolower); + } + +#if !defined(RELEASE_OR_BETA) + if (ShouldInterposeIOs()) { + // We need to register the observer on the main thread, because we want + // to observe IO that happens on the main thread. + // IOInterposer needs to be initialized before calling + // IOInterposer::Register or our observer will be silently dropped. + if (NS_IsMainThread()) { + IOInterposer::Init(); + IOInterposer::Register(IOInterposeObserver::OpAll, + &ProfilerIOInterposeObserver::GetInstance()); + } else { + NS_DispatchToMainThread( + NS_NewRunnableFunction("ActivePS::ActivePS", []() { + // Note: This could theoretically happen after ActivePS gets + // destroyed, but it's ok: + // - The Observer always checks that the profiler is (still) + // active before doing its work. + // - The destruction should happen on the same thread as this + // construction, so the un-registration will also be dispatched + // and queued on the main thread, and run after this. + IOInterposer::Init(); + IOInterposer::Register( + IOInterposeObserver::OpAll, + &ProfilerIOInterposeObserver::GetInstance()); + })); + } + } +#endif + + if (ProfilerFeature::HasPower(aFeatures)) { + mMaybePowerCounters = new PowerCounters(); + for (const auto& powerCounter : mMaybePowerCounters->GetCounters()) { + locked_profiler_add_sampled_counter(aLock, powerCounter); + } + } + } + + ~ActivePS() { + MOZ_ASSERT( + !mMaybeProcessCPUCounter, + "mMaybeProcessCPUCounter should have been deleted before ~ActivePS()"); + MOZ_ASSERT( + !mMaybePowerCounters, + "mMaybePowerCounters should have been deleted before ~ActivePS()"); + +#if !defined(RELEASE_OR_BETA) + if (ShouldInterposeIOs()) { + // We need to unregister the observer on the main thread, because that's + // where we've registered it. + if (NS_IsMainThread()) { + IOInterposer::Unregister(IOInterposeObserver::OpAll, + &ProfilerIOInterposeObserver::GetInstance()); + } else { + NS_DispatchToMainThread( + NS_NewRunnableFunction("ActivePS::~ActivePS", []() { + IOInterposer::Unregister( + IOInterposeObserver::OpAll, + &ProfilerIOInterposeObserver::GetInstance()); + })); + } + } +#endif + if (mProfileBufferChunkManager) { + // We still control the chunk manager, remove it from the core buffer. + profiler_get_core_buffer().ResetChunkManager(); + } + + ProfilingLog::Destroy(); + } + + bool ThreadSelected(const char* aThreadName) { + if (mFiltersLowered.empty()) { + return true; + } + + std::string name = aThreadName; + std::transform(name.begin(), name.end(), name.begin(), ::tolower); + + for (const auto& filter : mFiltersLowered) { + if (filter == "*") { + return true; + } + + // Crude, non UTF-8 compatible, case insensitive substring search + if (name.find(filter) != std::string::npos) { + return true; + } + + // If the filter is "pid:<my pid>", profile all threads. + if (mozilla::profiler::detail::FilterHasPid(filter.c_str())) { + return true; + } + } + + return false; + } + + public: + static void Create( + PSLockRef aLock, const TimeStamp& aProfilingStartTime, + PowerOfTwo32 aCapacity, double aInterval, uint32_t aFeatures, + const char** aFilters, uint32_t aFilterCount, uint64_t aActiveTabID, + const Maybe<double>& aDuration, + UniquePtr<ProfileBufferChunkManagerWithLocalLimit> aChunkManagerOrNull) { + MOZ_ASSERT(!sInstance); + sInstance = new ActivePS(aLock, aProfilingStartTime, aCapacity, aInterval, + aFeatures, aFilters, aFilterCount, aActiveTabID, + aDuration, std::move(aChunkManagerOrNull)); + } + + [[nodiscard]] static SamplerThread* Destroy(PSLockRef aLock) { + MOZ_ASSERT(sInstance); + if (sInstance->mMaybeProcessCPUCounter) { + locked_profiler_remove_sampled_counter( + aLock, sInstance->mMaybeProcessCPUCounter); + delete sInstance->mMaybeProcessCPUCounter; + sInstance->mMaybeProcessCPUCounter = nullptr; + } + + if (sInstance->mMaybePowerCounters) { + for (const auto& powerCounter : + sInstance->mMaybePowerCounters->GetCounters()) { + locked_profiler_remove_sampled_counter(aLock, powerCounter); + } + delete sInstance->mMaybePowerCounters; + sInstance->mMaybePowerCounters = nullptr; + } + + auto samplerThread = sInstance->mSamplerThread; + delete sInstance; + sInstance = nullptr; + + return samplerThread; + } + + static bool Exists(PSLockRef) { return !!sInstance; } + + static bool Equals(PSLockRef, PowerOfTwo32 aCapacity, + const Maybe<double>& aDuration, double aInterval, + uint32_t aFeatures, const char** aFilters, + uint32_t aFilterCount, uint64_t aActiveTabID) { + MOZ_ASSERT(sInstance); + if (sInstance->mCapacity != aCapacity || + sInstance->mDuration != aDuration || + sInstance->mInterval != aInterval || + sInstance->mFeatures != aFeatures || + sInstance->mFilters.length() != aFilterCount || + sInstance->mActiveTabID != aActiveTabID) { + return false; + } + + for (uint32_t i = 0; i < sInstance->mFilters.length(); ++i) { + if (strcmp(sInstance->mFilters[i].c_str(), aFilters[i]) != 0) { + return false; + } + } + return true; + } + + static size_t SizeOf(PSLockRef, MallocSizeOf aMallocSizeOf) { + MOZ_ASSERT(sInstance); + + size_t n = aMallocSizeOf(sInstance); + + n += sInstance->mProfileBuffer.SizeOfExcludingThis(aMallocSizeOf); + + // Measurement of the following members may be added later if DMD finds it + // is worthwhile: + // - mLiveProfiledThreads (both the array itself, and the contents) + // - mDeadProfiledThreads (both the array itself, and the contents) + // + + return n; + } + + static ThreadProfilingFeatures ProfilingFeaturesForThread( + PSLockRef aLock, const ThreadRegistrationInfo& aInfo) { + MOZ_ASSERT(sInstance); + if (sInstance->ThreadSelected(aInfo.Name())) { + // This thread was selected by the user, record everything. + return ThreadProfilingFeatures::Any; + } + ThreadProfilingFeatures features = ThreadProfilingFeatures::NotProfiled; + if (ActivePS::FeatureCPUAllThreads(aLock)) { + features = Combine(features, ThreadProfilingFeatures::CPUUtilization); + } + if (ActivePS::FeatureSamplingAllThreads(aLock)) { + features = Combine(features, ThreadProfilingFeatures::Sampling); + } + if (ActivePS::FeatureMarkersAllThreads(aLock)) { + features = Combine(features, ThreadProfilingFeatures::Markers); + } + return features; + } + + [[nodiscard]] static bool AppendPostSamplingCallback( + PSLockRef, PostSamplingCallback&& aCallback); + + // Writes out the current active configuration of the profile. + static void WriteActiveConfiguration( + PSLockRef aLock, JSONWriter& aWriter, + const Span<const char>& aPropertyName = MakeStringSpan("")) { + if (!sInstance) { + if (!aPropertyName.empty()) { + aWriter.NullProperty(aPropertyName); + } else { + aWriter.NullElement(); + } + return; + }; + + if (!aPropertyName.empty()) { + aWriter.StartObjectProperty(aPropertyName); + } else { + aWriter.StartObjectElement(); + } + + { + aWriter.StartArrayProperty("features"); +#define WRITE_ACTIVE_FEATURES(n_, str_, Name_, desc_) \ + if (profiler_feature_active(ProfilerFeature::Name_)) { \ + aWriter.StringElement(str_); \ + } + + PROFILER_FOR_EACH_FEATURE(WRITE_ACTIVE_FEATURES) +#undef WRITE_ACTIVE_FEATURES + aWriter.EndArray(); + } + { + aWriter.StartArrayProperty("threads"); + for (const auto& filter : sInstance->mFilters) { + aWriter.StringElement(filter); + } + aWriter.EndArray(); + } + { + // Now write all the simple values. + + // The interval is also available on profile.meta.interval + aWriter.DoubleProperty("interval", sInstance->mInterval); + aWriter.IntProperty("capacity", sInstance->mCapacity.Value()); + if (sInstance->mDuration) { + aWriter.DoubleProperty("duration", sInstance->mDuration.value()); + } + // Here, we are converting uint64_t to double. Tab IDs are + // being created using `nsContentUtils::GenerateProcessSpecificId`, which + // is specifically designed to only use 53 of the 64 bits to be lossless + // when passed into and out of JS as a double. + aWriter.DoubleProperty("activeTabID", sInstance->mActiveTabID); + } + aWriter.EndObject(); + } + + PS_GET_LOCKLESS(TimeStamp, ProfilingStartTime) + + PS_GET(uint32_t, Generation) + + PS_GET(PowerOfTwo32, Capacity) + + PS_GET(Maybe<double>, Duration) + + PS_GET(double, Interval) + + PS_GET(uint32_t, Features) + + PS_GET(uint64_t, ActiveTabID) + +#define PS_GET_FEATURE(n_, str_, Name_, desc_) \ + static bool Feature##Name_(PSLockRef) { \ + MOZ_ASSERT(sInstance); \ + return ProfilerFeature::Has##Name_(sInstance->mFeatures); \ + } + + PROFILER_FOR_EACH_FEATURE(PS_GET_FEATURE) + +#undef PS_GET_FEATURE + + static uint32_t JSFlags(PSLockRef aLock) { + uint32_t Flags = 0; + Flags |= + FeatureJS(aLock) ? uint32_t(JSInstrumentationFlags::StackSampling) : 0; + + Flags |= FeatureJSAllocations(aLock) + ? uint32_t(JSInstrumentationFlags::Allocations) + : 0; + return Flags; + } + + PS_GET(const Vector<std::string>&, Filters) + PS_GET(const Vector<std::string>&, FiltersLowered) + + // Not using PS_GET, because only the "Controlled" interface of + // `mProfileBufferChunkManager` should be exposed here. + static ProfileBufferChunkManagerWithLocalLimit& ControlledChunkManager( + PSLockRef) { + MOZ_ASSERT(sInstance); + MOZ_ASSERT(sInstance->mProfileBufferChunkManager); + return *sInstance->mProfileBufferChunkManager; + } + + static void FulfillChunkRequests(PSLockRef) { + MOZ_ASSERT(sInstance); + if (sInstance->mProfileBufferChunkManager) { + sInstance->mProfileBufferChunkManager->FulfillChunkRequests(); + } + } + + static ProfileBuffer& Buffer(PSLockRef) { + MOZ_ASSERT(sInstance); + return sInstance->mProfileBuffer; + } + + static const Vector<LiveProfiledThreadData>& LiveProfiledThreads(PSLockRef) { + MOZ_ASSERT(sInstance); + return sInstance->mLiveProfiledThreads; + } + + struct ProfiledThreadListElement { + TimeStamp mRegisterTime; + JSContext* mJSContext; // Null for unregistered threads. + ProfiledThreadData* mProfiledThreadData; + }; + using ProfiledThreadList = Vector<ProfiledThreadListElement>; + + // Returns a ProfiledThreadList with all threads that should be included in a + // profile, both for threads that are still registered, and for threads that + // have been unregistered but still have data in the buffer. + // The returned array is sorted by thread register time. + // Do not hold on to the return value past LockedRegistry. + static ProfiledThreadList ProfiledThreads( + ThreadRegistry::LockedRegistry& aLockedRegistry, PSLockRef aLock) { + MOZ_ASSERT(sInstance); + ProfiledThreadList array; + MOZ_RELEASE_ASSERT( + array.initCapacity(sInstance->mLiveProfiledThreads.length() + + sInstance->mDeadProfiledThreads.length())); + + for (ThreadRegistry::OffThreadRef offThreadRef : aLockedRegistry) { + ProfiledThreadData* profiledThreadData = + offThreadRef.UnlockedRWForLockedProfilerRef().GetProfiledThreadData( + aLock); + if (!profiledThreadData) { + // This thread was not profiled, continue with the next one. + continue; + } + ThreadRegistry::OffThreadRef::RWFromAnyThreadWithLock lockedThreadData = + offThreadRef.LockedRWFromAnyThread(); + MOZ_RELEASE_ASSERT(array.append(ProfiledThreadListElement{ + profiledThreadData->Info().RegisterTime(), + lockedThreadData->GetJSContext(), profiledThreadData})); + } + + for (auto& t : sInstance->mDeadProfiledThreads) { + MOZ_RELEASE_ASSERT(array.append(ProfiledThreadListElement{ + t->Info().RegisterTime(), (JSContext*)nullptr, t.get()})); + } + + std::sort(array.begin(), array.end(), + [](const ProfiledThreadListElement& a, + const ProfiledThreadListElement& b) { + return a.mRegisterTime < b.mRegisterTime; + }); + return array; + } + + static Vector<RefPtr<PageInformation>> ProfiledPages(PSLockRef aLock) { + MOZ_ASSERT(sInstance); + Vector<RefPtr<PageInformation>> array; + for (auto& d : CorePS::RegisteredPages(aLock)) { + MOZ_RELEASE_ASSERT(array.append(d)); + } + for (auto& d : sInstance->mDeadProfiledPages) { + MOZ_RELEASE_ASSERT(array.append(d)); + } + // We don't need to sort the pages like threads since we won't show them + // as a list. + return array; + } + + static ProfiledThreadData* AddLiveProfiledThread( + PSLockRef, UniquePtr<ProfiledThreadData>&& aProfiledThreadData) { + MOZ_ASSERT(sInstance); + MOZ_RELEASE_ASSERT(sInstance->mLiveProfiledThreads.append( + LiveProfiledThreadData{std::move(aProfiledThreadData)})); + + // Return a weak pointer to the ProfiledThreadData object. + return sInstance->mLiveProfiledThreads.back().mProfiledThreadData.get(); + } + + static void UnregisterThread(PSLockRef aLockRef, + ProfiledThreadData* aProfiledThreadData) { + MOZ_ASSERT(sInstance); + + DiscardExpiredDeadProfiledThreads(aLockRef); + + // Find the right entry in the mLiveProfiledThreads array and remove the + // element, moving the ProfiledThreadData object for the thread into the + // mDeadProfiledThreads array. + for (size_t i = 0; i < sInstance->mLiveProfiledThreads.length(); i++) { + LiveProfiledThreadData& thread = sInstance->mLiveProfiledThreads[i]; + if (thread.mProfiledThreadData == aProfiledThreadData) { + thread.mProfiledThreadData->NotifyUnregistered( + sInstance->mProfileBuffer.BufferRangeEnd()); + MOZ_RELEASE_ASSERT(sInstance->mDeadProfiledThreads.append( + std::move(thread.mProfiledThreadData))); + sInstance->mLiveProfiledThreads.erase( + &sInstance->mLiveProfiledThreads[i]); + return; + } + } + } + + // This is a counter to collect process CPU utilization during profiling. + // It cannot be a raw `ProfilerCounter` because we need to manually add/remove + // it while the profiler lock is already held. + class ProcessCPUCounter final : public BaseProfilerCount { + public: + explicit ProcessCPUCounter(PSLockRef aLock) + : BaseProfilerCount("processCPU", &mCounter, nullptr, "CPU", + "Process CPU utilization") { + // Adding on construction, so it's ready before the sampler starts. + locked_profiler_add_sampled_counter(aLock, this); + // Note: Removed from ActivePS::Destroy, because a lock is needed. + } + + void Add(int64_t aNumber) { mCounter += aNumber; } + + private: + ProfilerAtomicSigned mCounter; + }; + PS_GET(ProcessCPUCounter*, MaybeProcessCPUCounter); + + PS_GET(PowerCounters*, MaybePowerCounters); + + PS_GET_AND_SET(bool, IsPaused) + + // True if sampling is paused (though generic `SetIsPaused()` or specific + // `SetIsSamplingPaused()`). + static bool IsSamplingPaused(PSLockRef lock) { + MOZ_ASSERT(sInstance); + return IsPaused(lock) || sInstance->mIsSamplingPaused; + } + + static void SetIsSamplingPaused(PSLockRef, bool aIsSamplingPaused) { + MOZ_ASSERT(sInstance); + sInstance->mIsSamplingPaused = aIsSamplingPaused; + } + + static void DiscardExpiredDeadProfiledThreads(PSLockRef) { + MOZ_ASSERT(sInstance); + uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart(); + // Discard any dead threads that were unregistered before bufferRangeStart. + sInstance->mDeadProfiledThreads.eraseIf( + [bufferRangeStart]( + const UniquePtr<ProfiledThreadData>& aProfiledThreadData) { + Maybe<uint64_t> bufferPosition = + aProfiledThreadData->BufferPositionWhenUnregistered(); + MOZ_RELEASE_ASSERT(bufferPosition, + "should have unregistered this thread"); + return *bufferPosition < bufferRangeStart; + }); + } + + static void UnregisterPage(PSLockRef aLock, + uint64_t aRegisteredInnerWindowID) { + MOZ_ASSERT(sInstance); + auto& registeredPages = CorePS::RegisteredPages(aLock); + for (size_t i = 0; i < registeredPages.length(); i++) { + RefPtr<PageInformation>& page = registeredPages[i]; + if (page->InnerWindowID() == aRegisteredInnerWindowID) { + page->NotifyUnregistered(sInstance->mProfileBuffer.BufferRangeEnd()); + MOZ_RELEASE_ASSERT( + sInstance->mDeadProfiledPages.append(std::move(page))); + registeredPages.erase(®isteredPages[i--]); + } + } + } + + static void DiscardExpiredPages(PSLockRef) { + MOZ_ASSERT(sInstance); + uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart(); + // Discard any dead pages that were unregistered before + // bufferRangeStart. + sInstance->mDeadProfiledPages.eraseIf( + [bufferRangeStart](const RefPtr<PageInformation>& aProfiledPage) { + Maybe<uint64_t> bufferPosition = + aProfiledPage->BufferPositionWhenUnregistered(); + MOZ_RELEASE_ASSERT(bufferPosition, + "should have unregistered this page"); + return *bufferPosition < bufferRangeStart; + }); + } + + static void ClearUnregisteredPages(PSLockRef) { + MOZ_ASSERT(sInstance); + sInstance->mDeadProfiledPages.clear(); + } + + static void ClearExpiredExitProfiles(PSLockRef) { + MOZ_ASSERT(sInstance); + uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart(); + // Discard exit profiles that were gathered before our buffer RangeStart. + // If we have started to overwrite our data from when the Base profile was + // added, we should get rid of that Base profile because it's now older than + // our oldest Gecko profile data. + // + // When adding: (In practice the starting buffer should be empty) + // v Start == End + // | <-- Buffer range, initially empty. + // ^ mGeckoIndexWhenBaseProfileAdded < Start FALSE -> keep it + // + // Later, still in range: + // v Start v End + // |=========| <-- Buffer range growing. + // ^ mGeckoIndexWhenBaseProfileAdded < Start FALSE -> keep it + // + // Even later, now out of range: + // v Start v End + // |============| <-- Buffer range full and sliding. + // ^ mGeckoIndexWhenBaseProfileAdded < Start TRUE! -> Discard it + if (sInstance->mBaseProfileThreads && + sInstance->mGeckoIndexWhenBaseProfileAdded + .ConvertToProfileBufferIndex() < + profiler_get_core_buffer().GetState().mRangeStart) { + DEBUG_LOG("ClearExpiredExitProfiles() - Discarding base profile %p", + sInstance->mBaseProfileThreads.get()); + sInstance->mBaseProfileThreads.reset(); + } + sInstance->mExitProfiles.eraseIf( + [bufferRangeStart](const ExitProfile& aExitProfile) { + return aExitProfile.mBufferPositionAtGatherTime < bufferRangeStart; + }); + } + + static void AddBaseProfileThreads(PSLockRef aLock, + UniquePtr<char[]> aBaseProfileThreads) { + MOZ_ASSERT(sInstance); + DEBUG_LOG("AddBaseProfileThreads(%p)", aBaseProfileThreads.get()); + sInstance->mBaseProfileThreads = std::move(aBaseProfileThreads); + sInstance->mGeckoIndexWhenBaseProfileAdded = + ProfileBufferBlockIndex::CreateFromProfileBufferIndex( + profiler_get_core_buffer().GetState().mRangeEnd); + } + + static UniquePtr<char[]> MoveBaseProfileThreads(PSLockRef aLock) { + MOZ_ASSERT(sInstance); + + ClearExpiredExitProfiles(aLock); + + DEBUG_LOG("MoveBaseProfileThreads() - Consuming base profile %p", + sInstance->mBaseProfileThreads.get()); + return std::move(sInstance->mBaseProfileThreads); + } + + static void AddExitProfile(PSLockRef aLock, const nsACString& aExitProfile) { + MOZ_ASSERT(sInstance); + + ClearExpiredExitProfiles(aLock); + + MOZ_RELEASE_ASSERT(sInstance->mExitProfiles.append(ExitProfile{ + nsCString(aExitProfile), sInstance->mProfileBuffer.BufferRangeEnd()})); + } + + static Vector<nsCString> MoveExitProfiles(PSLockRef aLock) { + MOZ_ASSERT(sInstance); + + ClearExpiredExitProfiles(aLock); + + Vector<nsCString> profiles; + MOZ_RELEASE_ASSERT( + profiles.initCapacity(sInstance->mExitProfiles.length())); + for (auto& profile : sInstance->mExitProfiles) { + MOZ_RELEASE_ASSERT(profiles.append(std::move(profile.mJSON))); + } + sInstance->mExitProfiles.clear(); + return profiles; + } + +#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY) + static void SetMemoryCounter(const BaseProfilerCount* aMemoryCounter) { + MOZ_ASSERT(sInstance); + + sInstance->mMemoryCounter = aMemoryCounter; + } + + static bool IsMemoryCounter(const BaseProfilerCount* aMemoryCounter) { + MOZ_ASSERT(sInstance); + + return sInstance->mMemoryCounter == aMemoryCounter; + } +#endif + + private: + // The singleton instance. + static ActivePS* sInstance; + + const TimeStamp mProfilingStartTime; + + // We need to track activity generations. If we didn't we could have the + // following scenario. + // + // - profiler_stop() locks gPSMutex, de-instantiates ActivePS, unlocks + // gPSMutex, deletes the SamplerThread (which does a join). + // + // - profiler_start() runs on a different thread, locks gPSMutex, + // re-instantiates ActivePS, unlocks gPSMutex -- all before the join + // completes. + // + // - SamplerThread::Run() locks gPSMutex, sees that ActivePS is instantiated, + // and continues as if the start/stop pair didn't occur. Also + // profiler_stop() is stuck, unable to finish. + // + // By checking ActivePS *and* the generation, we can avoid this scenario. + // sNextGeneration is used to track the next generation number; it is static + // because it must persist across different ActivePS instantiations. + const uint32_t mGeneration; + static uint32_t sNextGeneration; + + // The maximum number of entries in mProfileBuffer. + const PowerOfTwo32 mCapacity; + + // The maximum duration of entries in mProfileBuffer, in seconds. + const Maybe<double> mDuration; + + // The interval between samples, measured in milliseconds. + const double mInterval; + + // The profile features that are enabled. + const uint32_t mFeatures; + + // Substrings of names of threads we want to profile. + Vector<std::string> mFilters; + Vector<std::string> mFiltersLowered; + + // ID of the active browser screen's active tab. + // It's being used to determine the profiled tab. It's "0" if we failed to + // get the ID. + const uint64_t mActiveTabID; + + // The chunk manager used by `mProfileBuffer` below. + // May become null if it gets transferred ouf of the Gecko Profiler. + UniquePtr<ProfileBufferChunkManagerWithLocalLimit> mProfileBufferChunkManager; + + // The buffer into which all samples are recorded. + ProfileBuffer mProfileBuffer; + + // ProfiledThreadData objects for any threads that were profiled at any point + // during this run of the profiler: + // - mLiveProfiledThreads contains all threads that are still registered, and + // - mDeadProfiledThreads contains all threads that have already been + // unregistered but for which there is still data in the profile buffer. + Vector<LiveProfiledThreadData> mLiveProfiledThreads; + Vector<UniquePtr<ProfiledThreadData>> mDeadProfiledThreads; + + // Info on all the dead pages. + // Registered pages are being moved to this array after unregistration. + // We are keeping them in case we need them in the profile data. + // We are removing them when we ensure that we won't need them anymore. + Vector<RefPtr<PageInformation>> mDeadProfiledPages; + + // Used to collect process CPU utilization values, if the feature is on. + ProcessCPUCounter* mMaybeProcessCPUCounter; + + // Used to collect power use data, if the power feature is on. + PowerCounters* mMaybePowerCounters; + + // The current sampler thread. This class is not responsible for destroying + // the SamplerThread object; the Destroy() method returns it so the caller + // can destroy it. + SamplerThread* const mSamplerThread; + + // Is the profiler fully paused? + bool mIsPaused; + + // Is the profiler periodic sampling paused? + bool mIsSamplingPaused; + + // Optional startup profile thread array from BaseProfiler. + UniquePtr<char[]> mBaseProfileThreads; + ProfileBufferBlockIndex mGeckoIndexWhenBaseProfileAdded; + + struct ExitProfile { + nsCString mJSON; + uint64_t mBufferPositionAtGatherTime; + }; + Vector<ExitProfile> mExitProfiles; + +#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY) + Atomic<const BaseProfilerCount*> mMemoryCounter; +#endif +}; + +ActivePS* ActivePS::sInstance = nullptr; +uint32_t ActivePS::sNextGeneration = 0; + +#undef PS_GET +#undef PS_GET_LOCKLESS +#undef PS_GET_AND_SET + +using ProfilerStateChangeMutex = + mozilla::baseprofiler::detail::BaseProfilerMutex; +using ProfilerStateChangeLock = + mozilla::baseprofiler::detail::BaseProfilerAutoLock; +static ProfilerStateChangeMutex gProfilerStateChangeMutex; + +struct IdentifiedProfilingStateChangeCallback { + ProfilingStateSet mProfilingStateSet; + ProfilingStateChangeCallback mProfilingStateChangeCallback; + uintptr_t mUniqueIdentifier; + + explicit IdentifiedProfilingStateChangeCallback( + ProfilingStateSet aProfilingStateSet, + ProfilingStateChangeCallback&& aProfilingStateChangeCallback, + uintptr_t aUniqueIdentifier) + : mProfilingStateSet(aProfilingStateSet), + mProfilingStateChangeCallback(aProfilingStateChangeCallback), + mUniqueIdentifier(aUniqueIdentifier) {} +}; +using IdentifiedProfilingStateChangeCallbackUPtr = + UniquePtr<IdentifiedProfilingStateChangeCallback>; + +static Vector<IdentifiedProfilingStateChangeCallbackUPtr> + mIdentifiedProfilingStateChangeCallbacks; + +void profiler_add_state_change_callback( + ProfilingStateSet aProfilingStateSet, + ProfilingStateChangeCallback&& aCallback, + uintptr_t aUniqueIdentifier /* = 0 */) { + MOZ_ASSERT(!PSAutoLock::IsLockedOnCurrentThread()); + ProfilerStateChangeLock lock(gProfilerStateChangeMutex); + +#ifdef DEBUG + // Check if a non-zero id is not already used. Bug forgive it in non-DEBUG + // builds; in the worst case they may get removed too early. + if (aUniqueIdentifier != 0) { + for (const IdentifiedProfilingStateChangeCallbackUPtr& idedCallback : + mIdentifiedProfilingStateChangeCallbacks) { + MOZ_ASSERT(idedCallback->mUniqueIdentifier != aUniqueIdentifier); + } + } +#endif // DEBUG + + if (aProfilingStateSet.contains(ProfilingState::AlreadyActive) && + profiler_is_active()) { + aCallback(ProfilingState::AlreadyActive); + } + + (void)mIdentifiedProfilingStateChangeCallbacks.append( + MakeUnique<IdentifiedProfilingStateChangeCallback>( + aProfilingStateSet, std::move(aCallback), aUniqueIdentifier)); +} + +// Remove the callback with the given identifier. +void profiler_remove_state_change_callback(uintptr_t aUniqueIdentifier) { + MOZ_ASSERT(aUniqueIdentifier != 0); + if (aUniqueIdentifier == 0) { + // Forgive zero in non-DEBUG builds. + return; + } + + MOZ_ASSERT(!PSAutoLock::IsLockedOnCurrentThread()); + ProfilerStateChangeLock lock(gProfilerStateChangeMutex); + + mIdentifiedProfilingStateChangeCallbacks.eraseIf( + [aUniqueIdentifier]( + const IdentifiedProfilingStateChangeCallbackUPtr& aIdedCallback) { + if (aIdedCallback->mUniqueIdentifier != aUniqueIdentifier) { + return false; + } + if (aIdedCallback->mProfilingStateSet.contains( + ProfilingState::RemovingCallback)) { + aIdedCallback->mProfilingStateChangeCallback( + ProfilingState::RemovingCallback); + } + return true; + }); +} + +static void invoke_profiler_state_change_callbacks( + ProfilingState aProfilingState) { + MOZ_ASSERT(!PSAutoLock::IsLockedOnCurrentThread()); + ProfilerStateChangeLock lock(gProfilerStateChangeMutex); + + for (const IdentifiedProfilingStateChangeCallbackUPtr& idedCallback : + mIdentifiedProfilingStateChangeCallbacks) { + if (idedCallback->mProfilingStateSet.contains(aProfilingState)) { + idedCallback->mProfilingStateChangeCallback(aProfilingState); + } + } +} + +Atomic<uint32_t, MemoryOrdering::Relaxed> RacyFeatures::sActiveAndFeatures(0); + +// The name of the main thread. +static const char* const kMainThreadName = "GeckoMain"; + +//////////////////////////////////////////////////////////////////////// +// BEGIN sampling/unwinding code + +// The registers used for stack unwinding and a few other sampling purposes. +// The ctor does nothing; users are responsible for filling in the fields. +class Registers { + public: + Registers() : mPC{nullptr}, mSP{nullptr}, mFP{nullptr}, mLR{nullptr} {} + +#if defined(HAVE_NATIVE_UNWIND) + // Fills in mPC, mSP, mFP, mLR, and mContext for a synchronous sample. + void SyncPopulate(); +#endif + + void Clear() { memset(this, 0, sizeof(*this)); } + + // These fields are filled in by + // Sampler::SuspendAndSampleAndResumeThread() for periodic and backtrace + // samples, and by SyncPopulate() for synchronous samples. + Address mPC; // Instruction pointer. + Address mSP; // Stack pointer. + Address mFP; // Frame pointer. + Address mLR; // ARM link register. +#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd) + // This contains all the registers, which means it duplicates the four fields + // above. This is ok. + ucontext_t* mContext; // The context from the signal handler or below. + ucontext_t mContextSyncStorage; // Storage for sync stack unwinding. +#endif +}; + +// Setting MAX_NATIVE_FRAMES too high risks the unwinder wasting a lot of time +// looping on corrupted stacks. +static const size_t MAX_NATIVE_FRAMES = 1024; + +struct NativeStack { + void* mPCs[MAX_NATIVE_FRAMES]; + void* mSPs[MAX_NATIVE_FRAMES]; + size_t mCount; // Number of frames filled. + + NativeStack() : mPCs(), mSPs(), mCount(0) {} +}; + +Atomic<bool> WALKING_JS_STACK(false); + +struct AutoWalkJSStack { + bool walkAllowed; + + AutoWalkJSStack() : walkAllowed(false) { + walkAllowed = WALKING_JS_STACK.compareExchange(false, true); + } + + ~AutoWalkJSStack() { + if (walkAllowed) { + WALKING_JS_STACK = false; + } + } +}; + +class StackWalkControl { + public: + struct ResumePoint { + // If lost, the stack walker should resume at these values. + void* resumeSp; // If null, stop the walker here, don't resume again. + void* resumeBp; + void* resumePc; + }; + +#if ((defined(USE_MOZ_STACK_WALK) || defined(USE_FRAME_POINTER_STACK_WALK)) && \ + defined(GP_ARCH_amd64)) + public: + static constexpr bool scIsSupported = true; + + void Clear() { mResumePointCount = 0; } + + size_t ResumePointCount() const { return mResumePointCount; } + + static constexpr size_t MaxResumePointCount() { + return scMaxResumePointCount; + } + + // Add a resume point. Note that adding anything past MaxResumePointCount() + // would silently fail. In practice this means that stack walking may still + // lose native frames. + void AddResumePoint(ResumePoint&& aResumePoint) { + // If SP is null, we expect BP and PC to also be null. + MOZ_ASSERT_IF(!aResumePoint.resumeSp, !aResumePoint.resumeBp); + MOZ_ASSERT_IF(!aResumePoint.resumeSp, !aResumePoint.resumePc); + + // If BP and/or PC are not null, SP must not be null. (But we allow BP/PC to + // be null even if SP is not null.) + MOZ_ASSERT_IF(aResumePoint.resumeBp, aResumePoint.resumeSp); + MOZ_ASSERT_IF(aResumePoint.resumePc, aResumePoint.resumeSp); + + if (mResumePointCount < scMaxResumePointCount) { + mResumePoint[mResumePointCount] = std::move(aResumePoint); + ++mResumePointCount; + } + } + + // Only allow non-modifying range-for loops. + const ResumePoint* begin() const { return &mResumePoint[0]; } + const ResumePoint* end() const { return &mResumePoint[mResumePointCount]; } + + // Find the next resume point that would be a caller of the function with the + // given SP; i.e., the resume point with the closest resumeSp > aSp. + const ResumePoint* GetResumePointCallingSp(void* aSp) const { + const ResumePoint* callingResumePoint = nullptr; + for (const ResumePoint& resumePoint : *this) { + if (resumePoint.resumeSp && // This is a potential resume point. + resumePoint.resumeSp > aSp && // It is a caller of the given SP. + (!callingResumePoint || // This is the first candidate. + resumePoint.resumeSp < callingResumePoint->resumeSp) // Or better. + ) { + callingResumePoint = &resumePoint; + } + } + return callingResumePoint; + } + + private: + size_t mResumePointCount = 0; + static constexpr size_t scMaxResumePointCount = 32; + ResumePoint mResumePoint[scMaxResumePointCount]; + +#else + public: + static constexpr bool scIsSupported = false; + // Discarded constexpr-if statements are still checked during compilation, + // these declarations are necessary for that, even if not actually used. + void Clear(); + size_t ResumePointCount(); + static constexpr size_t MaxResumePointCount(); + void AddResumePoint(ResumePoint&& aResumePoint); + const ResumePoint* begin() const; + const ResumePoint* end() const; + const ResumePoint* GetResumePointCallingSp(void* aSp) const; +#endif +}; + +// Make a copy of the JS stack into a JSFrame array, and return the number of +// copied frames. +// This copy is necessary since, like the native stack, the JS stack is iterated +// youngest-to-oldest and we need to iterate oldest-to-youngest in MergeStacks. +static uint32_t ExtractJsFrames( + bool aIsSynchronous, + const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, + const Registers& aRegs, ProfilerStackCollector& aCollector, + JsFrameBuffer aJsFrames, StackWalkControl* aStackWalkControlIfSupported) { + MOZ_ASSERT(aJsFrames, + "ExtractJsFrames should only be called if there is a " + "JsFrameBuffer to fill."); + + uint32_t jsFramesCount = 0; + + // Only walk jit stack if profiling frame iterator is turned on. + JSContext* context = aThreadData.GetJSContext(); + if (context && JS::IsProfilingEnabledForContext(context)) { + AutoWalkJSStack autoWalkJSStack; + + if (autoWalkJSStack.walkAllowed) { + JS::ProfilingFrameIterator::RegisterState registerState; + registerState.pc = aRegs.mPC; + registerState.sp = aRegs.mSP; + registerState.lr = aRegs.mLR; + registerState.fp = aRegs.mFP; + + // Non-periodic sampling passes Nothing() as the buffer write position to + // ProfilingFrameIterator to avoid incorrectly resetting the buffer + // position of sampled JIT frames inside the JS engine. + Maybe<uint64_t> samplePosInBuffer; + if (!aIsSynchronous) { + // aCollector.SamplePositionInBuffer() will return Nothing() when + // profiler_suspend_and_sample_thread is called from the background hang + // reporter. + samplePosInBuffer = aCollector.SamplePositionInBuffer(); + } + + for (JS::ProfilingFrameIterator jsIter(context, registerState, + samplePosInBuffer); + !jsIter.done(); ++jsIter) { + if (aIsSynchronous || jsIter.isWasm()) { + jsFramesCount += + jsIter.extractStack(aJsFrames, jsFramesCount, MAX_JS_FRAMES); + if (jsFramesCount == MAX_JS_FRAMES) { + break; + } + } else { + Maybe<JS::ProfilingFrameIterator::Frame> frame = + jsIter.getPhysicalFrameWithoutLabel(); + if (frame.isSome()) { + aJsFrames[jsFramesCount++] = std::move(frame).ref(); + if (jsFramesCount == MAX_JS_FRAMES) { + break; + } + } + } + + if constexpr (StackWalkControl::scIsSupported) { + if (aStackWalkControlIfSupported) { + jsIter.getCppEntryRegisters().apply( + [&](const JS::ProfilingFrameIterator::RegisterState& + aCppEntry) { + StackWalkControl::ResumePoint resumePoint; + resumePoint.resumeSp = aCppEntry.sp; + resumePoint.resumeBp = aCppEntry.fp; + resumePoint.resumePc = aCppEntry.pc; + aStackWalkControlIfSupported->AddResumePoint( + std::move(resumePoint)); + }); + } + } else { + MOZ_ASSERT(!aStackWalkControlIfSupported, + "aStackWalkControlIfSupported should be null when " + "!StackWalkControl::scIsSupported"); + (void)aStackWalkControlIfSupported; + } + } + } + } + + return jsFramesCount; +} + +// Merges the profiling stack, native stack, and JS stack, outputting the +// details to aCollector. +static void MergeStacks( + uint32_t aFeatures, bool aIsSynchronous, + const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, + const Registers& aRegs, const NativeStack& aNativeStack, + ProfilerStackCollector& aCollector, JsFrame* aJsFrames, + uint32_t aJsFramesCount) { + // WARNING: this function runs within the profiler's "critical section". + // WARNING: this function might be called while the profiler is inactive, and + // cannot rely on ActivePS. + + MOZ_ASSERT_IF(!aJsFrames, aJsFramesCount == 0); + + const ProfilingStack& profilingStack = aThreadData.ProfilingStackCRef(); + const js::ProfilingStackFrame* profilingStackFrames = profilingStack.frames; + uint32_t profilingStackFrameCount = profilingStack.stackSize(); + + // While the profiling stack array is ordered oldest-to-youngest, the JS and + // native arrays are ordered youngest-to-oldest. We must add frames to aInfo + // oldest-to-youngest. Thus, iterate over the profiling stack forwards and JS + // and native arrays backwards. Note: this means the terminating condition + // jsIndex and nativeIndex is being < 0. + uint32_t profilingStackIndex = 0; + int32_t jsIndex = aJsFramesCount - 1; + int32_t nativeIndex = aNativeStack.mCount - 1; + + uint8_t* lastLabelFrameStackAddr = nullptr; + uint8_t* jitEndStackAddr = nullptr; + + // Iterate as long as there is at least one frame remaining. + while (profilingStackIndex != profilingStackFrameCount || jsIndex >= 0 || + nativeIndex >= 0) { + // There are 1 to 3 frames available. Find and add the oldest. + uint8_t* profilingStackAddr = nullptr; + uint8_t* jsStackAddr = nullptr; + uint8_t* nativeStackAddr = nullptr; + uint8_t* jsActivationAddr = nullptr; + + if (profilingStackIndex != profilingStackFrameCount) { + const js::ProfilingStackFrame& profilingStackFrame = + profilingStackFrames[profilingStackIndex]; + + if (profilingStackFrame.isLabelFrame() || + profilingStackFrame.isSpMarkerFrame()) { + lastLabelFrameStackAddr = (uint8_t*)profilingStackFrame.stackAddress(); + } + + // Skip any JS_OSR frames. Such frames are used when the JS interpreter + // enters a jit frame on a loop edge (via on-stack-replacement, or OSR). + // To avoid both the profiling stack frame and jit frame being recorded + // (and showing up twice), the interpreter marks the interpreter + // profiling stack frame as JS_OSR to ensure that it doesn't get counted. + if (profilingStackFrame.isOSRFrame()) { + profilingStackIndex++; + continue; + } + + MOZ_ASSERT(lastLabelFrameStackAddr); + profilingStackAddr = lastLabelFrameStackAddr; + } + + if (jsIndex >= 0) { + jsStackAddr = (uint8_t*)aJsFrames[jsIndex].stackAddress; + jsActivationAddr = (uint8_t*)aJsFrames[jsIndex].activation; + } + + if (nativeIndex >= 0) { + nativeStackAddr = (uint8_t*)aNativeStack.mSPs[nativeIndex]; + } + + // If there's a native stack frame which has the same SP as a profiling + // stack frame, pretend we didn't see the native stack frame. Ditto for a + // native stack frame which has the same SP as a JS stack frame. In effect + // this means profiling stack frames or JS frames trump conflicting native + // frames. + if (nativeStackAddr && (profilingStackAddr == nativeStackAddr || + jsStackAddr == nativeStackAddr)) { + nativeStackAddr = nullptr; + nativeIndex--; + MOZ_ASSERT(profilingStackAddr || jsStackAddr); + } + + // Sanity checks. + MOZ_ASSERT_IF(profilingStackAddr, + profilingStackAddr != jsStackAddr && + profilingStackAddr != nativeStackAddr); + MOZ_ASSERT_IF(jsStackAddr, jsStackAddr != profilingStackAddr && + jsStackAddr != nativeStackAddr); + MOZ_ASSERT_IF(nativeStackAddr, nativeStackAddr != profilingStackAddr && + nativeStackAddr != jsStackAddr); + + // Check to see if profiling stack frame is top-most. + if (profilingStackAddr > jsStackAddr && + profilingStackAddr > nativeStackAddr) { + MOZ_ASSERT(profilingStackIndex < profilingStackFrameCount); + const js::ProfilingStackFrame& profilingStackFrame = + profilingStackFrames[profilingStackIndex]; + + // Sp marker frames are just annotations and should not be recorded in + // the profile. + if (!profilingStackFrame.isSpMarkerFrame()) { + // The JIT only allows the top-most frame to have a nullptr pc. + MOZ_ASSERT_IF( + profilingStackFrame.isJsFrame() && profilingStackFrame.script() && + !profilingStackFrame.pc(), + &profilingStackFrame == + &profilingStack.frames[profilingStack.stackSize() - 1]); + if (aIsSynchronous && profilingStackFrame.categoryPair() == + JS::ProfilingCategoryPair::PROFILER) { + // For stacks captured synchronously (ie. marker stacks), stop + // walking the stack as soon as we enter the profiler category, + // to avoid showing profiler internal code in marker stacks. + return; + } + aCollector.CollectProfilingStackFrame(profilingStackFrame); + } + profilingStackIndex++; + continue; + } + + // Check to see if JS jit stack frame is top-most + if (jsStackAddr > nativeStackAddr) { + MOZ_ASSERT(jsIndex >= 0); + const JS::ProfilingFrameIterator::Frame& jsFrame = aJsFrames[jsIndex]; + jitEndStackAddr = (uint8_t*)jsFrame.endStackAddress; + // Stringifying non-wasm JIT frames is delayed until streaming time. To + // re-lookup the entry in the JitcodeGlobalTable, we need to store the + // JIT code address (OptInfoAddr) in the circular buffer. + // + // Note that we cannot do this when we are sychronously sampling the + // current thread; that is, when called from profiler_get_backtrace. The + // captured backtrace is usually externally stored for an indeterminate + // amount of time, such as in nsRefreshDriver. Problematically, the + // stored backtrace may be alive across a GC during which the profiler + // itself is disabled. In that case, the JS engine is free to discard its + // JIT code. This means that if we inserted such OptInfoAddr entries into + // the buffer, nsRefreshDriver would now be holding on to a backtrace + // with stale JIT code return addresses. + if (aIsSynchronous || + jsFrame.kind == JS::ProfilingFrameIterator::Frame_Wasm) { + aCollector.CollectWasmFrame(jsFrame.label); + } else if (jsFrame.kind == + JS::ProfilingFrameIterator::Frame_BaselineInterpreter) { + // Materialize a ProfilingStackFrame similar to the C++ Interpreter. We + // also set the IS_BLINTERP_FRAME flag to differentiate though. + JSScript* script = jsFrame.interpreterScript; + jsbytecode* pc = jsFrame.interpreterPC(); + js::ProfilingStackFrame stackFrame; + constexpr uint32_t ExtraFlags = + uint32_t(js::ProfilingStackFrame::Flags::IS_BLINTERP_FRAME); + stackFrame.initJsFrame<JS::ProfilingCategoryPair::JS_BaselineInterpret, + ExtraFlags>("", jsFrame.label, script, pc, + jsFrame.realmID); + aCollector.CollectProfilingStackFrame(stackFrame); + } else { + MOZ_ASSERT(jsFrame.kind == JS::ProfilingFrameIterator::Frame_Ion || + jsFrame.kind == JS::ProfilingFrameIterator::Frame_Baseline); + aCollector.CollectJitReturnAddr(jsFrame.returnAddress()); + } + + jsIndex--; + continue; + } + + // If we reach here, there must be a native stack frame and it must be the + // greatest frame. + if (nativeStackAddr && + // If the latest JS frame was JIT, this could be the native frame that + // corresponds to it. In that case, skip the native frame, because + // there's no need for the same frame to be present twice in the stack. + // The JS frame can be considered the symbolicated version of the native + // frame. + (!jitEndStackAddr || nativeStackAddr < jitEndStackAddr) && + // This might still be a JIT operation, check to make sure that is not + // in range of the NEXT JavaScript's stacks' activation address. + (!jsActivationAddr || nativeStackAddr > jsActivationAddr)) { + MOZ_ASSERT(nativeIndex >= 0); + void* addr = (void*)aNativeStack.mPCs[nativeIndex]; + aCollector.CollectNativeLeafAddr(addr); + } + if (nativeIndex >= 0) { + nativeIndex--; + } + } + + // Update the JS context with the current profile sample buffer generation. + // + // Only do this for periodic samples. We don't want to do this for + // synchronous samples, and we also don't want to do it for calls to + // profiler_suspend_and_sample_thread() from the background hang reporter - + // in that case, aCollector.BufferRangeStart() will return Nothing(). + if (!aIsSynchronous) { + aCollector.BufferRangeStart().apply( + [&aThreadData](uint64_t aBufferRangeStart) { + JSContext* context = aThreadData.GetJSContext(); + if (context) { + JS::SetJSContextProfilerSampleBufferRangeStart(context, + aBufferRangeStart); + } + }); + } +} + +#if defined(USE_FRAME_POINTER_STACK_WALK) || defined(USE_MOZ_STACK_WALK) +static void StackWalkCallback(uint32_t aFrameNumber, void* aPC, void* aSP, + void* aClosure) { + NativeStack* nativeStack = static_cast<NativeStack*>(aClosure); + MOZ_ASSERT(nativeStack->mCount < MAX_NATIVE_FRAMES); + nativeStack->mSPs[nativeStack->mCount] = aSP; + nativeStack->mPCs[nativeStack->mCount] = aPC; + nativeStack->mCount++; +} +#endif + +#if defined(USE_FRAME_POINTER_STACK_WALK) +static void DoFramePointerBacktrace( + const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, + const Registers& aRegs, NativeStack& aNativeStack, + StackWalkControl* aStackWalkControlIfSupported) { + // WARNING: this function runs within the profiler's "critical section". + // WARNING: this function might be called while the profiler is inactive, and + // cannot rely on ActivePS. + + // Make a local copy of the Registers, to allow modifications. + Registers regs = aRegs; + + // Start with the current function. We use 0 as the frame number here because + // the FramePointerStackWalk() call below will use 1..N. This is a bit weird + // but it doesn't matter because StackWalkCallback() doesn't use the frame + // number argument. + StackWalkCallback(/* frameNum */ 0, regs.mPC, regs.mSP, &aNativeStack); + + const void* const stackEnd = aThreadData.StackTop(); + + // This is to check forward-progress after using a resume point. + void* previousResumeSp = nullptr; + + for (;;) { + if (!(regs.mSP && regs.mSP <= regs.mFP && regs.mFP <= stackEnd)) { + break; + } + FramePointerStackWalk(StackWalkCallback, + uint32_t(MAX_NATIVE_FRAMES - aNativeStack.mCount), + &aNativeStack, reinterpret_cast<void**>(regs.mFP), + const_cast<void*>(stackEnd)); + + if constexpr (!StackWalkControl::scIsSupported) { + break; + } else { + if (aNativeStack.mCount >= MAX_NATIVE_FRAMES) { + // No room to add more frames. + break; + } + if (!aStackWalkControlIfSupported || + aStackWalkControlIfSupported->ResumePointCount() == 0) { + // No resume information. + break; + } + void* lastSP = aNativeStack.mSPs[aNativeStack.mCount - 1]; + if (previousResumeSp && + ((uintptr_t)lastSP <= (uintptr_t)previousResumeSp)) { + // No progress after the previous resume point. + break; + } + const StackWalkControl::ResumePoint* resumePoint = + aStackWalkControlIfSupported->GetResumePointCallingSp(lastSP); + if (!resumePoint) { + break; + } + void* sp = resumePoint->resumeSp; + if (!sp) { + // Null SP in a resume point means we stop here. + break; + } + void* pc = resumePoint->resumePc; + StackWalkCallback(/* frameNum */ aNativeStack.mCount, pc, sp, + &aNativeStack); + ++aNativeStack.mCount; + if (aNativeStack.mCount >= MAX_NATIVE_FRAMES) { + break; + } + // Prepare context to resume stack walking. + regs.mPC = (Address)pc; + regs.mSP = (Address)sp; + regs.mFP = (Address)resumePoint->resumeBp; + + previousResumeSp = sp; + } + } +} +#endif + +#if defined(USE_MOZ_STACK_WALK) +static void DoMozStackWalkBacktrace( + const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, + const Registers& aRegs, NativeStack& aNativeStack, + StackWalkControl* aStackWalkControlIfSupported) { + // WARNING: this function runs within the profiler's "critical section". + // WARNING: this function might be called while the profiler is inactive, and + // cannot rely on ActivePS. + + // Start with the current function. We use 0 as the frame number here because + // the MozStackWalkThread() call below will use 1..N. This is a bit weird but + // it doesn't matter because StackWalkCallback() doesn't use the frame number + // argument. + StackWalkCallback(/* frameNum */ 0, aRegs.mPC, aRegs.mSP, &aNativeStack); + + HANDLE thread = aThreadData.PlatformDataCRef().ProfiledThread(); + MOZ_ASSERT(thread); + + CONTEXT context_buf; + CONTEXT* context = nullptr; + if constexpr (StackWalkControl::scIsSupported) { + context = &context_buf; + memset(&context_buf, 0, sizeof(CONTEXT)); + context_buf.ContextFlags = CONTEXT_FULL; +# if defined(_M_AMD64) + context_buf.Rsp = (DWORD64)aRegs.mSP; + context_buf.Rbp = (DWORD64)aRegs.mFP; + context_buf.Rip = (DWORD64)aRegs.mPC; +# else + static_assert(!StackWalkControl::scIsSupported, + "Mismatched support between StackWalkControl and " + "DoMozStackWalkBacktrace"); +# endif + } else { + context = nullptr; + } + + // This is to check forward-progress after using a resume point. + void* previousResumeSp = nullptr; + + for (;;) { + MozStackWalkThread(StackWalkCallback, + uint32_t(MAX_NATIVE_FRAMES - aNativeStack.mCount), + &aNativeStack, thread, context); + + if constexpr (!StackWalkControl::scIsSupported) { + break; + } else { + if (aNativeStack.mCount >= MAX_NATIVE_FRAMES) { + // No room to add more frames. + break; + } + if (!aStackWalkControlIfSupported || + aStackWalkControlIfSupported->ResumePointCount() == 0) { + // No resume information. + break; + } + void* lastSP = aNativeStack.mSPs[aNativeStack.mCount - 1]; + if (previousResumeSp && + ((uintptr_t)lastSP <= (uintptr_t)previousResumeSp)) { + // No progress after the previous resume point. + break; + } + const StackWalkControl::ResumePoint* resumePoint = + aStackWalkControlIfSupported->GetResumePointCallingSp(lastSP); + if (!resumePoint) { + break; + } + void* sp = resumePoint->resumeSp; + if (!sp) { + // Null SP in a resume point means we stop here. + break; + } + void* pc = resumePoint->resumePc; + StackWalkCallback(/* frameNum */ aNativeStack.mCount, pc, sp, + &aNativeStack); + ++aNativeStack.mCount; + if (aNativeStack.mCount >= MAX_NATIVE_FRAMES) { + break; + } + // Prepare context to resume stack walking. + memset(&context_buf, 0, sizeof(CONTEXT)); + context_buf.ContextFlags = CONTEXT_FULL; +# if defined(_M_AMD64) + context_buf.Rsp = (DWORD64)sp; + context_buf.Rbp = (DWORD64)resumePoint->resumeBp; + context_buf.Rip = (DWORD64)pc; +# else + static_assert(!StackWalkControl::scIsSupported, + "Mismatched support between StackWalkControl and " + "DoMozStackWalkBacktrace"); +# endif + previousResumeSp = sp; + } + } +} +#endif + +#ifdef USE_EHABI_STACKWALK +static void DoEHABIBacktrace( + const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, + const Registers& aRegs, NativeStack& aNativeStack, + StackWalkControl* aStackWalkControlIfSupported) { + // WARNING: this function runs within the profiler's "critical section". + // WARNING: this function might be called while the profiler is inactive, and + // cannot rely on ActivePS. + + aNativeStack.mCount = EHABIStackWalk( + aRegs.mContext->uc_mcontext, const_cast<void*>(aThreadData.StackTop()), + aNativeStack.mSPs, aNativeStack.mPCs, MAX_NATIVE_FRAMES); + (void)aStackWalkControlIfSupported; // TODO: Implement. +} +#endif + +#ifdef USE_LUL_STACKWALK + +// See the comment at the callsite for why this function is necessary. +# if defined(MOZ_HAVE_ASAN_BLACKLIST) +MOZ_ASAN_BLACKLIST static void ASAN_memcpy(void* aDst, const void* aSrc, + size_t aLen) { + // The obvious thing to do here is call memcpy(). However, although + // ASAN_memcpy() is not instrumented by ASAN, memcpy() still is, and the + // false positive still manifests! So we must implement memcpy() ourselves + // within this function. + char* dst = static_cast<char*>(aDst); + const char* src = static_cast<const char*>(aSrc); + + for (size_t i = 0; i < aLen; i++) { + dst[i] = src[i]; + } +} +# endif + +static void DoLULBacktrace( + const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, + const Registers& aRegs, NativeStack& aNativeStack, + StackWalkControl* aStackWalkControlIfSupported) { + // WARNING: this function runs within the profiler's "critical section". + // WARNING: this function might be called while the profiler is inactive, and + // cannot rely on ActivePS. + + (void)aStackWalkControlIfSupported; // TODO: Implement. + + const mcontext_t* mc = &aRegs.mContext->uc_mcontext; + + lul::UnwindRegs startRegs; + memset(&startRegs, 0, sizeof(startRegs)); + +# if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android) + startRegs.xip = lul::TaggedUWord(mc->gregs[REG_RIP]); + startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_RSP]); + startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_RBP]); +# elif defined(GP_PLAT_amd64_freebsd) + startRegs.xip = lul::TaggedUWord(mc->mc_rip); + startRegs.xsp = lul::TaggedUWord(mc->mc_rsp); + startRegs.xbp = lul::TaggedUWord(mc->mc_rbp); +# elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android) + startRegs.r15 = lul::TaggedUWord(mc->arm_pc); + startRegs.r14 = lul::TaggedUWord(mc->arm_lr); + startRegs.r13 = lul::TaggedUWord(mc->arm_sp); + startRegs.r12 = lul::TaggedUWord(mc->arm_ip); + startRegs.r11 = lul::TaggedUWord(mc->arm_fp); + startRegs.r7 = lul::TaggedUWord(mc->arm_r7); +# elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android) + startRegs.pc = lul::TaggedUWord(mc->pc); + startRegs.x29 = lul::TaggedUWord(mc->regs[29]); + startRegs.x30 = lul::TaggedUWord(mc->regs[30]); + startRegs.sp = lul::TaggedUWord(mc->sp); +# elif defined(GP_PLAT_arm64_freebsd) + startRegs.pc = lul::TaggedUWord(mc->mc_gpregs.gp_elr); + startRegs.x29 = lul::TaggedUWord(mc->mc_gpregs.gp_x[29]); + startRegs.x30 = lul::TaggedUWord(mc->mc_gpregs.gp_lr); + startRegs.sp = lul::TaggedUWord(mc->mc_gpregs.gp_sp); +# elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android) + startRegs.xip = lul::TaggedUWord(mc->gregs[REG_EIP]); + startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_ESP]); + startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_EBP]); +# elif defined(GP_PLAT_mips64_linux) + startRegs.pc = lul::TaggedUWord(mc->pc); + startRegs.sp = lul::TaggedUWord(mc->gregs[29]); + startRegs.fp = lul::TaggedUWord(mc->gregs[30]); +# else +# error "Unknown plat" +# endif + + // Copy up to N_STACK_BYTES from rsp-REDZONE upwards, but not going past the + // stack's registered top point. Do some basic validity checks too. This + // assumes that the TaggedUWord holding the stack pointer value is valid, but + // it should be, since it was constructed that way in the code just above. + + // We could construct |stackImg| so that LUL reads directly from the stack in + // question, rather than from a copy of it. That would reduce overhead and + // space use a bit. However, it gives a problem with dynamic analysis tools + // (ASan, TSan, Valgrind) which is that such tools will report invalid or + // racing memory accesses, and such accesses will be reported deep inside LUL. + // By taking a copy here, we can either sanitise the copy (for Valgrind) or + // copy it using an unchecked memcpy (for ASan, TSan). That way we don't have + // to try and suppress errors inside LUL. + // + // N_STACK_BYTES is set to 160KB. This is big enough to hold all stacks + // observed in some minutes of testing, whilst keeping the size of this + // function (DoNativeBacktrace)'s frame reasonable. Most stacks observed in + // practice are small, 4KB or less, and so the copy costs are insignificant + // compared to other profiler overhead. + // + // |stackImg| is allocated on this (the sampling thread's) stack. That + // implies that the frame for this function is at least N_STACK_BYTES large. + // In general it would be considered unacceptable to have such a large frame + // on a stack, but it only exists for the unwinder thread, and so is not + // expected to be a problem. Allocating it on the heap is troublesome because + // this function runs whilst the sampled thread is suspended, so any heap + // allocation risks deadlock. Allocating it as a global variable is not + // thread safe, which would be a problem if we ever allow multiple sampler + // threads. Hence allocating it on the stack seems to be the least-worst + // option. + + lul::StackImage stackImg; + + { +# if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android) || \ + defined(GP_PLAT_amd64_freebsd) + uintptr_t rEDZONE_SIZE = 128; + uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE; +# elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android) + uintptr_t rEDZONE_SIZE = 0; + uintptr_t start = startRegs.r13.Value() - rEDZONE_SIZE; +# elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android) || \ + defined(GP_PLAT_arm64_freebsd) + uintptr_t rEDZONE_SIZE = 0; + uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE; +# elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android) + uintptr_t rEDZONE_SIZE = 0; + uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE; +# elif defined(GP_PLAT_mips64_linux) + uintptr_t rEDZONE_SIZE = 0; + uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE; +# else +# error "Unknown plat" +# endif + uintptr_t end = reinterpret_cast<uintptr_t>(aThreadData.StackTop()); + uintptr_t ws = sizeof(void*); + start &= ~(ws - 1); + end &= ~(ws - 1); + uintptr_t nToCopy = 0; + if (start < end) { + nToCopy = end - start; + if (nToCopy >= 1024u * 1024u) { + // start is abnormally far from end, possibly due to some special code + // that uses a separate stack elsewhere (e.g.: rr). In this case we just + // give up on this sample. + nToCopy = 0; + } else if (nToCopy > lul::N_STACK_BYTES) { + nToCopy = lul::N_STACK_BYTES; + } + } + MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES); + stackImg.mLen = nToCopy; + stackImg.mStartAvma = start; + if (nToCopy > 0) { + // If this is a vanilla memcpy(), ASAN makes the following complaint: + // + // ERROR: AddressSanitizer: stack-buffer-underflow ... + // ... + // HINT: this may be a false positive if your program uses some custom + // stack unwind mechanism or swapcontext + // + // This code is very much a custom stack unwind mechanism! So we use an + // alternative memcpy() implementation that is ignored by ASAN. +# if defined(MOZ_HAVE_ASAN_BLACKLIST) + ASAN_memcpy(&stackImg.mContents[0], (void*)start, nToCopy); +# else + memcpy(&stackImg.mContents[0], (void*)start, nToCopy); +# endif + (void)VALGRIND_MAKE_MEM_DEFINED(&stackImg.mContents[0], nToCopy); + } + } + + size_t framePointerFramesAcquired = 0; + lul::LUL* lul = CorePS::Lul(); + MOZ_RELEASE_ASSERT(lul); + lul->Unwind(reinterpret_cast<uintptr_t*>(aNativeStack.mPCs), + reinterpret_cast<uintptr_t*>(aNativeStack.mSPs), + &aNativeStack.mCount, &framePointerFramesAcquired, + MAX_NATIVE_FRAMES, &startRegs, &stackImg); + + // Update stats in the LUL stats object. Unfortunately this requires + // three global memory operations. + lul->mStats.mContext += 1; + lul->mStats.mCFI += aNativeStack.mCount - 1 - framePointerFramesAcquired; + lul->mStats.mFP += framePointerFramesAcquired; +} + +#endif + +#ifdef HAVE_NATIVE_UNWIND +static void DoNativeBacktrace( + const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, + const Registers& aRegs, NativeStack& aNativeStack, + StackWalkControl* aStackWalkControlIfSupported) { + // This method determines which stackwalker is used for periodic and + // synchronous samples. (Backtrace samples are treated differently, see + // profiler_suspend_and_sample_thread() for details). The only part of the + // ordering that matters is that LUL must precede FRAME_POINTER, because on + // Linux they can both be present. +# if defined(USE_LUL_STACKWALK) + DoLULBacktrace(aThreadData, aRegs, aNativeStack, + aStackWalkControlIfSupported); +# elif defined(USE_EHABI_STACKWALK) + DoEHABIBacktrace(aThreadData, aRegs, aNativeStack, + aStackWalkControlIfSupported); +# elif defined(USE_FRAME_POINTER_STACK_WALK) + DoFramePointerBacktrace(aThreadData, aRegs, aNativeStack, + aStackWalkControlIfSupported); +# elif defined(USE_MOZ_STACK_WALK) + DoMozStackWalkBacktrace(aThreadData, aRegs, aNativeStack, + aStackWalkControlIfSupported); +# else +# error "Invalid configuration" +# endif +} +#endif + +// Writes some components shared by periodic and synchronous profiles to +// ActivePS's ProfileBuffer. (This should only be called from DoSyncSample() +// and DoPeriodicSample().) +// +// The grammar for entry sequences is in a comment above +// ProfileBuffer::StreamSamplesToJSON. +static inline void DoSharedSample( + bool aIsSynchronous, uint32_t aFeatures, + const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, + JsFrame* aJsFrames, const Registers& aRegs, uint64_t aSamplePos, + uint64_t aBufferRangeStart, ProfileBuffer& aBuffer, + StackCaptureOptions aCaptureOptions = StackCaptureOptions::Full) { + // WARNING: this function runs within the profiler's "critical section". + + MOZ_ASSERT(!aBuffer.IsThreadSafe(), + "Mutexes cannot be used inside this critical section"); + + ProfileBufferCollector collector(aBuffer, aSamplePos, aBufferRangeStart); + StackWalkControl* stackWalkControlIfSupported = nullptr; +#if defined(HAVE_NATIVE_UNWIND) + const bool captureNative = ProfilerFeature::HasStackWalk(aFeatures) && + aCaptureOptions == StackCaptureOptions::Full; + StackWalkControl stackWalkControl; + if constexpr (StackWalkControl::scIsSupported) { + if (captureNative) { + stackWalkControlIfSupported = &stackWalkControl; + } + } +#endif // defined(HAVE_NATIVE_UNWIND) + const uint32_t jsFramesCount = + aJsFrames ? ExtractJsFrames(aIsSynchronous, aThreadData, aRegs, collector, + aJsFrames, stackWalkControlIfSupported) + : 0; + NativeStack nativeStack; +#if defined(HAVE_NATIVE_UNWIND) + if (captureNative) { + DoNativeBacktrace(aThreadData, aRegs, nativeStack, + stackWalkControlIfSupported); + + MergeStacks(aFeatures, aIsSynchronous, aThreadData, aRegs, nativeStack, + collector, aJsFrames, jsFramesCount); + } else +#endif + { + MergeStacks(aFeatures, aIsSynchronous, aThreadData, aRegs, nativeStack, + collector, aJsFrames, jsFramesCount); + + // We can't walk the whole native stack, but we can record the top frame. + if (aCaptureOptions == StackCaptureOptions::Full) { + aBuffer.AddEntry(ProfileBufferEntry::NativeLeafAddr((void*)aRegs.mPC)); + } + } +} + +// Writes the components of a synchronous sample to the given ProfileBuffer. +static void DoSyncSample( + uint32_t aFeatures, + const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, + const TimeStamp& aNow, const Registers& aRegs, ProfileBuffer& aBuffer, + StackCaptureOptions aCaptureOptions) { + // WARNING: this function runs within the profiler's "critical section". + + MOZ_ASSERT(aCaptureOptions != StackCaptureOptions::NoStack, + "DoSyncSample should not be called when no capture is needed"); + + const uint64_t bufferRangeStart = aBuffer.BufferRangeStart(); + + const uint64_t samplePos = + aBuffer.AddThreadIdEntry(aThreadData.Info().ThreadId()); + + TimeDuration delta = aNow - CorePS::ProcessStartTime(); + aBuffer.AddEntry(ProfileBufferEntry::Time(delta.ToMilliseconds())); + + if (!aThreadData.GetJSContext()) { + // No JSContext, there is no JS frame buffer (and no need for it). + DoSharedSample(/* aIsSynchronous = */ true, aFeatures, aThreadData, + /* aJsFrames = */ nullptr, aRegs, samplePos, + bufferRangeStart, aBuffer, aCaptureOptions); + } else { + // JSContext is present, we need to lock the thread data to access the JS + // frame buffer. + ThreadRegistration::WithOnThreadRef([&](ThreadRegistration::OnThreadRef + aOnThreadRef) { + aOnThreadRef.WithConstLockedRWOnThread( + [&](const ThreadRegistration::LockedRWOnThread& aLockedThreadData) { + DoSharedSample(/* aIsSynchronous = */ true, aFeatures, aThreadData, + aLockedThreadData.GetJsFrameBuffer(), aRegs, + samplePos, bufferRangeStart, aBuffer, + aCaptureOptions); + }); + }); + } +} + +// Writes the components of a periodic sample to ActivePS's ProfileBuffer. +// The ThreadId entry is already written in the main ProfileBuffer, its location +// is `aSamplePos`, we can write the rest to `aBuffer` (which may be different). +static inline void DoPeriodicSample( + PSLockRef aLock, + const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, + const Registers& aRegs, uint64_t aSamplePos, uint64_t aBufferRangeStart, + ProfileBuffer& aBuffer) { + // WARNING: this function runs within the profiler's "critical section". + + MOZ_RELEASE_ASSERT(ActivePS::Exists(aLock)); + + JsFrameBuffer& jsFrames = CorePS::JsFrames(aLock); + DoSharedSample(/* aIsSynchronous = */ false, ActivePS::Features(aLock), + aThreadData, jsFrames, aRegs, aSamplePos, aBufferRangeStart, + aBuffer); +} + +// END sampling/unwinding code +//////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////// +// BEGIN saving/streaming code + +const static uint64_t kJS_MAX_SAFE_UINTEGER = +9007199254740991ULL; + +static int64_t SafeJSInteger(uint64_t aValue) { + return aValue <= kJS_MAX_SAFE_UINTEGER ? int64_t(aValue) : -1; +} + +static void AddSharedLibraryInfoToStream(JSONWriter& aWriter, + const SharedLibrary& aLib) { + aWriter.StartObjectElement(); + aWriter.IntProperty("start", SafeJSInteger(aLib.GetStart())); + aWriter.IntProperty("end", SafeJSInteger(aLib.GetEnd())); + aWriter.IntProperty("offset", SafeJSInteger(aLib.GetOffset())); + aWriter.StringProperty("name", NS_ConvertUTF16toUTF8(aLib.GetModuleName())); + aWriter.StringProperty("path", NS_ConvertUTF16toUTF8(aLib.GetModulePath())); + aWriter.StringProperty("debugName", + NS_ConvertUTF16toUTF8(aLib.GetDebugName())); + aWriter.StringProperty("debugPath", + NS_ConvertUTF16toUTF8(aLib.GetDebugPath())); + aWriter.StringProperty("breakpadId", aLib.GetBreakpadId()); + aWriter.StringProperty("arch", aLib.GetArch()); + aWriter.EndObject(); +} + +void AppendSharedLibraries(JSONWriter& aWriter) { + SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf(); + info.SortByAddress(); + for (size_t i = 0; i < info.GetSize(); i++) { + AddSharedLibraryInfoToStream(aWriter, info.GetEntry(i)); + } +} + +static void StreamCategories(SpliceableJSONWriter& aWriter) { + // Same order as ProfilingCategory. Format: + // [ + // { + // name: "Idle", + // color: "transparent", + // subcategories: ["Other"], + // }, + // { + // name: "Other", + // color: "grey", + // subcategories: [ + // "JSM loading", + // "Subprocess launching", + // "DLL loading" + // ] + // }, + // ... + // ] + +#define CATEGORY_JSON_BEGIN_CATEGORY(name, labelAsString, color) \ + aWriter.Start(); \ + aWriter.StringProperty("name", labelAsString); \ + aWriter.StringProperty("color", color); \ + aWriter.StartArrayProperty("subcategories"); +#define CATEGORY_JSON_SUBCATEGORY(supercategory, name, labelAsString) \ + aWriter.StringElement(labelAsString); +#define CATEGORY_JSON_END_CATEGORY \ + aWriter.EndArray(); \ + aWriter.EndObject(); + + MOZ_PROFILING_CATEGORY_LIST(CATEGORY_JSON_BEGIN_CATEGORY, + CATEGORY_JSON_SUBCATEGORY, + CATEGORY_JSON_END_CATEGORY) + +#undef CATEGORY_JSON_BEGIN_CATEGORY +#undef CATEGORY_JSON_SUBCATEGORY +#undef CATEGORY_JSON_END_CATEGORY +} + +static void StreamMarkerSchema(SpliceableJSONWriter& aWriter) { + // Get an array view with all registered marker-type-specific functions. + base_profiler_markers_detail::Streaming::LockedMarkerTypeFunctionsList + markerTypeFunctionsArray; + // List of streamed marker names, this is used to spot duplicates. + std::set<std::string> names; + // Stream the display schema for each different one. (Duplications may come + // from the same code potentially living in different libraries.) + for (const auto& markerTypeFunctions : markerTypeFunctionsArray) { + auto name = markerTypeFunctions.mMarkerTypeNameFunction(); + // std::set.insert(T&&) returns a pair, its `second` is true if the element + // was actually inserted (i.e., it was not there yet.) + const bool didInsert = + names.insert(std::string(name.data(), name.size())).second; + if (didInsert) { + markerTypeFunctions.mMarkerSchemaFunction().Stream(aWriter, name); + } + } + + // Now stream the Rust marker schemas. Passing the names set as a void pointer + // as well, so we can continue checking if the schemes are added already in + // the Rust side. + profiler::ffi::gecko_profiler_stream_marker_schemas( + &aWriter, static_cast<void*>(&names)); +} + +// Some meta information that is better recorded before streaming the profile. +// This is *not* intended to be cached, as some values could change between +// profiling sessions. +struct PreRecordedMetaInformation { + bool mAsyncStacks; + + // This struct should only live on the stack, so it's fine to use Auto + // strings. + nsAutoCString mHttpPlatform; + nsAutoCString mHttpOscpu; + nsAutoCString mHttpMisc; + + nsAutoCString mRuntimeABI; + nsAutoCString mRuntimeToolkit; + + nsAutoCString mAppInfoProduct; + nsAutoCString mAppInfoAppBuildID; + nsAutoCString mAppInfoSourceURL; + + int32_t mProcessInfoCpuCount; + int32_t mProcessInfoCpuCores; + nsAutoCString mProcessInfoCpuName; +}; + +// This function should be called out of the profiler lock. +// It gathers non-trivial data that doesn't require the profiler to stop, or for +// which the request could theoretically deadlock if the profiler is locked. +static PreRecordedMetaInformation PreRecordMetaInformation() { + MOZ_ASSERT(!PSAutoLock::IsLockedOnCurrentThread()); + + PreRecordedMetaInformation info = {}; // Aggregate-init all fields. + + if (!NS_IsMainThread()) { + // Leave these properties out if we're not on the main thread. + // At the moment, the only case in which this function is called on a + // background thread is if we're in a content process and are going to + // send this profile to the parent process. In that case, the parent + // process profile's "meta" object already has the rest of the properties, + // and the parent process profile is dumped on that process's main thread. + return info; + } + + info.mAsyncStacks = Preferences::GetBool("javascript.options.asyncstack"); + + nsresult res; + + if (nsCOMPtr<nsIHttpProtocolHandler> http = + do_GetService(NS_NETWORK_PROTOCOL_CONTRACTID_PREFIX "http", &res); + !NS_FAILED(res) && http) { + Unused << http->GetPlatform(info.mHttpPlatform); + +#if defined(GP_OS_darwin) + // On Mac, the http "oscpu" is capped at 10.15, so we need to get the real + // OS version directly. + int major = 0; + int minor = 0; + int bugfix = 0; + nsCocoaFeatures::GetSystemVersion(major, minor, bugfix); + if (major != 0) { + info.mHttpOscpu.AppendLiteral("macOS "); + info.mHttpOscpu.AppendInt(major); + info.mHttpOscpu.AppendLiteral("."); + info.mHttpOscpu.AppendInt(minor); + info.mHttpOscpu.AppendLiteral("."); + info.mHttpOscpu.AppendInt(bugfix); + } else +#endif +#if defined(GP_OS_windows) + // On Windows, the http "oscpu" is capped at Windows 10, so we need to get + // the real OS version directly. + OSVERSIONINFO ovi = {sizeof(OSVERSIONINFO)}; + if (GetVersionEx(&ovi)) { + info.mHttpOscpu.AppendLiteral("Windows "); + // The major version returned for Windows 11 is 10, but we can + // identify it from the build number. + info.mHttpOscpu.AppendInt( + ovi.dwBuildNumber >= 22000 ? 11 : int32_t(ovi.dwMajorVersion)); + info.mHttpOscpu.AppendLiteral("."); + info.mHttpOscpu.AppendInt(int32_t(ovi.dwMinorVersion)); +# if defined(_ARM64_) + info.mHttpOscpu.AppendLiteral(" Arm64"); +# endif + info.mHttpOscpu.AppendLiteral("; build="); + info.mHttpOscpu.AppendInt(int32_t(ovi.dwBuildNumber)); + } else +#endif + { + Unused << http->GetOscpu(info.mHttpOscpu); + } + + Unused << http->GetMisc(info.mHttpMisc); + } + + if (nsCOMPtr<nsIXULRuntime> runtime = + do_GetService("@mozilla.org/xre/runtime;1"); + runtime) { + Unused << runtime->GetXPCOMABI(info.mRuntimeABI); + Unused << runtime->GetWidgetToolkit(info.mRuntimeToolkit); + } + + if (nsCOMPtr<nsIXULAppInfo> appInfo = + do_GetService("@mozilla.org/xre/app-info;1"); + appInfo) { + Unused << appInfo->GetName(info.mAppInfoProduct); + Unused << appInfo->GetAppBuildID(info.mAppInfoAppBuildID); + Unused << appInfo->GetSourceURL(info.mAppInfoSourceURL); + } + + ProcessInfo processInfo = {}; // Aggregate-init all fields to false/zeroes. + if (NS_SUCCEEDED(CollectProcessInfo(processInfo))) { + info.mProcessInfoCpuCount = processInfo.cpuCount; + info.mProcessInfoCpuCores = processInfo.cpuCores; + info.mProcessInfoCpuName = processInfo.cpuName; + } + + return info; +} + +// Implemented in platform-specific cpps, to add object properties describing +// the units of CPU measurements in samples. +static void StreamMetaPlatformSampleUnits(PSLockRef aLock, + SpliceableJSONWriter& aWriter); + +static void StreamMetaJSCustomObject( + PSLockRef aLock, SpliceableJSONWriter& aWriter, bool aIsShuttingDown, + const PreRecordedMetaInformation& aPreRecordedMetaInformation) { + MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock)); + + aWriter.IntProperty("version", 26); + + // The "startTime" field holds the number of milliseconds since midnight + // January 1, 1970 GMT. This grotty code computes (Now - (Now - + // ProcessStartTime)) to convert CorePS::ProcessStartTime() into that form. + // Note: This is the only absolute time in the profile! All other timestamps + // are relative to this startTime. + TimeDuration delta = TimeStamp::Now() - CorePS::ProcessStartTime(); + aWriter.DoubleProperty( + "startTime", + static_cast<double>(PR_Now() / 1000.0 - delta.ToMilliseconds())); + + aWriter.DoubleProperty("profilingStartTime", (ActivePS::ProfilingStartTime() - + CorePS::ProcessStartTime()) + .ToMilliseconds()); + + if (const TimeStamp contentEarliestTime = + ActivePS::Buffer(aLock) + .UnderlyingChunkedBuffer() + .GetEarliestChunkStartTimeStamp(); + !contentEarliestTime.IsNull()) { + aWriter.DoubleProperty( + "contentEarliestTime", + (contentEarliestTime - CorePS::ProcessStartTime()).ToMilliseconds()); + } else { + aWriter.NullProperty("contentEarliestTime"); + } + + const double profilingEndTime = profiler_time(); + aWriter.DoubleProperty("profilingEndTime", profilingEndTime); + + if (aIsShuttingDown) { + aWriter.DoubleProperty("shutdownTime", profilingEndTime); + } else { + aWriter.NullProperty("shutdownTime"); + } + + aWriter.StartArrayProperty("categories"); + StreamCategories(aWriter); + aWriter.EndArray(); + + aWriter.StartArrayProperty("markerSchema"); + StreamMarkerSchema(aWriter); + aWriter.EndArray(); + + ActivePS::WriteActiveConfiguration(aLock, aWriter, + MakeStringSpan("configuration")); + + if (!NS_IsMainThread()) { + // Leave the rest of the properties out if we're not on the main thread. + // At the moment, the only case in which this function is called on a + // background thread is if we're in a content process and are going to + // send this profile to the parent process. In that case, the parent + // process profile's "meta" object already has the rest of the properties, + // and the parent process profile is dumped on that process's main thread. + return; + } + + aWriter.DoubleProperty("interval", ActivePS::Interval(aLock)); + aWriter.IntProperty("stackwalk", ActivePS::FeatureStackWalk(aLock)); + +#ifdef DEBUG + aWriter.IntProperty("debug", 1); +#else + aWriter.IntProperty("debug", 0); +#endif + + aWriter.IntProperty("gcpoison", JS::IsGCPoisoning() ? 1 : 0); + + aWriter.IntProperty("asyncstack", aPreRecordedMetaInformation.mAsyncStacks); + + aWriter.IntProperty("processType", XRE_GetProcessType()); + + aWriter.StringProperty("updateChannel", MOZ_STRINGIFY(MOZ_UPDATE_CHANNEL)); + + if (!aPreRecordedMetaInformation.mHttpPlatform.IsEmpty()) { + aWriter.StringProperty("platform", + aPreRecordedMetaInformation.mHttpPlatform); + } + if (!aPreRecordedMetaInformation.mHttpOscpu.IsEmpty()) { + aWriter.StringProperty("oscpu", aPreRecordedMetaInformation.mHttpOscpu); + } + if (!aPreRecordedMetaInformation.mHttpMisc.IsEmpty()) { + aWriter.StringProperty("misc", aPreRecordedMetaInformation.mHttpMisc); + } + + if (!aPreRecordedMetaInformation.mRuntimeABI.IsEmpty()) { + aWriter.StringProperty("abi", aPreRecordedMetaInformation.mRuntimeABI); + } + if (!aPreRecordedMetaInformation.mRuntimeToolkit.IsEmpty()) { + aWriter.StringProperty("toolkit", + aPreRecordedMetaInformation.mRuntimeToolkit); + } + + if (!aPreRecordedMetaInformation.mAppInfoProduct.IsEmpty()) { + aWriter.StringProperty("product", + aPreRecordedMetaInformation.mAppInfoProduct); + } + if (!aPreRecordedMetaInformation.mAppInfoAppBuildID.IsEmpty()) { + aWriter.StringProperty("appBuildID", + aPreRecordedMetaInformation.mAppInfoAppBuildID); + } + if (!aPreRecordedMetaInformation.mAppInfoSourceURL.IsEmpty()) { + aWriter.StringProperty("sourceURL", + aPreRecordedMetaInformation.mAppInfoSourceURL); + } + + if (!aPreRecordedMetaInformation.mProcessInfoCpuName.IsEmpty()) { + aWriter.StringProperty("CPUName", + aPreRecordedMetaInformation.mProcessInfoCpuName); + } + if (aPreRecordedMetaInformation.mProcessInfoCpuCores > 0) { + aWriter.IntProperty("physicalCPUs", + aPreRecordedMetaInformation.mProcessInfoCpuCores); + } + if (aPreRecordedMetaInformation.mProcessInfoCpuCount > 0) { + aWriter.IntProperty("logicalCPUs", + aPreRecordedMetaInformation.mProcessInfoCpuCount); + } + +#if defined(GP_OS_android) + jni::String::LocalRef deviceInformation = + java::GeckoJavaSampler::GetDeviceInformation(); + aWriter.StringProperty("device", deviceInformation->ToCString()); +#endif + + aWriter.StartObjectProperty("sampleUnits"); + { + aWriter.StringProperty("time", "ms"); + aWriter.StringProperty("eventDelay", "ms"); + StreamMetaPlatformSampleUnits(aLock, aWriter); + } + aWriter.EndObject(); + + // We should avoid collecting extension metadata for profiler when there is no + // observer service, since a ExtensionPolicyService could not be created then. + if (nsCOMPtr<nsIObserverService> os = services::GetObserverService()) { + aWriter.StartObjectProperty("extensions"); + { + { + JSONSchemaWriter schema(aWriter); + schema.WriteField("id"); + schema.WriteField("name"); + schema.WriteField("baseURL"); + } + + aWriter.StartArrayProperty("data"); + { + nsTArray<RefPtr<WebExtensionPolicy>> exts; + ExtensionPolicyService::GetSingleton().GetAll(exts); + + for (auto& ext : exts) { + aWriter.StartArrayElement(); + + nsAutoString id; + ext->GetId(id); + aWriter.StringElement(NS_ConvertUTF16toUTF8(id)); + + aWriter.StringElement(NS_ConvertUTF16toUTF8(ext->Name())); + + auto url = ext->GetURL(u""_ns); + if (url.isOk()) { + aWriter.StringElement(NS_ConvertUTF16toUTF8(url.unwrap())); + } + + aWriter.EndArray(); + } + } + aWriter.EndArray(); + } + aWriter.EndObject(); + } +} + +static void StreamPages(PSLockRef aLock, SpliceableJSONWriter& aWriter) { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + ActivePS::DiscardExpiredPages(aLock); + for (const auto& page : ActivePS::ProfiledPages(aLock)) { + page->StreamJSON(aWriter); + } +} + +#if defined(GP_OS_android) +template <int N> +static bool StartsWith(const nsACString& string, const char (&prefix)[N]) { + if (N - 1 > string.Length()) { + return false; + } + return memcmp(string.Data(), prefix, N - 1) == 0; +} + +static JS::ProfilingCategoryPair InferJavaCategory(nsACString& aName) { + if (aName.EqualsLiteral("android.os.MessageQueue.nativePollOnce()")) { + return JS::ProfilingCategoryPair::IDLE; + } + if (aName.EqualsLiteral("java.lang.Object.wait()")) { + return JS::ProfilingCategoryPair::JAVA_BLOCKED; + } + if (StartsWith(aName, "android.") || StartsWith(aName, "com.android.")) { + return JS::ProfilingCategoryPair::JAVA_ANDROID; + } + if (StartsWith(aName, "mozilla.") || StartsWith(aName, "org.mozilla.")) { + return JS::ProfilingCategoryPair::JAVA_MOZILLA; + } + if (StartsWith(aName, "java.") || StartsWith(aName, "sun.") || + StartsWith(aName, "com.sun.")) { + return JS::ProfilingCategoryPair::JAVA_LANGUAGE; + } + if (StartsWith(aName, "kotlin.") || StartsWith(aName, "kotlinx.")) { + return JS::ProfilingCategoryPair::JAVA_KOTLIN; + } + if (StartsWith(aName, "androidx.")) { + return JS::ProfilingCategoryPair::JAVA_ANDROIDX; + } + return JS::ProfilingCategoryPair::OTHER; +} + +// Marker type for Java markers without any details. +struct JavaMarker { + static constexpr Span<const char> MarkerTypeName() { + return MakeStringSpan("Java"); + } + static void StreamJSONMarkerData( + baseprofiler::SpliceableJSONWriter& aWriter) {} + static MarkerSchema MarkerTypeDisplay() { + using MS = MarkerSchema; + MS schema{MS::Location::TimelineOverview, MS::Location::MarkerChart, + MS::Location::MarkerTable}; + schema.SetAllLabels("{marker.name}"); + return schema; + } +}; + +// Marker type for Java markers with a detail field. +struct JavaMarkerWithDetails { + static constexpr Span<const char> MarkerTypeName() { + return MakeStringSpan("JavaWithDetails"); + } + static void StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter& aWriter, + const ProfilerString8View& aText) { + // This (currently) needs to be called "name" to be searchable on the + // front-end. + aWriter.StringProperty("name", aText); + } + static MarkerSchema MarkerTypeDisplay() { + using MS = MarkerSchema; + MS schema{MS::Location::TimelineOverview, MS::Location::MarkerChart, + MS::Location::MarkerTable}; + schema.SetTooltipLabel("{marker.name}"); + schema.SetChartLabel("{marker.data.name}"); + schema.SetTableLabel("{marker.name} - {marker.data.name}"); + schema.AddKeyLabelFormatSearchable("name", "Details", MS::Format::String, + MS::Searchable::Searchable); + return schema; + } +}; + +static void CollectJavaThreadProfileData( + nsTArray<java::GeckoJavaSampler::ThreadInfo::LocalRef>& javaThreads, + ProfileBuffer& aProfileBuffer) { + // Retrieve metadata about the threads. + const auto threadCount = java::GeckoJavaSampler::GetRegisteredThreadCount(); + for (int i = 0; i < threadCount; i++) { + javaThreads.AppendElement( + java::GeckoJavaSampler::GetRegisteredThreadInfo(i)); + } + + // locked_profiler_start uses sample count is 1000 for Java thread. + // This entry size is enough now, but we might have to estimate it + // if we can customize it + // Pass the samples + int sampleId = 0; + while (true) { + const auto threadId = java::GeckoJavaSampler::GetThreadId(sampleId); + double sampleTime = java::GeckoJavaSampler::GetSampleTime(sampleId); + if (threadId == 0 || sampleTime == 0.0) { + break; + } + + aProfileBuffer.AddThreadIdEntry(ProfilerThreadId::FromNumber(threadId)); + aProfileBuffer.AddEntry(ProfileBufferEntry::Time(sampleTime)); + int frameId = 0; + while (true) { + jni::String::LocalRef frameName = + java::GeckoJavaSampler::GetFrameName(sampleId, frameId++); + if (!frameName) { + break; + } + nsCString frameNameString = frameName->ToCString(); + + auto categoryPair = InferJavaCategory(frameNameString); + aProfileBuffer.CollectCodeLocation("", frameNameString.get(), 0, 0, + Nothing(), Nothing(), + Some(categoryPair)); + } + sampleId++; + } + + // Pass the markers now + while (true) { + // Gets the data from the Android UI thread only. + java::GeckoJavaSampler::Marker::LocalRef marker = + java::GeckoJavaSampler::PollNextMarker(); + if (!marker) { + // All markers are transferred. + break; + } + + // Get all the marker information from the Java thread using JNI. + const auto threadId = ProfilerThreadId::FromNumber(marker->GetThreadId()); + nsCString markerName = marker->GetMarkerName()->ToCString(); + jni::String::LocalRef text = marker->GetMarkerText(); + TimeStamp startTime = + CorePS::ProcessStartTime() + + TimeDuration::FromMilliseconds(marker->GetStartTime()); + + double endTimeMs = marker->GetEndTime(); + // A marker can be either a duration with start and end, or a point in time + // with only startTime. If endTime is 0, this means it's a point in time. + TimeStamp endTime = endTimeMs == 0 + ? startTime + : CorePS::ProcessStartTime() + + TimeDuration::FromMilliseconds(endTimeMs); + MarkerTiming timing = endTimeMs == 0 + ? MarkerTiming::InstantAt(startTime) + : MarkerTiming::Interval(startTime, endTime); + + if (!text) { + // This marker doesn't have a text. + AddMarkerToBuffer(aProfileBuffer.UnderlyingChunkedBuffer(), markerName, + geckoprofiler::category::JAVA_ANDROID, + {MarkerThreadId(threadId), std::move(timing)}, + JavaMarker{}); + } else { + // This marker has a text. + AddMarkerToBuffer(aProfileBuffer.UnderlyingChunkedBuffer(), markerName, + geckoprofiler::category::JAVA_ANDROID, + {MarkerThreadId(threadId), std::move(timing)}, + JavaMarkerWithDetails{}, text->ToCString()); + } + } +} +#endif + +UniquePtr<ProfilerCodeAddressService> +profiler_code_address_service_for_presymbolication() { + static const bool preSymbolicate = []() { + const char* symbolicate = getenv("MOZ_PROFILER_SYMBOLICATE"); + return symbolicate && symbolicate[0] != '\0'; + }(); + return preSymbolicate ? MakeUnique<ProfilerCodeAddressService>() : nullptr; +} + +static void locked_profiler_stream_json_for_this_process( + PSLockRef aLock, SpliceableJSONWriter& aWriter, double aSinceTime, + const PreRecordedMetaInformation& aPreRecordedMetaInformation, + bool aIsShuttingDown, ProfilerCodeAddressService* aService, + mozilla::ProgressLogger aProgressLogger) { + LOG("locked_profiler_stream_json_for_this_process"); + +#ifdef DEBUG + PRIntervalTime slowWithSleeps = 0; + if (!XRE_IsParentProcess()) { + for (const auto& filter : ActivePS::Filters(aLock)) { + if (filter == "test-debug-child-slow-json") { + LOG("test-debug-child-slow-json"); + // There are 10 slow-downs below, each will sleep 250ms, for a total of + // 2.5s, which should trigger the first progress request after 1s, and + // the next progress which will have advanced further, so this profile + // shouldn't get dropped. + slowWithSleeps = PR_MillisecondsToInterval(250); + } else if (filter == "test-debug-child-very-slow-json") { + LOG("test-debug-child-very-slow-json"); + // Wait for more than 2s without any progress, which should get this + // profile discarded. + PR_Sleep(PR_SecondsToInterval(5)); + } + } + } +# define SLOW_DOWN_FOR_TESTING() \ + if (slowWithSleeps != 0) { \ + DEBUG_LOG("progress=%.0f%%, sleep...", \ + aProgressLogger.GetGlobalProgress().ToDouble() * 100.0); \ + PR_Sleep(slowWithSleeps); \ + } +#else // #ifdef DEBUG +# define SLOW_DOWN_FOR_TESTING() /* No slow-downs */ +#endif // #ifdef DEBUG #else + + MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock)); + + AUTO_PROFILER_STATS(locked_profiler_stream_json_for_this_process); + + const double collectionStartMs = profiler_time(); + + ProfileBuffer& buffer = ActivePS::Buffer(aLock); + + aProgressLogger.SetLocalProgress(1_pc, "Locked profile buffer"); + + SLOW_DOWN_FOR_TESTING(); + + // If there is a set "Window length", discard older data. + Maybe<double> durationS = ActivePS::Duration(aLock); + if (durationS.isSome()) { + const double durationStartMs = collectionStartMs - *durationS * 1000; + buffer.DiscardSamplesBeforeTime(durationStartMs); + } + aProgressLogger.SetLocalProgress(2_pc, "Discarded old data"); + + if (aWriter.Failed()) { + return; + } + SLOW_DOWN_FOR_TESTING(); + +#if defined(GP_OS_android) + // Java thread profile data should be collected before serializing the meta + // object. This is because Java thread adds some markers with marker schema + // objects. And these objects should be added before the serialization of the + // `profile.meta.markerSchema` array, so these marker schema objects can also + // be serialized properly. That's why java thread profile data needs to be + // done before everything. + + // We are allocating it chunk by chunk. So this will not allocate 64 MiB + // at once. This size should be more than enough for java threads. + // This buffer is being created for each process but Android has + // relatively fewer processes compared to desktop, so it's okay here. + mozilla::ProfileBufferChunkManagerWithLocalLimit javaChunkManager( + 64 * 1024 * 1024, 1024 * 1024); + ProfileChunkedBuffer javaBufferManager( + ProfileChunkedBuffer::ThreadSafety::WithoutMutex, javaChunkManager); + ProfileBuffer javaBuffer(javaBufferManager); + + nsTArray<java::GeckoJavaSampler::ThreadInfo::LocalRef> javaThreads; + + if (ActivePS::FeatureJava(aLock)) { + CollectJavaThreadProfileData(javaThreads, javaBuffer); + aProgressLogger.SetLocalProgress(3_pc, "Collected Java thread"); + } +#endif + + // Put shared library info + aWriter.StartArrayProperty("libs"); + AppendSharedLibraries(aWriter); + aWriter.EndArray(); + aProgressLogger.SetLocalProgress(4_pc, "Wrote library information"); + + if (aWriter.Failed()) { + return; + } + SLOW_DOWN_FOR_TESTING(); + + // Put meta data + aWriter.StartObjectProperty("meta"); + { + StreamMetaJSCustomObject(aLock, aWriter, aIsShuttingDown, + aPreRecordedMetaInformation); + } + aWriter.EndObject(); + aProgressLogger.SetLocalProgress(5_pc, "Wrote profile metadata"); + + if (aWriter.Failed()) { + return; + } + SLOW_DOWN_FOR_TESTING(); + + // Put page data + aWriter.StartArrayProperty("pages"); + { StreamPages(aLock, aWriter); } + aWriter.EndArray(); + aProgressLogger.SetLocalProgress(6_pc, "Wrote pages"); + + buffer.StreamProfilerOverheadToJSON( + aWriter, CorePS::ProcessStartTime(), aSinceTime, + aProgressLogger.CreateSubLoggerTo(10_pc, "Wrote profiler overheads")); + + buffer.StreamCountersToJSON( + aWriter, CorePS::ProcessStartTime(), aSinceTime, + aProgressLogger.CreateSubLoggerTo(14_pc, "Wrote counters")); + + if (aWriter.Failed()) { + return; + } + SLOW_DOWN_FOR_TESTING(); + + // Lists the samples for each thread profile + aWriter.StartArrayProperty("threads"); + { + ActivePS::DiscardExpiredDeadProfiledThreads(aLock); + aProgressLogger.SetLocalProgress(15_pc, "Discarded expired profiles"); + + ThreadRegistry::LockedRegistry lockedRegistry; + ActivePS::ProfiledThreadList threads = + ActivePS::ProfiledThreads(lockedRegistry, aLock); + + const uint32_t threadCount = uint32_t(threads.length()); + + if (aWriter.Failed()) { + return; + } + SLOW_DOWN_FOR_TESTING(); + + // Prepare the streaming context for each thread. + ProcessStreamingContext processStreamingContext( + threadCount, aWriter.SourceFailureLatch(), CorePS::ProcessStartTime(), + aSinceTime); + for (auto&& [i, progressLogger] : aProgressLogger.CreateLoopSubLoggersTo( + 20_pc, threadCount, "Preparing thread streaming contexts...")) { + ActivePS::ProfiledThreadListElement& thread = threads[i]; + MOZ_RELEASE_ASSERT(thread.mProfiledThreadData); + processStreamingContext.AddThreadStreamingContext( + *thread.mProfiledThreadData, buffer, thread.mJSContext, aService, + std::move(progressLogger)); + if (aWriter.Failed()) { + return; + } + } + + SLOW_DOWN_FOR_TESTING(); + + // Read the buffer once, and extract all samples and markers that the + // context expects. + buffer.StreamSamplesAndMarkersToJSON( + processStreamingContext, aProgressLogger.CreateSubLoggerTo( + "Processing samples and markers...", 80_pc, + "Processed samples and markers")); + + if (aWriter.Failed()) { + return; + } + SLOW_DOWN_FOR_TESTING(); + + // Stream each thread from the pre-filled context. + ThreadStreamingContext* const contextListBegin = + processStreamingContext.begin(); + MOZ_ASSERT(uint32_t(processStreamingContext.end() - contextListBegin) == + threadCount); + for (auto&& [i, progressLogger] : aProgressLogger.CreateLoopSubLoggersTo( + 92_pc, threadCount, "Streaming threads...")) { + ThreadStreamingContext& threadStreamingContext = contextListBegin[i]; + threadStreamingContext.FinalizeWriter(); + threadStreamingContext.mProfiledThreadData.StreamJSON( + std::move(threadStreamingContext), aWriter, + CorePS::ProcessName(aLock), CorePS::ETLDplus1(aLock), + CorePS::ProcessStartTime(), aService, std::move(progressLogger)); + if (aWriter.Failed()) { + return; + } + } + aProgressLogger.SetLocalProgress(92_pc, "Wrote samples and markers"); + +#if defined(GP_OS_android) + if (ActivePS::FeatureJava(aLock)) { + for (java::GeckoJavaSampler::ThreadInfo::LocalRef& threadInfo : + javaThreads) { + ProfiledThreadData threadData(ThreadRegistrationInfo{ + threadInfo->GetName()->ToCString().BeginReading(), + ProfilerThreadId::FromNumber(threadInfo->GetId()), false, + CorePS::ProcessStartTime()}); + + threadData.StreamJSON( + javaBuffer, nullptr, aWriter, CorePS::ProcessName(aLock), + CorePS::ETLDplus1(aLock), CorePS::ProcessStartTime(), aSinceTime, + nullptr, + aProgressLogger.CreateSubLoggerTo("Streaming Java thread...", 96_pc, + "Streamed Java thread")); + } + if (aWriter.Failed()) { + return; + } + } else { + aProgressLogger.SetLocalProgress(96_pc, "No Java thread"); + } +#endif + + UniquePtr<char[]> baseProfileThreads = + ActivePS::MoveBaseProfileThreads(aLock); + if (baseProfileThreads) { + aWriter.Splice(MakeStringSpan(baseProfileThreads.get())); + if (aWriter.Failed()) { + return; + } + aProgressLogger.SetLocalProgress(97_pc, "Wrote baseprofiler data"); + } else { + aProgressLogger.SetLocalProgress(97_pc, "No baseprofiler data"); + } + } + aWriter.EndArray(); + + SLOW_DOWN_FOR_TESTING(); + + aWriter.StartArrayProperty("pausedRanges"); + { + buffer.StreamPausedRangesToJSON( + aWriter, aSinceTime, + aProgressLogger.CreateSubLoggerTo("Streaming pauses...", 99_pc, + "Streamed pauses")); + } + aWriter.EndArray(); + + if (aWriter.Failed()) { + return; + } + + ProfilingLog::Access([&](Json::Value& aProfilingLogObject) { + aProfilingLogObject[Json::StaticString{ + "profilingLogEnd" TIMESTAMP_JSON_SUFFIX}] = ProfilingLog::Timestamp(); + + aWriter.StartObjectProperty("profilingLog"); + { + nsAutoCString pid; + pid.AppendInt(int64_t(profiler_current_process_id().ToNumber())); + Json::String logString = ToCompactString(aProfilingLogObject); + aWriter.SplicedJSONProperty(pid, logString); + } + aWriter.EndObject(); + }); + + const double collectionEndMs = profiler_time(); + + // Record timestamps for the collection into the buffer, so that consumers + // know why we didn't collect any samples for its duration. + // We put these entries into the buffer after we've collected the profile, + // so they'll be visible for the *next* profile collection (if they haven't + // been overwritten due to buffer wraparound by then). + buffer.AddEntry(ProfileBufferEntry::CollectionStart(collectionStartMs)); + buffer.AddEntry(ProfileBufferEntry::CollectionEnd(collectionEndMs)); + +#ifdef DEBUG + if (slowWithSleeps != 0) { + LOG("locked_profiler_stream_json_for_this_process done"); + } +#endif // DEBUG +} + +// Keep this internal function non-static, so it may be used by tests. +bool do_profiler_stream_json_for_this_process( + SpliceableJSONWriter& aWriter, double aSinceTime, bool aIsShuttingDown, + ProfilerCodeAddressService* aService, + mozilla::ProgressLogger aProgressLogger) { + LOG("profiler_stream_json_for_this_process"); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + const auto preRecordedMetaInformation = PreRecordMetaInformation(); + + aProgressLogger.SetLocalProgress(2_pc, "PreRecordMetaInformation done"); + + if (profiler_is_active()) { + invoke_profiler_state_change_callbacks(ProfilingState::GeneratingProfile); + } + + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return false; + } + + locked_profiler_stream_json_for_this_process( + lock, aWriter, aSinceTime, preRecordedMetaInformation, aIsShuttingDown, + aService, + aProgressLogger.CreateSubLoggerFromTo( + 3_pc, "locked_profiler_stream_json_for_this_process started", 100_pc, + "locked_profiler_stream_json_for_this_process done")); + return !aWriter.Failed(); +} + +bool profiler_stream_json_for_this_process( + SpliceableJSONWriter& aWriter, double aSinceTime, bool aIsShuttingDown, + ProfilerCodeAddressService* aService, + mozilla::ProgressLogger aProgressLogger) { + MOZ_RELEASE_ASSERT( + !XRE_IsParentProcess() || NS_IsMainThread(), + "In the parent process, profiles should only be generated from the main " + "thread, otherwise they will be incomplete."); + return do_profiler_stream_json_for_this_process(aWriter, aSinceTime, + aIsShuttingDown, aService, + std::move(aProgressLogger)); +} + +// END saving/streaming code +//////////////////////////////////////////////////////////////////////// + +static char FeatureCategory(uint32_t aFeature) { + if (aFeature & DefaultFeatures()) { + if (aFeature & AvailableFeatures()) { + return 'D'; + } + return 'd'; + } + + if (aFeature & StartupExtraDefaultFeatures()) { + if (aFeature & AvailableFeatures()) { + return 'S'; + } + return 's'; + } + + if (aFeature & AvailableFeatures()) { + return '-'; + } + return 'x'; +} + +static void PrintUsage() { + MOZ_RELEASE_ASSERT(NS_IsMainThread()); + + printf( + "\n" + "Profiler environment variable usage:\n" + "\n" + " MOZ_PROFILER_HELP\n" + " If set to any value, prints this message.\n" + " Use MOZ_BASE_PROFILER_HELP for BaseProfiler help.\n" + "\n" + " MOZ_LOG\n" + " Enables logging. The levels of logging available are\n" + " 'prof:3' (least verbose), 'prof:4', 'prof:5' (most verbose).\n" + "\n" + " MOZ_PROFILER_STARTUP\n" + " If set to any value other than '' or '0'/'N'/'n', starts the\n" + " profiler immediately on start-up.\n" + " Useful if you want profile code that runs very early.\n" + "\n" + " MOZ_PROFILER_STARTUP_ENTRIES=<%u..%u>\n" + " If MOZ_PROFILER_STARTUP is set, specifies the number of entries per\n" + " process in the profiler's circular buffer when the profiler is first\n" + " started.\n" + " If unset, the platform default is used:\n" + " %u entries per process, or %u when MOZ_PROFILER_STARTUP is set.\n" + " (%u bytes per entry -> %u or %u total bytes per process)\n" + " Optional units in bytes: KB, KiB, MB, MiB, GB, GiB\n" + "\n" + " MOZ_PROFILER_STARTUP_DURATION=<1..>\n" + " If MOZ_PROFILER_STARTUP is set, specifies the maximum life time of\n" + " entries in the the profiler's circular buffer when the profiler is\n" + " first started, in seconds.\n" + " If unset, the life time of the entries will only be restricted by\n" + " MOZ_PROFILER_STARTUP_ENTRIES (or its default value), and no\n" + " additional time duration restriction will be applied.\n" + "\n" + " MOZ_PROFILER_STARTUP_INTERVAL=<1..%d>\n" + " If MOZ_PROFILER_STARTUP is set, specifies the sample interval,\n" + " measured in milliseconds, when the profiler is first started.\n" + " If unset, the platform default is used.\n" + "\n" + " MOZ_PROFILER_STARTUP_FEATURES_BITFIELD=<Number>\n" + " If MOZ_PROFILER_STARTUP is set, specifies the profiling features, as\n" + " the integer value of the features bitfield.\n" + " If unset, the value from MOZ_PROFILER_STARTUP_FEATURES is used.\n" + "\n" + " MOZ_PROFILER_STARTUP_FEATURES=<Features>\n" + " If MOZ_PROFILER_STARTUP is set, specifies the profiling features, as\n" + " a comma-separated list of strings.\n" + " Ignored if MOZ_PROFILER_STARTUP_FEATURES_BITFIELD is set.\n" + " If unset, the platform default is used.\n" + "\n" + " Features: (x=unavailable, D/d=default/unavailable,\n" + " S/s=MOZ_PROFILER_STARTUP extra default/unavailable)\n", + unsigned(ActivePS::scMinimumBufferEntries), + unsigned(ActivePS::scMaximumBufferEntries), + unsigned(PROFILER_DEFAULT_ENTRIES.Value()), + unsigned(PROFILER_DEFAULT_STARTUP_ENTRIES.Value()), + unsigned(scBytesPerEntry), + unsigned(PROFILER_DEFAULT_ENTRIES.Value() * scBytesPerEntry), + unsigned(PROFILER_DEFAULT_STARTUP_ENTRIES.Value() * scBytesPerEntry), + PROFILER_MAX_INTERVAL); + +#define PRINT_FEATURE(n_, str_, Name_, desc_) \ + printf(" %c %7u: \"%s\" (%s)\n", FeatureCategory(ProfilerFeature::Name_), \ + ProfilerFeature::Name_, str_, desc_); + + PROFILER_FOR_EACH_FEATURE(PRINT_FEATURE) + +#undef PRINT_FEATURE + + printf( + " - \"default\" (All above D+S defaults)\n" + "\n" + " MOZ_PROFILER_STARTUP_FILTERS=<Filters>\n" + " If MOZ_PROFILER_STARTUP is set, specifies the thread filters, as a\n" + " comma-separated list of strings. A given thread will be sampled if\n" + " any of the filters is a case-insensitive substring of the thread\n" + " name. If unset, a default is used.\n" + "\n" + " MOZ_PROFILER_STARTUP_ACTIVE_TAB_ID=<Number>\n" + " This variable is used to propagate the activeTabID of\n" + " the profiler init params to subprocesses.\n" + "\n" + " MOZ_PROFILER_SHUTDOWN=<Filename>\n" + " If set, the profiler saves a profile to the named file on shutdown.\n" + " If the Filename contains \"%%p\", this will be replaced with the'\n" + " process id of the parent process.\n" + "\n" + " MOZ_PROFILER_SYMBOLICATE\n" + " If set, the profiler will pre-symbolicate profiles.\n" + " *Note* This will add a significant pause when gathering data, and\n" + " is intended mainly for local development.\n" + "\n" + " MOZ_PROFILER_LUL_TEST\n" + " If set to any value, runs LUL unit tests at startup.\n" + "\n" + " This platform %s native unwinding.\n" + "\n", +#if defined(HAVE_NATIVE_UNWIND) + "supports" +#else + "does not support" +#endif + ); +} + +//////////////////////////////////////////////////////////////////////// +// BEGIN Sampler + +#if defined(GP_OS_linux) || defined(GP_OS_android) +struct SigHandlerCoordinator; +#endif + +// Sampler performs setup and teardown of the state required to sample with the +// profiler. Sampler may exist when ActivePS is not present. +// +// SuspendAndSampleAndResumeThread must only be called from a single thread, +// and must not sample the thread it is being called from. A separate Sampler +// instance must be used for each thread which wants to capture samples. + +// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING +// +// With the exception of SamplerThread, all Sampler objects must be Disable-d +// before releasing the lock which was used to create them. This avoids races +// on linux with the SIGPROF signal handler. + +class Sampler { + public: + // Sets up the profiler such that it can begin sampling. + explicit Sampler(PSLockRef aLock); + + // Disable the sampler, restoring it to its previous state. This must be + // called once, and only once, before the Sampler is destroyed. + void Disable(PSLockRef aLock); + + // This method suspends and resumes the samplee thread. It calls the passed-in + // function-like object aProcessRegs (passing it a populated |const + // Registers&| arg) while the samplee thread is suspended. Note that + // the aProcessRegs function must be very careful not to do anything that + // requires a lock, since we may have interrupted the thread at any point. + // As an example, you can't call TimeStamp::Now() since on windows it + // takes a lock on the performance counter. + // + // Func must be a function-like object of type `void()`. + template <typename Func> + void SuspendAndSampleAndResumeThread( + PSLockRef aLock, + const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, + const TimeStamp& aNow, const Func& aProcessRegs); + + private: +#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd) + // Used to restore the SIGPROF handler when ours is removed. + struct sigaction mOldSigprofHandler; + + // This process' ID. Needed as an argument for tgkill in + // SuspendAndSampleAndResumeThread. + ProfilerProcessId mMyPid; + + // The sampler thread's ID. Used to assert that it is not sampling itself, + // which would lead to deadlock. + ProfilerThreadId mSamplerTid; + + public: + // This is the one-and-only variable used to communicate between the sampler + // thread and the samplee thread's signal handler. It's static because the + // samplee thread's signal handler is static. + static struct SigHandlerCoordinator* sSigHandlerCoordinator; +#endif +}; + +// END Sampler +//////////////////////////////////////////////////////////////////////// + +// Platform-specific function that retrieves per-thread CPU measurements. +static RunningTimes GetThreadRunningTimesDiff( + PSLockRef aLock, + ThreadRegistration::UnlockedRWForLockedProfiler& aThreadData); +// Platform-specific function that *may* discard CPU measurements since the +// previous call to GetThreadRunningTimesDiff, if the way to suspend threads on +// this platform may add running times to that thread. +// No-op otherwise, if suspending a thread doesn't make it work. +static void DiscardSuspendedThreadRunningTimes( + PSLockRef aLock, + ThreadRegistration::UnlockedRWForLockedProfiler& aThreadData); + +// Platform-specific function that retrieves process CPU measurements. +static RunningTimes GetProcessRunningTimesDiff( + PSLockRef aLock, RunningTimes& aPreviousRunningTimesToBeUpdated); + +// Template function to be used by `GetThreadRunningTimesDiff()` (unless some +// platform has a better way to achieve this). +// It help perform CPU measurements and tie them to a timestamp, such that the +// measurements and timestamp are very close together. +// This is necessary, because the relative CPU usage is computed by dividing +// consecutive CPU measurements by their timestamp difference; if there was an +// unexpected big gap, it could skew this computation and produce impossible +// spikes that would hide the rest of the data. See bug 1685938 for more info. +// Note that this may call the measurement function more than once; it is +// assumed to normally be fast. +// This was verified experimentally, but there is currently no regression +// testing for it; see follow-up bug 1687402. +template <typename GetCPURunningTimesFunction> +RunningTimes GetRunningTimesWithTightTimestamp( + GetCPURunningTimesFunction&& aGetCPURunningTimesFunction) { + // Once per process, compute a threshold over which running times and their + // timestamp is considered too far apart. + static const TimeDuration scMaxRunningTimesReadDuration = [&]() { + // Run the main CPU measurements + timestamp a number of times and capture + // their durations. + constexpr int loops = 128; + TimeDuration durations[loops]; + RunningTimes runningTimes; + TimeStamp before = TimeStamp::Now(); + for (int i = 0; i < loops; ++i) { + AUTO_PROFILER_STATS(GetRunningTimes_MaxRunningTimesReadDuration); + aGetCPURunningTimesFunction(runningTimes); + const TimeStamp after = TimeStamp::Now(); + durations[i] = after - before; + before = after; + } + // Move median duration to the middle. + std::nth_element(&durations[0], &durations[loops / 2], &durations[loops]); + // Use median*8 as cut-off point. + // Typical durations should be around a microsecond, the cut-off should then + // be around 10 microseconds, well below the expected minimum inter-sample + // interval (observed as a few milliseconds), so overall this should keep + // cpu/interval spikes + return durations[loops / 2] * 8; + }(); + + // Record CPU measurements between two timestamps. + RunningTimes runningTimes; + TimeStamp before = TimeStamp::Now(); + aGetCPURunningTimesFunction(runningTimes); + TimeStamp after = TimeStamp::Now(); + const TimeDuration duration = after - before; + + // In most cases, the above should be quick enough. But if not (e.g., because + // of an OS context switch), repeat once: + if (MOZ_UNLIKELY(duration > scMaxRunningTimesReadDuration)) { + AUTO_PROFILER_STATS(GetRunningTimes_REDO); + RunningTimes runningTimes2; + aGetCPURunningTimesFunction(runningTimes2); + TimeStamp after2 = TimeStamp::Now(); + const TimeDuration duration2 = after2 - after; + if (duration2 < duration) { + // We did it faster, use the new results. (But it could still be slower + // than expected, see note below for why it's acceptable.) + // This must stay *after* the CPU measurements. + runningTimes2.SetPostMeasurementTimeStamp(after2); + return runningTimes2; + } + // Otherwise use the initial results, they were slow, but faster than the + // second attempt. + // This means that something bad happened twice in a row on the same thread! + // So trying more times would be unlikely to get much better, and would be + // more expensive than the precision is worth. + // At worst, it means that a spike of activity may be reported in the next + // time slice. But in the end, the cumulative work is conserved, so it + // should still be visible at about the correct time in the graph. + AUTO_PROFILER_STATS(GetRunningTimes_RedoWasWorse); + } + + // This must stay *after* the CPU measurements. + runningTimes.SetPostMeasurementTimeStamp(after); + + return runningTimes; +} + +//////////////////////////////////////////////////////////////////////// +// BEGIN SamplerThread + +// The sampler thread controls sampling and runs whenever the profiler is +// active. It periodically runs through all registered threads, finds those +// that should be sampled, then pauses and samples them. + +class SamplerThread { + public: + // Creates a sampler thread, but doesn't start it. + SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration, + double aIntervalMilliseconds, uint32_t aFeatures); + ~SamplerThread(); + + // This runs on (is!) the sampler thread. + void Run(); + +#if defined(GP_OS_windows) + // This runs on (is!) the thread to spy on unregistered threads. + void RunUnregisteredThreadSpy(); +#endif + + // This runs on the main thread. + void Stop(PSLockRef aLock); + + void AppendPostSamplingCallback(PSLockRef, PostSamplingCallback&& aCallback) { + // We are under lock, so it's safe to just modify the list pointer. + // Also this means the sampler has not started its run yet, so any callback + // added now will be invoked at the end of the next loop; this guarantees + // that the callback will be invoked after at least one full sampling loop. + mPostSamplingCallbackList = MakeUnique<PostSamplingCallbackListItem>( + std::move(mPostSamplingCallbackList), std::move(aCallback)); + } + + private: + void SpyOnUnregisteredThreads(); + + // Item containing a post-sampling callback, and a tail-list of more items. + // Using a linked list means no need to move items when adding more, and + // "stealing" the whole list is one pointer move. + struct PostSamplingCallbackListItem { + UniquePtr<PostSamplingCallbackListItem> mPrev; + PostSamplingCallback mCallback; + + PostSamplingCallbackListItem(UniquePtr<PostSamplingCallbackListItem> aPrev, + PostSamplingCallback&& aCallback) + : mPrev(std::move(aPrev)), mCallback(std::move(aCallback)) {} + }; + + [[nodiscard]] UniquePtr<PostSamplingCallbackListItem> + TakePostSamplingCallbacks(PSLockRef) { + return std::move(mPostSamplingCallbackList); + } + + static void InvokePostSamplingCallbacks( + UniquePtr<PostSamplingCallbackListItem> aCallbacks, + SamplingState aSamplingState) { + if (!aCallbacks) { + return; + } + // We want to drill down to the last element in this list, which is the + // oldest one, so that we invoke them in FIFO order. + // We don't expect many callbacks, so it's safe to recurse. Note that we're + // moving-from the UniquePtr, so the tail will implicitly get destroyed. + InvokePostSamplingCallbacks(std::move(aCallbacks->mPrev), aSamplingState); + // We are going to destroy this item, so we can safely move-from the + // callback before calling it (in case it has an rvalue-ref-qualified call + // operator). + std::move(aCallbacks->mCallback)(aSamplingState); + // It may be tempting for a future maintainer to change aCallbacks into an + // rvalue reference; this will remind them not to do that! + static_assert( + std::is_same_v<decltype(aCallbacks), + UniquePtr<PostSamplingCallbackListItem>>, + "We need to capture the list by-value, to implicitly destroy it"); + } + + // This suspends the calling thread for the given number of microseconds. + // Best effort timing. + void SleepMicro(uint32_t aMicroseconds); + + // The sampler used to suspend and sample threads. + Sampler mSampler; + + // The activity generation, for detecting when the sampler thread must stop. + const uint32_t mActivityGeneration; + + // The interval between samples, measured in microseconds. + const int mIntervalMicroseconds; + + // The OS-specific handle for the sampler thread. +#if defined(GP_OS_windows) + HANDLE mThread; + HANDLE mUnregisteredThreadSpyThread = nullptr; + enum class SpyingState { + NoSpying, + Spy_Initializing, + // Spy is waiting for SamplerToSpy_Start or MainToSpy_Shutdown. + Spy_Waiting, + // Sampler requests spy to start working. May be pre-empted by + // MainToSpy_Shutdown. + SamplerToSpy_Start, + // Spy is currently working, cannot be interrupted, only the spy is allowed + // to change the state again. + Spy_Working, + // Main control requests spy to shut down. + MainToSpy_Shutdown, + // Spy notified main control that it's out of the loop, about to exit. + SpyToMain_ShuttingDown + }; + SpyingState mSpyingState = SpyingState::NoSpying; + // The sampler will increment this while the spy is working, then while the + // spy is waiting the sampler will decrement it until <=0 before starting the + // spy. This will ensure that the work doesn't take more than 50% of a CPU + // core. + int mDelaySpyStart = 0; + Monitor mSpyingStateMonitor MOZ_UNANNOTATED{ + "SamplerThread::mSpyingStateMonitor"}; +#elif defined(GP_OS_darwin) || defined(GP_OS_linux) || \ + defined(GP_OS_android) || defined(GP_OS_freebsd) + pthread_t mThread; +#endif + + // Post-sampling callbacks are kept in a simple linked list, which will be + // stolen by the sampler thread at the end of its next run. + UniquePtr<PostSamplingCallbackListItem> mPostSamplingCallbackList; + +#if defined(GP_OS_windows) + bool mNoTimerResolutionChange = true; +#endif + + struct SpiedThread { + base::ProcessId mThreadId; + nsCString mName; + uint64_t mCPUTimeNs; + + SpiedThread(base::ProcessId aThreadId, const nsACString& aName, + uint64_t aCPUTimeNs) + : mThreadId(aThreadId), mName(aName), mCPUTimeNs(aCPUTimeNs) {} + + // Comparisons with just a thread id, for easy searching in an array. + friend bool operator==(const SpiedThread& aSpiedThread, + base::ProcessId aThreadId) { + return aSpiedThread.mThreadId == aThreadId; + } + friend bool operator==(base::ProcessId aThreadId, + const SpiedThread& aSpiedThread) { + return aThreadId == aSpiedThread.mThreadId; + } + }; + + // Time at which mSpiedThreads was previously updated. Null before 1st update. + TimeStamp mLastSpying; + // Unregistered threads that have been found, and are being spied on. + using SpiedThreads = AutoTArray<SpiedThread, 128>; + SpiedThreads mSpiedThreads; + + SamplerThread(const SamplerThread&) = delete; + void operator=(const SamplerThread&) = delete; +}; + +// [[nodiscard]] static +bool ActivePS::AppendPostSamplingCallback(PSLockRef aLock, + PostSamplingCallback&& aCallback) { + if (!sInstance || !sInstance->mSamplerThread) { + return false; + } + sInstance->mSamplerThread->AppendPostSamplingCallback(aLock, + std::move(aCallback)); + return true; +} + +// This function is required because we need to create a SamplerThread within +// ActivePS's constructor, but SamplerThread is defined after ActivePS. It +// could probably be removed by moving some code around. +static SamplerThread* NewSamplerThread(PSLockRef aLock, uint32_t aGeneration, + double aInterval, uint32_t aFeatures) { + return new SamplerThread(aLock, aGeneration, aInterval, aFeatures); +} + +// This function is the sampler thread. This implementation is used for all +// targets. +void SamplerThread::Run() { + PR_SetCurrentThreadName("SamplerThread"); + + // Features won't change during this SamplerThread's lifetime, so we can read + // them once and store them locally. + const uint32_t features = []() -> uint32_t { + PSAutoLock lock; + if (!ActivePS::Exists(lock)) { + // If there is no active profiler, it doesn't matter what we return, + // because this thread will exit before any feature is used. + return 0; + } + return ActivePS::Features(lock); + }(); + + // Not *no*-stack-sampling means we do want stack sampling. + const bool stackSampling = !ProfilerFeature::HasNoStackSampling(features); + + const bool cpuUtilization = ProfilerFeature::HasCPUUtilization(features); + + // Use local ProfileBuffer and underlying buffer to capture the stack. + // (This is to avoid touching the core buffer lock while a thread is + // suspended, because that thread could be working with the core buffer as + // well. + mozilla::ProfileBufferChunkManagerSingle localChunkManager( + ProfileBufferChunkManager::scExpectedMaximumStackSize); + ProfileChunkedBuffer localBuffer( + ProfileChunkedBuffer::ThreadSafety::WithoutMutex, localChunkManager); + ProfileBuffer localProfileBuffer(localBuffer); + + // Will be kept between collections, to know what each collection does. + auto previousState = localBuffer.GetState(); + + // This will be filled at every loop, to be used by the next loop to compute + // the CPU utilization between samples. + RunningTimes processRunningTimes; + + // This will be set inside the loop, from inside the lock scope, to capture + // all callbacks added before that, but none after the lock is released. + UniquePtr<PostSamplingCallbackListItem> postSamplingCallbacks; + // This will be set inside the loop, before invoking callbacks outside. + SamplingState samplingState{}; + + const TimeDuration sampleInterval = + TimeDuration::FromMicroseconds(mIntervalMicroseconds); + const uint32_t minimumIntervalSleepUs = + static_cast<uint32_t>(mIntervalMicroseconds / 4); + + // This is the scheduled time at which each sampling loop should start. + // It will determine the ideal next sampling start by adding the expected + // interval, unless when sampling runs late -- See end of while() loop. + TimeStamp scheduledSampleStart = TimeStamp::Now(); + + while (true) { + const TimeStamp sampleStart = TimeStamp::Now(); + + // This scope is for |lock|. It ends before we sleep below. + { + // There should be no local callbacks left from a previous loop. + MOZ_ASSERT(!postSamplingCallbacks); + + PSAutoLock lock; + TimeStamp lockAcquired = TimeStamp::Now(); + + // Move all the post-sampling callbacks locally, so that new ones cannot + // sneak in between the end of the lock scope and the invocation after it. + postSamplingCallbacks = TakePostSamplingCallbacks(lock); + + if (!ActivePS::Exists(lock)) { + // Exit the `while` loop, including the lock scope, before invoking + // callbacks and returning. + samplingState = SamplingState::JustStopped; + break; + } + + // At this point profiler_stop() might have been called, and + // profiler_start() might have been called on another thread. If this + // happens the generation won't match. + if (ActivePS::Generation(lock) != mActivityGeneration) { + samplingState = SamplingState::JustStopped; + // Exit the `while` loop, including the lock scope, before invoking + // callbacks and returning. + break; + } + + ActivePS::ClearExpiredExitProfiles(lock); + + TimeStamp expiredMarkersCleaned = TimeStamp::Now(); + + if (int(gSkipSampling) <= 0 && !ActivePS::IsSamplingPaused(lock)) { + double sampleStartDeltaMs = + (sampleStart - CorePS::ProcessStartTime()).ToMilliseconds(); + ProfileBuffer& buffer = ActivePS::Buffer(lock); + + // Before sampling counters, update the process CPU counter if active. + if (ActivePS::ProcessCPUCounter* processCPUCounter = + ActivePS::MaybeProcessCPUCounter(lock); + processCPUCounter) { + RunningTimes processRunningTimesDiff = + GetProcessRunningTimesDiff(lock, processRunningTimes); + Maybe<uint64_t> cpu = processRunningTimesDiff.GetJsonThreadCPUDelta(); + if (cpu) { + processCPUCounter->Add(static_cast<int64_t>(*cpu)); + } + } + + if (PowerCounters* powerCounters = ActivePS::MaybePowerCounters(lock); + powerCounters) { + powerCounters->Sample(); + } + + // handle per-process generic counters + const Vector<BaseProfilerCount*>& counters = CorePS::Counters(lock); + for (auto& counter : counters) { + if (auto sample = counter->Sample(); sample.isSampleNew) { + // create Buffer entries for each counter + buffer.AddEntry(ProfileBufferEntry::CounterId(counter)); + buffer.AddEntry(ProfileBufferEntry::Time(sampleStartDeltaMs)); +#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY) + if (ActivePS::IsMemoryCounter(counter)) { + // For the memory counter, substract the size of our buffer to + // avoid giving the misleading impression that the memory use + // keeps on growing when it's just the profiler session that's + // using a larger buffer as it gets longer. + sample.count -= static_cast<int64_t>( + ActivePS::ControlledChunkManager(lock).TotalSize()); + } +#endif + // In the future, we may support keyed counters - for example, + // counters with a key which is a thread ID. For "simple" counters + // we'll just use a key of 0. + buffer.AddEntry(ProfileBufferEntry::CounterKey(0)); + buffer.AddEntry(ProfileBufferEntry::Count(sample.count)); + if (sample.number) { + buffer.AddEntry(ProfileBufferEntry::Number(sample.number)); + } + } + } + TimeStamp countersSampled = TimeStamp::Now(); + + if (stackSampling || cpuUtilization) { + samplingState = SamplingState::SamplingCompleted; + + // Prevent threads from ending (or starting) and allow access to all + // OffThreadRef's. + ThreadRegistry::LockedRegistry lockedRegistry; + + for (ThreadRegistry::OffThreadRef offThreadRef : lockedRegistry) { + ThreadRegistration::UnlockedRWForLockedProfiler& + unlockedThreadData = + offThreadRef.UnlockedRWForLockedProfilerRef(); + ProfiledThreadData* profiledThreadData = + unlockedThreadData.GetProfiledThreadData(lock); + if (!profiledThreadData) { + // This thread is not being profiled, continue with the next one. + continue; + } + + const ThreadProfilingFeatures whatToProfile = + unlockedThreadData.ProfilingFeatures(); + const bool threadCPUUtilization = + cpuUtilization && + DoFeaturesIntersect(whatToProfile, + ThreadProfilingFeatures::CPUUtilization); + const bool threadStackSampling = + stackSampling && + DoFeaturesIntersect(whatToProfile, + ThreadProfilingFeatures::Sampling); + if (!threadCPUUtilization && !threadStackSampling) { + // Nothing to profile on this thread, continue with the next one. + continue; + } + + const ProfilerThreadId threadId = + unlockedThreadData.Info().ThreadId(); + + const RunningTimes runningTimesDiff = [&]() { + if (!threadCPUUtilization) { + // If we don't need CPU measurements, we only need a timestamp. + return RunningTimes(TimeStamp::Now()); + } + return GetThreadRunningTimesDiff(lock, unlockedThreadData); + }(); + + const TimeStamp& now = runningTimesDiff.PostMeasurementTimeStamp(); + double threadSampleDeltaMs = + (now - CorePS::ProcessStartTime()).ToMilliseconds(); + + // If the thread is asleep and has been sampled before in the same + // sleep episode, or otherwise(*) if there was zero CPU activity + // since the previous sampling, find and copy the previous sample, + // as that's cheaper than taking a new sample. + // (*) Tech note: The asleep check is done first and always, because + // it is more reliable, and knows if it's the first asleep + // sample, which cannot be duplicated; if the test was the other + // way around, it could find zero CPU and then short-circuit + // that state-changing second-asleep-check operation, which + // could result in an unneeded sample. + // However we're using current running times (instead of copying the + // old ones) because some work could have happened. + if (threadStackSampling && + (unlockedThreadData.CanDuplicateLastSampleDueToSleep() || + runningTimesDiff.GetThreadCPUDelta() == Some(uint64_t(0)))) { + const bool dup_ok = ActivePS::Buffer(lock).DuplicateLastSample( + threadId, threadSampleDeltaMs, + profiledThreadData->LastSample(), runningTimesDiff); + if (dup_ok) { + continue; + } + } + + AUTO_PROFILER_STATS(gecko_SamplerThread_Run_DoPeriodicSample); + + // Record the global profiler buffer's range start now, before + // adding the first entry for this thread's sample. + const uint64_t bufferRangeStart = buffer.BufferRangeStart(); + + // Add the thread ID now, so we know its position in the main + // buffer, which is used by some JS data. + // (DoPeriodicSample only knows about the temporary local buffer.) + const uint64_t samplePos = buffer.AddThreadIdEntry(threadId); + profiledThreadData->LastSample() = Some(samplePos); + + // Also add the time, so it's always there after the thread ID, as + // expected by the parser. (Other stack data is optional.) + buffer.AddEntry(ProfileBufferEntry::TimeBeforeCompactStack( + threadSampleDeltaMs)); + + Maybe<double> unresponsiveDuration_ms; + + // If we have RunningTimes data, store it before the CompactStack. + // Note: It is not stored inside the CompactStack so that it doesn't + // get incorrectly duplicated when the thread is sleeping. + if (!runningTimesDiff.IsEmpty()) { + profiler_get_core_buffer().PutObjects( + ProfileBufferEntry::Kind::RunningTimes, runningTimesDiff); + } + + if (threadStackSampling) { + ThreadRegistry::OffThreadRef::RWFromAnyThreadWithLock + lockedThreadData = offThreadRef.LockedRWFromAnyThread(); + // Suspend the thread and collect its stack data in the local + // buffer. + mSampler.SuspendAndSampleAndResumeThread( + lock, lockedThreadData.DataCRef(), now, + [&](const Registers& aRegs, const TimeStamp& aNow) { + DoPeriodicSample(lock, lockedThreadData.DataCRef(), aRegs, + samplePos, bufferRangeStart, + localProfileBuffer); + + // For "eventDelay", we want the input delay - but if + // there are no events in the input queue (or even if there + // are), we're interested in how long the delay *would* be + // for an input event now, which would be the time to finish + // the current event + the delay caused by any events + // already in the input queue (plus any High priority + // events). Events at lower priorities (in a + // PrioritizedEventQueue) than Input count for input delay + // only for the duration that they're running, since when + // they finish, any queued input event would run. + // + // Unless we record the time state of all events and queue + // states at all times, this is hard to precisely calculate, + // but we can approximate it well in post-processing with + // RunningEventDelay and RunningEventStart. + // + // RunningEventDelay is the time duration the event was + // queued before starting execution. RunningEventStart is + // the time the event started. (Note: since we care about + // Input event delays on MainThread, for + // PrioritizedEventQueues we return 0 for RunningEventDelay + // if the currently running event has a lower priority than + // Input (since Input events won't queue behind them). + // + // To directly measure this we would need to record the time + // at which the newest event currently in each queue at time + // X (the sample time) finishes running. This of course + // would require looking into the future, or recording all + // this state and then post-processing it later. If we were + // to trace every event start and end we could do this, but + // it would have significant overhead to do so (and buffer + // usage). From a recording of RunningEventDelays and + // RunningEventStarts we can infer the actual delay: + // + // clang-format off + // Event queue: <tail> D : C : B : A <head> + // Time inserted (ms): 40 : 20 : 10 : 0 + // Run Time (ms): 30 : 100 : 40 : 30 + // + // 0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170 + // [A||||||||||||] + // ----------[B|||||||||||||||||] + // -------------------------[C|||||||||||||||||||||||||||||||||||||||||||||||] + // -----------------------------------------------------------------[D|||||||||...] + // + // Calculate the delay of a new event added at time t: (run every sample) + // TimeSinceRunningEventBlockedInputEvents = RunningEventDelay + (now - RunningEventStart); + // effective_submission = now - TimeSinceRunningEventBlockedInputEvents; + // delta = (now - last_sample_time); + // last_sample_time = now; + // for (t=effective_submission to now) { + // delay[t] += delta; + // } + // + // Can be reduced in overhead by: + // TimeSinceRunningEventBlockedInputEvents = RunningEventDelay + (now - RunningEventStart); + // effective_submission = now - TimeSinceRunningEventBlockedInputEvents; + // if (effective_submission != last_submission) { + // delta = (now - last_submision); + // // this loop should be made to match each sample point in the range + // // intead of assuming 1ms sampling as this pseudocode does + // for (t=last_submission to effective_submission-1) { + // delay[t] += delta; + // delta -= 1; // assumes 1ms; adjust as needed to match for() + // } + // last_submission = effective_submission; + // } + // + // Time Head of queue Running Event RunningEventDelay Delay of Effective Started Calc (submission->now add 10ms) Final + // hypothetical Submission Running @ result + // event E + // 0 Empty A 0 30 0 0 @0=10 30 + // 10 B A 0 60 0 0 @0=20, @10=10 60 + // 20 B A 0 150 0 0 @0=30, @10=20, @20=10 150 + // 30 C B 20 140 10 30 @10=20, @20=10, @30=0 140 + // 40 C B 20 160 @10=30, @20=20... 160 + // 50 C B 20 150 150 + // 60 C B 20 140 @10=50, @20=40... 140 + // 70 D C 50 130 20 70 @20=50, @30=40... 130 + // ... + // 160 D C 50 40 @20=140, @30=130... 40 + // 170 <empty> D 140 30 40 @40=140, @50=130... (rounding) 30 + // 180 <empty> D 140 20 40 @40=150 20 + // 190 <empty> D 140 10 40 @40=160 10 + // 200 <empty> <empty> 0 0 NA 0 + // + // Function Delay(t) = the time between t and the time at which a hypothetical + // event e would start executing, if e was enqueued at time t. + // + // Delay(-1) = 0 // Before A was enqueued. No wait time, can start running + // // instantly. + // Delay(0) = 30 // The hypothetical event e got enqueued just after A got + // // enqueued. It can start running at 30, when A is done. + // Delay(5) = 25 + // Delay(10) = 60 // Can start running at 70, after both A and B are done. + // Delay(19) = 51 + // Delay(20) = 150 // Can start running at 170, after A, B & C. + // Delay(25) = 145 + // Delay(30) = 170 // Can start running at 200, after A, B, C & D. + // Delay(120) = 80 + // Delay(200) = 0 // (assuming nothing was enqueued after D) + // + // For every event that gets enqueued, the Delay time will go up by the + // event's running time at the time at which the event is enqueued. + // The Delay function will be a sawtooth of the following shape: + // + // |\ |... + // | \ | + // |\ | \ | + // | \ | \ | + // |\ | \ | \ | + // |\ | \| \| \ | + // | \| \ | + // _| \____| + // + // + // A more complex example with a PrioritizedEventQueue: + // + // Event queue: <tail> D : C : B : A <head> + // Time inserted (ms): 40 : 20 : 10 : 0 + // Run Time (ms): 30 : 100 : 40 : 30 + // Priority: Input: Norm: Norm: Norm + // + // 0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170 + // [A||||||||||||] + // ----------[B|||||||||||||||||] + // ----------------------------------------[C|||||||||||||||||||||||||||||||||||||||||||||||] + // ---------------[D||||||||||||] + // + // + // Time Head of queue Running Event RunningEventDelay Delay of Effective Started Calc (submission->now add 10ms) Final + // hypothetical Submission Running @ result + // event + // 0 Empty A 0 30 0 0 @0=10 30 + // 10 B A 0 20 0 0 @0=20, @10=10 20 + // 20 B A 0 10 0 0 @0=30, @10=20, @20=10 10 + // 30 C B 0 40 30 30 @30=10 40 + // 40 C B 0 60 30 @40=10, @30=20 60 + // 50 C B 0 50 30 @50=10, @40=20, @30=30 50 + // 60 C B 0 40 30 @60=10, @50=20, @40=30, @30=40 40 + // 70 C D 30 30 40 70 @60=20, @50=30, @40=40 30 + // 80 C D 30 20 40 70 ...@50=40, @40=50 20 + // 90 C D 30 10 40 70 ...@60=40, @50=50, @40=60 10 + // 100 <empty> C 0 100 100 100 @100=10 100 + // 110 <empty> C 0 90 100 100 @110=10, @100=20 90 + + // + // For PrioritizedEventQueue, the definition of the Delay(t) function is adjusted: the hypothetical event e has Input priority. + // Delay(-1) = 0 // Before A was enqueued. No wait time, can start running + // // instantly. + // Delay(0) = 30 // The hypothetical input event e got enqueued just after A got + // // enqueued. It can start running at 30, when A is done. + // Delay(5) = 25 + // Delay(10) = 20 + // Delay(25) = 5 // B has been queued, but e does not need to wait for B because e has Input priority and B does not. + // // So e can start running at 30, when A is done. + // Delay(30) = 40 // Can start running at 70, after B is done. + // Delay(40) = 60 // Can start at 100, after B and D are done (D is Input Priority) + // Delay(80) = 20 + // Delay(100) = 100 // Wait for C to finish + + // clang-format on + // + // Alternatively we could insert (recycled instead of + // allocated/freed) input events at every sample period + // (1ms...), and use them to back-calculate the delay. This + // might also be somewhat expensive, and would require + // guessing at the maximum delay, which would likely be in + // the seconds, and so you'd need 1000's of pre-allocated + // events per queue per thread - so there would be a memory + // impact as well. + + TimeDuration currentEventDelay; + TimeDuration currentEventRunning; + lockedThreadData->GetRunningEventDelay( + aNow, currentEventDelay, currentEventRunning); + + // Note: eventDelay is a different definition of + // responsiveness than the 16ms event injection. + + // Don't suppress 0's for now; that can be a future + // optimization. We probably want one zero to be stored + // before we start suppressing, which would be more + // complex. + unresponsiveDuration_ms = + Some(currentEventDelay.ToMilliseconds() + + currentEventRunning.ToMilliseconds()); + }); + + if (cpuUtilization) { + // Suspending the thread for sampling could have added some + // running time to it, discard any since the call to + // GetThreadRunningTimesDiff above. + DiscardSuspendedThreadRunningTimes(lock, unlockedThreadData); + } + + // If we got eventDelay data, store it before the CompactStack. + // Note: It is not stored inside the CompactStack so that it + // doesn't get incorrectly duplicated when the thread is sleeping. + if (unresponsiveDuration_ms.isSome()) { + profiler_get_core_buffer().PutObjects( + ProfileBufferEntry::Kind::UnresponsiveDurationMs, + *unresponsiveDuration_ms); + } + } + + // There *must* be a CompactStack after a TimeBeforeCompactStack; + // but note that other entries may have been concurrently inserted + // between the TimeBeforeCompactStack above and now. If the captured + // sample from `DoPeriodicSample` is complete, copy it into the + // global buffer, otherwise add an empty one to satisfy the parser + // that expects one. + auto state = localBuffer.GetState(); + if (NS_WARN_IF(state.mFailedPutBytes != + previousState.mFailedPutBytes)) { + LOG("Stack sample too big for local storage, failed to store %u " + "bytes", + unsigned(state.mFailedPutBytes - + previousState.mFailedPutBytes)); + // There *must* be a CompactStack after a TimeBeforeCompactStack, + // even an empty one. + profiler_get_core_buffer().PutObjects( + ProfileBufferEntry::Kind::CompactStack, + UniquePtr<ProfileChunkedBuffer>(nullptr)); + } else if (state.mRangeEnd - previousState.mRangeEnd >= + *profiler_get_core_buffer().BufferLength()) { + LOG("Stack sample too big for profiler storage, needed %u bytes", + unsigned(state.mRangeEnd - previousState.mRangeEnd)); + // There *must* be a CompactStack after a TimeBeforeCompactStack, + // even an empty one. + profiler_get_core_buffer().PutObjects( + ProfileBufferEntry::Kind::CompactStack, + UniquePtr<ProfileChunkedBuffer>(nullptr)); + } else { + profiler_get_core_buffer().PutObjects( + ProfileBufferEntry::Kind::CompactStack, localBuffer); + } + + // Clean up for the next run. + localBuffer.Clear(); + previousState = localBuffer.GetState(); + } + } else { + samplingState = SamplingState::NoStackSamplingCompleted; + } + +#if defined(USE_LUL_STACKWALK) + // The LUL unwind object accumulates frame statistics. Periodically we + // should poke it to give it a chance to print those statistics. This + // involves doing I/O (fprintf, __android_log_print, etc.) and so + // can't safely be done from the critical section inside + // SuspendAndSampleAndResumeThread, which is why it is done here. + lul::LUL* lul = CorePS::Lul(); + if (lul) { + lul->MaybeShowStats(); + } +#endif + TimeStamp threadsSampled = TimeStamp::Now(); + + { + AUTO_PROFILER_STATS(Sampler_FulfillChunkRequests); + ActivePS::FulfillChunkRequests(lock); + } + + buffer.CollectOverheadStats(sampleStartDeltaMs, + lockAcquired - sampleStart, + expiredMarkersCleaned - lockAcquired, + countersSampled - expiredMarkersCleaned, + threadsSampled - countersSampled); + } else { + samplingState = SamplingState::SamplingPaused; + } + } + // gPSMutex is not held after this point. + + // Invoke end-of-sampling callbacks outside of the locked scope. + InvokePostSamplingCallbacks(std::move(postSamplingCallbacks), + samplingState); + + ProfilerChild::ProcessPendingUpdate(); + + if (ProfilerFeature::HasUnregisteredThreads(features)) { +#if defined(GP_OS_windows) + { + MonitorAutoLock spyingStateLock{mSpyingStateMonitor}; + switch (mSpyingState) { + case SpyingState::SamplerToSpy_Start: + case SpyingState::Spy_Working: + // If the spy is working (or about to work), record this loop + // iteration to delay the next start. + ++mDelaySpyStart; + break; + case SpyingState::Spy_Waiting: + // The Spy is idle, waiting for instructions. Should we delay? + if (--mDelaySpyStart <= 0) { + mDelaySpyStart = 0; + mSpyingState = SpyingState::SamplerToSpy_Start; + mSpyingStateMonitor.NotifyAll(); + } + break; + default: + // Otherwise the spy should be initializing or shutting down. + MOZ_ASSERT(mSpyingState == SpyingState::Spy_Initializing || + mSpyingState == SpyingState::MainToSpy_Shutdown || + mSpyingState == SpyingState::SpyToMain_ShuttingDown); + break; + } + } +#else + // On non-Windows platforms, this is fast enough to run in this thread, + // each sampling loop. + SpyOnUnregisteredThreads(); +#endif + } + + // We expect the next sampling loop to start `sampleInterval` after this + // loop here was scheduled to start. + scheduledSampleStart += sampleInterval; + + // Try to sleep until we reach that next scheduled time. + const TimeStamp beforeSleep = TimeStamp::Now(); + if (scheduledSampleStart >= beforeSleep) { + // There is still time before the next scheduled sample time. + const uint32_t sleepTimeUs = static_cast<uint32_t>( + (scheduledSampleStart - beforeSleep).ToMicroseconds()); + if (sleepTimeUs >= minimumIntervalSleepUs) { + SleepMicro(sleepTimeUs); + } else { + // If we're too close to that time, sleep the minimum amount of time. + // Note that the next scheduled start is not shifted, so at the end of + // the next loop, sleep may again be adjusted to get closer to schedule. + SleepMicro(minimumIntervalSleepUs); + } + } else { + // This sampling loop ended after the next sampling should have started! + // There is little point to try and keep up to schedule now, it would + // require more work, while it's likely we're late because the system is + // already busy. Try and restart a normal schedule from now. + scheduledSampleStart = beforeSleep + sampleInterval; + SleepMicro(static_cast<uint32_t>(sampleInterval.ToMicroseconds())); + } + } + + // End of `while` loop. We can only be here from a `break` inside the loop. + InvokePostSamplingCallbacks(std::move(postSamplingCallbacks), samplingState); +} + +namespace geckoprofiler::markers { + +struct UnregisteredThreadLifetimeMarker { + static constexpr Span<const char> MarkerTypeName() { + return MakeStringSpan("UnregisteredThreadLifetime"); + } + static void StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter& aWriter, + base::ProcessId aThreadId, + const ProfilerString8View& aName, + const ProfilerString8View& aEndEvent) { + aWriter.IntProperty("Thread Id", aThreadId); + aWriter.StringProperty("Thread Name", aName.Length() != 0 + ? aName.AsSpan() + : MakeStringSpan("~Unnamed~")); + if (aEndEvent.Length() != 0) { + aWriter.StringProperty("End Event", aEndEvent); + } + } + static MarkerSchema MarkerTypeDisplay() { + using MS = MarkerSchema; + MS schema{MS::Location::MarkerChart, MS::Location::MarkerTable}; + schema.AddKeyFormatSearchable("Thread Id", MS::Format::Integer, + MS::Searchable::Searchable); + schema.AddKeyFormatSearchable("Thread Name", MS::Format::String, + MS::Searchable::Searchable); + schema.AddKeyFormat("End Event", MS::Format::String); + schema.AddStaticLabelValue( + "Note", + "Start and end are approximate, based on first and last appearances."); + schema.SetChartLabel( + "{marker.data.Thread Name} (tid {marker.data.Thread Id})"); + schema.SetTableLabel("{marker.name} lifetime"); + return schema; + } +}; + +struct UnregisteredThreadCPUMarker { + static constexpr Span<const char> MarkerTypeName() { + return MakeStringSpan("UnregisteredThreadCPU"); + } + static void StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter& aWriter, + base::ProcessId aThreadId, + int64_t aCPUDiffNs, const TimeStamp& aStart, + const TimeStamp& aEnd) { + aWriter.IntProperty("Thread Id", aThreadId); + aWriter.IntProperty("CPU Time", aCPUDiffNs); + aWriter.DoubleProperty( + "CPU Utilization", + double(aCPUDiffNs) / ((aEnd - aStart).ToMicroseconds() * 1000.0)); + } + static MarkerSchema MarkerTypeDisplay() { + using MS = MarkerSchema; + MS schema{MS::Location::MarkerChart, MS::Location::MarkerTable}; + schema.AddKeyFormatSearchable("Thread Id", MS::Format::Integer, + MS::Searchable::Searchable); + schema.AddKeyFormat("CPU Time", MS::Format::Nanoseconds); + schema.AddKeyFormat("CPU Utilization", MS::Format::Percentage); + schema.SetChartLabel("{marker.data.CPU Utilization}"); + schema.SetTableLabel( + "{marker.name} - Activity: {marker.data.CPU Utilization}"); + return schema; + } +}; + +} // namespace geckoprofiler::markers + +static bool IsThreadIdRegistered(ProfilerThreadId aThreadId) { + ThreadRegistry::LockedRegistry lockedRegistry; + const auto registryEnd = lockedRegistry.end(); + return std::find_if( + lockedRegistry.begin(), registryEnd, + [aThreadId](const ThreadRegistry::OffThreadRef& aOffThreadRef) { + return aOffThreadRef.UnlockedConstReaderCRef() + .Info() + .ThreadId() == aThreadId; + }) != registryEnd; +} + +static nsAutoCString MakeThreadInfoMarkerName(base::ProcessId aThreadId, + const nsACString& aName) { + nsAutoCString markerName{"tid "}; + markerName.AppendInt(int64_t(aThreadId)); + if (!aName.IsEmpty()) { + markerName.AppendLiteral(" "); + markerName.Append(aName); + } + return markerName; +} + +void SamplerThread::SpyOnUnregisteredThreads() { + const TimeStamp unregisteredThreadSearchStart = TimeStamp::Now(); + + const base::ProcessId currentProcessId = + base::ProcessId(profiler_current_process_id().ToNumber()); + nsTArray<ProcInfoRequest> request(1); + request.EmplaceBack( + /* aPid = */ currentProcessId, + /* aProcessType = */ ProcType::Unknown, + /* aOrigin = */ ""_ns, + /* aWindowInfo = */ nsTArray<WindowInfo>{}, + /* aUtilityInfo = */ nsTArray<UtilityInfo>{}, + /* aChild = */ 0 +#ifdef XP_MACOSX + , + /* aChildTask = */ MACH_PORT_NULL +#endif // XP_MACOSX + ); + + const ProcInfoPromise::ResolveOrRejectValue procInfoOrError = + GetProcInfoSync(std::move(request)); + + if (!procInfoOrError.IsResolve()) { + PROFILER_MARKER_TEXT("Failed unregistered thread search", PROFILER, + MarkerOptions(MarkerThreadId::MainThread(), + MarkerTiming::IntervalUntilNowFrom( + unregisteredThreadSearchStart)), + "Could not retrieve any process information"); + return; + } + + const auto& procInfoHashMap = procInfoOrError.ResolveValue(); + // Expecting the requested (current) process information to be present in the + // hashmap. + const auto& procInfoPtr = + procInfoHashMap.readonlyThreadsafeLookup(currentProcessId); + if (!procInfoPtr) { + PROFILER_MARKER_TEXT("Failed unregistered thread search", PROFILER, + MarkerOptions(MarkerThreadId::MainThread(), + MarkerTiming::IntervalUntilNowFrom( + unregisteredThreadSearchStart)), + "Could not retrieve information about this process"); + return; + } + + // Record the time spent so far, which is OS-bound... + PROFILER_MARKER_TEXT("Unregistered thread search", PROFILER, + MarkerOptions(MarkerThreadId::MainThread(), + MarkerTiming::IntervalUntilNowFrom( + unregisteredThreadSearchStart)), + "Work to discover threads"); + + // ... and record the time needed to process the data, which we can control. + AUTO_PROFILER_MARKER_TEXT( + "Unregistered thread search", PROFILER, + MarkerOptions(MarkerThreadId::MainThread()), + "Work to process discovered threads and record unregistered ones"_ns); + + const Span<const mozilla::ThreadInfo> threads = procInfoPtr->value().threads; + + // mLastSpying timestamp should be null only at the beginning of a session, + // when mSpiedThreads is still empty. + MOZ_ASSERT_IF(mLastSpying.IsNull(), mSpiedThreads.IsEmpty()); + + const TimeStamp previousSpying = std::exchange(mLastSpying, TimeStamp::Now()); + + // Find threads that were spied on but are not present anymore. + const auto threadsBegin = threads.begin(); + const auto threadsEnd = threads.end(); + for (size_t spiedThreadIndexPlus1 = mSpiedThreads.Length(); + spiedThreadIndexPlus1 != 0; --spiedThreadIndexPlus1) { + const SpiedThread& spiedThread = mSpiedThreads[spiedThreadIndexPlus1 - 1]; + if (std::find_if(threadsBegin, threadsEnd, + [spiedTid = spiedThread.mThreadId]( + const mozilla::ThreadInfo& aThreadInfo) { + return aThreadInfo.tid == spiedTid; + }) == threadsEnd) { + // This spied thread is gone. + PROFILER_MARKER( + MakeThreadInfoMarkerName(spiedThread.mThreadId, spiedThread.mName), + PROFILER, + MarkerOptions( + MarkerThreadId::MainThread(), + // Place the end between this update and the previous one. + MarkerTiming::IntervalEnd(previousSpying + + (mLastSpying - previousSpying) / + int64_t(2))), + UnregisteredThreadLifetimeMarker, spiedThread.mThreadId, + spiedThread.mName, "Thread disappeared"); + + // Don't spy on it anymore, assuming it won't come back. + mSpiedThreads.RemoveElementAt(spiedThreadIndexPlus1 - 1); + } + } + + for (const mozilla::ThreadInfo& threadInfo : threads) { + // Index of this encountered thread in mSpiedThreads, or NoIndex. + size_t spiedThreadIndex = mSpiedThreads.IndexOf(threadInfo.tid); + if (IsThreadIdRegistered(ProfilerThreadId::FromNumber(threadInfo.tid))) { + // This thread id is already officially registered. + if (spiedThreadIndex != SpiedThreads::NoIndex) { + // This now-registered thread was previously being spied. + SpiedThread& spiedThread = mSpiedThreads[spiedThreadIndex]; + PROFILER_MARKER( + MakeThreadInfoMarkerName(spiedThread.mThreadId, spiedThread.mName), + PROFILER, + MarkerOptions( + MarkerThreadId::MainThread(), + // Place the end between this update and the previous one. + // TODO: Find the real time from the thread registration? + MarkerTiming::IntervalEnd(previousSpying + + (mLastSpying - previousSpying) / + int64_t(2))), + UnregisteredThreadLifetimeMarker, spiedThread.mThreadId, + spiedThread.mName, "Thread registered itself"); + + // Remove from mSpiedThreads, since it can be profiled normally. + mSpiedThreads.RemoveElement(threadInfo.tid); + } + } else { + // This thread id is not registered. + if (spiedThreadIndex == SpiedThreads::NoIndex) { + // This unregistered thread has not been spied yet, store it now. + NS_ConvertUTF16toUTF8 name(threadInfo.name); + mSpiedThreads.EmplaceBack(threadInfo.tid, name, threadInfo.cpuTime); + + PROFILER_MARKER( + MakeThreadInfoMarkerName(threadInfo.tid, name), PROFILER, + MarkerOptions( + MarkerThreadId::MainThread(), + // Place the start between this update and the previous one (or + // the start of this search if it's the first one). + MarkerTiming::IntervalStart( + mLastSpying - + (mLastSpying - (previousSpying.IsNull() + ? unregisteredThreadSearchStart + : previousSpying)) / + int64_t(2))), + UnregisteredThreadLifetimeMarker, threadInfo.tid, name, + /* aEndEvent */ ""); + } else { + // This unregistered thread was already being spied, record its work. + SpiedThread& spiedThread = mSpiedThreads[spiedThreadIndex]; + int64_t diffCPUTimeNs = + int64_t(threadInfo.cpuTime) - int64_t(spiedThread.mCPUTimeNs); + spiedThread.mCPUTimeNs = threadInfo.cpuTime; + if (diffCPUTimeNs != 0) { + PROFILER_MARKER( + MakeThreadInfoMarkerName(threadInfo.tid, spiedThread.mName), + PROFILER, + MarkerOptions( + MarkerThreadId::MainThread(), + MarkerTiming::Interval(previousSpying, mLastSpying)), + UnregisteredThreadCPUMarker, threadInfo.tid, diffCPUTimeNs, + previousSpying, mLastSpying); + } + } + } + } + + PROFILER_MARKER_TEXT("Unregistered thread search", PROFILER, + MarkerOptions(MarkerThreadId::MainThread(), + MarkerTiming::IntervalUntilNowFrom( + unregisteredThreadSearchStart)), + "Work to discover and record unregistered threads"); +} + +// We #include these files directly because it means those files can use +// declarations from this file trivially. These provide target-specific +// implementations of all SamplerThread methods except Run(). +#if defined(GP_OS_windows) +# include "platform-win32.cpp" +#elif defined(GP_OS_darwin) +# include "platform-macos.cpp" +#elif defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd) +# include "platform-linux-android.cpp" +#else +# error "bad platform" +#endif + +// END SamplerThread +//////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////// +// BEGIN externally visible functions + +MOZ_DEFINE_MALLOC_SIZE_OF(GeckoProfilerMallocSizeOf) + +NS_IMETHODIMP +GeckoProfilerReporter::CollectReports(nsIHandleReportCallback* aHandleReport, + nsISupports* aData, bool aAnonymize) { + MOZ_RELEASE_ASSERT(NS_IsMainThread()); + + size_t profSize = 0; + size_t lulSize = 0; + + { + PSAutoLock lock; + + if (CorePS::Exists()) { + CorePS::AddSizeOf(lock, GeckoProfilerMallocSizeOf, profSize, lulSize); + } + + if (ActivePS::Exists(lock)) { + profSize += ActivePS::SizeOf(lock, GeckoProfilerMallocSizeOf); + } + } + + MOZ_COLLECT_REPORT( + "explicit/profiler/profiler-state", KIND_HEAP, UNITS_BYTES, profSize, + "Memory used by the Gecko Profiler's global state (excluding memory used " + "by LUL)."); + +#if defined(USE_LUL_STACKWALK) + MOZ_COLLECT_REPORT( + "explicit/profiler/lul", KIND_HEAP, UNITS_BYTES, lulSize, + "Memory used by LUL, a stack unwinder used by the Gecko Profiler."); +#endif + + return NS_OK; +} + +NS_IMPL_ISUPPORTS(GeckoProfilerReporter, nsIMemoryReporter) + +static uint32_t ParseFeature(const char* aFeature, bool aIsStartup) { + if (strcmp(aFeature, "default") == 0) { + return (aIsStartup ? (DefaultFeatures() | StartupExtraDefaultFeatures()) + : DefaultFeatures()) & + AvailableFeatures(); + } + +#define PARSE_FEATURE_BIT(n_, str_, Name_, desc_) \ + if (strcmp(aFeature, str_) == 0) { \ + return ProfilerFeature::Name_; \ + } + + PROFILER_FOR_EACH_FEATURE(PARSE_FEATURE_BIT) + +#undef PARSE_FEATURE_BIT + + printf("\nUnrecognized feature \"%s\".\n\n", aFeature); + // Since we may have an old feature we don't implement anymore, don't exit. + PrintUsage(); + return 0; +} + +uint32_t ParseFeaturesFromStringArray(const char** aFeatures, + uint32_t aFeatureCount, + bool aIsStartup /* = false */) { + uint32_t features = 0; + for (size_t i = 0; i < aFeatureCount; i++) { + features |= ParseFeature(aFeatures[i], aIsStartup); + } + return features; +} + +static ProfilingStack* locked_register_thread( + PSLockRef aLock, ThreadRegistry::OffThreadRef aOffThreadRef) { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + VTUNE_REGISTER_THREAD(aOffThreadRef.UnlockedConstReaderCRef().Info().Name()); + + if (ActivePS::Exists(aLock)) { + ThreadProfilingFeatures threadProfilingFeatures = + ActivePS::ProfilingFeaturesForThread( + aLock, aOffThreadRef.UnlockedConstReaderCRef().Info()); + if (threadProfilingFeatures != ThreadProfilingFeatures::NotProfiled) { + ThreadRegistry::OffThreadRef::RWFromAnyThreadWithLock + lockedRWFromAnyThread = aOffThreadRef.LockedRWFromAnyThread(); + + ProfiledThreadData* profiledThreadData = ActivePS::AddLiveProfiledThread( + aLock, MakeUnique<ProfiledThreadData>( + aOffThreadRef.UnlockedConstReaderCRef().Info())); + lockedRWFromAnyThread->SetProfilingFeaturesAndData( + threadProfilingFeatures, profiledThreadData, aLock); + + if (ActivePS::FeatureJS(aLock)) { + lockedRWFromAnyThread->StartJSSampling(ActivePS::JSFlags(aLock)); + if (ThreadRegistration::LockedRWOnThread* lockedRWOnThread = + lockedRWFromAnyThread.GetLockedRWOnThread(); + lockedRWOnThread) { + // We can manually poll the current thread so it starts sampling + // immediately. + lockedRWOnThread->PollJSSampling(); + } + if (lockedRWFromAnyThread->GetJSContext()) { + profiledThreadData->NotifyReceivedJSContext( + ActivePS::Buffer(aLock).BufferRangeEnd()); + } + } + } + } + + return &aOffThreadRef.UnlockedConstReaderAndAtomicRWRef().ProfilingStackRef(); +} + +static void NotifyObservers(const char* aTopic, + nsISupports* aSubject = nullptr) { + if (!NS_IsMainThread()) { + // Dispatch a task to the main thread that notifies observers. + // If NotifyObservers is called both on and off the main thread within a + // short time, the order of the notifications can be different from the + // order of the calls to NotifyObservers. + // Getting the order 100% right isn't that important at the moment, because + // these notifications are only observed in the parent process, where the + // profiler_* functions are currently only called on the main thread. + nsCOMPtr<nsISupports> subject = aSubject; + NS_DispatchToMainThread(NS_NewRunnableFunction( + "NotifyObservers", [=] { NotifyObservers(aTopic, subject); })); + return; + } + + if (nsCOMPtr<nsIObserverService> os = services::GetObserverService()) { + os->NotifyObservers(aSubject, aTopic, nullptr); + } +} + +[[nodiscard]] static RefPtr<GenericPromise> NotifyProfilerStarted( + const PowerOfTwo32& aCapacity, const Maybe<double>& aDuration, + double aInterval, uint32_t aFeatures, const char** aFilters, + uint32_t aFilterCount, uint64_t aActiveTabID) { + nsTArray<nsCString> filtersArray; + for (size_t i = 0; i < aFilterCount; ++i) { + filtersArray.AppendElement(aFilters[i]); + } + + nsCOMPtr<nsIProfilerStartParams> params = new nsProfilerStartParams( + aCapacity.Value(), aDuration, aInterval, aFeatures, + std::move(filtersArray), aActiveTabID); + + RefPtr<GenericPromise> startPromise = ProfilerParent::ProfilerStarted(params); + NotifyObservers("profiler-started", params); + return startPromise; +} + +static void locked_profiler_start(PSLockRef aLock, PowerOfTwo32 aCapacity, + double aInterval, uint32_t aFeatures, + const char** aFilters, uint32_t aFilterCount, + uint64_t aActiveTabID, + const Maybe<double>& aDuration); + +// This basically duplicates AutoProfilerLabel's constructor. +static void* MozGlueLabelEnter(const char* aLabel, const char* aDynamicString, + void* aSp) { + ThreadRegistration::OnThreadPtr onThreadPtr = + ThreadRegistration::GetOnThreadPtr(); + if (!onThreadPtr) { + return nullptr; + } + ProfilingStack& profilingStack = + onThreadPtr->UnlockedConstReaderAndAtomicRWRef().ProfilingStackRef(); + profilingStack.pushLabelFrame(aLabel, aDynamicString, aSp, + JS::ProfilingCategoryPair::OTHER); + return &profilingStack; +} + +// This basically duplicates AutoProfilerLabel's destructor. +static void MozGlueLabelExit(void* aProfilingStack) { + if (aProfilingStack) { + reinterpret_cast<ProfilingStack*>(aProfilingStack)->pop(); + } +} + +static Vector<const char*> SplitAtCommas(const char* aString, + UniquePtr<char[]>& aStorage) { + size_t len = strlen(aString); + aStorage = MakeUnique<char[]>(len + 1); + PodCopy(aStorage.get(), aString, len + 1); + + // Iterate over all characters in aStorage and split at commas, by + // overwriting commas with the null char. + Vector<const char*> array; + size_t currentElementStart = 0; + for (size_t i = 0; i <= len; i++) { + if (aStorage[i] == ',') { + aStorage[i] = '\0'; + } + if (aStorage[i] == '\0') { + // Only add non-empty elements, otherwise ParseFeatures would later + // complain about unrecognized features. + if (currentElementStart != i) { + MOZ_RELEASE_ASSERT(array.append(&aStorage[currentElementStart])); + } + currentElementStart = i + 1; + } + } + return array; +} + +void profiler_init_threadmanager() { + LOG("profiler_init_threadmanager"); + + ThreadRegistration::WithOnThreadRef( + [](ThreadRegistration::OnThreadRef aOnThreadRef) { + aOnThreadRef.WithLockedRWOnThread( + [](ThreadRegistration::LockedRWOnThread& aThreadData) { + if (!aThreadData.GetEventTarget()) { + aThreadData.ResetMainThread(NS_GetCurrentThreadNoCreate()); + } + }); + }); +} + +static const char* get_size_suffix(const char* str) { + const char* ptr = str; + + while (isdigit(*ptr)) { + ptr++; + } + + return ptr; +} + +void profiler_init(void* aStackTop) { + LOG("profiler_init"); + + profiler_init_main_thread_id(); + + VTUNE_INIT(); + + MOZ_RELEASE_ASSERT(!CorePS::Exists()); + + if (getenv("MOZ_PROFILER_HELP")) { + PrintUsage(); + exit(0); + } + + SharedLibraryInfo::Initialize(); + + uint32_t features = DefaultFeatures() & AvailableFeatures(); + + UniquePtr<char[]> filterStorage; + + Vector<const char*> filters; + MOZ_RELEASE_ASSERT(filters.append("GeckoMain")); + MOZ_RELEASE_ASSERT(filters.append("Compositor")); + MOZ_RELEASE_ASSERT(filters.append("Renderer")); + MOZ_RELEASE_ASSERT(filters.append("DOM Worker")); + + PowerOfTwo32 capacity = PROFILER_DEFAULT_ENTRIES; + Maybe<double> duration = Nothing(); + double interval = PROFILER_DEFAULT_INTERVAL; + uint64_t activeTabID = PROFILER_DEFAULT_ACTIVE_TAB_ID; + + ThreadRegistration::RegisterThread(kMainThreadName, aStackTop); + + { + PSAutoLock lock; + + // We've passed the possible failure point. Instantiate CorePS, which + // indicates that the profiler has initialized successfully. + CorePS::Create(lock); + + // Make sure threads already in the ThreadRegistry (like the main thread) + // get registered in CorePS as well. + { + ThreadRegistry::LockedRegistry lockedRegistry; + for (ThreadRegistry::OffThreadRef offThreadRef : lockedRegistry) { + locked_register_thread(lock, offThreadRef); + } + } + + // Platform-specific initialization. + PlatformInit(lock); + +#if defined(GP_OS_android) + if (jni::IsAvailable()) { + GeckoJavaSampler::Init(); + } +#endif + + // (Linux-only) We could create CorePS::mLul and read unwind info into it + // at this point. That would match the lifetime implied by destruction of + // it in profiler_shutdown() just below. However, that gives a big delay on + // startup, even if no profiling is actually to be done. So, instead, it is + // created on demand at the first call to PlatformStart(). + + const char* startupEnv = getenv("MOZ_PROFILER_STARTUP"); + if (!startupEnv || startupEnv[0] == '\0' || + ((startupEnv[0] == '0' || startupEnv[0] == 'N' || + startupEnv[0] == 'n') && + startupEnv[1] == '\0')) { + return; + } + + LOG("- MOZ_PROFILER_STARTUP is set"); + + // Startup default capacity may be different. + capacity = PROFILER_DEFAULT_STARTUP_ENTRIES; + + const char* startupCapacity = getenv("MOZ_PROFILER_STARTUP_ENTRIES"); + if (startupCapacity && startupCapacity[0] != '\0') { + errno = 0; + long capacityLong = strtol(startupCapacity, nullptr, 10); + std::string_view sizeSuffix = get_size_suffix(startupCapacity); + + if (sizeSuffix == "KB") { + capacityLong *= 1000 / scBytesPerEntry; + } else if (sizeSuffix == "KiB") { + capacityLong *= 1024 / scBytesPerEntry; + } else if (sizeSuffix == "MB") { + capacityLong *= (1000 * 1000) / scBytesPerEntry; + } else if (sizeSuffix == "MiB") { + capacityLong *= (1024 * 1024) / scBytesPerEntry; + } else if (sizeSuffix == "GB") { + capacityLong *= (1000 * 1000 * 1000) / scBytesPerEntry; + } else if (sizeSuffix == "GiB") { + capacityLong *= (1024 * 1024 * 1024) / scBytesPerEntry; + } else if (!sizeSuffix.empty()) { + LOG("- MOZ_PROFILER_STARTUP_ENTRIES unit must be one of the " + "following: KB, KiB, MB, MiB, GB, GiB"); + PrintUsage(); + exit(1); + } + + // `long` could be 32 or 64 bits, so we force a 64-bit comparison with + // the maximum 32-bit signed number (as more than that is clamped down to + // 2^31 anyway). + if (errno == 0 && capacityLong > 0 && + static_cast<uint64_t>(capacityLong) <= + static_cast<uint64_t>(INT32_MAX)) { + capacity = PowerOfTwo32(ActivePS::ClampToAllowedEntries( + static_cast<uint32_t>(capacityLong))); + LOG("- MOZ_PROFILER_STARTUP_ENTRIES = %u", unsigned(capacity.Value())); + } else { + LOG("- MOZ_PROFILER_STARTUP_ENTRIES not a valid integer: %s", + startupCapacity); + PrintUsage(); + exit(1); + } + } + + const char* startupDuration = getenv("MOZ_PROFILER_STARTUP_DURATION"); + if (startupDuration && startupDuration[0] != '\0') { + errno = 0; + double durationVal = PR_strtod(startupDuration, nullptr); + if (errno == 0 && durationVal >= 0.0) { + if (durationVal > 0.0) { + duration = Some(durationVal); + } + LOG("- MOZ_PROFILER_STARTUP_DURATION = %f", durationVal); + } else { + LOG("- MOZ_PROFILER_STARTUP_DURATION not a valid float: %s", + startupDuration); + PrintUsage(); + exit(1); + } + } + + const char* startupInterval = getenv("MOZ_PROFILER_STARTUP_INTERVAL"); + if (startupInterval && startupInterval[0] != '\0') { + errno = 0; + interval = PR_strtod(startupInterval, nullptr); + if (errno == 0 && interval > 0.0 && interval <= PROFILER_MAX_INTERVAL) { + LOG("- MOZ_PROFILER_STARTUP_INTERVAL = %f", interval); + } else { + LOG("- MOZ_PROFILER_STARTUP_INTERVAL not a valid float: %s", + startupInterval); + PrintUsage(); + exit(1); + } + } + + features |= StartupExtraDefaultFeatures() & AvailableFeatures(); + + const char* startupFeaturesBitfield = + getenv("MOZ_PROFILER_STARTUP_FEATURES_BITFIELD"); + if (startupFeaturesBitfield && startupFeaturesBitfield[0] != '\0') { + errno = 0; + features = strtol(startupFeaturesBitfield, nullptr, 10); + if (errno == 0) { + LOG("- MOZ_PROFILER_STARTUP_FEATURES_BITFIELD = %d", features); + } else { + LOG("- MOZ_PROFILER_STARTUP_FEATURES_BITFIELD not a valid integer: %s", + startupFeaturesBitfield); + PrintUsage(); + exit(1); + } + } else { + const char* startupFeatures = getenv("MOZ_PROFILER_STARTUP_FEATURES"); + if (startupFeatures) { + // Interpret startupFeatures as a list of feature strings, separated by + // commas. + UniquePtr<char[]> featureStringStorage; + Vector<const char*> featureStringArray = + SplitAtCommas(startupFeatures, featureStringStorage); + features = ParseFeaturesFromStringArray(featureStringArray.begin(), + featureStringArray.length(), + /* aIsStartup */ true); + LOG("- MOZ_PROFILER_STARTUP_FEATURES = %d", features); + } + } + + const char* startupFilters = getenv("MOZ_PROFILER_STARTUP_FILTERS"); + if (startupFilters && startupFilters[0] != '\0') { + filters = SplitAtCommas(startupFilters, filterStorage); + LOG("- MOZ_PROFILER_STARTUP_FILTERS = %s", startupFilters); + + if (mozilla::profiler::detail::FiltersExcludePid(filters)) { + LOG(" -> This process is excluded and won't be profiled"); + return; + } + } + + const char* startupActiveTabID = + getenv("MOZ_PROFILER_STARTUP_ACTIVE_TAB_ID"); + if (startupActiveTabID && startupActiveTabID[0] != '\0') { + std::istringstream iss(startupActiveTabID); + iss >> activeTabID; + if (!iss.fail()) { + LOG("- MOZ_PROFILER_STARTUP_ACTIVE_TAB_ID = %" PRIu64, activeTabID); + } else { + LOG("- MOZ_PROFILER_STARTUP_ACTIVE_TAB_ID not a valid " + "uint64_t: %s", + startupActiveTabID); + PrintUsage(); + exit(1); + } + } + + locked_profiler_start(lock, capacity, interval, features, filters.begin(), + filters.length(), activeTabID, duration); + } + + // The GeckoMain thread registration happened too early to record a marker, + // so let's record it again now. + profiler_mark_thread_awake(); + +#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY) + // Start counting memory allocations (outside of lock because this may call + // profiler_add_sampled_counter which would attempt to take the lock.) + ActivePS::SetMemoryCounter(mozilla::profiler::install_memory_hooks()); +#endif + + invoke_profiler_state_change_callbacks(ProfilingState::Started); + + // We do this with gPSMutex unlocked. The comment in profiler_stop() explains + // why. + Unused << NotifyProfilerStarted(capacity, duration, interval, features, + filters.begin(), filters.length(), 0); +} + +static void locked_profiler_save_profile_to_file( + PSLockRef aLock, const char* aFilename, + const PreRecordedMetaInformation& aPreRecordedMetaInformation, + bool aIsShuttingDown); + +static SamplerThread* locked_profiler_stop(PSLockRef aLock); + +void profiler_shutdown(IsFastShutdown aIsFastShutdown) { + LOG("profiler_shutdown"); + + VTUNE_SHUTDOWN(); + + MOZ_RELEASE_ASSERT(NS_IsMainThread()); + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + if (profiler_is_active()) { + invoke_profiler_state_change_callbacks(ProfilingState::Stopping); + } + invoke_profiler_state_change_callbacks(ProfilingState::ShuttingDown); + + const auto preRecordedMetaInformation = PreRecordMetaInformation(); + + ProfilerParent::ProfilerWillStopIfStarted(); + + // If the profiler is active we must get a handle to the SamplerThread before + // ActivePS is destroyed, in order to delete it. + SamplerThread* samplerThread = nullptr; + { + PSAutoLock lock; + + // Save the profile on shutdown if requested. + if (ActivePS::Exists(lock)) { + const char* filename = getenv("MOZ_PROFILER_SHUTDOWN"); + if (filename && filename[0] != '\0') { + locked_profiler_save_profile_to_file(lock, filename, + preRecordedMetaInformation, + /* aIsShuttingDown */ true); + } + if (aIsFastShutdown == IsFastShutdown::Yes) { + return; + } + + samplerThread = locked_profiler_stop(lock); + } else if (aIsFastShutdown == IsFastShutdown::Yes) { + return; + } + + CorePS::Destroy(lock); + } + + // We do these operations with gPSMutex unlocked. The comments in + // profiler_stop() explain why. + if (samplerThread) { + Unused << ProfilerParent::ProfilerStopped(); + NotifyObservers("profiler-stopped"); + delete samplerThread; + } + + // Reverse the registration done in profiler_init. + ThreadRegistration::UnregisterThread(); +} + +static bool WriteProfileToJSONWriter(SpliceableChunkedJSONWriter& aWriter, + double aSinceTime, bool aIsShuttingDown, + ProfilerCodeAddressService* aService, + mozilla::ProgressLogger aProgressLogger) { + LOG("WriteProfileToJSONWriter"); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + aWriter.Start(); + { + if (!profiler_stream_json_for_this_process( + aWriter, aSinceTime, aIsShuttingDown, aService, + aProgressLogger.CreateSubLoggerFromTo( + 0_pc, + "WriteProfileToJSONWriter: " + "profiler_stream_json_for_this_process started", + 100_pc, + "WriteProfileToJSONWriter: " + "profiler_stream_json_for_this_process done"))) { + return false; + } + + // Don't include profiles from other processes because this is a + // synchronous function. + aWriter.StartArrayProperty("processes"); + aWriter.EndArray(); + } + aWriter.End(); + return !aWriter.Failed(); +} + +void profiler_set_process_name(const nsACString& aProcessName, + const nsACString* aETLDplus1) { + LOG("profiler_set_process_name(\"%s\", \"%s\")", aProcessName.Data(), + aETLDplus1 ? aETLDplus1->Data() : "<none>"); + PSAutoLock lock; + CorePS::SetProcessName(lock, aProcessName); + if (aETLDplus1) { + CorePS::SetETLDplus1(lock, *aETLDplus1); + } +} + +UniquePtr<char[]> profiler_get_profile(double aSinceTime, + bool aIsShuttingDown) { + LOG("profiler_get_profile"); + + UniquePtr<ProfilerCodeAddressService> service = + profiler_code_address_service_for_presymbolication(); + + FailureLatchSource failureLatch; + SpliceableChunkedJSONWriter b{failureLatch}; + if (!WriteProfileToJSONWriter(b, aSinceTime, aIsShuttingDown, service.get(), + ProgressLogger{})) { + return nullptr; + } + return b.ChunkedWriteFunc().CopyData(); +} + +[[nodiscard]] bool profiler_get_profile_json( + SpliceableChunkedJSONWriter& aSpliceableChunkedJSONWriter, + double aSinceTime, bool aIsShuttingDown, + mozilla::ProgressLogger aProgressLogger) { + LOG("profiler_get_profile_json"); + + UniquePtr<ProfilerCodeAddressService> service = + profiler_code_address_service_for_presymbolication(); + + return WriteProfileToJSONWriter( + aSpliceableChunkedJSONWriter, aSinceTime, aIsShuttingDown, service.get(), + aProgressLogger.CreateSubLoggerFromTo( + 0.1_pc, "profiler_get_profile_json: WriteProfileToJSONWriter started", + 99.9_pc, "profiler_get_profile_json: WriteProfileToJSONWriter done")); +} + +void profiler_get_start_params(int* aCapacity, Maybe<double>* aDuration, + double* aInterval, uint32_t* aFeatures, + Vector<const char*>* aFilters, + uint64_t* aActiveTabID) { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + if (NS_WARN_IF(!aCapacity) || NS_WARN_IF(!aDuration) || + NS_WARN_IF(!aInterval) || NS_WARN_IF(!aFeatures) || + NS_WARN_IF(!aFilters)) { + return; + } + + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + *aCapacity = 0; + *aDuration = Nothing(); + *aInterval = 0; + *aFeatures = 0; + *aActiveTabID = 0; + aFilters->clear(); + return; + } + + *aCapacity = ActivePS::Capacity(lock).Value(); + *aDuration = ActivePS::Duration(lock); + *aInterval = ActivePS::Interval(lock); + *aFeatures = ActivePS::Features(lock); + *aActiveTabID = ActivePS::ActiveTabID(lock); + + const Vector<std::string>& filters = ActivePS::Filters(lock); + MOZ_ALWAYS_TRUE(aFilters->resize(filters.length())); + for (uint32_t i = 0; i < filters.length(); ++i) { + (*aFilters)[i] = filters[i].c_str(); + } +} + +ProfileBufferControlledChunkManager* profiler_get_controlled_chunk_manager() { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + PSAutoLock lock; + if (NS_WARN_IF(!ActivePS::Exists(lock))) { + return nullptr; + } + return &ActivePS::ControlledChunkManager(lock); +} + +namespace mozilla { + +void GetProfilerEnvVarsForChildProcess( + std::function<void(const char* key, const char* value)>&& aSetEnv) { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + aSetEnv("MOZ_PROFILER_STARTUP", ""); + return; + } + + aSetEnv("MOZ_PROFILER_STARTUP", "1"); + + // If MOZ_PROFILER_SHUTDOWN is defined, make sure it's empty in children, so + // that they don't attempt to write over that file. + if (getenv("MOZ_PROFILER_SHUTDOWN")) { + aSetEnv("MOZ_PROFILER_SHUTDOWN", ""); + } + + // Hidden option to stop Base Profiler, mostly due to Talos intermittents, + // see https://bugzilla.mozilla.org/show_bug.cgi?id=1638851#c3 + // TODO: Investigate root cause and remove this in bugs 1648324 and 1648325. + if (getenv("MOZ_PROFILER_STARTUP_NO_BASE")) { + aSetEnv("MOZ_PROFILER_STARTUP_NO_BASE", "1"); + } + + auto capacityString = + Smprintf("%u", unsigned(ActivePS::Capacity(lock).Value())); + aSetEnv("MOZ_PROFILER_STARTUP_ENTRIES", capacityString.get()); + + // Use AppendFloat instead of Smprintf with %f because the decimal + // separator used by %f is locale-dependent. But the string we produce needs + // to be parseable by strtod, which only accepts the period character as a + // decimal separator. AppendFloat always uses the period character. + nsCString intervalString; + intervalString.AppendFloat(ActivePS::Interval(lock)); + aSetEnv("MOZ_PROFILER_STARTUP_INTERVAL", intervalString.get()); + + auto featuresString = Smprintf("%d", ActivePS::Features(lock)); + aSetEnv("MOZ_PROFILER_STARTUP_FEATURES_BITFIELD", featuresString.get()); + + std::string filtersString; + const Vector<std::string>& filters = ActivePS::Filters(lock); + for (uint32_t i = 0; i < filters.length(); ++i) { + if (i != 0) { + filtersString += ","; + } + filtersString += filters[i]; + } + aSetEnv("MOZ_PROFILER_STARTUP_FILTERS", filtersString.c_str()); + + auto activeTabIDString = Smprintf("%" PRIu64, ActivePS::ActiveTabID(lock)); + aSetEnv("MOZ_PROFILER_STARTUP_ACTIVE_TAB_ID", activeTabIDString.get()); +} + +} // namespace mozilla + +void profiler_received_exit_profile(const nsACString& aExitProfile) { + MOZ_RELEASE_ASSERT(NS_IsMainThread()); + MOZ_RELEASE_ASSERT(CorePS::Exists()); + PSAutoLock lock; + if (!ActivePS::Exists(lock)) { + return; + } + ActivePS::AddExitProfile(lock, aExitProfile); +} + +Vector<nsCString> profiler_move_exit_profiles() { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + PSAutoLock lock; + Vector<nsCString> profiles; + if (ActivePS::Exists(lock)) { + profiles = ActivePS::MoveExitProfiles(lock); + } + return profiles; +} + +static void locked_profiler_save_profile_to_file( + PSLockRef aLock, const char* aFilename, + const PreRecordedMetaInformation& aPreRecordedMetaInformation, + bool aIsShuttingDown = false) { + nsAutoCString processedFilename(aFilename); + const auto processInsertionIndex = processedFilename.Find("%p"); + if (processInsertionIndex != kNotFound) { + // Replace "%p" with the process id. + nsAutoCString process; + process.AppendInt(profiler_current_process_id().ToNumber()); + processedFilename.Replace(processInsertionIndex, 2, process); + LOG("locked_profiler_save_profile_to_file(\"%s\" -> \"%s\")", aFilename, + processedFilename.get()); + } else { + LOG("locked_profiler_save_profile_to_file(\"%s\")", aFilename); + } + + MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock)); + + std::ofstream stream; + stream.open(processedFilename.get()); + if (stream.is_open()) { + OStreamJSONWriteFunc sw(stream); + SpliceableJSONWriter w(sw, FailureLatchInfallibleSource::Singleton()); + w.Start(); + { + locked_profiler_stream_json_for_this_process( + aLock, w, /* sinceTime */ 0, aPreRecordedMetaInformation, + aIsShuttingDown, nullptr, ProgressLogger{}); + + w.StartArrayProperty("processes"); + Vector<nsCString> exitProfiles = ActivePS::MoveExitProfiles(aLock); + for (auto& exitProfile : exitProfiles) { + if (!exitProfile.IsEmpty() && exitProfile[0] != '*') { + w.Splice(exitProfile); + } + } + w.EndArray(); + } + w.End(); + + stream.close(); + } +} + +void profiler_save_profile_to_file(const char* aFilename) { + LOG("profiler_save_profile_to_file(%s)", aFilename); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + const auto preRecordedMetaInformation = PreRecordMetaInformation(); + + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return; + } + + locked_profiler_save_profile_to_file(lock, aFilename, + preRecordedMetaInformation); +} + +uint32_t profiler_get_available_features() { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + return AvailableFeatures(); +} + +Maybe<ProfilerBufferInfo> profiler_get_buffer_info() { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return Nothing(); + } + + return Some(ActivePS::Buffer(lock).GetProfilerBufferInfo()); +} + +static void PollJSSamplingForCurrentThread() { + ThreadRegistration::WithOnThreadRef( + [](ThreadRegistration::OnThreadRef aOnThreadRef) { + aOnThreadRef.WithLockedRWOnThread( + [](ThreadRegistration::LockedRWOnThread& aThreadData) { + aThreadData.PollJSSampling(); + }); + }); +} + +// When the profiler is started on a background thread, we can't synchronously +// call PollJSSampling on the main thread's ThreadInfo. And the next regular +// call to PollJSSampling on the main thread would only happen once the main +// thread triggers a JS interrupt callback. +// This means that all the JS execution between profiler_start() and the first +// JS interrupt would happen with JS sampling disabled, and we wouldn't get any +// JS function information for that period of time. +// So in order to start JS sampling as soon as possible, we dispatch a runnable +// to the main thread which manually calls PollJSSamplingForCurrentThread(). +// In some cases this runnable will lose the race with the next JS interrupt. +// That's fine; PollJSSamplingForCurrentThread() is immune to redundant calls. +static void TriggerPollJSSamplingOnMainThread() { + nsCOMPtr<nsIThread> mainThread; + nsresult rv = NS_GetMainThread(getter_AddRefs(mainThread)); + if (NS_SUCCEEDED(rv) && mainThread) { + nsCOMPtr<nsIRunnable> task = + NS_NewRunnableFunction("TriggerPollJSSamplingOnMainThread", + []() { PollJSSamplingForCurrentThread(); }); + SchedulerGroup::Dispatch(TaskCategory::Other, task.forget()); + } +} + +static void locked_profiler_start(PSLockRef aLock, PowerOfTwo32 aCapacity, + double aInterval, uint32_t aFeatures, + const char** aFilters, uint32_t aFilterCount, + uint64_t aActiveTabID, + const Maybe<double>& aDuration) { + TimeStamp profilingStartTime = TimeStamp::Now(); + + if (LOG_TEST) { + LOG("locked_profiler_start"); + LOG("- capacity = %u", unsigned(aCapacity.Value())); + LOG("- duration = %.2f", aDuration ? *aDuration : -1); + LOG("- interval = %.2f", aInterval); + LOG("- tab ID = %" PRIu64, aActiveTabID); + +#define LOG_FEATURE(n_, str_, Name_, desc_) \ + if (ProfilerFeature::Has##Name_(aFeatures)) { \ + LOG("- feature = %s", str_); \ + } + + PROFILER_FOR_EACH_FEATURE(LOG_FEATURE) + +#undef LOG_FEATURE + + for (uint32_t i = 0; i < aFilterCount; i++) { + LOG("- threads = %s", aFilters[i]); + } + } + + MOZ_RELEASE_ASSERT(CorePS::Exists() && !ActivePS::Exists(aLock)); + + // Do this before the Base Profiler is stopped, to keep the existing buffer + // (if any) alive for our use. + if (NS_IsMainThread()) { + mozilla::base_profiler_markers_detail::EnsureBufferForMainThreadAddMarker(); + } else { + NS_DispatchToMainThread( + NS_NewRunnableFunction("EnsureBufferForMainThreadAddMarker", + &mozilla::base_profiler_markers_detail:: + EnsureBufferForMainThreadAddMarker)); + } + + UniquePtr<ProfileBufferChunkManagerWithLocalLimit> baseChunkManager; + bool profilersHandOver = false; + if (baseprofiler::profiler_is_active()) { + // Note that we still hold the lock, so the sampler cannot run yet and + // interact negatively with the still-active BaseProfiler sampler. + // Assume that Base Profiler is active because of MOZ_PROFILER_STARTUP. + + // Take ownership of the chunk manager from the Base Profiler, to extend its + // lifetime during the new Gecko Profiler session. Since we're using the + // same core buffer, all the base profiler data remains. + baseChunkManager = baseprofiler::detail::ExtractBaseProfilerChunkManager(); + + if (baseChunkManager) { + profilersHandOver = true; + if (const TimeStamp baseProfilingStartTime = + baseprofiler::detail::GetProfilingStartTime(); + !baseProfilingStartTime.IsNull()) { + profilingStartTime = baseProfilingStartTime; + } + + BASE_PROFILER_MARKER_TEXT( + "Profilers handover", PROFILER, MarkerTiming::IntervalStart(), + "Transition from Base to Gecko Profiler, some data may be missing"); + } + + // Now stop Base Profiler (BP), as further recording will be ignored anyway, + // and so that it won't clash with Gecko Profiler (GP) sampling starting + // after the lock is dropped. + // On Linux this is especially important to do before creating the GP + // sampler, because the BP sampler may send a signal (to stop threads to be + // sampled), which the GP would intercept before its own initialization is + // complete and ready to handle such signals. + // Note that even though `profiler_stop()` doesn't immediately destroy and + // join the sampler thread, it safely deactivates it in such a way that the + // thread will soon exit without doing any actual work. + // TODO: Allow non-sampling profiling to continue. + // TODO: Re-start BP after GP shutdown, to capture post-XPCOM shutdown. + baseprofiler::profiler_stop(); + } + +#if defined(GP_PLAT_amd64_windows) + InitializeWin64ProfilerHooks(); +#endif + + // Fall back to the default values if the passed-in values are unreasonable. + // We want to be able to store at least one full stack. + PowerOfTwo32 capacity = + (aCapacity.Value() >= + ProfileBufferChunkManager::scExpectedMaximumStackSize / scBytesPerEntry) + ? aCapacity + : PROFILER_DEFAULT_ENTRIES; + Maybe<double> duration = aDuration; + + if (aDuration && *aDuration <= 0) { + duration = Nothing(); + } + + double interval = aInterval > 0 ? aInterval : PROFILER_DEFAULT_INTERVAL; + + ActivePS::Create(aLock, profilingStartTime, capacity, interval, aFeatures, + aFilters, aFilterCount, aActiveTabID, duration, + std::move(baseChunkManager)); + + // ActivePS::Create can only succeed or crash. + MOZ_ASSERT(ActivePS::Exists(aLock)); + + // Set up profiling for each registered thread, if appropriate. +#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY) + bool isMainThreadBeingProfiled = false; +#endif + ThreadRegistry::LockedRegistry lockedRegistry; + for (ThreadRegistry::OffThreadRef offThreadRef : lockedRegistry) { + const ThreadRegistrationInfo& info = + offThreadRef.UnlockedConstReaderCRef().Info(); + + ThreadProfilingFeatures threadProfilingFeatures = + ActivePS::ProfilingFeaturesForThread(aLock, info); + if (threadProfilingFeatures != ThreadProfilingFeatures::NotProfiled) { + ThreadRegistry::OffThreadRef::RWFromAnyThreadWithLock lockedThreadData = + offThreadRef.LockedRWFromAnyThread(); + ProfiledThreadData* profiledThreadData = ActivePS::AddLiveProfiledThread( + aLock, MakeUnique<ProfiledThreadData>(info)); + lockedThreadData->SetProfilingFeaturesAndData(threadProfilingFeatures, + profiledThreadData, aLock); + lockedThreadData->GetNewCpuTimeInNs(); + if (ActivePS::FeatureJS(aLock)) { + lockedThreadData->StartJSSampling(ActivePS::JSFlags(aLock)); + if (ThreadRegistration::LockedRWOnThread* lockedRWOnThread = + lockedThreadData.GetLockedRWOnThread(); + lockedRWOnThread) { + // We can manually poll the current thread so it starts sampling + // immediately. + lockedRWOnThread->PollJSSampling(); + } else if (info.IsMainThread()) { + // Dispatch a runnable to the main thread to call + // PollJSSampling(), so that we don't have wait for the next JS + // interrupt callback in order to start profiling JS. + TriggerPollJSSamplingOnMainThread(); + } + } +#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY) + if (info.IsMainThread()) { + isMainThreadBeingProfiled = true; + } +#endif + lockedThreadData->ReinitializeOnResume(); + if (ActivePS::FeatureJS(aLock) && lockedThreadData->GetJSContext()) { + profiledThreadData->NotifyReceivedJSContext(0); + } + } + } + + // Setup support for pushing/popping labels in mozglue. + RegisterProfilerLabelEnterExit(MozGlueLabelEnter, MozGlueLabelExit); + +#if defined(GP_OS_android) + if (ActivePS::FeatureJava(aLock)) { + int javaInterval = interval; + // Java sampling doesn't accurately keep up with the sampling rate that is + // lower than 1ms. + if (javaInterval < 1) { + javaInterval = 1; + } + + JNIEnv* env = jni::GetEnvForThread(); + const auto& filters = ActivePS::Filters(aLock); + jni::ObjectArray::LocalRef javaFilters = + jni::ObjectArray::New<jni::String>(filters.length()); + for (size_t i = 0; i < filters.length(); i++) { + javaFilters->SetElement(i, jni::StringParam(filters[i].data(), env)); + } + + // Send the interval-relative entry count, but we have 100000 hard cap in + // the java code, it can't be more than that. + java::GeckoJavaSampler::Start( + javaFilters, javaInterval, + std::round((double)(capacity.Value()) * interval / + (double)(javaInterval))); + } +#endif + +#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY) + if (ActivePS::FeatureNativeAllocations(aLock)) { + if (isMainThreadBeingProfiled) { + mozilla::profiler::enable_native_allocations(); + } else { + NS_WARNING( + "The nativeallocations feature is turned on, but the main thread is " + "not being profiled. The allocations are only stored on the main " + "thread."); + } + } +#endif + + if (ProfilerFeature::HasAudioCallbackTracing(aFeatures)) { + StartAudioCallbackTracing(); + } + + // At the very end, set up RacyFeatures. + RacyFeatures::SetActive(ActivePS::Features(aLock)); + + if (profilersHandOver) { + PROFILER_MARKER_UNTYPED("Profilers handover", PROFILER, + MarkerTiming::IntervalEnd()); + } +} + +RefPtr<GenericPromise> profiler_start(PowerOfTwo32 aCapacity, double aInterval, + uint32_t aFeatures, const char** aFilters, + uint32_t aFilterCount, + uint64_t aActiveTabID, + const Maybe<double>& aDuration) { + LOG("profiler_start"); + + ProfilerParent::ProfilerWillStopIfStarted(); + + SamplerThread* samplerThread = nullptr; + { + PSAutoLock lock; + + // Initialize if necessary. + if (!CorePS::Exists()) { + profiler_init(nullptr); + } + + // Reset the current state if the profiler is running. + if (ActivePS::Exists(lock)) { + // Note: Not invoking callbacks with ProfilingState::Stopping, because + // we're under lock, and also it would not be useful: Any profiling data + // will be discarded, and we're immediately restarting the profiler below + // and then notifying ProfilingState::Started. + samplerThread = locked_profiler_stop(lock); + } + + locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters, + aFilterCount, aActiveTabID, aDuration); + } + +#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY) + // Start counting memory allocations (outside of lock because this may call + // profiler_add_sampled_counter which would attempt to take the lock.) + ActivePS::SetMemoryCounter(mozilla::profiler::install_memory_hooks()); +#endif + + invoke_profiler_state_change_callbacks(ProfilingState::Started); + + // We do these operations with gPSMutex unlocked. The comments in + // profiler_stop() explain why. + if (samplerThread) { + Unused << ProfilerParent::ProfilerStopped(); + NotifyObservers("profiler-stopped"); + delete samplerThread; + } + return NotifyProfilerStarted(aCapacity, aDuration, aInterval, aFeatures, + aFilters, aFilterCount, aActiveTabID); +} + +void profiler_ensure_started(PowerOfTwo32 aCapacity, double aInterval, + uint32_t aFeatures, const char** aFilters, + uint32_t aFilterCount, uint64_t aActiveTabID, + const Maybe<double>& aDuration) { + LOG("profiler_ensure_started"); + + ProfilerParent::ProfilerWillStopIfStarted(); + + bool startedProfiler = false; + SamplerThread* samplerThread = nullptr; + { + PSAutoLock lock; + + // Initialize if necessary. + if (!CorePS::Exists()) { + profiler_init(nullptr); + } + + if (ActivePS::Exists(lock)) { + // The profiler is active. + if (!ActivePS::Equals(lock, aCapacity, aDuration, aInterval, aFeatures, + aFilters, aFilterCount, aActiveTabID)) { + // Stop and restart with different settings. + // Note: Not invoking callbacks with ProfilingState::Stopping, because + // we're under lock, and also it would not be useful: Any profiling data + // will be discarded, and we're immediately restarting the profiler + // below and then notifying ProfilingState::Started. + samplerThread = locked_profiler_stop(lock); + locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters, + aFilterCount, aActiveTabID, aDuration); + startedProfiler = true; + } + } else { + // The profiler is stopped. + locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters, + aFilterCount, aActiveTabID, aDuration); + startedProfiler = true; + } + } + + // We do these operations with gPSMutex unlocked. The comments in + // profiler_stop() explain why. + if (samplerThread) { + Unused << ProfilerParent::ProfilerStopped(); + NotifyObservers("profiler-stopped"); + delete samplerThread; + } + + if (startedProfiler) { + invoke_profiler_state_change_callbacks(ProfilingState::Started); + + Unused << NotifyProfilerStarted(aCapacity, aDuration, aInterval, aFeatures, + aFilters, aFilterCount, aActiveTabID); + } +} + +[[nodiscard]] static SamplerThread* locked_profiler_stop(PSLockRef aLock) { + LOG("locked_profiler_stop"); + + MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock)); + + // At the very start, clear RacyFeatures. + RacyFeatures::SetInactive(); + + if (ActivePS::FeatureAudioCallbackTracing(aLock)) { + StopAudioCallbackTracing(); + } + +#if defined(GP_OS_android) + if (ActivePS::FeatureJava(aLock)) { + java::GeckoJavaSampler::Stop(); + } +#endif + + // Remove support for pushing/popping labels in mozglue. + RegisterProfilerLabelEnterExit(nullptr, nullptr); + + // Stop sampling live threads. + ThreadRegistry::LockedRegistry lockedRegistry; + for (ThreadRegistry::OffThreadRef offThreadRef : lockedRegistry) { + if (offThreadRef.UnlockedRWForLockedProfilerRef().ProfilingFeatures() == + ThreadProfilingFeatures::NotProfiled) { + continue; + } + + ThreadRegistry::OffThreadRef::RWFromAnyThreadWithLock lockedThreadData = + offThreadRef.LockedRWFromAnyThread(); + + lockedThreadData->ClearProfilingFeaturesAndData(aLock); + + if (ActivePS::FeatureJS(aLock)) { + lockedThreadData->StopJSSampling(); + if (ThreadRegistration::LockedRWOnThread* lockedRWOnThread = + lockedThreadData.GetLockedRWOnThread(); + lockedRWOnThread) { + // We are on the thread, we can manually poll the current thread so it + // stops profiling immediately. + lockedRWOnThread->PollJSSampling(); + } else if (lockedThreadData->Info().IsMainThread()) { + // Dispatch a runnable to the main thread to call PollJSSampling(), + // so that we don't have wait for the next JS interrupt callback in + // order to start profiling JS. + TriggerPollJSSamplingOnMainThread(); + } + } + } + +#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY) + if (ActivePS::FeatureNativeAllocations(aLock)) { + mozilla::profiler::disable_native_allocations(); + } +#endif + + // The Stop() call doesn't actually stop Run(); that happens in this + // function's caller when the sampler thread is destroyed. Stop() just gives + // the SamplerThread a chance to do some cleanup with gPSMutex locked. + SamplerThread* samplerThread = ActivePS::Destroy(aLock); + samplerThread->Stop(aLock); + + if (NS_IsMainThread()) { + mozilla::base_profiler_markers_detail:: + ReleaseBufferForMainThreadAddMarker(); + } else { + NS_DispatchToMainThread( + NS_NewRunnableFunction("ReleaseBufferForMainThreadAddMarker", + &mozilla::base_profiler_markers_detail:: + ReleaseBufferForMainThreadAddMarker)); + } + + return samplerThread; +} + +RefPtr<GenericPromise> profiler_stop() { + LOG("profiler_stop"); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + if (profiler_is_active()) { + invoke_profiler_state_change_callbacks(ProfilingState::Stopping); + } + + ProfilerParent::ProfilerWillStopIfStarted(); + +#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY) + // Remove the hooks early, as native allocations (if they are on) can be + // quite expensive. + mozilla::profiler::remove_memory_hooks(); +#endif + + SamplerThread* samplerThread; + { + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return GenericPromise::CreateAndResolve(/* unused */ true, __func__); + } + + samplerThread = locked_profiler_stop(lock); + } + + // We notify observers with gPSMutex unlocked. Otherwise we might get a + // deadlock, if code run by these functions calls a profiler function that + // locks gPSMutex, for example when it wants to insert a marker. + // (This has been seen in practise in bug 1346356, when we were still firing + // these notifications synchronously.) + RefPtr<GenericPromise> promise = ProfilerParent::ProfilerStopped(); + NotifyObservers("profiler-stopped"); + + // We delete with gPSMutex unlocked. Otherwise we would get a deadlock: we + // would be waiting here with gPSMutex locked for SamplerThread::Run() to + // return so the join operation within the destructor can complete, but Run() + // needs to lock gPSMutex to return. + // + // Because this call occurs with gPSMutex unlocked, it -- including the final + // iteration of Run()'s loop -- must be able detect deactivation and return + // in a way that's safe with respect to other gPSMutex-locking operations + // that may have occurred in the meantime. + delete samplerThread; + + return promise; +} + +bool profiler_is_paused() { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return false; + } + + return ActivePS::IsPaused(lock); +} + +/* [[nodiscard]] */ bool profiler_callback_after_sampling( + PostSamplingCallback&& aCallback) { + LOG("profiler_callback_after_sampling"); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + PSAutoLock lock; + + return ActivePS::AppendPostSamplingCallback(lock, std::move(aCallback)); +} + +RefPtr<GenericPromise> profiler_pause() { + LOG("profiler_pause"); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + invoke_profiler_state_change_callbacks(ProfilingState::Pausing); + + { + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return GenericPromise::CreateAndResolve(/* unused */ true, __func__); + } + +#if defined(GP_OS_android) + if (ActivePS::FeatureJava(lock) && !ActivePS::IsSamplingPaused(lock)) { + // Not paused yet, so this is the first pause, let Java know. + // TODO: Distinguish Pause and PauseSampling in Java. + java::GeckoJavaSampler::PauseSampling(); + } +#endif + + RacyFeatures::SetPaused(); + ActivePS::SetIsPaused(lock, true); + ActivePS::Buffer(lock).AddEntry(ProfileBufferEntry::Pause(profiler_time())); + } + + // gPSMutex must be unlocked when we notify, to avoid potential deadlocks. + RefPtr<GenericPromise> promise = ProfilerParent::ProfilerPaused(); + NotifyObservers("profiler-paused"); + return promise; +} + +RefPtr<GenericPromise> profiler_resume() { + LOG("profiler_resume"); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + { + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return GenericPromise::CreateAndResolve(/* unused */ true, __func__); + } + + ActivePS::Buffer(lock).AddEntry( + ProfileBufferEntry::Resume(profiler_time())); + ActivePS::SetIsPaused(lock, false); + RacyFeatures::SetUnpaused(); + +#if defined(GP_OS_android) + if (ActivePS::FeatureJava(lock) && !ActivePS::IsSamplingPaused(lock)) { + // Not paused anymore, so this is the last unpause, let Java know. + // TODO: Distinguish Unpause and UnpauseSampling in Java. + java::GeckoJavaSampler::UnpauseSampling(); + } +#endif + } + + // gPSMutex must be unlocked when we notify, to avoid potential deadlocks. + RefPtr<GenericPromise> promise = ProfilerParent::ProfilerResumed(); + NotifyObservers("profiler-resumed"); + + invoke_profiler_state_change_callbacks(ProfilingState::Resumed); + + return promise; +} + +bool profiler_is_sampling_paused() { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return false; + } + + return ActivePS::IsSamplingPaused(lock); +} + +RefPtr<GenericPromise> profiler_pause_sampling() { + LOG("profiler_pause_sampling"); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + { + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return GenericPromise::CreateAndResolve(/* unused */ true, __func__); + } + +#if defined(GP_OS_android) + if (ActivePS::FeatureJava(lock) && !ActivePS::IsSamplingPaused(lock)) { + // Not paused yet, so this is the first pause, let Java know. + // TODO: Distinguish Pause and PauseSampling in Java. + java::GeckoJavaSampler::PauseSampling(); + } +#endif + + RacyFeatures::SetSamplingPaused(); + ActivePS::SetIsSamplingPaused(lock, true); + ActivePS::Buffer(lock).AddEntry( + ProfileBufferEntry::PauseSampling(profiler_time())); + } + + // gPSMutex must be unlocked when we notify, to avoid potential deadlocks. + RefPtr<GenericPromise> promise = ProfilerParent::ProfilerPausedSampling(); + NotifyObservers("profiler-paused-sampling"); + return promise; +} + +RefPtr<GenericPromise> profiler_resume_sampling() { + LOG("profiler_resume_sampling"); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + { + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return GenericPromise::CreateAndResolve(/* unused */ true, __func__); + } + + ActivePS::Buffer(lock).AddEntry( + ProfileBufferEntry::ResumeSampling(profiler_time())); + ActivePS::SetIsSamplingPaused(lock, false); + RacyFeatures::SetSamplingUnpaused(); + +#if defined(GP_OS_android) + if (ActivePS::FeatureJava(lock) && !ActivePS::IsSamplingPaused(lock)) { + // Not paused anymore, so this is the last unpause, let Java know. + // TODO: Distinguish Unpause and UnpauseSampling in Java. + java::GeckoJavaSampler::UnpauseSampling(); + } +#endif + } + + // gPSMutex must be unlocked when we notify, to avoid potential deadlocks. + RefPtr<GenericPromise> promise = ProfilerParent::ProfilerResumedSampling(); + NotifyObservers("profiler-resumed-sampling"); + return promise; +} + +bool profiler_feature_active(uint32_t aFeature) { + // This function runs both on and off the main thread. + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + // This function is hot enough that we use RacyFeatures, not ActivePS. + return RacyFeatures::IsActiveWithFeature(aFeature); +} + +void profiler_write_active_configuration(JSONWriter& aWriter) { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + PSAutoLock lock; + ActivePS::WriteActiveConfiguration(lock, aWriter); +} + +void profiler_add_sampled_counter(BaseProfilerCount* aCounter) { + DEBUG_LOG("profiler_add_sampled_counter(%s)", aCounter->mLabel); + PSAutoLock lock; + locked_profiler_add_sampled_counter(lock, aCounter); +} + +void profiler_remove_sampled_counter(BaseProfilerCount* aCounter) { + DEBUG_LOG("profiler_remove_sampled_counter(%s)", aCounter->mLabel); + PSAutoLock lock; + locked_profiler_remove_sampled_counter(lock, aCounter); +} + +ProfilingStack* profiler_register_thread(const char* aName, + void* aGuessStackTop) { + DEBUG_LOG("profiler_register_thread(%s)", aName); + + // This will call `ThreadRegistry::Register()` (see below). + return ThreadRegistration::RegisterThread(aName, aGuessStackTop); +} + +/* static */ +void ThreadRegistry::Register(ThreadRegistration::OnThreadRef aOnThreadRef) { + // Set the thread name (except for the main thread, which is controlled + // elsewhere, and influences the process name on some systems like Linux). + if (!aOnThreadRef.UnlockedConstReaderCRef().Info().IsMainThread()) { + // Make sure we have a nsThread wrapper for the current thread, and that + // NSPR knows its name. + (void)NS_GetCurrentThread(); + NS_SetCurrentThreadName( + aOnThreadRef.UnlockedConstReaderCRef().Info().Name()); + } + + PSAutoLock lock; + + { + RegistryLockExclusive lock{sRegistryMutex}; + MOZ_RELEASE_ASSERT(sRegistryContainer.append(OffThreadRef{aOnThreadRef})); + } + + if (!CorePS::Exists()) { + // CorePS has not been created yet. + // If&when that happens, it will handle already-registered threads then. + return; + } + + (void)locked_register_thread(lock, OffThreadRef{aOnThreadRef}); +} + +void profiler_unregister_thread() { + // This will call `ThreadRegistry::Unregister()` (see below). + ThreadRegistration::UnregisterThread(); +} + +static void locked_unregister_thread( + PSLockRef lock, ThreadRegistration::OnThreadRef aOnThreadRef) { + if (!CorePS::Exists()) { + // This function can be called after the main thread has already shut + // down. + return; + } + + // We don't call StopJSSampling() here; there's no point doing that for a JS + // thread that is in the process of disappearing. + + ThreadRegistration::OnThreadRef::RWOnThreadWithLock lockedThreadData = + aOnThreadRef.LockedRWOnThread(); + + ProfiledThreadData* profiledThreadData = + lockedThreadData->GetProfiledThreadData(lock); + lockedThreadData->ClearProfilingFeaturesAndData(lock); + + MOZ_RELEASE_ASSERT( + lockedThreadData->Info().ThreadId() == profiler_current_thread_id(), + "Thread being unregistered has changed its TID"); + + DEBUG_LOG("profiler_unregister_thread: %s", lockedThreadData->Info().Name()); + + if (profiledThreadData && ActivePS::Exists(lock)) { + ActivePS::UnregisterThread(lock, profiledThreadData); + } +} + +/* static */ +void ThreadRegistry::Unregister(ThreadRegistration::OnThreadRef aOnThreadRef) { + PSAutoLock psLock; + locked_unregister_thread(psLock, aOnThreadRef); + + RegistryLockExclusive lock{sRegistryMutex}; + for (OffThreadRef& thread : sRegistryContainer) { + if (thread.IsPointingAt(*aOnThreadRef.mThreadRegistration)) { + sRegistryContainer.erase(&thread); + break; + } + } +} + +void profiler_register_page(uint64_t aTabID, uint64_t aInnerWindowID, + const nsCString& aUrl, + uint64_t aEmbedderInnerWindowID, + bool aIsPrivateBrowsing) { + DEBUG_LOG("profiler_register_page(%" PRIu64 ", %" PRIu64 ", %s, %" PRIu64 + ", %s)", + aTabID, aInnerWindowID, aUrl.get(), aEmbedderInnerWindowID, + aIsPrivateBrowsing ? "true" : "false"); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + PSAutoLock lock; + + // When a Browsing context is first loaded, the first url loaded in it will be + // about:blank. Because of that, this call keeps the first non-about:blank + // registration of window and discards the previous one. + RefPtr<PageInformation> pageInfo = new PageInformation( + aTabID, aInnerWindowID, aUrl, aEmbedderInnerWindowID, aIsPrivateBrowsing); + CorePS::AppendRegisteredPage(lock, std::move(pageInfo)); + + // After appending the given page to CorePS, look for the expired + // pages and remove them if there are any. + if (ActivePS::Exists(lock)) { + ActivePS::DiscardExpiredPages(lock); + } +} + +void profiler_unregister_page(uint64_t aRegisteredInnerWindowID) { + PSAutoLock lock; + + if (!CorePS::Exists()) { + // This function can be called after the main thread has already shut down. + return; + } + + // During unregistration, if the profiler is active, we have to keep the + // page information since there may be some markers associated with the given + // page. But if profiler is not active. we have no reason to keep the + // page information here because there can't be any marker associated with it. + if (ActivePS::Exists(lock)) { + ActivePS::UnregisterPage(lock, aRegisteredInnerWindowID); + } else { + CorePS::RemoveRegisteredPage(lock, aRegisteredInnerWindowID); + } +} + +void profiler_clear_all_pages() { + { + PSAutoLock lock; + + if (!CorePS::Exists()) { + // This function can be called after the main thread has already shut + // down. + return; + } + + CorePS::ClearRegisteredPages(lock); + if (ActivePS::Exists(lock)) { + ActivePS::ClearUnregisteredPages(lock); + } + } + + // gPSMutex must be unlocked when we notify, to avoid potential deadlocks. + ProfilerParent::ClearAllPages(); +} + +namespace geckoprofiler::markers::detail { + +Maybe<uint64_t> profiler_get_inner_window_id_from_docshell( + nsIDocShell* aDocshell) { + Maybe<uint64_t> innerWindowID = Nothing(); + if (aDocshell) { + auto outerWindow = aDocshell->GetWindow(); + if (outerWindow) { + auto innerWindow = outerWindow->GetCurrentInnerWindow(); + if (innerWindow) { + innerWindowID = Some(innerWindow->WindowID()); + } + } + } + return innerWindowID; +} + +} // namespace geckoprofiler::markers::detail + +namespace geckoprofiler::markers { + +struct CPUAwakeMarker { + static constexpr Span<const char> MarkerTypeName() { + return MakeStringSpan("Awake"); + } + static void StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter& aWriter, + int64_t aCPUId +#ifdef GP_OS_darwin + , + uint32_t aQoS +#endif +#ifdef GP_OS_windows + , + int32_t aAbsolutePriority, + int32_t aRelativePriority, + int32_t aCurrentPriority +#endif + ) { +#ifndef GP_PLAT_arm64_darwin + aWriter.IntProperty("CPU Id", aCPUId); +#endif +#ifdef GP_OS_windows + if (aAbsolutePriority) { + aWriter.IntProperty("absPriority", aAbsolutePriority); + } + if (aCurrentPriority) { + aWriter.IntProperty("curPriority", aCurrentPriority); + } + aWriter.IntProperty("priority", aRelativePriority); +#endif +#ifdef GP_OS_darwin + const char* QoS = ""; + switch (aQoS) { + case QOS_CLASS_USER_INTERACTIVE: + QoS = "User Interactive"; + break; + case QOS_CLASS_USER_INITIATED: + QoS = "User Initiated"; + break; + case QOS_CLASS_DEFAULT: + QoS = "Default"; + break; + case QOS_CLASS_UTILITY: + QoS = "Utility"; + break; + case QOS_CLASS_BACKGROUND: + QoS = "Background"; + break; + default: + QoS = "Unspecified"; + } + + aWriter.StringProperty("QoS", + ProfilerString8View::WrapNullTerminatedString(QoS)); +#endif + } + + static MarkerSchema MarkerTypeDisplay() { + using MS = MarkerSchema; + MS schema{MS::Location::MarkerChart, MS::Location::MarkerTable}; + schema.AddKeyFormat("CPU Time", MS::Format::Duration); +#ifndef GP_PLAT_arm64_darwin + schema.AddKeyFormat("CPU Id", MS::Format::Integer); + schema.SetTableLabel("Awake - CPU Id = {marker.data.CPU Id}"); +#endif +#ifdef GP_OS_windows + schema.AddKeyLabelFormat("priority", "Relative Thread Priority", + MS::Format::Integer); + schema.AddKeyLabelFormat("absPriority", "Base Thread Priority", + MS::Format::Integer); + schema.AddKeyLabelFormat("curPriority", "Current Thread Priority", + MS::Format::Integer); +#endif +#ifdef GP_OS_darwin + schema.AddKeyLabelFormat("QoS", "Quality of Service", MS::Format::String); +#endif + return schema; + } +}; + +struct CPUAwakeMarkerEnd : public CPUAwakeMarker { + static constexpr Span<const char> MarkerTypeName() { + return MakeStringSpan("AwakeEnd"); + } + static void StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter& aWriter, + int64_t aCPUTimeNs) { + if (aCPUTimeNs) { + constexpr double NS_PER_MS = 1'000'000; + aWriter.DoubleProperty("CPU Time", double(aCPUTimeNs) / NS_PER_MS); + } + } +}; + +} // namespace geckoprofiler::markers + +void profiler_mark_thread_asleep() { + if (!profiler_thread_is_being_profiled_for_markers()) { + return; + } + + uint64_t cpuTimeNs = ThreadRegistration::WithOnThreadRefOr( + [](ThreadRegistration::OnThreadRef aOnThreadRef) { + return aOnThreadRef.UnlockedConstReaderAndAtomicRWRef() + .GetNewCpuTimeInNs(); + }, + 0); + PROFILER_MARKER("Awake", OTHER, MarkerTiming::IntervalEnd(), + CPUAwakeMarkerEnd, cpuTimeNs); +} + +void profiler_thread_sleep() { + profiler_mark_thread_asleep(); + ThreadRegistration::WithOnThreadRef( + [](ThreadRegistration::OnThreadRef aOnThreadRef) { + aOnThreadRef.UnlockedConstReaderAndAtomicRWRef().SetSleeping(); + }); +} + +#if defined(GP_OS_windows) +# if !defined(__MINGW32__) +enum { + ThreadBasicInformation, +}; +# endif + +struct THREAD_BASIC_INFORMATION { + NTSTATUS ExitStatus; + PVOID TebBaseAddress; + CLIENT_ID ClientId; + KAFFINITY AffMask; + DWORD Priority; + DWORD BasePriority; +}; +#endif + +static mozilla::Atomic<uint64_t, mozilla::MemoryOrdering::Relaxed> gWakeCount( + 0); + +namespace geckoprofiler::markers { +struct WakeUpCountMarker { + static constexpr Span<const char> MarkerTypeName() { + return MakeStringSpan("WakeUpCount"); + } + static void StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter& aWriter, + int32_t aCount, + const ProfilerString8View& aType) { + aWriter.IntProperty("Count", aCount); + aWriter.StringProperty("label", aType); + } + static MarkerSchema MarkerTypeDisplay() { + using MS = MarkerSchema; + MS schema{MS::Location::MarkerChart, MS::Location::MarkerTable}; + schema.AddKeyFormat("Count", MS::Format::Integer); + schema.SetTooltipLabel("{marker.name} - {marker.data.label}"); + schema.SetTableLabel( + "{marker.name} - {marker.data.label}: {marker.data.count}"); + return schema; + } +}; +} // namespace geckoprofiler::markers + +void profiler_record_wakeup_count(const nsACString& aProcessType) { + static uint64_t previousThreadWakeCount = 0; + + uint64_t newWakeups = gWakeCount - previousThreadWakeCount; + if (newWakeups > 0) { + if (newWakeups < std::numeric_limits<int32_t>::max()) { + int32_t newWakeups32 = int32_t(newWakeups); + mozilla::glean::power::total_thread_wakeups.Add(newWakeups32); + mozilla::glean::power::wakeups_per_process_type.Get(aProcessType) + .Add(newWakeups32); + PROFILER_MARKER("Thread Wake-ups", OTHER, {}, WakeUpCountMarker, + newWakeups32, aProcessType); + } + + previousThreadWakeCount += newWakeups; + } + +#ifdef NIGHTLY_BUILD + ThreadRegistry::LockedRegistry lockedRegistry; + for (ThreadRegistry::OffThreadRef offThreadRef : lockedRegistry) { + const ThreadRegistry::UnlockedConstReaderAndAtomicRW& threadData = + offThreadRef.UnlockedConstReaderAndAtomicRWRef(); + threadData.RecordWakeCount(); + } +#endif +} + +void profiler_mark_thread_awake() { + ++gWakeCount; + if (!profiler_thread_is_being_profiled_for_markers()) { + return; + } + + int64_t cpuId = 0; +#if defined(GP_OS_windows) + cpuId = GetCurrentProcessorNumber(); +#elif defined(GP_OS_darwin) +# ifdef GP_PLAT_amd64_darwin + unsigned int eax, ebx, ecx, edx; + __cpuid_count(1, 0, eax, ebx, ecx, edx); + // Check if we have an APIC. + if ((edx & (1 << 9))) { + // APIC ID is bits 24-31 of EBX + cpuId = ebx >> 24; + } +# endif +#else + cpuId = sched_getcpu(); +#endif + +#if defined(GP_OS_windows) + LONG priority; + static const auto get_thread_information_fn = + reinterpret_cast<decltype(&::GetThreadInformation)>(::GetProcAddress( + ::GetModuleHandle(L"Kernel32.dll"), "GetThreadInformation")); + + if (!get_thread_information_fn || + !get_thread_information_fn(GetCurrentThread(), ThreadAbsoluteCpuPriority, + &priority, sizeof(priority))) { + priority = 0; + } + + static const auto nt_query_information_thread_fn = + reinterpret_cast<decltype(&::NtQueryInformationThread)>(::GetProcAddress( + ::GetModuleHandle(L"ntdll.dll"), "NtQueryInformationThread")); + + LONG currentPriority = 0; + if (nt_query_information_thread_fn) { + THREAD_BASIC_INFORMATION threadInfo; + auto status = (*nt_query_information_thread_fn)( + GetCurrentThread(), (THREADINFOCLASS)ThreadBasicInformation, + &threadInfo, sizeof(threadInfo), NULL); + if (NT_SUCCESS(status)) { + currentPriority = threadInfo.Priority; + } + } +#endif + PROFILER_MARKER( + "Awake", OTHER, MarkerTiming::IntervalStart(), CPUAwakeMarker, cpuId +#if defined(GP_OS_darwin) + , + qos_class_self() +#endif +#if defined(GP_OS_windows) + , + priority, GetThreadPriority(GetCurrentThread()), currentPriority +#endif + ); +} + +void profiler_thread_wake() { + profiler_mark_thread_awake(); + ThreadRegistration::WithOnThreadRef( + [](ThreadRegistration::OnThreadRef aOnThreadRef) { + aOnThreadRef.UnlockedConstReaderAndAtomicRWRef().SetAwake(); + }); +} + +void profiler_js_interrupt_callback() { + // This function runs on JS threads being sampled. + PollJSSamplingForCurrentThread(); +} + +double profiler_time() { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + TimeDuration delta = TimeStamp::Now() - CorePS::ProcessStartTime(); + return delta.ToMilliseconds(); +} + +bool profiler_capture_backtrace_into(ProfileChunkedBuffer& aChunkedBuffer, + StackCaptureOptions aCaptureOptions) { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + if (!profiler_is_active() || + aCaptureOptions == StackCaptureOptions::NoStack) { + return false; + } + + return ThreadRegistration::WithOnThreadRefOr( + [&](ThreadRegistration::OnThreadRef aOnThreadRef) { + mozilla::Maybe<uint32_t> maybeFeatures = + RacyFeatures::FeaturesIfActiveAndUnpaused(); + if (!maybeFeatures) { + return false; + } + + ProfileBuffer profileBuffer(aChunkedBuffer); + + Registers regs; +#if defined(HAVE_NATIVE_UNWIND) + regs.SyncPopulate(); +#else + regs.Clear(); +#endif + + DoSyncSample(*maybeFeatures, + aOnThreadRef.UnlockedReaderAndAtomicRWOnThreadCRef(), + TimeStamp::Now(), regs, profileBuffer, aCaptureOptions); + + return true; + }, + // If this was called from a non-registered thread, return false and do no + // more work. This can happen from a memory hook. + false); +} + +UniquePtr<ProfileChunkedBuffer> profiler_capture_backtrace() { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + AUTO_PROFILER_LABEL("profiler_capture_backtrace", PROFILER); + + // Quick is-active check before allocating a buffer. + if (!profiler_is_active()) { + return nullptr; + } + + auto buffer = MakeUnique<ProfileChunkedBuffer>( + ProfileChunkedBuffer::ThreadSafety::WithoutMutex, + MakeUnique<ProfileBufferChunkManagerSingle>( + ProfileBufferChunkManager::scExpectedMaximumStackSize)); + + if (!profiler_capture_backtrace_into(*buffer, StackCaptureOptions::Full)) { + return nullptr; + } + + return buffer; +} + +UniqueProfilerBacktrace profiler_get_backtrace() { + UniquePtr<ProfileChunkedBuffer> buffer = profiler_capture_backtrace(); + + if (!buffer) { + return nullptr; + } + + return UniqueProfilerBacktrace( + new ProfilerBacktrace("SyncProfile", std::move(buffer))); +} + +void ProfilerBacktraceDestructor::operator()(ProfilerBacktrace* aBacktrace) { + delete aBacktrace; +} + +bool profiler_is_locked_on_current_thread() { + // This function is used to help users avoid calling `profiler_...` functions + // when the profiler may already have a lock in place, which would prevent a + // 2nd recursive lock (resulting in a crash or a never-ending wait), or a + // deadlock between any two mutexes. So we must return `true` for any of: + // - The main profiler mutex, used by most functions, and/or + // - The buffer mutex, used directly in some functions without locking the + // main mutex, e.g., marker-related functions. + // - The ProfilerParent or ProfilerChild mutex, used to store and process + // buffer chunk updates. + return PSAutoLock::IsLockedOnCurrentThread() || + ThreadRegistry::IsRegistryMutexLockedOnCurrentThread() || + ThreadRegistration::IsDataMutexLockedOnCurrentThread() || + profiler_get_core_buffer().IsThreadSafeAndLockedOnCurrentThread() || + ProfilerParent::IsLockedOnCurrentThread() || + ProfilerChild::IsLockedOnCurrentThread(); +} + +void profiler_set_js_context(JSContext* aCx) { + MOZ_ASSERT(aCx); + ThreadRegistration::WithOnThreadRef( + [&](ThreadRegistration::OnThreadRef aOnThreadRef) { + // The profiler mutex must be locked before the ThreadRegistration's. + PSAutoLock lock; + aOnThreadRef.WithLockedRWOnThread( + [&](ThreadRegistration::LockedRWOnThread& aThreadData) { + aThreadData.SetJSContext(aCx); + + if (!ActivePS::Exists(lock) || !ActivePS::FeatureJS(lock)) { + return; + } + + // This call is on-thread, so we can call PollJSSampling() to + // start JS sampling immediately. + aThreadData.PollJSSampling(); + + if (ProfiledThreadData* profiledThreadData = + aThreadData.GetProfiledThreadData(lock); + profiledThreadData) { + profiledThreadData->NotifyReceivedJSContext( + ActivePS::Buffer(lock).BufferRangeEnd()); + } + }); + }); +} + +void profiler_clear_js_context() { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + ThreadRegistration::WithOnThreadRef( + [](ThreadRegistration::OnThreadRef aOnThreadRef) { + JSContext* cx = + aOnThreadRef.UnlockedReaderAndAtomicRWOnThreadCRef().GetJSContext(); + if (!cx) { + return; + } + + // The profiler mutex must be locked before the ThreadRegistration's. + PSAutoLock lock; + ThreadRegistration::OnThreadRef::RWOnThreadWithLock lockedThreadData = + aOnThreadRef.LockedRWOnThread(); + + if (ProfiledThreadData* profiledThreadData = + lockedThreadData->GetProfiledThreadData(lock); + profiledThreadData && ActivePS::Exists(lock) && + ActivePS::FeatureJS(lock)) { + profiledThreadData->NotifyAboutToLoseJSContext( + cx, CorePS::ProcessStartTime(), ActivePS::Buffer(lock)); + + // Notify the JS context that profiling for this context has + // stopped. Do this by calling StopJSSampling and PollJSSampling + // before nulling out the JSContext. + lockedThreadData->StopJSSampling(); + lockedThreadData->PollJSSampling(); + + lockedThreadData->ClearJSContext(); + + // Tell the thread that we'd like to have JS sampling on this + // thread again, once it gets a new JSContext (if ever). + lockedThreadData->StartJSSampling(ActivePS::JSFlags(lock)); + } else { + // This thread is not being profiled or JS profiling is off, we only + // need to clear the context pointer. + lockedThreadData->ClearJSContext(); + } + }); +} + +static void profiler_suspend_and_sample_thread( + const PSAutoLock* aLockIfAsynchronousSampling, + const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData, + JsFrame* aJsFrames, uint32_t aFeatures, ProfilerStackCollector& aCollector, + bool aSampleNative) { + const ThreadRegistrationInfo& info = aThreadData.Info(); + + if (info.IsMainThread()) { + aCollector.SetIsMainThread(); + } + + // Allocate the space for the native stack + NativeStack nativeStack; + + auto collectStack = [&](const Registers& aRegs, const TimeStamp& aNow) { + // The target thread is now suspended. Collect a native backtrace, + // and call the callback. + StackWalkControl* stackWalkControlIfSupported = nullptr; +#if defined(HAVE_FASTINIT_NATIVE_UNWIND) + StackWalkControl stackWalkControl; + if constexpr (StackWalkControl::scIsSupported) { + if (aSampleNative) { + stackWalkControlIfSupported = &stackWalkControl; + } + } +#endif + const uint32_t jsFramesCount = + aJsFrames ? ExtractJsFrames(!aLockIfAsynchronousSampling, aThreadData, + aRegs, aCollector, aJsFrames, + stackWalkControlIfSupported) + : 0; + +#if defined(HAVE_FASTINIT_NATIVE_UNWIND) + if (aSampleNative) { + // We can only use FramePointerStackWalk or MozStackWalk from + // suspend_and_sample_thread as other stackwalking methods may not be + // initialized. +# if defined(USE_FRAME_POINTER_STACK_WALK) + DoFramePointerBacktrace(aThreadData, aRegs, nativeStack, + stackWalkControlIfSupported); +# elif defined(USE_MOZ_STACK_WALK) + DoMozStackWalkBacktrace(aThreadData, aRegs, nativeStack, + stackWalkControlIfSupported); +# else +# error "Invalid configuration" +# endif + + MergeStacks(aFeatures, !aLockIfAsynchronousSampling, aThreadData, aRegs, + nativeStack, aCollector, aJsFrames, jsFramesCount); + } else +#endif + { + MergeStacks(aFeatures, !aLockIfAsynchronousSampling, aThreadData, aRegs, + nativeStack, aCollector, aJsFrames, jsFramesCount); + + aCollector.CollectNativeLeafAddr((void*)aRegs.mPC); + } + }; + + if (!aLockIfAsynchronousSampling) { + // Sampling the current thread, do NOT suspend it! + Registers regs; +#if defined(HAVE_NATIVE_UNWIND) + regs.SyncPopulate(); +#else + regs.Clear(); +#endif + collectStack(regs, TimeStamp::Now()); + } else { + // Suspend, sample, and then resume the target thread. + Sampler sampler(*aLockIfAsynchronousSampling); + TimeStamp now = TimeStamp::Now(); + sampler.SuspendAndSampleAndResumeThread(*aLockIfAsynchronousSampling, + aThreadData, now, collectStack); + + // NOTE: Make sure to disable the sampler before it is destroyed, in + // case the profiler is running at the same time. + sampler.Disable(*aLockIfAsynchronousSampling); + } +} + +// NOTE: aCollector's methods will be called while the target thread is paused. +// Doing things in those methods like allocating -- which may try to claim +// locks -- is a surefire way to deadlock. +void profiler_suspend_and_sample_thread(ProfilerThreadId aThreadId, + uint32_t aFeatures, + ProfilerStackCollector& aCollector, + bool aSampleNative /* = true */) { + if (!aThreadId.IsSpecified() || aThreadId == profiler_current_thread_id()) { + // Sampling the current thread. Get its information from the TLS (no locking + // required.) + ThreadRegistration::WithOnThreadRef( + [&](ThreadRegistration::OnThreadRef aOnThreadRef) { + aOnThreadRef.WithUnlockedReaderAndAtomicRWOnThread( + [&](const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& + aThreadData) { + if (!aThreadData.GetJSContext()) { + // No JSContext, there is no JS frame buffer (and no need for + // it). + profiler_suspend_and_sample_thread( + /* aLockIfAsynchronousSampling = */ nullptr, aThreadData, + /* aJsFrames = */ nullptr, aFeatures, aCollector, + aSampleNative); + } else { + // JSContext is present, we need to lock the thread data to + // access the JS frame buffer. + aOnThreadRef.WithConstLockedRWOnThread( + [&](const ThreadRegistration::LockedRWOnThread& + aLockedThreadData) { + profiler_suspend_and_sample_thread( + /* aLockIfAsynchronousSampling = */ nullptr, + aThreadData, aLockedThreadData.GetJsFrameBuffer(), + aFeatures, aCollector, aSampleNative); + }); + } + }); + }); + } else { + // Lock the profiler before accessing the ThreadRegistry. + PSAutoLock lock; + ThreadRegistry::WithOffThreadRef( + aThreadId, [&](ThreadRegistry::OffThreadRef aOffThreadRef) { + aOffThreadRef.WithLockedRWFromAnyThread( + [&](const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& + aThreadData) { + JsFrameBuffer& jsFrames = CorePS::JsFrames(lock); + profiler_suspend_and_sample_thread(&lock, aThreadData, jsFrames, + aFeatures, aCollector, + aSampleNative); + }); + }); + } +} + +// END externally visible functions +//////////////////////////////////////////////////////////////////////// diff --git a/tools/profiler/core/platform.h b/tools/profiler/core/platform.h new file mode 100644 index 0000000000..a8020e0748 --- /dev/null +++ b/tools/profiler/core/platform.h @@ -0,0 +1,379 @@ +// Copyright (c) 2006-2011 The Chromium Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in +// the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google, Inc. nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS +// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +// SUCH DAMAGE. + +#ifndef TOOLS_PLATFORM_H_ +#define TOOLS_PLATFORM_H_ + +#include "PlatformMacros.h" + +#include "json/json.h" +#include "mozilla/Atomics.h" +#include "mozilla/BaseProfilerDetail.h" +#include "mozilla/Logging.h" +#include "mozilla/MathAlgorithms.h" +#include "mozilla/ProfileBufferEntrySerialization.h" +#include "mozilla/ProfileJSONWriter.h" +#include "mozilla/ProfilerUtils.h" +#include "mozilla/ProgressLogger.h" +#include "mozilla/TimeStamp.h" +#include "mozilla/UniquePtr.h" +#include "mozilla/Vector.h" +#include "nsString.h" + +#include <cstddef> +#include <cstdint> +#include <functional> + +class ProfilerCodeAddressService; + +namespace mozilla { +struct SymbolTable; +} + +extern mozilla::LazyLogModule gProfilerLog; + +// These are for MOZ_LOG="prof:3" or higher. It's the default logging level for +// the profiler, and should be used sparingly. +#define LOG_TEST MOZ_LOG_TEST(gProfilerLog, mozilla::LogLevel::Info) +#define LOG(arg, ...) \ + MOZ_LOG(gProfilerLog, mozilla::LogLevel::Info, \ + ("[%" PRIu64 "] " arg, \ + uint64_t(profiler_current_process_id().ToNumber()), ##__VA_ARGS__)) + +// These are for MOZ_LOG="prof:4" or higher. It should be used for logging that +// is somewhat more verbose than LOG. +#define DEBUG_LOG_TEST MOZ_LOG_TEST(gProfilerLog, mozilla::LogLevel::Debug) +#define DEBUG_LOG(arg, ...) \ + MOZ_LOG(gProfilerLog, mozilla::LogLevel::Debug, \ + ("[%" PRIu64 "] " arg, \ + uint64_t(profiler_current_process_id().ToNumber()), ##__VA_ARGS__)) + +typedef uint8_t* Address; + +// Stringify the given JSON value, in the most compact format. +// Note: Numbers are limited to a precision of 6 decimal digits, so that +// timestamps in ms have a precision in ns. +Json::String ToCompactString(const Json::Value& aJsonValue); + +// Profiling log stored in a Json::Value. The actual log only exists while the +// profiler is running, and will be inserted at the end of the JSON profile. +class ProfilingLog { + public: + // These will be called by ActivePS when the profiler starts/stops. + static void Init(); + static void Destroy(); + + // Access the profiling log JSON object, in order to modify it. + // Only calls the given function if the profiler is active. + // Thread-safe. But `aF` must not call other locking profiler functions. + // This is intended to capture some internal logging that doesn't belong in + // other places like markers. The log is accessible through the JS console on + // profiler.firefox.com, in the `profile.profilingLog` object; the data format + // is intentionally not defined, and not intended to be shown in the + // front-end. + // Please use caution not to output too much data. + template <typename F> + static void Access(F&& aF) { + mozilla::baseprofiler::detail::BaseProfilerAutoLock lock{gMutex}; + if (gLog) { + std::forward<F>(aF)(*gLog); + } + } + +#define DURATION_JSON_SUFFIX "_ms" + + // Convert a TimeDuration to the value to be stored in the log. + // Use DURATION_JSON_SUFFIX as suffix in the property name. + static Json::Value Duration(const mozilla::TimeDuration& aDuration) { + return Json::Value{aDuration.ToMilliseconds()}; + } + +#define TIMESTAMP_JSON_SUFFIX "_TSms" + + // Convert a TimeStamp to the value to be stored in the log. + // Use TIMESTAMP_JSON_SUFFIX as suffix in the property name. + static Json::Value Timestamp( + const mozilla::TimeStamp& aTimestamp = mozilla::TimeStamp::Now()) { + if (aTimestamp.IsNull()) { + return Json::Value{0.0}; + } + return Duration(aTimestamp - mozilla::TimeStamp::ProcessCreation()); + } + + static bool IsLockedOnCurrentThread(); + + private: + static mozilla::baseprofiler::detail::BaseProfilerMutex gMutex; + static mozilla::UniquePtr<Json::Value> gLog; +}; + +// ---------------------------------------------------------------------------- +// Miscellaneous + +// If positive, skip stack-sampling in the sampler thread loop. +// Users should increment it atomically when samplings should be avoided, and +// later decrement it back. Multiple uses can overlap. +// There could be a sampling in progress when this is first incremented, so if +// it is critical to prevent any sampling, lock the profiler mutex instead. +// Relaxed ordering, because it's used to request that the profiler pause +// future sampling; this is not time critical, nor dependent on anything else. +extern mozilla::Atomic<int, mozilla::MemoryOrdering::Relaxed> gSkipSampling; + +void AppendSharedLibraries(mozilla::JSONWriter& aWriter); + +// Convert the array of strings to a bitfield. +uint32_t ParseFeaturesFromStringArray(const char** aFeatures, + uint32_t aFeatureCount, + bool aIsStartup = false); + +// Add the begin/end 'Awake' markers for the thread. +void profiler_mark_thread_awake(); + +void profiler_mark_thread_asleep(); + +[[nodiscard]] bool profiler_get_profile_json( + SpliceableChunkedJSONWriter& aSpliceableChunkedJSONWriter, + double aSinceTime, bool aIsShuttingDown, + mozilla::ProgressLogger aProgressLogger); + +// Flags to conveniently track various JS instrumentations. +enum class JSInstrumentationFlags { + StackSampling = 0x1, + Allocations = 0x2, +}; + +// Write out the information of the active profiling configuration. +void profiler_write_active_configuration(mozilla::JSONWriter& aWriter); + +// Extract all received exit profiles that have not yet expired (i.e., they +// still intersect with this process' buffer range). +mozilla::Vector<nsCString> profiler_move_exit_profiles(); + +// If the "MOZ_PROFILER_SYMBOLICATE" env-var is set, we return a new +// ProfilerCodeAddressService object to use for local symbolication of profiles. +// This is off by default, and mainly intended for local development. +mozilla::UniquePtr<ProfilerCodeAddressService> +profiler_code_address_service_for_presymbolication(); + +extern "C" { +// This function is defined in the profiler rust module at +// tools/profiler/rust-helper. mozilla::SymbolTable and CompactSymbolTable +// have identical memory layout. +bool profiler_get_symbol_table(const char* debug_path, const char* breakpad_id, + mozilla::SymbolTable* symbol_table); + +bool profiler_demangle_rust(const char* mangled, char* buffer, size_t len); +} + +// For each running times value, call MACRO(index, name, unit, jsonProperty) +#define PROFILER_FOR_EACH_RUNNING_TIME(MACRO) \ + MACRO(0, ThreadCPU, Delta, threadCPUDelta) + +// This class contains all "running times" such as CPU usage measurements. +// All measurements are listed in `PROFILER_FOR_EACH_RUNNING_TIME` above. +// Each measurement is optional and only takes a value when explicitly set. +// Two RunningTimes object may be subtracted, to get the difference between +// known values. +class RunningTimes { + public: + constexpr RunningTimes() = default; + + // Constructor with only a timestamp, useful when no measurements will be + // taken. + constexpr explicit RunningTimes(const mozilla::TimeStamp& aTimeStamp) + : mPostMeasurementTimeStamp(aTimeStamp) {} + + constexpr void Clear() { *this = RunningTimes{}; } + + constexpr bool IsEmpty() const { return mKnownBits == 0; } + + // This should be called right after CPU measurements have been taken. + void SetPostMeasurementTimeStamp(const mozilla::TimeStamp& aTimeStamp) { + mPostMeasurementTimeStamp = aTimeStamp; + } + + const mozilla::TimeStamp& PostMeasurementTimeStamp() const { + return mPostMeasurementTimeStamp; + } + + // Should be filled for any registered thread. + +#define RUNNING_TIME_MEMBER(index, name, unit, jsonProperty) \ + constexpr bool Is##name##unit##Known() const { \ + return (mKnownBits & mGot##name##unit) != 0; \ + } \ + \ + constexpr void Clear##name##unit() { \ + m##name##unit = 0; \ + mKnownBits &= ~mGot##name##unit; \ + } \ + \ + constexpr void Reset##name##unit(uint64_t a##name##unit) { \ + m##name##unit = a##name##unit; \ + mKnownBits |= mGot##name##unit; \ + } \ + \ + constexpr void Set##name##unit(uint64_t a##name##unit) { \ + MOZ_ASSERT(!Is##name##unit##Known(), #name #unit " already set"); \ + Reset##name##unit(a##name##unit); \ + } \ + \ + constexpr mozilla::Maybe<uint64_t> Get##name##unit() const { \ + if (Is##name##unit##Known()) { \ + return mozilla::Some(m##name##unit); \ + } \ + return mozilla::Nothing{}; \ + } \ + \ + constexpr mozilla::Maybe<uint64_t> GetJson##name##unit() const { \ + if (Is##name##unit##Known()) { \ + return mozilla::Some(ConvertRawToJson(m##name##unit)); \ + } \ + return mozilla::Nothing{}; \ + } + + PROFILER_FOR_EACH_RUNNING_TIME(RUNNING_TIME_MEMBER) + +#undef RUNNING_TIME_MEMBER + + // Take values from another RunningTimes. + RunningTimes& TakeFrom(RunningTimes& aOther) { + if (!aOther.IsEmpty()) { +#define RUNNING_TIME_TAKE(index, name, unit, jsonProperty) \ + if (aOther.Is##name##unit##Known()) { \ + Set##name##unit(std::exchange(aOther.m##name##unit, 0)); \ + } + + PROFILER_FOR_EACH_RUNNING_TIME(RUNNING_TIME_TAKE) + +#undef RUNNING_TIME_TAKE + + aOther.mKnownBits = 0; + } + return *this; + } + + // Difference from `aBefore` to `this`. Any unknown makes the result unknown. + // PostMeasurementTimeStamp set to `this` PostMeasurementTimeStamp, to keep + // the most recent timestamp associated with the end of the interval over + // which the difference applies. + RunningTimes operator-(const RunningTimes& aBefore) const { + RunningTimes diff; + diff.mPostMeasurementTimeStamp = mPostMeasurementTimeStamp; +#define RUNNING_TIME_SUB(index, name, unit, jsonProperty) \ + if (Is##name##unit##Known() && aBefore.Is##name##unit##Known()) { \ + diff.Set##name##unit(m##name##unit - aBefore.m##name##unit); \ + } + + PROFILER_FOR_EACH_RUNNING_TIME(RUNNING_TIME_SUB) + +#undef RUNNING_TIME_SUB + return diff; + } + + private: + friend mozilla::ProfileBufferEntryWriter::Serializer<RunningTimes>; + friend mozilla::ProfileBufferEntryReader::Deserializer<RunningTimes>; + + // Platform-dependent. + static uint64_t ConvertRawToJson(uint64_t aRawValue); + + mozilla::TimeStamp mPostMeasurementTimeStamp; + + uint32_t mKnownBits = 0u; + +#define RUNNING_TIME_MEMBER(index, name, unit, jsonProperty) \ + static constexpr uint32_t mGot##name##unit = 1u << index; \ + uint64_t m##name##unit = 0; + + PROFILER_FOR_EACH_RUNNING_TIME(RUNNING_TIME_MEMBER) + +#undef RUNNING_TIME_MEMBER +}; + +template <> +struct mozilla::ProfileBufferEntryWriter::Serializer<RunningTimes> { + static Length Bytes(const RunningTimes& aRunningTimes) { + Length bytes = 0; + +#define RUNNING_TIME_SERIALIZATION_BYTES(index, name, unit, jsonProperty) \ + if (aRunningTimes.Is##name##unit##Known()) { \ + bytes += ULEB128Size(aRunningTimes.m##name##unit); \ + } + + PROFILER_FOR_EACH_RUNNING_TIME(RUNNING_TIME_SERIALIZATION_BYTES) + +#undef RUNNING_TIME_SERIALIZATION_BYTES + return ULEB128Size(aRunningTimes.mKnownBits) + bytes; + } + + static void Write(ProfileBufferEntryWriter& aEW, + const RunningTimes& aRunningTimes) { + aEW.WriteULEB128(aRunningTimes.mKnownBits); + +#define RUNNING_TIME_SERIALIZE(index, name, unit, jsonProperty) \ + if (aRunningTimes.Is##name##unit##Known()) { \ + aEW.WriteULEB128(aRunningTimes.m##name##unit); \ + } + + PROFILER_FOR_EACH_RUNNING_TIME(RUNNING_TIME_SERIALIZE) + +#undef RUNNING_TIME_SERIALIZE + } +}; + +template <> +struct mozilla::ProfileBufferEntryReader::Deserializer<RunningTimes> { + static void ReadInto(ProfileBufferEntryReader& aER, + RunningTimes& aRunningTimes) { + aRunningTimes = Read(aER); + } + + static RunningTimes Read(ProfileBufferEntryReader& aER) { + // Start with empty running times, everything is cleared. + RunningTimes times; + + // This sets all the bits into mKnownBits, we don't need to modify it + // further. + times.mKnownBits = aER.ReadULEB128<uint32_t>(); + + // For each member that should be known, read its value. +#define RUNNING_TIME_DESERIALIZE(index, name, unit, jsonProperty) \ + if (times.Is##name##unit##Known()) { \ + times.m##name##unit = aER.ReadULEB128<decltype(times.m##name##unit)>(); \ + } + + PROFILER_FOR_EACH_RUNNING_TIME(RUNNING_TIME_DESERIALIZE) + +#undef RUNNING_TIME_DESERIALIZE + + return times; + } +}; + +#endif /* ndef TOOLS_PLATFORM_H_ */ diff --git a/tools/profiler/core/shared-libraries-linux.cc b/tools/profiler/core/shared-libraries-linux.cc new file mode 100644 index 0000000000..db980db55a --- /dev/null +++ b/tools/profiler/core/shared-libraries-linux.cc @@ -0,0 +1,253 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "shared-libraries.h" + +#define PATH_MAX_TOSTRING(x) #x +#define PATH_MAX_STRING(x) PATH_MAX_TOSTRING(x) +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <limits.h> +#include <unistd.h> +#include <fstream> +#include "platform.h" +#include "shared-libraries.h" +#include "GeckoProfiler.h" +#include "mozilla/Sprintf.h" +#include "mozilla/Unused.h" +#include "nsDebug.h" +#include "nsNativeCharsetUtils.h" +#include <nsTArray.h> + +#include "common/linux/file_id.h" +#include <algorithm> +#include <dlfcn.h> +#if defined(GP_OS_linux) || defined(GP_OS_android) +# include <features.h> +#endif +#include <sys/types.h> + +#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd) +# include <link.h> // dl_phdr_info +#else +# error "Unexpected configuration" +#endif + +#if defined(GP_OS_android) +extern "C" MOZ_EXPORT __attribute__((weak)) int dl_iterate_phdr( + int (*callback)(struct dl_phdr_info* info, size_t size, void* data), + void* data); +#endif + +struct LoadedLibraryInfo { + LoadedLibraryInfo(const char* aName, unsigned long aBaseAddress, + unsigned long aFirstMappingStart, + unsigned long aLastMappingEnd) + : mName(aName), + mBaseAddress(aBaseAddress), + mFirstMappingStart(aFirstMappingStart), + mLastMappingEnd(aLastMappingEnd) {} + + nsCString mName; + unsigned long mBaseAddress; + unsigned long mFirstMappingStart; + unsigned long mLastMappingEnd; +}; + +static nsCString IDtoUUIDString( + const google_breakpad::wasteful_vector<uint8_t>& aIdentifier) { + using namespace google_breakpad; + + nsCString uuid; + const std::string str = FileID::ConvertIdentifierToUUIDString(aIdentifier); + uuid.Append(str.c_str(), str.size()); + // This is '0', not '\0', since it represents the breakpad id age. + uuid.Append('0'); + return uuid; +} + +// Get the breakpad Id for the binary file pointed by bin_name +static nsCString getId(const char* bin_name) { + using namespace google_breakpad; + + PageAllocator allocator; + auto_wasteful_vector<uint8_t, kDefaultBuildIdSize> identifier(&allocator); + + FileID file_id(bin_name); + if (file_id.ElfFileIdentifier(identifier)) { + return IDtoUUIDString(identifier); + } + + return ""_ns; +} + +static SharedLibrary SharedLibraryAtPath(const char* path, + unsigned long libStart, + unsigned long libEnd, + unsigned long offset = 0) { + nsAutoString pathStr; + mozilla::Unused << NS_WARN_IF( + NS_FAILED(NS_CopyNativeToUnicode(nsDependentCString(path), pathStr))); + + nsAutoString nameStr = pathStr; + int32_t pos = nameStr.RFindChar('/'); + if (pos != kNotFound) { + nameStr.Cut(0, pos + 1); + } + + return SharedLibrary(libStart, libEnd, offset, getId(path), nameStr, pathStr, + nameStr, pathStr, ""_ns, ""); +} + +static int dl_iterate_callback(struct dl_phdr_info* dl_info, size_t size, + void* data) { + auto libInfoList = reinterpret_cast<nsTArray<LoadedLibraryInfo>*>(data); + + if (dl_info->dlpi_phnum <= 0) return 0; + + unsigned long baseAddress = dl_info->dlpi_addr; + unsigned long firstMappingStart = -1; + unsigned long lastMappingEnd = 0; + + for (size_t i = 0; i < dl_info->dlpi_phnum; i++) { + if (dl_info->dlpi_phdr[i].p_type != PT_LOAD) { + continue; + } + unsigned long start = dl_info->dlpi_addr + dl_info->dlpi_phdr[i].p_vaddr; + unsigned long end = start + dl_info->dlpi_phdr[i].p_memsz; + if (start < firstMappingStart) { + firstMappingStart = start; + } + if (end > lastMappingEnd) { + lastMappingEnd = end; + } + } + + libInfoList->AppendElement(LoadedLibraryInfo( + dl_info->dlpi_name, baseAddress, firstMappingStart, lastMappingEnd)); + + return 0; +} + +SharedLibraryInfo SharedLibraryInfo::GetInfoForSelf() { + SharedLibraryInfo info; + +#if defined(GP_OS_linux) + // We need to find the name of the executable (exeName, exeNameLen) and the + // address of its executable section (exeExeAddr) in the running image. + char exeName[PATH_MAX]; + memset(exeName, 0, sizeof(exeName)); + + ssize_t exeNameLen = readlink("/proc/self/exe", exeName, sizeof(exeName) - 1); + if (exeNameLen == -1) { + // readlink failed for whatever reason. Note this, but keep going. + exeName[0] = '\0'; + exeNameLen = 0; + LOG("SharedLibraryInfo::GetInfoForSelf(): readlink failed"); + } else { + // Assert no buffer overflow. + MOZ_RELEASE_ASSERT(exeNameLen >= 0 && + exeNameLen < static_cast<ssize_t>(sizeof(exeName))); + } + + unsigned long exeExeAddr = 0; +#endif + +#if defined(GP_OS_android) + // If dl_iterate_phdr doesn't exist, we give up immediately. + if (!dl_iterate_phdr) { + // On ARM Android, dl_iterate_phdr is provided by the custom linker. + // So if libxul was loaded by the system linker (e.g. as part of + // xpcshell when running tests), it won't be available and we should + // not call it. + return info; + } +#endif + +#if defined(GP_OS_linux) || defined(GP_OS_android) + // Read info from /proc/self/maps. We ignore most of it. + pid_t pid = profiler_current_process_id().ToNumber(); + char path[PATH_MAX]; + SprintfLiteral(path, "/proc/%d/maps", pid); + std::ifstream maps(path); + std::string line; + while (std::getline(maps, line)) { + int ret; + unsigned long start; + unsigned long end; + char perm[6 + 1] = ""; + unsigned long offset; + char modulePath[PATH_MAX + 1] = ""; + ret = sscanf(line.c_str(), + "%lx-%lx %6s %lx %*s %*x %" PATH_MAX_STRING(PATH_MAX) "s\n", + &start, &end, perm, &offset, modulePath); + if (!strchr(perm, 'x')) { + // Ignore non executable entries + continue; + } + if (ret != 5 && ret != 4) { + LOG("SharedLibraryInfo::GetInfoForSelf(): " + "reading /proc/self/maps failed"); + continue; + } + +# if defined(GP_OS_linux) + // Try to establish the main executable's load address. + if (exeNameLen > 0 && strcmp(modulePath, exeName) == 0) { + exeExeAddr = start; + } +# elif defined(GP_OS_android) + // Use /proc/pid/maps to get the dalvik-jit section since it has no + // associated phdrs. + if (0 == strcmp(modulePath, "/dev/ashmem/dalvik-jit-code-cache")) { + info.AddSharedLibrary( + SharedLibraryAtPath(modulePath, start, end, offset)); + if (info.GetSize() > 10000) { + LOG("SharedLibraryInfo::GetInfoForSelf(): " + "implausibly large number of mappings acquired"); + break; + } + } +# endif + } +#endif + + nsTArray<LoadedLibraryInfo> libInfoList; + + // We collect the bulk of the library info using dl_iterate_phdr. + dl_iterate_phdr(dl_iterate_callback, &libInfoList); + + for (const auto& libInfo : libInfoList) { + info.AddSharedLibrary( + SharedLibraryAtPath(libInfo.mName.get(), libInfo.mFirstMappingStart, + libInfo.mLastMappingEnd, + libInfo.mFirstMappingStart - libInfo.mBaseAddress)); + } + +#if defined(GP_OS_linux) + // Make another pass over the information we just harvested from + // dl_iterate_phdr. If we see a nameless object mapped at what we earlier + // established to be the main executable's load address, attach the + // executable's name to that entry. + for (size_t i = 0; i < info.GetSize(); i++) { + SharedLibrary& lib = info.GetMutableEntry(i); + if (lib.GetStart() <= exeExeAddr && exeExeAddr <= lib.GetEnd() && + lib.GetNativeDebugPath().empty()) { + lib = SharedLibraryAtPath(exeName, lib.GetStart(), lib.GetEnd(), + lib.GetOffset()); + + // We only expect to see one such entry. + break; + } + } +#endif + + return info; +} + +void SharedLibraryInfo::Initialize() { /* do nothing */ +} diff --git a/tools/profiler/core/shared-libraries-macos.cc b/tools/profiler/core/shared-libraries-macos.cc new file mode 100644 index 0000000000..606677de9d --- /dev/null +++ b/tools/profiler/core/shared-libraries-macos.cc @@ -0,0 +1,204 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "shared-libraries.h" + +#include "ClearOnShutdown.h" +#include "mozilla/StaticMutex.h" +#include "mozilla/Unused.h" +#include "nsNativeCharsetUtils.h" +#include <AvailabilityMacros.h> + +#include <dlfcn.h> +#include <mach-o/arch.h> +#include <mach-o/dyld_images.h> +#include <mach-o/dyld.h> +#include <mach-o/loader.h> +#include <mach/mach_init.h> +#include <mach/mach_traps.h> +#include <mach/task_info.h> +#include <mach/task.h> +#include <sstream> +#include <stdlib.h> +#include <string.h> +#include <vector> + +// Architecture specific abstraction. +#if defined(GP_ARCH_x86) +typedef mach_header platform_mach_header; +typedef segment_command mach_segment_command_type; +# define MACHO_MAGIC_NUMBER MH_MAGIC +# define CMD_SEGMENT LC_SEGMENT +# define seg_size uint32_t +#else +typedef mach_header_64 platform_mach_header; +typedef segment_command_64 mach_segment_command_type; +# define MACHO_MAGIC_NUMBER MH_MAGIC_64 +# define CMD_SEGMENT LC_SEGMENT_64 +# define seg_size uint64_t +#endif + +struct NativeSharedLibrary { + const platform_mach_header* header; + std::string path; +}; +static std::vector<NativeSharedLibrary>* sSharedLibrariesList = nullptr; +static mozilla::StaticMutex sSharedLibrariesMutex MOZ_UNANNOTATED; + +static void SharedLibraryAddImage(const struct mach_header* mh, + intptr_t vmaddr_slide) { + // NOTE: Presumably for backwards-compatibility reasons, this function accepts + // a mach_header even on 64-bit where it ought to be a mach_header_64. We cast + // it to the right type here. + auto header = reinterpret_cast<const platform_mach_header*>(mh); + + Dl_info info; + if (!dladdr(header, &info)) { + return; + } + + mozilla::StaticMutexAutoLock lock(sSharedLibrariesMutex); + if (!sSharedLibrariesList) { + return; + } + + NativeSharedLibrary lib = {header, info.dli_fname}; + sSharedLibrariesList->push_back(lib); +} + +static void SharedLibraryRemoveImage(const struct mach_header* mh, + intptr_t vmaddr_slide) { + // NOTE: Presumably for backwards-compatibility reasons, this function accepts + // a mach_header even on 64-bit where it ought to be a mach_header_64. We cast + // it to the right type here. + auto header = reinterpret_cast<const platform_mach_header*>(mh); + + mozilla::StaticMutexAutoLock lock(sSharedLibrariesMutex); + if (!sSharedLibrariesList) { + return; + } + + uint32_t count = sSharedLibrariesList->size(); + for (uint32_t i = 0; i < count; ++i) { + if ((*sSharedLibrariesList)[i].header == header) { + sSharedLibrariesList->erase(sSharedLibrariesList->begin() + i); + return; + } + } +} + +void SharedLibraryInfo::Initialize() { + // NOTE: We intentionally leak this memory here. We're allocating dynamically + // in order to avoid static initializers. + sSharedLibrariesList = new std::vector<NativeSharedLibrary>(); + + _dyld_register_func_for_add_image(SharedLibraryAddImage); + _dyld_register_func_for_remove_image(SharedLibraryRemoveImage); +} + +static void addSharedLibrary(const platform_mach_header* header, + const char* path, SharedLibraryInfo& info) { + const struct load_command* cmd = + reinterpret_cast<const struct load_command*>(header + 1); + + seg_size size = 0; + unsigned long long start = reinterpret_cast<unsigned long long>(header); + // Find the cmd segment in the macho image. It will contain the offset we care + // about. + const uint8_t* uuid_bytes = nullptr; + for (unsigned int i = 0; + cmd && (i < header->ncmds) && (uuid_bytes == nullptr || size == 0); + ++i) { + if (cmd->cmd == CMD_SEGMENT) { + const mach_segment_command_type* seg = + reinterpret_cast<const mach_segment_command_type*>(cmd); + + if (!strcmp(seg->segname, "__TEXT")) { + size = seg->vmsize; + } + } else if (cmd->cmd == LC_UUID) { + const uuid_command* ucmd = reinterpret_cast<const uuid_command*>(cmd); + uuid_bytes = ucmd->uuid; + } + + cmd = reinterpret_cast<const struct load_command*>( + reinterpret_cast<const char*>(cmd) + cmd->cmdsize); + } + + nsAutoCString uuid; + if (uuid_bytes != nullptr) { + uuid.AppendPrintf( + "%02X" + "%02X" + "%02X" + "%02X" + "%02X" + "%02X" + "%02X" + "%02X" + "%02X" + "%02X" + "%02X" + "%02X" + "%02X" + "%02X" + "%02X" + "%02X" + "0" /* breakpad id age */, + uuid_bytes[0], uuid_bytes[1], uuid_bytes[2], uuid_bytes[3], + uuid_bytes[4], uuid_bytes[5], uuid_bytes[6], uuid_bytes[7], + uuid_bytes[8], uuid_bytes[9], uuid_bytes[10], uuid_bytes[11], + uuid_bytes[12], uuid_bytes[13], uuid_bytes[14], uuid_bytes[15]); + } + + nsAutoString pathStr; + mozilla::Unused << NS_WARN_IF( + NS_FAILED(NS_CopyNativeToUnicode(nsDependentCString(path), pathStr))); + + nsAutoString nameStr = pathStr; + int32_t pos = nameStr.RFindChar('/'); + if (pos != kNotFound) { + nameStr.Cut(0, pos + 1); + } + + const NXArchInfo* archInfo = + NXGetArchInfoFromCpuType(header->cputype, header->cpusubtype); + + info.AddSharedLibrary(SharedLibrary(start, start + size, 0, uuid, nameStr, + pathStr, nameStr, pathStr, ""_ns, + archInfo ? archInfo->name : "")); +} + +// Translate the statically stored sSharedLibrariesList information into a +// SharedLibraryInfo object. +SharedLibraryInfo SharedLibraryInfo::GetInfoForSelf() { + mozilla::StaticMutexAutoLock lock(sSharedLibrariesMutex); + SharedLibraryInfo sharedLibraryInfo; + + for (auto& info : *sSharedLibrariesList) { + addSharedLibrary(info.header, info.path.c_str(), sharedLibraryInfo); + } + + // Add the entry for dyld itself. + // We only support macOS 10.12+, which corresponds to dyld version 15+. + // dyld version 15 added the dyldPath property. + task_dyld_info_data_t task_dyld_info; + mach_msg_type_number_t count = TASK_DYLD_INFO_COUNT; + if (task_info(mach_task_self(), TASK_DYLD_INFO, (task_info_t)&task_dyld_info, + &count) != KERN_SUCCESS) { + return sharedLibraryInfo; + } + + struct dyld_all_image_infos* aii = + (struct dyld_all_image_infos*)task_dyld_info.all_image_info_addr; + if (aii->version >= 15) { + const platform_mach_header* header = + reinterpret_cast<const platform_mach_header*>( + aii->dyldImageLoadAddress); + addSharedLibrary(header, aii->dyldPath, sharedLibraryInfo); + } + + return sharedLibraryInfo; +} diff --git a/tools/profiler/core/shared-libraries-win32.cc b/tools/profiler/core/shared-libraries-win32.cc new file mode 100644 index 0000000000..b8b61a2a9e --- /dev/null +++ b/tools/profiler/core/shared-libraries-win32.cc @@ -0,0 +1,143 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <windows.h> + +#include "shared-libraries.h" +#include "nsWindowsHelpers.h" +#include "mozilla/NativeNt.h" +#include "mozilla/WindowsEnumProcessModules.h" +#include "mozilla/WindowsProcessMitigations.h" +#include "mozilla/WindowsVersion.h" +#include "nsPrintfCString.h" + +static bool IsModuleUnsafeToLoad(const nsAString& aModuleName) { +#if defined(_M_AMD64) || defined(_M_IX86) + // Hackaround for Bug 1607574. Nvidia's shim driver nvd3d9wrap[x].dll detours + // LoadLibraryExW and it causes AV when the following conditions are met. + // 1. LoadLibraryExW was called for "detoured.dll" + // 2. nvinit[x].dll was unloaded + // 3. OS version is older than 6.2 +# if defined(_M_AMD64) + LPCWSTR kNvidiaShimDriver = L"nvd3d9wrapx.dll"; + LPCWSTR kNvidiaInitDriver = L"nvinitx.dll"; +# elif defined(_M_IX86) + LPCWSTR kNvidiaShimDriver = L"nvd3d9wrap.dll"; + LPCWSTR kNvidiaInitDriver = L"nvinit.dll"; +# endif + if (aModuleName.LowerCaseEqualsLiteral("detoured.dll") && + !mozilla::IsWin8OrLater() && ::GetModuleHandleW(kNvidiaShimDriver) && + !::GetModuleHandleW(kNvidiaInitDriver)) { + return true; + } +#endif // defined(_M_AMD64) || defined(_M_IX86) + + // Hackaround for Bug 1723868. There is no safe way to prevent the module + // Microsoft's VP9 Video Decoder from being unloaded because mfplat.dll may + // have posted more than one task to unload the module in the work queue + // without calling LoadLibrary. + if (aModuleName.LowerCaseEqualsLiteral("msvp9dec_store.dll")) { + return true; + } + + return false; +} + +SharedLibraryInfo SharedLibraryInfo::GetInfoForSelf() { + SharedLibraryInfo sharedLibraryInfo; + + auto addSharedLibraryFromModuleInfo = [&sharedLibraryInfo]( + const wchar_t* aModulePath, + HMODULE aModule) { + nsDependentSubstring moduleNameStr( + mozilla::nt::GetLeafName(nsDependentString(aModulePath))); + + // If the module is unsafe to call LoadLibraryEx for, we skip. + if (IsModuleUnsafeToLoad(moduleNameStr)) { + return; + } + + // If EAF+ is enabled, parsing ntdll's PE header causes a crash. + if (mozilla::IsEafPlusEnabled() && + moduleNameStr.LowerCaseEqualsLiteral("ntdll.dll")) { + return; + } + + // Load the module again to make sure that its handle will remain + // valid as we attempt to read the PDB information from it. We load the + // DLL as a datafile so that we don't end up running the newly loaded + // module's DllMain function. If the original handle |aModule| is valid, + // LoadLibraryEx just increments its refcount. + // LOAD_LIBRARY_AS_IMAGE_RESOURCE is needed to read information from the + // sections (not PE headers) which should be relocated by the loader, + // otherwise GetPdbInfo() will cause a crash. + nsModuleHandle handleLock(::LoadLibraryExW( + aModulePath, NULL, + LOAD_LIBRARY_AS_DATAFILE | LOAD_LIBRARY_AS_IMAGE_RESOURCE)); + if (!handleLock) { + return; + } + + mozilla::nt::PEHeaders headers(handleLock.get()); + if (!headers) { + return; + } + + mozilla::Maybe<mozilla::Range<const uint8_t>> bounds = headers.GetBounds(); + if (!bounds) { + return; + } + + // Put the original |aModule| into SharedLibrary, but we get debug info + // from |handleLock| as |aModule| might be inaccessible. + const uintptr_t modStart = reinterpret_cast<uintptr_t>(aModule); + const uintptr_t modEnd = modStart + bounds->length(); + + nsAutoCString breakpadId; + nsAutoString pdbPathStr; + if (const auto* debugInfo = headers.GetPdbInfo()) { + MOZ_ASSERT(breakpadId.IsEmpty()); + const GUID& pdbSig = debugInfo->pdbSignature; + breakpadId.AppendPrintf( + "%08lX" // m0 + "%04X%04X" // m1,m2 + "%02X%02X%02X%02X%02X%02X%02X%02X" // m3 + "%X", // pdbAge + pdbSig.Data1, pdbSig.Data2, pdbSig.Data3, pdbSig.Data4[0], + pdbSig.Data4[1], pdbSig.Data4[2], pdbSig.Data4[3], pdbSig.Data4[4], + pdbSig.Data4[5], pdbSig.Data4[6], pdbSig.Data4[7], debugInfo->pdbAge); + + // The PDB file name could be different from module filename, + // so report both + // e.g. The PDB for C:\Windows\SysWOW64\ntdll.dll is wntdll.pdb + pdbPathStr = NS_ConvertUTF8toUTF16(debugInfo->pdbFileName); + } + + nsAutoCString versionStr; + uint64_t version; + if (headers.GetVersionInfo(version)) { + versionStr.AppendPrintf("%u.%u.%u.%u", + static_cast<uint32_t>((version >> 48) & 0xFFFFu), + static_cast<uint32_t>((version >> 32) & 0xFFFFu), + static_cast<uint32_t>((version >> 16) & 0xFFFFu), + static_cast<uint32_t>(version & 0xFFFFu)); + } + + const nsString& pdbNameStr = + PromiseFlatString(mozilla::nt::GetLeafName(pdbPathStr)); + SharedLibrary shlib(modStart, modEnd, + 0, // DLLs are always mapped at offset 0 on Windows + breakpadId, PromiseFlatString(moduleNameStr), + nsDependentString(aModulePath), pdbNameStr, pdbPathStr, + versionStr, ""); + sharedLibraryInfo.AddSharedLibrary(shlib); + }; + + mozilla::EnumerateProcessModules(addSharedLibraryFromModuleInfo); + return sharedLibraryInfo; +} + +void SharedLibraryInfo::Initialize() { /* do nothing */ +} diff --git a/tools/profiler/core/vtune/ittnotify.h b/tools/profiler/core/vtune/ittnotify.h new file mode 100644 index 0000000000..f1d65b3328 --- /dev/null +++ b/tools/profiler/core/vtune/ittnotify.h @@ -0,0 +1,4123 @@ +/* <copyright> + This file is provided under a dual BSD/GPLv2 license. When using or + redistributing this file, you may do so under either license. + + GPL LICENSE SUMMARY + + Copyright (c) 2005-2014 Intel Corporation. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + The full GNU General Public License is included in this distribution + in the file called LICENSE.GPL. + + Contact Information: + http://software.intel.com/en-us/articles/intel-vtune-amplifier-xe/ + + BSD LICENSE + + Copyright (c) 2005-2014 Intel Corporation. All rights reserved. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +</copyright> */ +#ifndef _ITTNOTIFY_H_ +#define _ITTNOTIFY_H_ + +/** +@file +@brief Public User API functions and types +@mainpage + +The ITT API is used to annotate a user's program with additional information +that can be used by correctness and performance tools. The user inserts +calls in their program. Those calls generate information that is collected +at runtime, and used by Intel(R) Threading Tools. + +@section API Concepts +The following general concepts are used throughout the API. + +@subsection Unicode Support +Many API functions take character string arguments. On Windows, there +are two versions of each such function. The function name is suffixed +by W if Unicode support is enabled, and by A otherwise. Any API function +that takes a character string argument adheres to this convention. + +@subsection Conditional Compilation +Many users prefer having an option to modify ITT API code when linking it +inside their runtimes. ITT API header file provides a mechanism to replace +ITT API function names inside your code with empty strings. To do this, +define the macros INTEL_NO_ITTNOTIFY_API during compilation and remove the +static library from the linker script. + +@subsection Domains +[see domains] +Domains provide a way to separate notification for different modules or +libraries in a program. Domains are specified by dotted character strings, +e.g. TBB.Internal.Control. + +A mechanism (to be specified) is provided to enable and disable +domains. By default, all domains are enabled. +@subsection Named Entities and Instances +Named entities (frames, regions, tasks, and markers) communicate +information about the program to the analysis tools. A named entity often +refers to a section of program code, or to some set of logical concepts +that the programmer wants to group together. + +Named entities relate to the programmer's static view of the program. When +the program actually executes, many instances of a given named entity +may be created. + +The API annotations denote instances of named entities. The actual +named entities are displayed using the analysis tools. In other words, +the named entities come into existence when instances are created. + +Instances of named entities may have instance identifiers (IDs). Some +API calls use instance identifiers to create relationships between +different instances of named entities. Other API calls associate data +with instances of named entities. + +Some named entities must always have instance IDs. In particular, regions +and frames always have IDs. Task and markers need IDs only if the ID is +needed in another API call (such as adding a relation or metadata). + +The lifetime of instance IDs is distinct from the lifetime of +instances. This allows various relationships to be specified separate +from the actual execution of instances. This flexibility comes at the +expense of extra API calls. + +The same ID may not be reused for different instances, unless a previous +[ref] __itt_id_destroy call for that ID has been issued. +*/ + +/** @cond exclude_from_documentation */ +#ifndef ITT_OS_WIN +# define ITT_OS_WIN 1 +#endif /* ITT_OS_WIN */ + +#ifndef ITT_OS_LINUX +# define ITT_OS_LINUX 2 +#endif /* ITT_OS_LINUX */ + +#ifndef ITT_OS_MAC +# define ITT_OS_MAC 3 +#endif /* ITT_OS_MAC */ + +#ifndef ITT_OS_FREEBSD +# define ITT_OS_FREEBSD 4 +#endif /* ITT_OS_FREEBSD */ + +#ifndef ITT_OS +# if defined WIN32 || defined _WIN32 +# define ITT_OS ITT_OS_WIN +# elif defined( __APPLE__ ) && defined( __MACH__ ) +# define ITT_OS ITT_OS_MAC +# elif defined( __FreeBSD__ ) +# define ITT_OS ITT_OS_FREEBSD +# else +# define ITT_OS ITT_OS_LINUX +# endif +#endif /* ITT_OS */ + +#ifndef ITT_PLATFORM_WIN +# define ITT_PLATFORM_WIN 1 +#endif /* ITT_PLATFORM_WIN */ + +#ifndef ITT_PLATFORM_POSIX +# define ITT_PLATFORM_POSIX 2 +#endif /* ITT_PLATFORM_POSIX */ + +#ifndef ITT_PLATFORM_MAC +# define ITT_PLATFORM_MAC 3 +#endif /* ITT_PLATFORM_MAC */ + +#ifndef ITT_PLATFORM_FREEBSD +# define ITT_PLATFORM_FREEBSD 4 +#endif /* ITT_PLATFORM_FREEBSD */ + +#ifndef ITT_PLATFORM +# if ITT_OS==ITT_OS_WIN +# define ITT_PLATFORM ITT_PLATFORM_WIN +# elif ITT_OS==ITT_OS_MAC +# define ITT_PLATFORM ITT_PLATFORM_MAC +# elif ITT_OS==ITT_OS_FREEBSD +# define ITT_PLATFORM ITT_PLATFORM_FREEBSD +# else +# define ITT_PLATFORM ITT_PLATFORM_POSIX +# endif +#endif /* ITT_PLATFORM */ + +#if defined(_UNICODE) && !defined(UNICODE) +#define UNICODE +#endif + +#include <stddef.h> +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#include <tchar.h> +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#include <stdint.h> +#if defined(UNICODE) || defined(_UNICODE) +#include <wchar.h> +#endif /* UNICODE || _UNICODE */ +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#ifndef ITTAPI_CDECL +# if ITT_PLATFORM==ITT_PLATFORM_WIN +# define ITTAPI_CDECL __cdecl +# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +# if defined _M_IX86 || defined __i386__ +# define ITTAPI_CDECL __attribute__ ((cdecl)) +# else /* _M_IX86 || __i386__ */ +# define ITTAPI_CDECL /* actual only on x86 platform */ +# endif /* _M_IX86 || __i386__ */ +# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* ITTAPI_CDECL */ + +#ifndef STDCALL +# if ITT_PLATFORM==ITT_PLATFORM_WIN +# define STDCALL __stdcall +# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +# if defined _M_IX86 || defined __i386__ +# define STDCALL __attribute__ ((stdcall)) +# else /* _M_IX86 || __i386__ */ +# define STDCALL /* supported only on x86 platform */ +# endif /* _M_IX86 || __i386__ */ +# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* STDCALL */ + +#define ITTAPI ITTAPI_CDECL +#define LIBITTAPI ITTAPI_CDECL + +/* TODO: Temporary for compatibility! */ +#define ITTAPI_CALL ITTAPI_CDECL +#define LIBITTAPI_CALL ITTAPI_CDECL + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +/* use __forceinline (VC++ specific) */ +#define ITT_INLINE __forceinline +#define ITT_INLINE_ATTRIBUTE /* nothing */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +/* + * Generally, functions are not inlined unless optimization is specified. + * For functions declared inline, this attribute inlines the function even + * if no optimization level was specified. + */ +#ifdef __STRICT_ANSI__ +#define ITT_INLINE static +#define ITT_INLINE_ATTRIBUTE __attribute__((unused)) +#else /* __STRICT_ANSI__ */ +#define ITT_INLINE static inline +#define ITT_INLINE_ATTRIBUTE __attribute__((always_inline, unused)) +#endif /* __STRICT_ANSI__ */ +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +/** @endcond */ + +#ifdef INTEL_ITTNOTIFY_ENABLE_LEGACY +# if ITT_PLATFORM==ITT_PLATFORM_WIN +# pragma message("WARNING!!! Deprecated API is used. Please undefine INTEL_ITTNOTIFY_ENABLE_LEGACY macro") +# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +# warning "Deprecated API is used. Please undefine INTEL_ITTNOTIFY_ENABLE_LEGACY macro" +# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +# include "vtune/legacy/ittnotify.h" +#endif /* INTEL_ITTNOTIFY_ENABLE_LEGACY */ + +/** @cond exclude_from_documentation */ +/* Helper macro for joining tokens */ +#define ITT_JOIN_AUX(p,n) p##n +#define ITT_JOIN(p,n) ITT_JOIN_AUX(p,n) + +#ifdef ITT_MAJOR +#undef ITT_MAJOR +#endif +#ifdef ITT_MINOR +#undef ITT_MINOR +#endif +#define ITT_MAJOR 3 +#define ITT_MINOR 0 + +/* Standard versioning of a token with major and minor version numbers */ +#define ITT_VERSIONIZE(x) \ + ITT_JOIN(x, \ + ITT_JOIN(_, \ + ITT_JOIN(ITT_MAJOR, \ + ITT_JOIN(_, ITT_MINOR)))) + +#ifndef INTEL_ITTNOTIFY_PREFIX +# define INTEL_ITTNOTIFY_PREFIX __itt_ +#endif /* INTEL_ITTNOTIFY_PREFIX */ +#ifndef INTEL_ITTNOTIFY_POSTFIX +# define INTEL_ITTNOTIFY_POSTFIX _ptr_ +#endif /* INTEL_ITTNOTIFY_POSTFIX */ + +#define ITTNOTIFY_NAME_AUX(n) ITT_JOIN(INTEL_ITTNOTIFY_PREFIX,n) +#define ITTNOTIFY_NAME(n) ITT_VERSIONIZE(ITTNOTIFY_NAME_AUX(ITT_JOIN(n,INTEL_ITTNOTIFY_POSTFIX))) + +#define ITTNOTIFY_VOID(n) (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n) +#define ITTNOTIFY_DATA(n) (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n) + +#define ITTNOTIFY_VOID_D0(n,d) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d) +#define ITTNOTIFY_VOID_D1(n,d,x) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x) +#define ITTNOTIFY_VOID_D2(n,d,x,y) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y) +#define ITTNOTIFY_VOID_D3(n,d,x,y,z) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z) +#define ITTNOTIFY_VOID_D4(n,d,x,y,z,a) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a) +#define ITTNOTIFY_VOID_D5(n,d,x,y,z,a,b) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b) +#define ITTNOTIFY_VOID_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c) +#define ITTNOTIFY_DATA_D0(n,d) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d) +#define ITTNOTIFY_DATA_D1(n,d,x) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x) +#define ITTNOTIFY_DATA_D2(n,d,x,y) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y) +#define ITTNOTIFY_DATA_D3(n,d,x,y,z) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z) +#define ITTNOTIFY_DATA_D4(n,d,x,y,z,a) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a) +#define ITTNOTIFY_DATA_D5(n,d,x,y,z,a,b) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b) +#define ITTNOTIFY_DATA_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c) + +#ifdef ITT_STUB +#undef ITT_STUB +#endif +#ifdef ITT_STUBV +#undef ITT_STUBV +#endif +#define ITT_STUBV(api,type,name,args) \ + typedef type (api* ITT_JOIN(ITTNOTIFY_NAME(name),_t)) args; \ + extern ITT_JOIN(ITTNOTIFY_NAME(name),_t) ITTNOTIFY_NAME(name); +#define ITT_STUB ITT_STUBV +/** @endcond */ + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/** @cond exclude_from_gpa_documentation */ +/** + * @defgroup public Public API + * @{ + * @} + */ + +/** + * @defgroup control Collection Control + * @ingroup public + * General behavior: application continues to run, but no profiling information is being collected + * + * Pausing occurs not only for the current thread but for all process as well as spawned processes + * - Intel(R) Parallel Inspector and Intel(R) Inspector XE: + * - Does not analyze or report errors that involve memory access. + * - Other errors are reported as usual. Pausing data collection in + * Intel(R) Parallel Inspector and Intel(R) Inspector XE + * only pauses tracing and analyzing memory access. + * It does not pause tracing or analyzing threading APIs. + * . + * - Intel(R) Parallel Amplifier and Intel(R) VTune(TM) Amplifier XE: + * - Does continue to record when new threads are started. + * . + * - Other effects: + * - Possible reduction of runtime overhead. + * . + * @{ + */ +/** @brief Pause collection */ +void ITTAPI __itt_pause(void); +/** @brief Resume collection */ +void ITTAPI __itt_resume(void); +/** @brief Detach collection */ +void ITTAPI __itt_detach(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, pause, (void)) +ITT_STUBV(ITTAPI, void, resume, (void)) +ITT_STUBV(ITTAPI, void, detach, (void)) +#define __itt_pause ITTNOTIFY_VOID(pause) +#define __itt_pause_ptr ITTNOTIFY_NAME(pause) +#define __itt_resume ITTNOTIFY_VOID(resume) +#define __itt_resume_ptr ITTNOTIFY_NAME(resume) +#define __itt_detach ITTNOTIFY_VOID(detach) +#define __itt_detach_ptr ITTNOTIFY_NAME(detach) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_pause() +#define __itt_pause_ptr 0 +#define __itt_resume() +#define __itt_resume_ptr 0 +#define __itt_detach() +#define __itt_detach_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_pause_ptr 0 +#define __itt_resume_ptr 0 +#define __itt_detach_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} control group */ +/** @endcond */ + +/** + * @defgroup threads Threads + * @ingroup public + * Give names to threads + * @{ + */ +/** + * @brief Sets thread name of calling thread + * @param[in] name - name of thread + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_thread_set_nameA(const char *name); +void ITTAPI __itt_thread_set_nameW(const wchar_t *name); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_thread_set_name __itt_thread_set_nameW +# define __itt_thread_set_name_ptr __itt_thread_set_nameW_ptr +#else /* UNICODE */ +# define __itt_thread_set_name __itt_thread_set_nameA +# define __itt_thread_set_name_ptr __itt_thread_set_nameA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +void ITTAPI __itt_thread_set_name(const char *name); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, thread_set_nameA, (const char *name)) +ITT_STUBV(ITTAPI, void, thread_set_nameW, (const wchar_t *name)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, thread_set_name, (const char *name)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_thread_set_nameA ITTNOTIFY_VOID(thread_set_nameA) +#define __itt_thread_set_nameA_ptr ITTNOTIFY_NAME(thread_set_nameA) +#define __itt_thread_set_nameW ITTNOTIFY_VOID(thread_set_nameW) +#define __itt_thread_set_nameW_ptr ITTNOTIFY_NAME(thread_set_nameW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_thread_set_name ITTNOTIFY_VOID(thread_set_name) +#define __itt_thread_set_name_ptr ITTNOTIFY_NAME(thread_set_name) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_thread_set_nameA(name) +#define __itt_thread_set_nameA_ptr 0 +#define __itt_thread_set_nameW(name) +#define __itt_thread_set_nameW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_thread_set_name(name) +#define __itt_thread_set_name_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_thread_set_nameA_ptr 0 +#define __itt_thread_set_nameW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_thread_set_name_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @cond exclude_from_gpa_documentation */ + +/** + * @brief Mark current thread as ignored from this point on, for the duration of its existence. + */ +void ITTAPI __itt_thread_ignore(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, thread_ignore, (void)) +#define __itt_thread_ignore ITTNOTIFY_VOID(thread_ignore) +#define __itt_thread_ignore_ptr ITTNOTIFY_NAME(thread_ignore) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_thread_ignore() +#define __itt_thread_ignore_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_thread_ignore_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} threads group */ + +/** + * @defgroup suppress Error suppression + * @ingroup public + * General behavior: application continues to run, but errors are suppressed + * + * @{ + */ + +/*****************************************************************//** + * @name group of functions used for error suppression in correctness tools + *********************************************************************/ +/** @{ */ +/** + * @hideinitializer + * @brief possible value for suppression mask + */ +#define __itt_suppress_all_errors 0x7fffffff + +/** + * @hideinitializer + * @brief possible value for suppression mask (suppresses errors from threading analysis) + */ +#define __itt_suppress_threading_errors 0x000000ff + +/** + * @hideinitializer + * @brief possible value for suppression mask (suppresses errors from memory analysis) + */ +#define __itt_suppress_memory_errors 0x0000ff00 + +/** + * @brief Start suppressing errors identified in mask on this thread + */ +void ITTAPI __itt_suppress_push(unsigned int mask); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, suppress_push, (unsigned int mask)) +#define __itt_suppress_push ITTNOTIFY_VOID(suppress_push) +#define __itt_suppress_push_ptr ITTNOTIFY_NAME(suppress_push) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_suppress_push(mask) +#define __itt_suppress_push_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_suppress_push_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Undo the effects of the matching call to __itt_suppress_push + */ +void ITTAPI __itt_suppress_pop(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, suppress_pop, (void)) +#define __itt_suppress_pop ITTNOTIFY_VOID(suppress_pop) +#define __itt_suppress_pop_ptr ITTNOTIFY_NAME(suppress_pop) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_suppress_pop() +#define __itt_suppress_pop_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_suppress_pop_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @enum __itt_model_disable + * @brief Enumerator for the disable methods + */ +typedef enum __itt_suppress_mode { + __itt_unsuppress_range, + __itt_suppress_range +} __itt_suppress_mode_t; + +/** + * @brief Mark a range of memory for error suppression or unsuppression for error types included in mask + */ +void ITTAPI __itt_suppress_mark_range(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, suppress_mark_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size)) +#define __itt_suppress_mark_range ITTNOTIFY_VOID(suppress_mark_range) +#define __itt_suppress_mark_range_ptr ITTNOTIFY_NAME(suppress_mark_range) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_suppress_mark_range(mask) +#define __itt_suppress_mark_range_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_suppress_mark_range_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Undo the effect of a matching call to __itt_suppress_mark_range. If not matching + * call is found, nothing is changed. + */ +void ITTAPI __itt_suppress_clear_range(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, suppress_clear_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size)) +#define __itt_suppress_clear_range ITTNOTIFY_VOID(suppress_clear_range) +#define __itt_suppress_clear_range_ptr ITTNOTIFY_NAME(suppress_clear_range) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_suppress_clear_range(mask) +#define __itt_suppress_clear_range_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_suppress_clear_range_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} */ +/** @} suppress group */ + +/** + * @defgroup sync Synchronization + * @ingroup public + * Indicate user-written synchronization code + * @{ + */ +/** + * @hideinitializer + * @brief possible value of attribute argument for sync object type + */ +#define __itt_attr_barrier 1 + +/** + * @hideinitializer + * @brief possible value of attribute argument for sync object type + */ +#define __itt_attr_mutex 2 + +/** +@brief Name a synchronization object +@param[in] addr Handle for the synchronization object. You should +use a real address to uniquely identify the synchronization object. +@param[in] objtype null-terminated object type string. If NULL is +passed, the name will be "User Synchronization". +@param[in] objname null-terminated object name string. If NULL, +no name will be assigned to the object. +@param[in] attribute one of [#__itt_attr_barrier, #__itt_attr_mutex] + */ + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_sync_createA(void *addr, const char *objtype, const char *objname, int attribute); +void ITTAPI __itt_sync_createW(void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_sync_create __itt_sync_createW +# define __itt_sync_create_ptr __itt_sync_createW_ptr +#else /* UNICODE */ +# define __itt_sync_create __itt_sync_createA +# define __itt_sync_create_ptr __itt_sync_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +void ITTAPI __itt_sync_create (void *addr, const char *objtype, const char *objname, int attribute); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, sync_createA, (void *addr, const char *objtype, const char *objname, int attribute)) +ITT_STUBV(ITTAPI, void, sync_createW, (void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, sync_create, (void *addr, const char* objtype, const char* objname, int attribute)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_sync_createA ITTNOTIFY_VOID(sync_createA) +#define __itt_sync_createA_ptr ITTNOTIFY_NAME(sync_createA) +#define __itt_sync_createW ITTNOTIFY_VOID(sync_createW) +#define __itt_sync_createW_ptr ITTNOTIFY_NAME(sync_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_sync_create ITTNOTIFY_VOID(sync_create) +#define __itt_sync_create_ptr ITTNOTIFY_NAME(sync_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_sync_createA(addr, objtype, objname, attribute) +#define __itt_sync_createA_ptr 0 +#define __itt_sync_createW(addr, objtype, objname, attribute) +#define __itt_sync_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_sync_create(addr, objtype, objname, attribute) +#define __itt_sync_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_sync_createA_ptr 0 +#define __itt_sync_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_sync_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** +@brief Rename a synchronization object + +You can use the rename call to assign or reassign a name to a given +synchronization object. +@param[in] addr handle for the synchronization object. +@param[in] name null-terminated object name string. +*/ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_sync_renameA(void *addr, const char *name); +void ITTAPI __itt_sync_renameW(void *addr, const wchar_t *name); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_sync_rename __itt_sync_renameW +# define __itt_sync_rename_ptr __itt_sync_renameW_ptr +#else /* UNICODE */ +# define __itt_sync_rename __itt_sync_renameA +# define __itt_sync_rename_ptr __itt_sync_renameA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +void ITTAPI __itt_sync_rename(void *addr, const char *name); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, sync_renameA, (void *addr, const char *name)) +ITT_STUBV(ITTAPI, void, sync_renameW, (void *addr, const wchar_t *name)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, sync_rename, (void *addr, const char *name)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_sync_renameA ITTNOTIFY_VOID(sync_renameA) +#define __itt_sync_renameA_ptr ITTNOTIFY_NAME(sync_renameA) +#define __itt_sync_renameW ITTNOTIFY_VOID(sync_renameW) +#define __itt_sync_renameW_ptr ITTNOTIFY_NAME(sync_renameW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_sync_rename ITTNOTIFY_VOID(sync_rename) +#define __itt_sync_rename_ptr ITTNOTIFY_NAME(sync_rename) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_sync_renameA(addr, name) +#define __itt_sync_renameA_ptr 0 +#define __itt_sync_renameW(addr, name) +#define __itt_sync_renameW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_sync_rename(addr, name) +#define __itt_sync_rename_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_sync_renameA_ptr 0 +#define __itt_sync_renameW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_sync_rename_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + @brief Destroy a synchronization object. + @param addr Handle for the synchronization object. + */ +void ITTAPI __itt_sync_destroy(void *addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, sync_destroy, (void *addr)) +#define __itt_sync_destroy ITTNOTIFY_VOID(sync_destroy) +#define __itt_sync_destroy_ptr ITTNOTIFY_NAME(sync_destroy) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_sync_destroy(addr) +#define __itt_sync_destroy_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_sync_destroy_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/*****************************************************************//** + * @name group of functions is used for performance measurement tools + *********************************************************************/ +/** @{ */ +/** + * @brief Enter spin loop on user-defined sync object + */ +void ITTAPI __itt_sync_prepare(void* addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, sync_prepare, (void *addr)) +#define __itt_sync_prepare ITTNOTIFY_VOID(sync_prepare) +#define __itt_sync_prepare_ptr ITTNOTIFY_NAME(sync_prepare) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_sync_prepare(addr) +#define __itt_sync_prepare_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_sync_prepare_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Quit spin loop without acquiring spin object + */ +void ITTAPI __itt_sync_cancel(void *addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, sync_cancel, (void *addr)) +#define __itt_sync_cancel ITTNOTIFY_VOID(sync_cancel) +#define __itt_sync_cancel_ptr ITTNOTIFY_NAME(sync_cancel) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_sync_cancel(addr) +#define __itt_sync_cancel_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_sync_cancel_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Successful spin loop completion (sync object acquired) + */ +void ITTAPI __itt_sync_acquired(void *addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, sync_acquired, (void *addr)) +#define __itt_sync_acquired ITTNOTIFY_VOID(sync_acquired) +#define __itt_sync_acquired_ptr ITTNOTIFY_NAME(sync_acquired) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_sync_acquired(addr) +#define __itt_sync_acquired_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_sync_acquired_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Start sync object releasing code. Is called before the lock release call. + */ +void ITTAPI __itt_sync_releasing(void* addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, sync_releasing, (void *addr)) +#define __itt_sync_releasing ITTNOTIFY_VOID(sync_releasing) +#define __itt_sync_releasing_ptr ITTNOTIFY_NAME(sync_releasing) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_sync_releasing(addr) +#define __itt_sync_releasing_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_sync_releasing_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} */ + +/** @} sync group */ + +/**************************************************************//** + * @name group of functions is used for correctness checking tools + ******************************************************************/ +/** @{ */ +/** + * @ingroup legacy + * @deprecated Legacy API + * @brief Fast synchronization which does no require spinning. + * - This special function is to be used by TBB and OpenMP libraries only when they know + * there is no spin but they need to suppress TC warnings about shared variable modifications. + * - It only has corresponding pointers in static library and does not have corresponding function + * in dynamic library. + * @see void __itt_sync_prepare(void* addr); + */ +void ITTAPI __itt_fsync_prepare(void* addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, fsync_prepare, (void *addr)) +#define __itt_fsync_prepare ITTNOTIFY_VOID(fsync_prepare) +#define __itt_fsync_prepare_ptr ITTNOTIFY_NAME(fsync_prepare) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_fsync_prepare(addr) +#define __itt_fsync_prepare_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_fsync_prepare_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup legacy + * @deprecated Legacy API + * @brief Fast synchronization which does no require spinning. + * - This special function is to be used by TBB and OpenMP libraries only when they know + * there is no spin but they need to suppress TC warnings about shared variable modifications. + * - It only has corresponding pointers in static library and does not have corresponding function + * in dynamic library. + * @see void __itt_sync_cancel(void *addr); + */ +void ITTAPI __itt_fsync_cancel(void *addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, fsync_cancel, (void *addr)) +#define __itt_fsync_cancel ITTNOTIFY_VOID(fsync_cancel) +#define __itt_fsync_cancel_ptr ITTNOTIFY_NAME(fsync_cancel) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_fsync_cancel(addr) +#define __itt_fsync_cancel_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_fsync_cancel_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup legacy + * @deprecated Legacy API + * @brief Fast synchronization which does no require spinning. + * - This special function is to be used by TBB and OpenMP libraries only when they know + * there is no spin but they need to suppress TC warnings about shared variable modifications. + * - It only has corresponding pointers in static library and does not have corresponding function + * in dynamic library. + * @see void __itt_sync_acquired(void *addr); + */ +void ITTAPI __itt_fsync_acquired(void *addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, fsync_acquired, (void *addr)) +#define __itt_fsync_acquired ITTNOTIFY_VOID(fsync_acquired) +#define __itt_fsync_acquired_ptr ITTNOTIFY_NAME(fsync_acquired) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_fsync_acquired(addr) +#define __itt_fsync_acquired_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_fsync_acquired_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup legacy + * @deprecated Legacy API + * @brief Fast synchronization which does no require spinning. + * - This special function is to be used by TBB and OpenMP libraries only when they know + * there is no spin but they need to suppress TC warnings about shared variable modifications. + * - It only has corresponding pointers in static library and does not have corresponding function + * in dynamic library. + * @see void __itt_sync_releasing(void* addr); + */ +void ITTAPI __itt_fsync_releasing(void* addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, fsync_releasing, (void *addr)) +#define __itt_fsync_releasing ITTNOTIFY_VOID(fsync_releasing) +#define __itt_fsync_releasing_ptr ITTNOTIFY_NAME(fsync_releasing) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_fsync_releasing(addr) +#define __itt_fsync_releasing_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_fsync_releasing_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} */ + +/** + * @defgroup model Modeling by Intel(R) Parallel Advisor + * @ingroup public + * This is the subset of itt used for modeling by Intel(R) Parallel Advisor. + * This API is called ONLY using annotate.h, by "Annotation" macros + * the user places in their sources during the parallelism modeling steps. + * + * site_begin/end and task_begin/end take the address of handle variables, + * which are writeable by the API. Handles must be 0 initialized prior + * to the first call to begin, or may cause a run-time failure. + * The handles are initialized in a multi-thread safe way by the API if + * the handle is 0. The commonly expected idiom is one static handle to + * identify a site or task. If a site or task of the same name has already + * been started during this collection, the same handle MAY be returned, + * but is not required to be - it is unspecified if data merging is done + * based on name. These routines also take an instance variable. Like + * the lexical instance, these must be 0 initialized. Unlike the lexical + * instance, this is used to track a single dynamic instance. + * + * API used by the Intel(R) Parallel Advisor to describe potential concurrency + * and related activities. User-added source annotations expand to calls + * to these procedures to enable modeling of a hypothetical concurrent + * execution serially. + * @{ + */ +#if !defined(_ADVISOR_ANNOTATE_H_) || defined(ANNOTATE_EXPAND_NULL) + +typedef void* __itt_model_site; /*!< @brief handle for lexical site */ +typedef void* __itt_model_site_instance; /*!< @brief handle for dynamic instance */ +typedef void* __itt_model_task; /*!< @brief handle for lexical site */ +typedef void* __itt_model_task_instance; /*!< @brief handle for dynamic instance */ + +/** + * @enum __itt_model_disable + * @brief Enumerator for the disable methods + */ +typedef enum { + __itt_model_disable_observation, + __itt_model_disable_collection +} __itt_model_disable; + +#endif /* !_ADVISOR_ANNOTATE_H_ || ANNOTATE_EXPAND_NULL */ + +/** + * @brief ANNOTATE_SITE_BEGIN/ANNOTATE_SITE_END support. + * + * site_begin/end model a potential concurrency site. + * site instances may be recursively nested with themselves. + * site_end exits the most recently started but unended site for the current + * thread. The handle passed to end may be used to validate structure. + * Instances of a site encountered on different threads concurrently + * are considered completely distinct. If the site name for two different + * lexical sites match, it is unspecified whether they are treated as the + * same or different for data presentation. + */ +void ITTAPI __itt_model_site_begin(__itt_model_site *site, __itt_model_site_instance *instance, const char *name); +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_model_site_beginW(const wchar_t *name); +#endif +void ITTAPI __itt_model_site_beginA(const char *name); +void ITTAPI __itt_model_site_beginAL(const char *name, size_t siteNameLen); +void ITTAPI __itt_model_site_end (__itt_model_site *site, __itt_model_site_instance *instance); +void ITTAPI __itt_model_site_end_2(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_site_begin, (__itt_model_site *site, __itt_model_site_instance *instance, const char *name)) +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, model_site_beginW, (const wchar_t *name)) +#endif +ITT_STUBV(ITTAPI, void, model_site_beginA, (const char *name)) +ITT_STUBV(ITTAPI, void, model_site_beginAL, (const char *name, size_t siteNameLen)) +ITT_STUBV(ITTAPI, void, model_site_end, (__itt_model_site *site, __itt_model_site_instance *instance)) +ITT_STUBV(ITTAPI, void, model_site_end_2, (void)) +#define __itt_model_site_begin ITTNOTIFY_VOID(model_site_begin) +#define __itt_model_site_begin_ptr ITTNOTIFY_NAME(model_site_begin) +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_model_site_beginW ITTNOTIFY_VOID(model_site_beginW) +#define __itt_model_site_beginW_ptr ITTNOTIFY_NAME(model_site_beginW) +#endif +#define __itt_model_site_beginA ITTNOTIFY_VOID(model_site_beginA) +#define __itt_model_site_beginA_ptr ITTNOTIFY_NAME(model_site_beginA) +#define __itt_model_site_beginAL ITTNOTIFY_VOID(model_site_beginAL) +#define __itt_model_site_beginAL_ptr ITTNOTIFY_NAME(model_site_beginAL) +#define __itt_model_site_end ITTNOTIFY_VOID(model_site_end) +#define __itt_model_site_end_ptr ITTNOTIFY_NAME(model_site_end) +#define __itt_model_site_end_2 ITTNOTIFY_VOID(model_site_end_2) +#define __itt_model_site_end_2_ptr ITTNOTIFY_NAME(model_site_end_2) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_site_begin(site, instance, name) +#define __itt_model_site_begin_ptr 0 +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_model_site_beginW(name) +#define __itt_model_site_beginW_ptr 0 +#endif +#define __itt_model_site_beginA(name) +#define __itt_model_site_beginA_ptr 0 +#define __itt_model_site_beginAL(name, siteNameLen) +#define __itt_model_site_beginAL_ptr 0 +#define __itt_model_site_end(site, instance) +#define __itt_model_site_end_ptr 0 +#define __itt_model_site_end_2() +#define __itt_model_site_end_2_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_site_begin_ptr 0 +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_model_site_beginW_ptr 0 +#endif +#define __itt_model_site_beginA_ptr 0 +#define __itt_model_site_beginAL_ptr 0 +#define __itt_model_site_end_ptr 0 +#define __itt_model_site_end_2_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_TASK_BEGIN/ANNOTATE_TASK_END support + * + * task_begin/end model a potential task, which is contained within the most + * closely enclosing dynamic site. task_end exits the most recently started + * but unended task. The handle passed to end may be used to validate + * structure. It is unspecified if bad dynamic nesting is detected. If it + * is, it should be encoded in the resulting data collection. The collector + * should not fail due to construct nesting issues, nor attempt to directly + * indicate the problem. + */ +void ITTAPI __itt_model_task_begin(__itt_model_task *task, __itt_model_task_instance *instance, const char *name); +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_model_task_beginW(const wchar_t *name); +void ITTAPI __itt_model_iteration_taskW(const wchar_t *name); +#endif +void ITTAPI __itt_model_task_beginA(const char *name); +void ITTAPI __itt_model_task_beginAL(const char *name, size_t taskNameLen); +void ITTAPI __itt_model_iteration_taskA(const char *name); +void ITTAPI __itt_model_iteration_taskAL(const char *name, size_t taskNameLen); +void ITTAPI __itt_model_task_end (__itt_model_task *task, __itt_model_task_instance *instance); +void ITTAPI __itt_model_task_end_2(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_task_begin, (__itt_model_task *task, __itt_model_task_instance *instance, const char *name)) +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, model_task_beginW, (const wchar_t *name)) +ITT_STUBV(ITTAPI, void, model_iteration_taskW, (const wchar_t *name)) +#endif +ITT_STUBV(ITTAPI, void, model_task_beginA, (const char *name)) +ITT_STUBV(ITTAPI, void, model_task_beginAL, (const char *name, size_t taskNameLen)) +ITT_STUBV(ITTAPI, void, model_iteration_taskA, (const char *name)) +ITT_STUBV(ITTAPI, void, model_iteration_taskAL, (const char *name, size_t taskNameLen)) +ITT_STUBV(ITTAPI, void, model_task_end, (__itt_model_task *task, __itt_model_task_instance *instance)) +ITT_STUBV(ITTAPI, void, model_task_end_2, (void)) +#define __itt_model_task_begin ITTNOTIFY_VOID(model_task_begin) +#define __itt_model_task_begin_ptr ITTNOTIFY_NAME(model_task_begin) +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_model_task_beginW ITTNOTIFY_VOID(model_task_beginW) +#define __itt_model_task_beginW_ptr ITTNOTIFY_NAME(model_task_beginW) +#define __itt_model_iteration_taskW ITTNOTIFY_VOID(model_iteration_taskW) +#define __itt_model_iteration_taskW_ptr ITTNOTIFY_NAME(model_iteration_taskW) +#endif +#define __itt_model_task_beginA ITTNOTIFY_VOID(model_task_beginA) +#define __itt_model_task_beginA_ptr ITTNOTIFY_NAME(model_task_beginA) +#define __itt_model_task_beginAL ITTNOTIFY_VOID(model_task_beginAL) +#define __itt_model_task_beginAL_ptr ITTNOTIFY_NAME(model_task_beginAL) +#define __itt_model_iteration_taskA ITTNOTIFY_VOID(model_iteration_taskA) +#define __itt_model_iteration_taskA_ptr ITTNOTIFY_NAME(model_iteration_taskA) +#define __itt_model_iteration_taskAL ITTNOTIFY_VOID(model_iteration_taskAL) +#define __itt_model_iteration_taskAL_ptr ITTNOTIFY_NAME(model_iteration_taskAL) +#define __itt_model_task_end ITTNOTIFY_VOID(model_task_end) +#define __itt_model_task_end_ptr ITTNOTIFY_NAME(model_task_end) +#define __itt_model_task_end_2 ITTNOTIFY_VOID(model_task_end_2) +#define __itt_model_task_end_2_ptr ITTNOTIFY_NAME(model_task_end_2) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_task_begin(task, instance, name) +#define __itt_model_task_begin_ptr 0 +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_model_task_beginW(name) +#define __itt_model_task_beginW_ptr 0 +#endif +#define __itt_model_task_beginA(name) +#define __itt_model_task_beginA_ptr 0 +#define __itt_model_task_beginAL(name, siteNameLen) +#define __itt_model_task_beginAL_ptr 0 +#define __itt_model_iteration_taskA(name) +#define __itt_model_iteration_taskA_ptr 0 +#define __itt_model_iteration_taskAL(name, siteNameLen) +#define __itt_model_iteration_taskAL_ptr 0 +#define __itt_model_task_end(task, instance) +#define __itt_model_task_end_ptr 0 +#define __itt_model_task_end_2() +#define __itt_model_task_end_2_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_task_begin_ptr 0 +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_model_task_beginW_ptr 0 +#endif +#define __itt_model_task_beginA_ptr 0 +#define __itt_model_task_beginAL_ptr 0 +#define __itt_model_iteration_taskA_ptr 0 +#define __itt_model_iteration_taskAL_ptr 0 +#define __itt_model_task_end_ptr 0 +#define __itt_model_task_end_2_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_LOCK_ACQUIRE/ANNOTATE_LOCK_RELEASE support + * + * lock_acquire/release model a potential lock for both lockset and + * performance modeling. Each unique address is modeled as a separate + * lock, with invalid addresses being valid lock IDs. Specifically: + * no storage is accessed by the API at the specified address - it is only + * used for lock identification. Lock acquires may be self-nested and are + * unlocked by a corresponding number of releases. + * (These closely correspond to __itt_sync_acquired/__itt_sync_releasing, + * but may not have identical semantics.) + */ +void ITTAPI __itt_model_lock_acquire(void *lock); +void ITTAPI __itt_model_lock_acquire_2(void *lock); +void ITTAPI __itt_model_lock_release(void *lock); +void ITTAPI __itt_model_lock_release_2(void *lock); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_lock_acquire, (void *lock)) +ITT_STUBV(ITTAPI, void, model_lock_acquire_2, (void *lock)) +ITT_STUBV(ITTAPI, void, model_lock_release, (void *lock)) +ITT_STUBV(ITTAPI, void, model_lock_release_2, (void *lock)) +#define __itt_model_lock_acquire ITTNOTIFY_VOID(model_lock_acquire) +#define __itt_model_lock_acquire_ptr ITTNOTIFY_NAME(model_lock_acquire) +#define __itt_model_lock_acquire_2 ITTNOTIFY_VOID(model_lock_acquire_2) +#define __itt_model_lock_acquire_2_ptr ITTNOTIFY_NAME(model_lock_acquire_2) +#define __itt_model_lock_release ITTNOTIFY_VOID(model_lock_release) +#define __itt_model_lock_release_ptr ITTNOTIFY_NAME(model_lock_release) +#define __itt_model_lock_release_2 ITTNOTIFY_VOID(model_lock_release_2) +#define __itt_model_lock_release_2_ptr ITTNOTIFY_NAME(model_lock_release_2) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_lock_acquire(lock) +#define __itt_model_lock_acquire_ptr 0 +#define __itt_model_lock_acquire_2(lock) +#define __itt_model_lock_acquire_2_ptr 0 +#define __itt_model_lock_release(lock) +#define __itt_model_lock_release_ptr 0 +#define __itt_model_lock_release_2(lock) +#define __itt_model_lock_release_2_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_lock_acquire_ptr 0 +#define __itt_model_lock_acquire_2_ptr 0 +#define __itt_model_lock_release_ptr 0 +#define __itt_model_lock_release_2_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_RECORD_ALLOCATION/ANNOTATE_RECORD_DEALLOCATION support + * + * record_allocation/deallocation describe user-defined memory allocator + * behavior, which may be required for correctness modeling to understand + * when storage is not expected to be actually reused across threads. + */ +void ITTAPI __itt_model_record_allocation (void *addr, size_t size); +void ITTAPI __itt_model_record_deallocation(void *addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_record_allocation, (void *addr, size_t size)) +ITT_STUBV(ITTAPI, void, model_record_deallocation, (void *addr)) +#define __itt_model_record_allocation ITTNOTIFY_VOID(model_record_allocation) +#define __itt_model_record_allocation_ptr ITTNOTIFY_NAME(model_record_allocation) +#define __itt_model_record_deallocation ITTNOTIFY_VOID(model_record_deallocation) +#define __itt_model_record_deallocation_ptr ITTNOTIFY_NAME(model_record_deallocation) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_record_allocation(addr, size) +#define __itt_model_record_allocation_ptr 0 +#define __itt_model_record_deallocation(addr) +#define __itt_model_record_deallocation_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_record_allocation_ptr 0 +#define __itt_model_record_deallocation_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_INDUCTION_USES support + * + * Note particular storage is inductive through the end of the current site + */ +void ITTAPI __itt_model_induction_uses(void* addr, size_t size); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_induction_uses, (void *addr, size_t size)) +#define __itt_model_induction_uses ITTNOTIFY_VOID(model_induction_uses) +#define __itt_model_induction_uses_ptr ITTNOTIFY_NAME(model_induction_uses) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_induction_uses(addr, size) +#define __itt_model_induction_uses_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_induction_uses_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_REDUCTION_USES support + * + * Note particular storage is used for reduction through the end + * of the current site + */ +void ITTAPI __itt_model_reduction_uses(void* addr, size_t size); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_reduction_uses, (void *addr, size_t size)) +#define __itt_model_reduction_uses ITTNOTIFY_VOID(model_reduction_uses) +#define __itt_model_reduction_uses_ptr ITTNOTIFY_NAME(model_reduction_uses) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_reduction_uses(addr, size) +#define __itt_model_reduction_uses_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_reduction_uses_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_OBSERVE_USES support + * + * Have correctness modeling record observations about uses of storage + * through the end of the current site + */ +void ITTAPI __itt_model_observe_uses(void* addr, size_t size); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_observe_uses, (void *addr, size_t size)) +#define __itt_model_observe_uses ITTNOTIFY_VOID(model_observe_uses) +#define __itt_model_observe_uses_ptr ITTNOTIFY_NAME(model_observe_uses) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_observe_uses(addr, size) +#define __itt_model_observe_uses_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_observe_uses_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_CLEAR_USES support + * + * Clear the special handling of a piece of storage related to induction, + * reduction or observe_uses + */ +void ITTAPI __itt_model_clear_uses(void* addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_clear_uses, (void *addr)) +#define __itt_model_clear_uses ITTNOTIFY_VOID(model_clear_uses) +#define __itt_model_clear_uses_ptr ITTNOTIFY_NAME(model_clear_uses) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_clear_uses(addr) +#define __itt_model_clear_uses_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_clear_uses_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_DISABLE_*_PUSH/ANNOTATE_DISABLE_*_POP support + * + * disable_push/disable_pop push and pop disabling based on a parameter. + * Disabling observations stops processing of memory references during + * correctness modeling, and all annotations that occur in the disabled + * region. This allows description of code that is expected to be handled + * specially during conversion to parallelism or that is not recognized + * by tools (e.g. some kinds of synchronization operations.) + * This mechanism causes all annotations in the disabled region, other + * than disable_push and disable_pop, to be ignored. (For example, this + * might validly be used to disable an entire parallel site and the contained + * tasks and locking in it for data collection purposes.) + * The disable for collection is a more expensive operation, but reduces + * collector overhead significantly. This applies to BOTH correctness data + * collection and performance data collection. For example, a site + * containing a task might only enable data collection for the first 10 + * iterations. Both performance and correctness data should reflect this, + * and the program should run as close to full speed as possible when + * collection is disabled. + */ +void ITTAPI __itt_model_disable_push(__itt_model_disable x); +void ITTAPI __itt_model_disable_pop(void); +void ITTAPI __itt_model_aggregate_task(size_t x); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_disable_push, (__itt_model_disable x)) +ITT_STUBV(ITTAPI, void, model_disable_pop, (void)) +ITT_STUBV(ITTAPI, void, model_aggregate_task, (size_t x)) +#define __itt_model_disable_push ITTNOTIFY_VOID(model_disable_push) +#define __itt_model_disable_push_ptr ITTNOTIFY_NAME(model_disable_push) +#define __itt_model_disable_pop ITTNOTIFY_VOID(model_disable_pop) +#define __itt_model_disable_pop_ptr ITTNOTIFY_NAME(model_disable_pop) +#define __itt_model_aggregate_task ITTNOTIFY_VOID(model_aggregate_task) +#define __itt_model_aggregate_task_ptr ITTNOTIFY_NAME(model_aggregate_task) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_disable_push(x) +#define __itt_model_disable_push_ptr 0 +#define __itt_model_disable_pop() +#define __itt_model_disable_pop_ptr 0 +#define __itt_model_aggregate_task(x) +#define __itt_model_aggregate_task_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_disable_push_ptr 0 +#define __itt_model_disable_pop_ptr 0 +#define __itt_model_aggregate_task_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} model group */ + +/** + * @defgroup heap Heap + * @ingroup public + * Heap group + * @{ + */ + +typedef void* __itt_heap_function; + +/** + * @brief Create an identification for heap function + * @return non-zero identifier or NULL + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_heap_function ITTAPI __itt_heap_function_createA(const char* name, const char* domain); +__itt_heap_function ITTAPI __itt_heap_function_createW(const wchar_t* name, const wchar_t* domain); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_heap_function_create __itt_heap_function_createW +# define __itt_heap_function_create_ptr __itt_heap_function_createW_ptr +#else +# define __itt_heap_function_create __itt_heap_function_createA +# define __itt_heap_function_create_ptr __itt_heap_function_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_heap_function ITTAPI __itt_heap_function_create(const char* name, const char* domain); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createA, (const char* name, const char* domain)) +ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createW, (const wchar_t* name, const wchar_t* domain)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_heap_function, heap_function_create, (const char* name, const char* domain)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_heap_function_createA ITTNOTIFY_DATA(heap_function_createA) +#define __itt_heap_function_createA_ptr ITTNOTIFY_NAME(heap_function_createA) +#define __itt_heap_function_createW ITTNOTIFY_DATA(heap_function_createW) +#define __itt_heap_function_createW_ptr ITTNOTIFY_NAME(heap_function_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_heap_function_create ITTNOTIFY_DATA(heap_function_create) +#define __itt_heap_function_create_ptr ITTNOTIFY_NAME(heap_function_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_heap_function_createA(name, domain) (__itt_heap_function)0 +#define __itt_heap_function_createA_ptr 0 +#define __itt_heap_function_createW(name, domain) (__itt_heap_function)0 +#define __itt_heap_function_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_heap_function_create(name, domain) (__itt_heap_function)0 +#define __itt_heap_function_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_heap_function_createA_ptr 0 +#define __itt_heap_function_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_heap_function_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an allocation begin occurrence. + */ +void ITTAPI __itt_heap_allocate_begin(__itt_heap_function h, size_t size, int initialized); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_allocate_begin, (__itt_heap_function h, size_t size, int initialized)) +#define __itt_heap_allocate_begin ITTNOTIFY_VOID(heap_allocate_begin) +#define __itt_heap_allocate_begin_ptr ITTNOTIFY_NAME(heap_allocate_begin) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_allocate_begin(h, size, initialized) +#define __itt_heap_allocate_begin_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_allocate_begin_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an allocation end occurrence. + */ +void ITTAPI __itt_heap_allocate_end(__itt_heap_function h, void** addr, size_t size, int initialized); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_allocate_end, (__itt_heap_function h, void** addr, size_t size, int initialized)) +#define __itt_heap_allocate_end ITTNOTIFY_VOID(heap_allocate_end) +#define __itt_heap_allocate_end_ptr ITTNOTIFY_NAME(heap_allocate_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_allocate_end(h, addr, size, initialized) +#define __itt_heap_allocate_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_allocate_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an free begin occurrence. + */ +void ITTAPI __itt_heap_free_begin(__itt_heap_function h, void* addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_free_begin, (__itt_heap_function h, void* addr)) +#define __itt_heap_free_begin ITTNOTIFY_VOID(heap_free_begin) +#define __itt_heap_free_begin_ptr ITTNOTIFY_NAME(heap_free_begin) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_free_begin(h, addr) +#define __itt_heap_free_begin_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_free_begin_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an free end occurrence. + */ +void ITTAPI __itt_heap_free_end(__itt_heap_function h, void* addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_free_end, (__itt_heap_function h, void* addr)) +#define __itt_heap_free_end ITTNOTIFY_VOID(heap_free_end) +#define __itt_heap_free_end_ptr ITTNOTIFY_NAME(heap_free_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_free_end(h, addr) +#define __itt_heap_free_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_free_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an reallocation begin occurrence. + */ +void ITTAPI __itt_heap_reallocate_begin(__itt_heap_function h, void* addr, size_t new_size, int initialized); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_reallocate_begin, (__itt_heap_function h, void* addr, size_t new_size, int initialized)) +#define __itt_heap_reallocate_begin ITTNOTIFY_VOID(heap_reallocate_begin) +#define __itt_heap_reallocate_begin_ptr ITTNOTIFY_NAME(heap_reallocate_begin) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_reallocate_begin(h, addr, new_size, initialized) +#define __itt_heap_reallocate_begin_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_reallocate_begin_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an reallocation end occurrence. + */ +void ITTAPI __itt_heap_reallocate_end(__itt_heap_function h, void* addr, void** new_addr, size_t new_size, int initialized); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_reallocate_end, (__itt_heap_function h, void* addr, void** new_addr, size_t new_size, int initialized)) +#define __itt_heap_reallocate_end ITTNOTIFY_VOID(heap_reallocate_end) +#define __itt_heap_reallocate_end_ptr ITTNOTIFY_NAME(heap_reallocate_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_reallocate_end(h, addr, new_addr, new_size, initialized) +#define __itt_heap_reallocate_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_reallocate_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @brief internal access begin */ +void ITTAPI __itt_heap_internal_access_begin(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_internal_access_begin, (void)) +#define __itt_heap_internal_access_begin ITTNOTIFY_VOID(heap_internal_access_begin) +#define __itt_heap_internal_access_begin_ptr ITTNOTIFY_NAME(heap_internal_access_begin) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_internal_access_begin() +#define __itt_heap_internal_access_begin_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_internal_access_begin_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @brief internal access end */ +void ITTAPI __itt_heap_internal_access_end(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_internal_access_end, (void)) +#define __itt_heap_internal_access_end ITTNOTIFY_VOID(heap_internal_access_end) +#define __itt_heap_internal_access_end_ptr ITTNOTIFY_NAME(heap_internal_access_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_internal_access_end() +#define __itt_heap_internal_access_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_internal_access_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @brief record memory growth begin */ +void ITTAPI __itt_heap_record_memory_growth_begin(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_record_memory_growth_begin, (void)) +#define __itt_heap_record_memory_growth_begin ITTNOTIFY_VOID(heap_record_memory_growth_begin) +#define __itt_heap_record_memory_growth_begin_ptr ITTNOTIFY_NAME(heap_record_memory_growth_begin) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_record_memory_growth_begin() +#define __itt_heap_record_memory_growth_begin_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_record_memory_growth_begin_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @brief record memory growth end */ +void ITTAPI __itt_heap_record_memory_growth_end(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_record_memory_growth_end, (void)) +#define __itt_heap_record_memory_growth_end ITTNOTIFY_VOID(heap_record_memory_growth_end) +#define __itt_heap_record_memory_growth_end_ptr ITTNOTIFY_NAME(heap_record_memory_growth_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_record_memory_growth_end() +#define __itt_heap_record_memory_growth_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_record_memory_growth_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Specify the type of heap detection/reporting to modify. + */ +/** + * @hideinitializer + * @brief Report on memory leaks. + */ +#define __itt_heap_leaks 0x00000001 + +/** + * @hideinitializer + * @brief Report on memory growth. + */ +#define __itt_heap_growth 0x00000002 + + +/** @brief heap reset detection */ +void ITTAPI __itt_heap_reset_detection(unsigned int reset_mask); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_reset_detection, (unsigned int reset_mask)) +#define __itt_heap_reset_detection ITTNOTIFY_VOID(heap_reset_detection) +#define __itt_heap_reset_detection_ptr ITTNOTIFY_NAME(heap_reset_detection) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_reset_detection() +#define __itt_heap_reset_detection_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_reset_detection_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @brief report */ +void ITTAPI __itt_heap_record(unsigned int record_mask); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_record, (unsigned int record_mask)) +#define __itt_heap_record ITTNOTIFY_VOID(heap_record) +#define __itt_heap_record_ptr ITTNOTIFY_NAME(heap_record) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_record() +#define __itt_heap_record_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_record_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @} heap group */ +/** @endcond */ +/* ========================================================================== */ + +/** + * @defgroup domains Domains + * @ingroup public + * Domains group + * @{ + */ + +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) + +typedef struct ___itt_domain +{ + volatile int flags; /*!< Zero if disabled, non-zero if enabled. The meaning of different non-zero values is reserved to the runtime */ + const char* nameA; /*!< Copy of original name in ASCII. */ +#if defined(UNICODE) || defined(_UNICODE) + const wchar_t* nameW; /*!< Copy of original name in UNICODE. */ +#else /* UNICODE || _UNICODE */ + void* nameW; +#endif /* UNICODE || _UNICODE */ + int extra1; /*!< Reserved to the runtime */ + void* extra2; /*!< Reserved to the runtime */ + struct ___itt_domain* next; +} __itt_domain; + +#pragma pack(pop) +/** @endcond */ + +/** + * @ingroup domains + * @brief Create a domain. + * Create domain using some domain name: the URI naming style is recommended. + * Because the set of domains is expected to be static over the application's + * execution time, there is no mechanism to destroy a domain. + * Any domain can be accessed by any thread in the process, regardless of + * which thread created the domain. This call is thread-safe. + * @param[in] name name of domain + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_domain* ITTAPI __itt_domain_createA(const char *name); +__itt_domain* ITTAPI __itt_domain_createW(const wchar_t *name); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_domain_create __itt_domain_createW +# define __itt_domain_create_ptr __itt_domain_createW_ptr +#else /* UNICODE */ +# define __itt_domain_create __itt_domain_createA +# define __itt_domain_create_ptr __itt_domain_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_domain* ITTAPI __itt_domain_create(const char *name); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_domain*, domain_createA, (const char *name)) +ITT_STUB(ITTAPI, __itt_domain*, domain_createW, (const wchar_t *name)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_domain*, domain_create, (const char *name)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_domain_createA ITTNOTIFY_DATA(domain_createA) +#define __itt_domain_createA_ptr ITTNOTIFY_NAME(domain_createA) +#define __itt_domain_createW ITTNOTIFY_DATA(domain_createW) +#define __itt_domain_createW_ptr ITTNOTIFY_NAME(domain_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_domain_create ITTNOTIFY_DATA(domain_create) +#define __itt_domain_create_ptr ITTNOTIFY_NAME(domain_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_domain_createA(name) (__itt_domain*)0 +#define __itt_domain_createA_ptr 0 +#define __itt_domain_createW(name) (__itt_domain*)0 +#define __itt_domain_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_domain_create(name) (__itt_domain*)0 +#define __itt_domain_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_domain_createA_ptr 0 +#define __itt_domain_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_domain_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} domains group */ + +/** + * @defgroup ids IDs + * @ingroup public + * IDs group + * @{ + */ + +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) + +typedef struct ___itt_id +{ + unsigned long long d1, d2, d3; +} __itt_id; + +#pragma pack(pop) +/** @endcond */ + +const __itt_id __itt_null = { 0, 0, 0 }; + +/** + * @ingroup ids + * @brief A convenience function is provided to create an ID without domain control. + * @brief This is a convenience function to initialize an __itt_id structure. This function + * does not affect the collector runtime in any way. After you make the ID with this + * function, you still must create it with the __itt_id_create function before using the ID + * to identify a named entity. + * @param[in] addr The address of object; high QWORD of the ID value. + * @param[in] extra The extra data to unique identify object; low QWORD of the ID value. + */ + +ITT_INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra) ITT_INLINE_ATTRIBUTE; +ITT_INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra) +{ + __itt_id id = __itt_null; + id.d1 = (unsigned long long)((uintptr_t)addr); + id.d2 = (unsigned long long)extra; + id.d3 = (unsigned long long)0; /* Reserved. Must be zero */ + return id; +} + +/** + * @ingroup ids + * @brief Create an instance of identifier. + * This establishes the beginning of the lifetime of an instance of + * the given ID in the trace. Once this lifetime starts, the ID + * can be used to tag named entity instances in calls such as + * __itt_task_begin, and to specify relationships among + * identified named entity instances, using the \ref relations APIs. + * Instance IDs are not domain specific! + * @param[in] domain The domain controlling the execution of this call. + * @param[in] id The ID to create. + */ +void ITTAPI __itt_id_create(const __itt_domain *domain, __itt_id id); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, id_create, (const __itt_domain *domain, __itt_id id)) +#define __itt_id_create(d,x) ITTNOTIFY_VOID_D1(id_create,d,x) +#define __itt_id_create_ptr ITTNOTIFY_NAME(id_create) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_id_create(domain,id) +#define __itt_id_create_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_id_create_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup ids + * @brief Destroy an instance of identifier. + * This ends the lifetime of the current instance of the given ID value in the trace. + * Any relationships that are established after this lifetime ends are invalid. + * This call must be performed before the given ID value can be reused for a different + * named entity instance. + * @param[in] domain The domain controlling the execution of this call. + * @param[in] id The ID to destroy. + */ +void ITTAPI __itt_id_destroy(const __itt_domain *domain, __itt_id id); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, id_destroy, (const __itt_domain *domain, __itt_id id)) +#define __itt_id_destroy(d,x) ITTNOTIFY_VOID_D1(id_destroy,d,x) +#define __itt_id_destroy_ptr ITTNOTIFY_NAME(id_destroy) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_id_destroy(domain,id) +#define __itt_id_destroy_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_id_destroy_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} ids group */ + +/** + * @defgroup handless String Handles + * @ingroup public + * String Handles group + * @{ + */ + +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) + +typedef struct ___itt_string_handle +{ + const char* strA; /*!< Copy of original string in ASCII. */ +#if defined(UNICODE) || defined(_UNICODE) + const wchar_t* strW; /*!< Copy of original string in UNICODE. */ +#else /* UNICODE || _UNICODE */ + void* strW; +#endif /* UNICODE || _UNICODE */ + int extra1; /*!< Reserved. Must be zero */ + void* extra2; /*!< Reserved. Must be zero */ + struct ___itt_string_handle* next; +} __itt_string_handle; + +#pragma pack(pop) +/** @endcond */ + +/** + * @ingroup handles + * @brief Create a string handle. + * Create and return handle value that can be associated with a string. + * Consecutive calls to __itt_string_handle_create with the same name + * return the same value. Because the set of string handles is expected to remain + * static during the application's execution time, there is no mechanism to destroy a string handle. + * Any string handle can be accessed by any thread in the process, regardless of which thread created + * the string handle. This call is thread-safe. + * @param[in] name The input string + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_string_handle* ITTAPI __itt_string_handle_createA(const char *name); +__itt_string_handle* ITTAPI __itt_string_handle_createW(const wchar_t *name); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_string_handle_create __itt_string_handle_createW +# define __itt_string_handle_create_ptr __itt_string_handle_createW_ptr +#else /* UNICODE */ +# define __itt_string_handle_create __itt_string_handle_createA +# define __itt_string_handle_create_ptr __itt_string_handle_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_string_handle* ITTAPI __itt_string_handle_create(const char *name); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createA, (const char *name)) +ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createW, (const wchar_t *name)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_create, (const char *name)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_string_handle_createA ITTNOTIFY_DATA(string_handle_createA) +#define __itt_string_handle_createA_ptr ITTNOTIFY_NAME(string_handle_createA) +#define __itt_string_handle_createW ITTNOTIFY_DATA(string_handle_createW) +#define __itt_string_handle_createW_ptr ITTNOTIFY_NAME(string_handle_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_string_handle_create ITTNOTIFY_DATA(string_handle_create) +#define __itt_string_handle_create_ptr ITTNOTIFY_NAME(string_handle_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_string_handle_createA(name) (__itt_string_handle*)0 +#define __itt_string_handle_createA_ptr 0 +#define __itt_string_handle_createW(name) (__itt_string_handle*)0 +#define __itt_string_handle_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_string_handle_create(name) (__itt_string_handle*)0 +#define __itt_string_handle_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_string_handle_createA_ptr 0 +#define __itt_string_handle_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_string_handle_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} handles group */ + +/** @cond exclude_from_documentation */ +typedef unsigned long long __itt_timestamp; +/** @endcond */ + +#define __itt_timestamp_none ((__itt_timestamp)-1LL) + +/** @cond exclude_from_gpa_documentation */ + +/** + * @ingroup timestamps + * @brief Return timestamp corresponding to the current moment. + * This returns the timestamp in the format that is the most relevant for the current + * host or platform (RDTSC, QPC, and others). You can use the "<" operator to + * compare __itt_timestamp values. + */ +__itt_timestamp ITTAPI __itt_get_timestamp(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, __itt_timestamp, get_timestamp, (void)) +#define __itt_get_timestamp ITTNOTIFY_DATA(get_timestamp) +#define __itt_get_timestamp_ptr ITTNOTIFY_NAME(get_timestamp) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_get_timestamp() +#define __itt_get_timestamp_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_get_timestamp_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} timestamps */ +/** @endcond */ + +/** @cond exclude_from_gpa_documentation */ + +/** + * @defgroup regions Regions + * @ingroup public + * Regions group + * @{ + */ +/** + * @ingroup regions + * @brief Begin of region instance. + * Successive calls to __itt_region_begin with the same ID are ignored + * until a call to __itt_region_end with the same ID + * @param[in] domain The domain for this region instance + * @param[in] id The instance ID for this region instance. Must not be __itt_null + * @param[in] parentid The instance ID for the parent of this region instance, or __itt_null + * @param[in] name The name of this region + */ +void ITTAPI __itt_region_begin(const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name); + +/** + * @ingroup regions + * @brief End of region instance. + * The first call to __itt_region_end with a given ID ends the + * region. Successive calls with the same ID are ignored, as are + * calls that do not have a matching __itt_region_begin call. + * @param[in] domain The domain for this region instance + * @param[in] id The instance ID for this region instance + */ +void ITTAPI __itt_region_end(const __itt_domain *domain, __itt_id id); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, region_begin, (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name)) +ITT_STUBV(ITTAPI, void, region_end, (const __itt_domain *domain, __itt_id id)) +#define __itt_region_begin(d,x,y,z) ITTNOTIFY_VOID_D3(region_begin,d,x,y,z) +#define __itt_region_begin_ptr ITTNOTIFY_NAME(region_begin) +#define __itt_region_end(d,x) ITTNOTIFY_VOID_D1(region_end,d,x) +#define __itt_region_end_ptr ITTNOTIFY_NAME(region_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_region_begin(d,x,y,z) +#define __itt_region_begin_ptr 0 +#define __itt_region_end(d,x) +#define __itt_region_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_region_begin_ptr 0 +#define __itt_region_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} regions group */ + +/** + * @defgroup frames Frames + * @ingroup public + * Frames are similar to regions, but are intended to be easier to use and to implement. + * In particular: + * - Frames always represent periods of elapsed time + * - By default, frames have no nesting relationships + * @{ + */ + +/** + * @ingroup frames + * @brief Begin a frame instance. + * Successive calls to __itt_frame_begin with the + * same ID are ignored until a call to __itt_frame_end with the same ID. + * @param[in] domain The domain for this frame instance + * @param[in] id The instance ID for this frame instance or NULL + */ +void ITTAPI __itt_frame_begin_v3(const __itt_domain *domain, __itt_id *id); + +/** + * @ingroup frames + * @brief End a frame instance. + * The first call to __itt_frame_end with a given ID + * ends the frame. Successive calls with the same ID are ignored, as are + * calls that do not have a matching __itt_frame_begin call. + * @param[in] domain The domain for this frame instance + * @param[in] id The instance ID for this frame instance or NULL for current + */ +void ITTAPI __itt_frame_end_v3(const __itt_domain *domain, __itt_id *id); + +/** + * @ingroup frames + * @brief Submits a frame instance. + * Successive calls to __itt_frame_begin or __itt_frame_submit with the + * same ID are ignored until a call to __itt_frame_end or __itt_frame_submit + * with the same ID. + * Passing special __itt_timestamp_none value as "end" argument means + * take the current timestamp as the end timestamp. + * @param[in] domain The domain for this frame instance + * @param[in] id The instance ID for this frame instance or NULL + * @param[in] begin Timestamp of the beginning of the frame + * @param[in] end Timestamp of the end of the frame + */ +void ITTAPI __itt_frame_submit_v3(const __itt_domain *domain, __itt_id *id, + __itt_timestamp begin, __itt_timestamp end); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, frame_begin_v3, (const __itt_domain *domain, __itt_id *id)) +ITT_STUBV(ITTAPI, void, frame_end_v3, (const __itt_domain *domain, __itt_id *id)) +ITT_STUBV(ITTAPI, void, frame_submit_v3, (const __itt_domain *domain, __itt_id *id, __itt_timestamp begin, __itt_timestamp end)) +#define __itt_frame_begin_v3(d,x) ITTNOTIFY_VOID_D1(frame_begin_v3,d,x) +#define __itt_frame_begin_v3_ptr ITTNOTIFY_NAME(frame_begin_v3) +#define __itt_frame_end_v3(d,x) ITTNOTIFY_VOID_D1(frame_end_v3,d,x) +#define __itt_frame_end_v3_ptr ITTNOTIFY_NAME(frame_end_v3) +#define __itt_frame_submit_v3(d,x,b,e) ITTNOTIFY_VOID_D3(frame_submit_v3,d,x,b,e) +#define __itt_frame_submit_v3_ptr ITTNOTIFY_NAME(frame_submit_v3) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_frame_begin_v3(domain,id) +#define __itt_frame_begin_v3_ptr 0 +#define __itt_frame_end_v3(domain,id) +#define __itt_frame_end_v3_ptr 0 +#define __itt_frame_submit_v3(domain,id,begin,end) +#define __itt_frame_submit_v3_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_frame_begin_v3_ptr 0 +#define __itt_frame_end_v3_ptr 0 +#define __itt_frame_submit_v3_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} frames group */ +/** @endcond */ + +/** + * @defgroup taskgroup Task Group + * @ingroup public + * Task Group + * @{ + */ +/** + * @ingroup task_groups + * @brief Denotes a task_group instance. + * Successive calls to __itt_task_group with the same ID are ignored. + * @param[in] domain The domain for this task_group instance + * @param[in] id The instance ID for this task_group instance. Must not be __itt_null. + * @param[in] parentid The instance ID for the parent of this task_group instance, or __itt_null. + * @param[in] name The name of this task_group + */ +void ITTAPI __itt_task_group(const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, task_group, (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name)) +#define __itt_task_group(d,x,y,z) ITTNOTIFY_VOID_D3(task_group,d,x,y,z) +#define __itt_task_group_ptr ITTNOTIFY_NAME(task_group) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_task_group(d,x,y,z) +#define __itt_task_group_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_task_group_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} taskgroup group */ + +/** + * @defgroup tasks Tasks + * @ingroup public + * A task instance represents a piece of work performed by a particular + * thread for a period of time. A call to __itt_task_begin creates a + * task instance. This becomes the current instance for that task on that + * thread. A following call to __itt_task_end on the same thread ends the + * instance. There may be multiple simultaneous instances of tasks with the + * same name on different threads. If an ID is specified, the task instance + * receives that ID. Nested tasks are allowed. + * + * Note: The task is defined by the bracketing of __itt_task_begin and + * __itt_task_end on the same thread. If some scheduling mechanism causes + * task switching (the thread executes a different user task) or task + * switching (the user task switches to a different thread) then this breaks + * the notion of current instance. Additional API calls are required to + * deal with that possibility. + * @{ + */ + +/** + * @ingroup tasks + * @brief Begin a task instance. + * @param[in] domain The domain for this task + * @param[in] taskid The instance ID for this task instance, or __itt_null + * @param[in] parentid The parent instance to which this task instance belongs, or __itt_null + * @param[in] name The name of this task + */ +void ITTAPI __itt_task_begin(const __itt_domain *domain, __itt_id taskid, __itt_id parentid, __itt_string_handle *name); + +/** + * @ingroup tasks + * @brief Begin a task instance. + * @param[in] domain The domain for this task + * @param[in] taskid The identifier for this task instance (may be 0) + * @param[in] parentid The parent of this task (may be 0) + * @param[in] fn The pointer to the function you are tracing + */ +void ITTAPI __itt_task_begin_fn(const __itt_domain *domain, __itt_id taskid, __itt_id parentid, void* fn); + +/** + * @ingroup tasks + * @brief End the current task instance. + * @param[in] domain The domain for this task + */ +void ITTAPI __itt_task_end(const __itt_domain *domain); + +/** + * @ingroup tasks + * @brief Begin an overlapped task instance. + * @param[in] domain The domain for this task. + * @param[in] taskid The identifier for this task instance, *cannot* be __itt_null. + * @param[in] parentid The parent of this task, or __itt_null. + * @param[in] name The name of this task. + */ +void ITTAPI __itt_task_begin_overlapped(const __itt_domain* domain, __itt_id taskid, __itt_id parentid, __itt_string_handle* name); + +/** + * @ingroup tasks + * @brief End an overlapped task instance. + * @param[in] domain The domain for this task + * @param[in] taskid Explicit ID of finished task + */ +void ITTAPI __itt_task_end_overlapped(const __itt_domain *domain, __itt_id taskid); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, task_begin, (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name)) +ITT_STUBV(ITTAPI, void, task_begin_fn, (const __itt_domain *domain, __itt_id id, __itt_id parentid, void* fn)) +ITT_STUBV(ITTAPI, void, task_end, (const __itt_domain *domain)) +ITT_STUBV(ITTAPI, void, task_begin_overlapped, (const __itt_domain *domain, __itt_id taskid, __itt_id parentid, __itt_string_handle *name)) +ITT_STUBV(ITTAPI, void, task_end_overlapped, (const __itt_domain *domain, __itt_id taskid)) +#define __itt_task_begin(d,x,y,z) ITTNOTIFY_VOID_D3(task_begin,d,x,y,z) +#define __itt_task_begin_ptr ITTNOTIFY_NAME(task_begin) +#define __itt_task_begin_fn(d,x,y,z) ITTNOTIFY_VOID_D3(task_begin_fn,d,x,y,z) +#define __itt_task_begin_fn_ptr ITTNOTIFY_NAME(task_begin_fn) +#define __itt_task_end(d) ITTNOTIFY_VOID_D0(task_end,d) +#define __itt_task_end_ptr ITTNOTIFY_NAME(task_end) +#define __itt_task_begin_overlapped(d,x,y,z) ITTNOTIFY_VOID_D3(task_begin_overlapped,d,x,y,z) +#define __itt_task_begin_overlapped_ptr ITTNOTIFY_NAME(task_begin_overlapped) +#define __itt_task_end_overlapped(d,x) ITTNOTIFY_VOID_D1(task_end_overlapped,d,x) +#define __itt_task_end_overlapped_ptr ITTNOTIFY_NAME(task_end_overlapped) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_task_begin(domain,id,parentid,name) +#define __itt_task_begin_ptr 0 +#define __itt_task_begin_fn(domain,id,parentid,fn) +#define __itt_task_begin_fn_ptr 0 +#define __itt_task_end(domain) +#define __itt_task_end_ptr 0 +#define __itt_task_begin_overlapped(domain,taskid,parentid,name) +#define __itt_task_begin_overlapped_ptr 0 +#define __itt_task_end_overlapped(domain,taskid) +#define __itt_task_end_overlapped_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_task_begin_ptr 0 +#define __itt_task_begin_fn_ptr 0 +#define __itt_task_end_ptr 0 +#define __itt_task_begin_overlapped_ptr 0 +#define __itt_task_end_overlapped_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} tasks group */ + + +/** + * @defgroup markers Markers + * Markers represent a single discreet event in time. Markers have a scope, + * described by an enumerated type __itt_scope. Markers are created by + * the API call __itt_marker. A marker instance can be given an ID for use in + * adding metadata. + * @{ + */ + +/** + * @brief Describes the scope of an event object in the trace. + */ +typedef enum +{ + __itt_scope_unknown = 0, + __itt_scope_global, + __itt_scope_track_group, + __itt_scope_track, + __itt_scope_task, + __itt_scope_marker +} __itt_scope; + +/** @cond exclude_from_documentation */ +#define __itt_marker_scope_unknown __itt_scope_unknown +#define __itt_marker_scope_global __itt_scope_global +#define __itt_marker_scope_process __itt_scope_track_group +#define __itt_marker_scope_thread __itt_scope_track +#define __itt_marker_scope_task __itt_scope_task +/** @endcond */ + +/** + * @ingroup markers + * @brief Create a marker instance + * @param[in] domain The domain for this marker + * @param[in] id The instance ID for this marker or __itt_null + * @param[in] name The name for this marker + * @param[in] scope The scope for this marker + */ +void ITTAPI __itt_marker(const __itt_domain *domain, __itt_id id, __itt_string_handle *name, __itt_scope scope); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, marker, (const __itt_domain *domain, __itt_id id, __itt_string_handle *name, __itt_scope scope)) +#define __itt_marker(d,x,y,z) ITTNOTIFY_VOID_D3(marker,d,x,y,z) +#define __itt_marker_ptr ITTNOTIFY_NAME(marker) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_marker(domain,id,name,scope) +#define __itt_marker_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_marker_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} markers group */ + +/** + * @defgroup metadata Metadata + * The metadata API is used to attach extra information to named + * entities. Metadata can be attached to an identified named entity by ID, + * or to the current entity (which is always a task). + * + * Conceptually metadata has a type (what kind of metadata), a key (the + * name of the metadata), and a value (the actual data). The encoding of + * the value depends on the type of the metadata. + * + * The type of metadata is specified by an enumerated type __itt_metdata_type. + * @{ + */ + +/** + * @ingroup parameters + * @brief describes the type of metadata + */ +typedef enum { + __itt_metadata_unknown = 0, + __itt_metadata_u64, /**< Unsigned 64-bit integer */ + __itt_metadata_s64, /**< Signed 64-bit integer */ + __itt_metadata_u32, /**< Unsigned 32-bit integer */ + __itt_metadata_s32, /**< Signed 32-bit integer */ + __itt_metadata_u16, /**< Unsigned 16-bit integer */ + __itt_metadata_s16, /**< Signed 16-bit integer */ + __itt_metadata_float, /**< Signed 32-bit floating-point */ + __itt_metadata_double /**< SIgned 64-bit floating-point */ +} __itt_metadata_type; + +/** + * @ingroup parameters + * @brief Add metadata to an instance of a named entity. + * @param[in] domain The domain controlling the call + * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task + * @param[in] key The name of the metadata + * @param[in] type The type of the metadata + * @param[in] count The number of elements of the given type. If count == 0, no metadata will be added. + * @param[in] data The metadata itself +*/ +void ITTAPI __itt_metadata_add(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, metadata_add, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data)) +#define __itt_metadata_add(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(metadata_add,d,x,y,z,a,b) +#define __itt_metadata_add_ptr ITTNOTIFY_NAME(metadata_add) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_metadata_add(d,x,y,z,a,b) +#define __itt_metadata_add_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_metadata_add_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup parameters + * @brief Add string metadata to an instance of a named entity. + * @param[in] domain The domain controlling the call + * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task + * @param[in] key The name of the metadata + * @param[in] data The metadata itself + * @param[in] length The number of characters in the string, or -1 if the length is unknown but the string is null-terminated +*/ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_metadata_str_addA(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length); +void ITTAPI __itt_metadata_str_addW(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const wchar_t *data, size_t length); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_metadata_str_add __itt_metadata_str_addW +# define __itt_metadata_str_add_ptr __itt_metadata_str_addW_ptr +#else /* UNICODE */ +# define __itt_metadata_str_add __itt_metadata_str_addA +# define __itt_metadata_str_add_ptr __itt_metadata_str_addA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +void ITTAPI __itt_metadata_str_add(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length); +#endif + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, metadata_str_addA, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length)) +ITT_STUBV(ITTAPI, void, metadata_str_addW, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const wchar_t *data, size_t length)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, metadata_str_add, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_metadata_str_addA(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_addA,d,x,y,z,a) +#define __itt_metadata_str_addA_ptr ITTNOTIFY_NAME(metadata_str_addA) +#define __itt_metadata_str_addW(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_addW,d,x,y,z,a) +#define __itt_metadata_str_addW_ptr ITTNOTIFY_NAME(metadata_str_addW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_metadata_str_add(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add,d,x,y,z,a) +#define __itt_metadata_str_add_ptr ITTNOTIFY_NAME(metadata_str_add) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_metadata_str_addA(d,x,y,z,a) +#define __itt_metadata_str_addA_ptr 0 +#define __itt_metadata_str_addW(d,x,y,z,a) +#define __itt_metadata_str_addW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_metadata_str_add(d,x,y,z,a) +#define __itt_metadata_str_add_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_metadata_str_addA_ptr 0 +#define __itt_metadata_str_addW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_metadata_str_add_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup parameters + * @brief Add metadata to an instance of a named entity. + * @param[in] domain The domain controlling the call + * @param[in] scope The scope of the instance to which the metadata is to be added + + * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task + + * @param[in] key The name of the metadata + * @param[in] type The type of the metadata + * @param[in] count The number of elements of the given type. If count == 0, no metadata will be added. + * @param[in] data The metadata itself +*/ +void ITTAPI __itt_metadata_add_with_scope(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, metadata_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data)) +#define __itt_metadata_add_with_scope(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(metadata_add_with_scope,d,x,y,z,a,b) +#define __itt_metadata_add_with_scope_ptr ITTNOTIFY_NAME(metadata_add_with_scope) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_metadata_add_with_scope(d,x,y,z,a,b) +#define __itt_metadata_add_with_scope_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_metadata_add_with_scope_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup parameters + * @brief Add string metadata to an instance of a named entity. + * @param[in] domain The domain controlling the call + * @param[in] scope The scope of the instance to which the metadata is to be added + + * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task + + * @param[in] key The name of the metadata + * @param[in] data The metadata itself + * @param[in] length The number of characters in the string, or -1 if the length is unknown but the string is null-terminated +*/ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_metadata_str_add_with_scopeA(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length); +void ITTAPI __itt_metadata_str_add_with_scopeW(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const wchar_t *data, size_t length); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_metadata_str_add_with_scope __itt_metadata_str_add_with_scopeW +# define __itt_metadata_str_add_with_scope_ptr __itt_metadata_str_add_with_scopeW_ptr +#else /* UNICODE */ +# define __itt_metadata_str_add_with_scope __itt_metadata_str_add_with_scopeA +# define __itt_metadata_str_add_with_scope_ptr __itt_metadata_str_add_with_scopeA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +void ITTAPI __itt_metadata_str_add_with_scope(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length); +#endif + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeA, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length)) +ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeW, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const wchar_t *data, size_t length)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, metadata_str_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_metadata_str_add_with_scopeA(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add_with_scopeA,d,x,y,z,a) +#define __itt_metadata_str_add_with_scopeA_ptr ITTNOTIFY_NAME(metadata_str_add_with_scopeA) +#define __itt_metadata_str_add_with_scopeW(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add_with_scopeW,d,x,y,z,a) +#define __itt_metadata_str_add_with_scopeW_ptr ITTNOTIFY_NAME(metadata_str_add_with_scopeW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_metadata_str_add_with_scope(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add_with_scope,d,x,y,z,a) +#define __itt_metadata_str_add_with_scope_ptr ITTNOTIFY_NAME(metadata_str_add_with_scope) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_metadata_str_add_with_scopeA(d,x,y,z,a) +#define __itt_metadata_str_add_with_scopeA_ptr 0 +#define __itt_metadata_str_add_with_scopeW(d,x,y,z,a) +#define __itt_metadata_str_add_with_scopeW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_metadata_str_add_with_scope(d,x,y,z,a) +#define __itt_metadata_str_add_with_scope_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_metadata_str_add_with_scopeA_ptr 0 +#define __itt_metadata_str_add_with_scopeW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_metadata_str_add_with_scope_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @} metadata group */ + +/** + * @defgroup relations Relations + * Instances of named entities can be explicitly associated with other + * instances using instance IDs and the relationship API calls. + * + * @{ + */ + +/** + * @ingroup relations + * @brief The kind of relation between two instances is specified by the enumerated type __itt_relation. + * Relations between instances can be added with an API call. The relation + * API uses instance IDs. Relations can be added before or after the actual + * instances are created and persist independently of the instances. This + * is the motivation for having different lifetimes for instance IDs and + * the actual instances. + */ +typedef enum +{ + __itt_relation_is_unknown = 0, + __itt_relation_is_dependent_on, /**< "A is dependent on B" means that A cannot start until B completes */ + __itt_relation_is_sibling_of, /**< "A is sibling of B" means that A and B were created as a group */ + __itt_relation_is_parent_of, /**< "A is parent of B" means that A created B */ + __itt_relation_is_continuation_of, /**< "A is continuation of B" means that A assumes the dependencies of B */ + __itt_relation_is_child_of, /**< "A is child of B" means that A was created by B (inverse of is_parent_of) */ + __itt_relation_is_continued_by, /**< "A is continued by B" means that B assumes the dependencies of A (inverse of is_continuation_of) */ + __itt_relation_is_predecessor_to /**< "A is predecessor to B" means that B cannot start until A completes (inverse of is_dependent_on) */ +} __itt_relation; + +/** + * @ingroup relations + * @brief Add a relation to the current task instance. + * The current task instance is the head of the relation. + * @param[in] domain The domain controlling this call + * @param[in] relation The kind of relation + * @param[in] tail The ID for the tail of the relation + */ +void ITTAPI __itt_relation_add_to_current(const __itt_domain *domain, __itt_relation relation, __itt_id tail); + +/** + * @ingroup relations + * @brief Add a relation between two instance identifiers. + * @param[in] domain The domain controlling this call + * @param[in] head The ID for the head of the relation + * @param[in] relation The kind of relation + * @param[in] tail The ID for the tail of the relation + */ +void ITTAPI __itt_relation_add(const __itt_domain *domain, __itt_id head, __itt_relation relation, __itt_id tail); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, relation_add_to_current, (const __itt_domain *domain, __itt_relation relation, __itt_id tail)) +ITT_STUBV(ITTAPI, void, relation_add, (const __itt_domain *domain, __itt_id head, __itt_relation relation, __itt_id tail)) +#define __itt_relation_add_to_current(d,x,y) ITTNOTIFY_VOID_D2(relation_add_to_current,d,x,y) +#define __itt_relation_add_to_current_ptr ITTNOTIFY_NAME(relation_add_to_current) +#define __itt_relation_add(d,x,y,z) ITTNOTIFY_VOID_D3(relation_add,d,x,y,z) +#define __itt_relation_add_ptr ITTNOTIFY_NAME(relation_add) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_relation_add_to_current(d,x,y) +#define __itt_relation_add_to_current_ptr 0 +#define __itt_relation_add(d,x,y,z) +#define __itt_relation_add_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_relation_add_to_current_ptr 0 +#define __itt_relation_add_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} relations group */ + +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) + +typedef struct ___itt_clock_info +{ + unsigned long long clock_freq; /*!< Clock domain frequency */ + unsigned long long clock_base; /*!< Clock domain base timestamp */ +} __itt_clock_info; + +#pragma pack(pop) +/** @endcond */ + +/** @cond exclude_from_documentation */ +typedef void (ITTAPI *__itt_get_clock_info_fn)(__itt_clock_info* clock_info, void* data); +/** @endcond */ + +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) + +typedef struct ___itt_clock_domain +{ + __itt_clock_info info; /*!< Most recent clock domain info */ + __itt_get_clock_info_fn fn; /*!< Callback function pointer */ + void* fn_data; /*!< Input argument for the callback function */ + int extra1; /*!< Reserved. Must be zero */ + void* extra2; /*!< Reserved. Must be zero */ + struct ___itt_clock_domain* next; +} __itt_clock_domain; + +#pragma pack(pop) +/** @endcond */ + +/** + * @ingroup clockdomains + * @brief Create a clock domain. + * Certain applications require the capability to trace their application using + * a clock domain different than the CPU, for instance the instrumentation of events + * that occur on a GPU. + * Because the set of domains is expected to be static over the application's execution time, + * there is no mechanism to destroy a domain. + * Any domain can be accessed by any thread in the process, regardless of which thread created + * the domain. This call is thread-safe. + * @param[in] fn A pointer to a callback function which retrieves alternative CPU timestamps + * @param[in] fn_data Argument for a callback function; may be NULL + */ +__itt_clock_domain* ITTAPI __itt_clock_domain_create(__itt_get_clock_info_fn fn, void* fn_data); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, __itt_clock_domain*, clock_domain_create, (__itt_get_clock_info_fn fn, void* fn_data)) +#define __itt_clock_domain_create ITTNOTIFY_DATA(clock_domain_create) +#define __itt_clock_domain_create_ptr ITTNOTIFY_NAME(clock_domain_create) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_clock_domain_create(fn,fn_data) (__itt_clock_domain*)0 +#define __itt_clock_domain_create_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_clock_domain_create_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup clockdomains + * @brief Recalculate clock domains frequences and clock base timestamps. + */ +void ITTAPI __itt_clock_domain_reset(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, clock_domain_reset, (void)) +#define __itt_clock_domain_reset ITTNOTIFY_VOID(clock_domain_reset) +#define __itt_clock_domain_reset_ptr ITTNOTIFY_NAME(clock_domain_reset) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_clock_domain_reset() +#define __itt_clock_domain_reset_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_clock_domain_reset_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup clockdomain + * @brief Create an instance of identifier. This establishes the beginning of the lifetime of + * an instance of the given ID in the trace. Once this lifetime starts, the ID can be used to + * tag named entity instances in calls such as __itt_task_begin, and to specify relationships among + * identified named entity instances, using the \ref relations APIs. + * @param[in] domain The domain controlling the execution of this call. + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] id The ID to create. + */ +void ITTAPI __itt_id_create_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id); + +/** + * @ingroup clockdomain + * @brief Destroy an instance of identifier. This ends the lifetime of the current instance of the + * given ID value in the trace. Any relationships that are established after this lifetime ends are + * invalid. This call must be performed before the given ID value can be reused for a different + * named entity instance. + * @param[in] domain The domain controlling the execution of this call. + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] id The ID to destroy. + */ +void ITTAPI __itt_id_destroy_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, id_create_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id)) +ITT_STUBV(ITTAPI, void, id_destroy_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id)) +#define __itt_id_create_ex(d,x,y,z) ITTNOTIFY_VOID_D3(id_create_ex,d,x,y,z) +#define __itt_id_create_ex_ptr ITTNOTIFY_NAME(id_create_ex) +#define __itt_id_destroy_ex(d,x,y,z) ITTNOTIFY_VOID_D3(id_destroy_ex,d,x,y,z) +#define __itt_id_destroy_ex_ptr ITTNOTIFY_NAME(id_destroy_ex) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_id_create_ex(domain,clock_domain,timestamp,id) +#define __itt_id_create_ex_ptr 0 +#define __itt_id_destroy_ex(domain,clock_domain,timestamp,id) +#define __itt_id_destroy_ex_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_id_create_ex_ptr 0 +#define __itt_id_destroy_ex_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup clockdomain + * @brief Begin a task instance. + * @param[in] domain The domain for this task + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] taskid The instance ID for this task instance, or __itt_null + * @param[in] parentid The parent instance to which this task instance belongs, or __itt_null + * @param[in] name The name of this task + */ +void ITTAPI __itt_task_begin_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name); + +/** + * @ingroup clockdomain + * @brief Begin a task instance. + * @param[in] domain The domain for this task + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] taskid The identifier for this task instance, or __itt_null + * @param[in] parentid The parent of this task, or __itt_null + * @param[in] fn The pointer to the function you are tracing + */ +void ITTAPI __itt_task_begin_fn_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, void* fn); + +/** + * @ingroup clockdomain + * @brief End the current task instance. + * @param[in] domain The domain for this task + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + */ +void ITTAPI __itt_task_end_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, task_begin_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, __itt_string_handle *name)) +ITT_STUBV(ITTAPI, void, task_begin_fn_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, void* fn)) +ITT_STUBV(ITTAPI, void, task_end_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp)) +#define __itt_task_begin_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(task_begin_ex,d,x,y,z,a,b) +#define __itt_task_begin_ex_ptr ITTNOTIFY_NAME(task_begin_ex) +#define __itt_task_begin_fn_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(task_begin_fn_ex,d,x,y,z,a,b) +#define __itt_task_begin_fn_ex_ptr ITTNOTIFY_NAME(task_begin_fn_ex) +#define __itt_task_end_ex(d,x,y) ITTNOTIFY_VOID_D2(task_end_ex,d,x,y) +#define __itt_task_end_ex_ptr ITTNOTIFY_NAME(task_end_ex) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_task_begin_ex(domain,clock_domain,timestamp,id,parentid,name) +#define __itt_task_begin_ex_ptr 0 +#define __itt_task_begin_fn_ex(domain,clock_domain,timestamp,id,parentid,fn) +#define __itt_task_begin_fn_ex_ptr 0 +#define __itt_task_end_ex(domain,clock_domain,timestamp) +#define __itt_task_end_ex_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_task_begin_ex_ptr 0 +#define __itt_task_begin_fn_ex_ptr 0 +#define __itt_task_end_ex_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @defgroup counters Counters + * @ingroup public + * Counters are user-defined objects with a monotonically increasing + * value. Counter values are 64-bit unsigned integers. + * Counters have names that can be displayed in + * the tools. + * @{ + */ + +/** + * @brief opaque structure for counter identification + */ +/** @cond exclude_from_documentation */ + +typedef struct ___itt_counter* __itt_counter; + +/** + * @brief Create an unsigned 64 bits integer counter with given name/domain + * + * After __itt_counter_create() is called, __itt_counter_inc(id), __itt_counter_inc_delta(id, delta), + * __itt_counter_set_value(id, value_ptr) or __itt_counter_set_value_ex(id, clock_domain, timestamp, value_ptr) + * can be used to change the value of the counter, where value_ptr is a pointer to an unsigned 64 bits integer + * + * The call is equal to __itt_counter_create_typed(name, domain, __itt_metadata_u64) + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_counter ITTAPI __itt_counter_createA(const char *name, const char *domain); +__itt_counter ITTAPI __itt_counter_createW(const wchar_t *name, const wchar_t *domain); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_counter_create __itt_counter_createW +# define __itt_counter_create_ptr __itt_counter_createW_ptr +#else /* UNICODE */ +# define __itt_counter_create __itt_counter_createA +# define __itt_counter_create_ptr __itt_counter_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_counter ITTAPI __itt_counter_create(const char *name, const char *domain); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_counter, counter_createA, (const char *name, const char *domain)) +ITT_STUB(ITTAPI, __itt_counter, counter_createW, (const wchar_t *name, const wchar_t *domain)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_counter, counter_create, (const char *name, const char *domain)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_counter_createA ITTNOTIFY_DATA(counter_createA) +#define __itt_counter_createA_ptr ITTNOTIFY_NAME(counter_createA) +#define __itt_counter_createW ITTNOTIFY_DATA(counter_createW) +#define __itt_counter_createW_ptr ITTNOTIFY_NAME(counter_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_counter_create ITTNOTIFY_DATA(counter_create) +#define __itt_counter_create_ptr ITTNOTIFY_NAME(counter_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_counter_createA(name, domain) +#define __itt_counter_createA_ptr 0 +#define __itt_counter_createW(name, domain) +#define __itt_counter_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_counter_create(name, domain) +#define __itt_counter_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_counter_createA_ptr 0 +#define __itt_counter_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_counter_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Increment the unsigned 64 bits integer counter value + * + * Calling this function to non-unsigned 64 bits integer counters has no effect + */ +void ITTAPI __itt_counter_inc(__itt_counter id); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_inc, (__itt_counter id)) +#define __itt_counter_inc ITTNOTIFY_VOID(counter_inc) +#define __itt_counter_inc_ptr ITTNOTIFY_NAME(counter_inc) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_inc(id) +#define __itt_counter_inc_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_inc_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** + * @brief Increment the unsigned 64 bits integer counter value with x + * + * Calling this function to non-unsigned 64 bits integer counters has no effect + */ +void ITTAPI __itt_counter_inc_delta(__itt_counter id, unsigned long long value); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_inc_delta, (__itt_counter id, unsigned long long value)) +#define __itt_counter_inc_delta ITTNOTIFY_VOID(counter_inc_delta) +#define __itt_counter_inc_delta_ptr ITTNOTIFY_NAME(counter_inc_delta) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_inc_delta(id, value) +#define __itt_counter_inc_delta_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_inc_delta_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Decrement the unsigned 64 bits integer counter value + * + * Calling this function to non-unsigned 64 bits integer counters has no effect + */ +void ITTAPI __itt_counter_dec(__itt_counter id); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_dec, (__itt_counter id)) +#define __itt_counter_dec ITTNOTIFY_VOID(counter_dec) +#define __itt_counter_dec_ptr ITTNOTIFY_NAME(counter_dec) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_dec(id) +#define __itt_counter_dec_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_dec_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** + * @brief Decrement the unsigned 64 bits integer counter value with x + * + * Calling this function to non-unsigned 64 bits integer counters has no effect + */ +void ITTAPI __itt_counter_dec_delta(__itt_counter id, unsigned long long value); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_dec_delta, (__itt_counter id, unsigned long long value)) +#define __itt_counter_dec_delta ITTNOTIFY_VOID(counter_dec_delta) +#define __itt_counter_dec_delta_ptr ITTNOTIFY_NAME(counter_dec_delta) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_dec_delta(id, value) +#define __itt_counter_dec_delta_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_dec_delta_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup counters + * @brief Increment a counter by one. + * The first call with a given name creates a counter by that name and sets its + * value to zero. Successive calls increment the counter value. + * @param[in] domain The domain controlling the call. Counter names are not domain specific. + * The domain argument is used only to enable or disable the API calls. + * @param[in] name The name of the counter + */ +void ITTAPI __itt_counter_inc_v3(const __itt_domain *domain, __itt_string_handle *name); + +/** + * @ingroup counters + * @brief Increment a counter by the value specified in delta. + * @param[in] domain The domain controlling the call. Counter names are not domain specific. + * The domain argument is used only to enable or disable the API calls. + * @param[in] name The name of the counter + * @param[in] delta The amount by which to increment the counter + */ +void ITTAPI __itt_counter_inc_delta_v3(const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_inc_v3, (const __itt_domain *domain, __itt_string_handle *name)) +ITT_STUBV(ITTAPI, void, counter_inc_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta)) +#define __itt_counter_inc_v3(d,x) ITTNOTIFY_VOID_D1(counter_inc_v3,d,x) +#define __itt_counter_inc_v3_ptr ITTNOTIFY_NAME(counter_inc_v3) +#define __itt_counter_inc_delta_v3(d,x,y) ITTNOTIFY_VOID_D2(counter_inc_delta_v3,d,x,y) +#define __itt_counter_inc_delta_v3_ptr ITTNOTIFY_NAME(counter_inc_delta_v3) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_inc_v3(domain,name) +#define __itt_counter_inc_v3_ptr 0 +#define __itt_counter_inc_delta_v3(domain,name,delta) +#define __itt_counter_inc_delta_v3_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_inc_v3_ptr 0 +#define __itt_counter_inc_delta_v3_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + + +/** + * @ingroup counters + * @brief Decrement a counter by one. + * The first call with a given name creates a counter by that name and sets its + * value to zero. Successive calls decrement the counter value. + * @param[in] domain The domain controlling the call. Counter names are not domain specific. + * The domain argument is used only to enable or disable the API calls. + * @param[in] name The name of the counter + */ +void ITTAPI __itt_counter_dec_v3(const __itt_domain *domain, __itt_string_handle *name); + +/** + * @ingroup counters + * @brief Decrement a counter by the value specified in delta. + * @param[in] domain The domain controlling the call. Counter names are not domain specific. + * The domain argument is used only to enable or disable the API calls. + * @param[in] name The name of the counter + * @param[in] delta The amount by which to decrement the counter + */ +void ITTAPI __itt_counter_dec_delta_v3(const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_dec_v3, (const __itt_domain *domain, __itt_string_handle *name)) +ITT_STUBV(ITTAPI, void, counter_dec_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta)) +#define __itt_counter_dec_v3(d,x) ITTNOTIFY_VOID_D1(counter_dec_v3,d,x) +#define __itt_counter_dec_v3_ptr ITTNOTIFY_NAME(counter_dec_v3) +#define __itt_counter_dec_delta_v3(d,x,y) ITTNOTIFY_VOID_D2(counter_dec_delta_v3,d,x,y) +#define __itt_counter_dec_delta_v3_ptr ITTNOTIFY_NAME(counter_dec_delta_v3) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_dec_v3(domain,name) +#define __itt_counter_dec_v3_ptr 0 +#define __itt_counter_dec_delta_v3(domain,name,delta) +#define __itt_counter_dec_delta_v3_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_dec_v3_ptr 0 +#define __itt_counter_dec_delta_v3_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @} counters group */ + + +/** + * @brief Set the counter value + */ +void ITTAPI __itt_counter_set_value(__itt_counter id, void *value_ptr); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_set_value, (__itt_counter id, void *value_ptr)) +#define __itt_counter_set_value ITTNOTIFY_VOID(counter_set_value) +#define __itt_counter_set_value_ptr ITTNOTIFY_NAME(counter_set_value) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_set_value(id, value_ptr) +#define __itt_counter_set_value_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_set_value_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Set the counter value + */ +void ITTAPI __itt_counter_set_value_ex(__itt_counter id, __itt_clock_domain *clock_domain, unsigned long long timestamp, void *value_ptr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_set_value_ex, (__itt_counter id, __itt_clock_domain *clock_domain, unsigned long long timestamp, void *value_ptr)) +#define __itt_counter_set_value_ex ITTNOTIFY_VOID(counter_set_value_ex) +#define __itt_counter_set_value_ex_ptr ITTNOTIFY_NAME(counter_set_value_ex) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_set_value_ex(id, clock_domain, timestamp, value_ptr) +#define __itt_counter_set_value_ex_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_set_value_ex_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Create a typed counter with given name/domain + * + * After __itt_counter_create_typed() is called, __itt_counter_inc(id), __itt_counter_inc_delta(id, delta), + * __itt_counter_set_value(id, value_ptr) or __itt_counter_set_value_ex(id, clock_domain, timestamp, value_ptr) + * can be used to change the value of the counter + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_counter ITTAPI __itt_counter_create_typedA(const char *name, const char *domain, __itt_metadata_type type); +__itt_counter ITTAPI __itt_counter_create_typedW(const wchar_t *name, const wchar_t *domain, __itt_metadata_type type); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_counter_create_typed __itt_counter_create_typedW +# define __itt_counter_create_typed_ptr __itt_counter_create_typedW_ptr +#else /* UNICODE */ +# define __itt_counter_create_typed __itt_counter_create_typedA +# define __itt_counter_create_typed_ptr __itt_counter_create_typedA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_counter ITTAPI __itt_counter_create_typed(const char *name, const char *domain, __itt_metadata_type type); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_counter, counter_create_typedA, (const char *name, const char *domain, __itt_metadata_type type)) +ITT_STUB(ITTAPI, __itt_counter, counter_create_typedW, (const wchar_t *name, const wchar_t *domain, __itt_metadata_type type)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_counter, counter_create_typed, (const char *name, const char *domain, __itt_metadata_type type)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_counter_create_typedA ITTNOTIFY_DATA(counter_create_typedA) +#define __itt_counter_create_typedA_ptr ITTNOTIFY_NAME(counter_create_typedA) +#define __itt_counter_create_typedW ITTNOTIFY_DATA(counter_create_typedW) +#define __itt_counter_create_typedW_ptr ITTNOTIFY_NAME(counter_create_typedW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_counter_create_typed ITTNOTIFY_DATA(counter_create_typed) +#define __itt_counter_create_typed_ptr ITTNOTIFY_NAME(counter_create_typed) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_counter_create_typedA(name, domain, type) +#define __itt_counter_create_typedA_ptr 0 +#define __itt_counter_create_typedW(name, domain, type) +#define __itt_counter_create_typedW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_counter_create_typed(name, domain, type) +#define __itt_counter_create_typed_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_counter_create_typedA_ptr 0 +#define __itt_counter_create_typedW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_counter_create_typed_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Destroy the counter identified by the pointer previously returned by __itt_counter_create() or + * __itt_counter_create_typed() + */ +void ITTAPI __itt_counter_destroy(__itt_counter id); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_destroy, (__itt_counter id)) +#define __itt_counter_destroy ITTNOTIFY_VOID(counter_destroy) +#define __itt_counter_destroy_ptr ITTNOTIFY_NAME(counter_destroy) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_destroy(id) +#define __itt_counter_destroy_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_destroy_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} counters group */ + +/** + * @ingroup markers + * @brief Create a marker instance. + * @param[in] domain The domain for this marker + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] id The instance ID for this marker, or __itt_null + * @param[in] name The name for this marker + * @param[in] scope The scope for this marker + */ +void ITTAPI __itt_marker_ex(const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_string_handle *name, __itt_scope scope); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, marker_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_string_handle *name, __itt_scope scope)) +#define __itt_marker_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(marker_ex,d,x,y,z,a,b) +#define __itt_marker_ex_ptr ITTNOTIFY_NAME(marker_ex) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_marker_ex(domain,clock_domain,timestamp,id,name,scope) +#define __itt_marker_ex_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_marker_ex_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup clockdomain + * @brief Add a relation to the current task instance. + * The current task instance is the head of the relation. + * @param[in] domain The domain controlling this call + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] relation The kind of relation + * @param[in] tail The ID for the tail of the relation + */ +void ITTAPI __itt_relation_add_to_current_ex(const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_relation relation, __itt_id tail); + +/** + * @ingroup clockdomain + * @brief Add a relation between two instance identifiers. + * @param[in] domain The domain controlling this call + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] head The ID for the head of the relation + * @param[in] relation The kind of relation + * @param[in] tail The ID for the tail of the relation + */ +void ITTAPI __itt_relation_add_ex(const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id head, __itt_relation relation, __itt_id tail); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, relation_add_to_current_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_relation relation, __itt_id tail)) +ITT_STUBV(ITTAPI, void, relation_add_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id head, __itt_relation relation, __itt_id tail)) +#define __itt_relation_add_to_current_ex(d,x,y,z,a) ITTNOTIFY_VOID_D4(relation_add_to_current_ex,d,x,y,z,a) +#define __itt_relation_add_to_current_ex_ptr ITTNOTIFY_NAME(relation_add_to_current_ex) +#define __itt_relation_add_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(relation_add_ex,d,x,y,z,a,b) +#define __itt_relation_add_ex_ptr ITTNOTIFY_NAME(relation_add_ex) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_relation_add_to_current_ex(domain,clock_domain,timestame,relation,tail) +#define __itt_relation_add_to_current_ex_ptr 0 +#define __itt_relation_add_ex(domain,clock_domain,timestamp,head,relation,tail) +#define __itt_relation_add_ex_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_relation_add_to_current_ex_ptr 0 +#define __itt_relation_add_ex_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @cond exclude_from_documentation */ +typedef enum ___itt_track_group_type +{ + __itt_track_group_type_normal = 0 +} __itt_track_group_type; +/** @endcond */ + +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) + +typedef struct ___itt_track_group +{ + __itt_string_handle* name; /*!< Name of the track group */ + struct ___itt_track* track; /*!< List of child tracks */ + __itt_track_group_type tgtype; /*!< Type of the track group */ + int extra1; /*!< Reserved. Must be zero */ + void* extra2; /*!< Reserved. Must be zero */ + struct ___itt_track_group* next; +} __itt_track_group; + +#pragma pack(pop) +/** @endcond */ + +/** + * @brief Placeholder for custom track types. Currently, "normal" custom track + * is the only available track type. + */ +typedef enum ___itt_track_type +{ + __itt_track_type_normal = 0 +#ifdef INTEL_ITTNOTIFY_API_PRIVATE + , __itt_track_type_queue +#endif /* INTEL_ITTNOTIFY_API_PRIVATE */ +} __itt_track_type; + +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) + +typedef struct ___itt_track +{ + __itt_string_handle* name; /*!< Name of the track group */ + __itt_track_group* group; /*!< Parent group to a track */ + __itt_track_type ttype; /*!< Type of the track */ + int extra1; /*!< Reserved. Must be zero */ + void* extra2; /*!< Reserved. Must be zero */ + struct ___itt_track* next; +} __itt_track; + +#pragma pack(pop) +/** @endcond */ + +/** + * @brief Create logical track group. + */ +__itt_track_group* ITTAPI __itt_track_group_create(__itt_string_handle* name, __itt_track_group_type track_group_type); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, __itt_track_group*, track_group_create, (__itt_string_handle* name, __itt_track_group_type track_group_type)) +#define __itt_track_group_create ITTNOTIFY_DATA(track_group_create) +#define __itt_track_group_create_ptr ITTNOTIFY_NAME(track_group_create) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_track_group_create(name) (__itt_track_group*)0 +#define __itt_track_group_create_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_track_group_create_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Create logical track. + */ +__itt_track* ITTAPI __itt_track_create(__itt_track_group* track_group, __itt_string_handle* name, __itt_track_type track_type); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, __itt_track*, track_create, (__itt_track_group* track_group,__itt_string_handle* name, __itt_track_type track_type)) +#define __itt_track_create ITTNOTIFY_DATA(track_create) +#define __itt_track_create_ptr ITTNOTIFY_NAME(track_create) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_track_create(track_group,name,track_type) (__itt_track*)0 +#define __itt_track_create_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_track_create_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Set the logical track. + */ +void ITTAPI __itt_set_track(__itt_track* track); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, set_track, (__itt_track *track)) +#define __itt_set_track ITTNOTIFY_VOID(set_track) +#define __itt_set_track_ptr ITTNOTIFY_NAME(set_track) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_set_track(track) +#define __itt_set_track_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_set_track_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/* ========================================================================== */ +/** @cond exclude_from_gpa_documentation */ +/** + * @defgroup events Events + * @ingroup public + * Events group + * @{ + */ +/** @brief user event type */ +typedef int __itt_event; + +/** + * @brief Create an event notification + * @note name or namelen being null/name and namelen not matching, user event feature not enabled + * @return non-zero event identifier upon success and __itt_err otherwise + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_event LIBITTAPI __itt_event_createA(const char *name, int namelen); +__itt_event LIBITTAPI __itt_event_createW(const wchar_t *name, int namelen); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_event_create __itt_event_createW +# define __itt_event_create_ptr __itt_event_createW_ptr +#else +# define __itt_event_create __itt_event_createA +# define __itt_event_create_ptr __itt_event_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_event LIBITTAPI __itt_event_create(const char *name, int namelen); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(LIBITTAPI, __itt_event, event_createA, (const char *name, int namelen)) +ITT_STUB(LIBITTAPI, __itt_event, event_createW, (const wchar_t *name, int namelen)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(LIBITTAPI, __itt_event, event_create, (const char *name, int namelen)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_event_createA ITTNOTIFY_DATA(event_createA) +#define __itt_event_createA_ptr ITTNOTIFY_NAME(event_createA) +#define __itt_event_createW ITTNOTIFY_DATA(event_createW) +#define __itt_event_createW_ptr ITTNOTIFY_NAME(event_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_event_create ITTNOTIFY_DATA(event_create) +#define __itt_event_create_ptr ITTNOTIFY_NAME(event_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_event_createA(name, namelen) (__itt_event)0 +#define __itt_event_createA_ptr 0 +#define __itt_event_createW(name, namelen) (__itt_event)0 +#define __itt_event_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_event_create(name, namelen) (__itt_event)0 +#define __itt_event_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_event_createA_ptr 0 +#define __itt_event_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_event_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an event occurrence. + * @return __itt_err upon failure (invalid event id/user event feature not enabled) + */ +int LIBITTAPI __itt_event_start(__itt_event event); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(LIBITTAPI, int, event_start, (__itt_event event)) +#define __itt_event_start ITTNOTIFY_DATA(event_start) +#define __itt_event_start_ptr ITTNOTIFY_NAME(event_start) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_event_start(event) (int)0 +#define __itt_event_start_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_event_start_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an event end occurrence. + * @note It is optional if events do not have durations. + * @return __itt_err upon failure (invalid event id/user event feature not enabled) + */ +int LIBITTAPI __itt_event_end(__itt_event event); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(LIBITTAPI, int, event_end, (__itt_event event)) +#define __itt_event_end ITTNOTIFY_DATA(event_end) +#define __itt_event_end_ptr ITTNOTIFY_NAME(event_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_event_end(event) (int)0 +#define __itt_event_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_event_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} events group */ + + +/** + * @defgroup arrays Arrays Visualizer + * @ingroup public + * Visualize arrays + * @{ + */ + +/** + * @enum __itt_av_data_type + * @brief Defines types of arrays data (for C/C++ intrinsic types) + */ +typedef enum +{ + __itt_e_first = 0, + __itt_e_char = 0, /* 1-byte integer */ + __itt_e_uchar, /* 1-byte unsigned integer */ + __itt_e_int16, /* 2-byte integer */ + __itt_e_uint16, /* 2-byte unsigned integer */ + __itt_e_int32, /* 4-byte integer */ + __itt_e_uint32, /* 4-byte unsigned integer */ + __itt_e_int64, /* 8-byte integer */ + __itt_e_uint64, /* 8-byte unsigned integer */ + __itt_e_float, /* 4-byte floating */ + __itt_e_double, /* 8-byte floating */ + __itt_e_last = __itt_e_double +} __itt_av_data_type; + +/** + * @brief Save an array data to a file. + * Output format is defined by the file extension. The csv and bmp formats are supported (bmp - for 2-dimensional array only). + * @param[in] data - pointer to the array data + * @param[in] rank - the rank of the array + * @param[in] dimensions - pointer to an array of integers, which specifies the array dimensions. + * The size of dimensions must be equal to the rank + * @param[in] type - the type of the array, specified as one of the __itt_av_data_type values (for intrinsic types) + * @param[in] filePath - the file path; the output format is defined by the file extension + * @param[in] columnOrder - defines how the array is stored in the linear memory. + * It should be 1 for column-major order (e.g. in FORTRAN) or 0 - for row-major order (e.g. in C). + */ + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +int ITTAPI __itt_av_saveA(void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder); +int ITTAPI __itt_av_saveW(void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_av_save __itt_av_saveW +# define __itt_av_save_ptr __itt_av_saveW_ptr +#else /* UNICODE */ +# define __itt_av_save __itt_av_saveA +# define __itt_av_save_ptr __itt_av_saveA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +int ITTAPI __itt_av_save(void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, int, av_saveA, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder)) +ITT_STUB(ITTAPI, int, av_saveW, (void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, int, av_save, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_av_saveA ITTNOTIFY_DATA(av_saveA) +#define __itt_av_saveA_ptr ITTNOTIFY_NAME(av_saveA) +#define __itt_av_saveW ITTNOTIFY_DATA(av_saveW) +#define __itt_av_saveW_ptr ITTNOTIFY_NAME(av_saveW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_av_save ITTNOTIFY_DATA(av_save) +#define __itt_av_save_ptr ITTNOTIFY_NAME(av_save) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_av_saveA(name) +#define __itt_av_saveA_ptr 0 +#define __itt_av_saveW(name) +#define __itt_av_saveW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_av_save(name) +#define __itt_av_save_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_av_saveA_ptr 0 +#define __itt_av_saveW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_av_save_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +void ITTAPI __itt_enable_attach(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, enable_attach, (void)) +#define __itt_enable_attach ITTNOTIFY_VOID(enable_attach) +#define __itt_enable_attach_ptr ITTNOTIFY_NAME(enable_attach) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_enable_attach() +#define __itt_enable_attach_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_enable_attach_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @cond exclude_from_gpa_documentation */ + +/** @} arrays group */ + +/** @endcond */ + +/** + * @brief Module load info + * This API is used to report necessary information in case of module relocation + * @param[in] start_addr - relocated module start address + * @param[in] end_addr - relocated module end address + * @param[in] path - file system path to the module + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_module_loadA(void *start_addr, void *end_addr, const char *path); +void ITTAPI __itt_module_loadW(void *start_addr, void *end_addr, const wchar_t *path); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_module_load __itt_module_loadW +# define __itt_module_load_ptr __itt_module_loadW_ptr +#else /* UNICODE */ +# define __itt_module_load __itt_module_loadA +# define __itt_module_load_ptr __itt_module_loadA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +void ITTAPI __itt_module_load(void *start_addr, void *end_addr, const char *path); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, void, module_loadA, (void *start_addr, void *end_addr, const char *path)) +ITT_STUB(ITTAPI, void, module_loadW, (void *start_addr, void *end_addr, const wchar_t *path)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, void, module_load, (void *start_addr, void *end_addr, const char *path)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_module_loadA ITTNOTIFY_VOID(module_loadA) +#define __itt_module_loadA_ptr ITTNOTIFY_NAME(module_loadA) +#define __itt_module_loadW ITTNOTIFY_VOID(module_loadW) +#define __itt_module_loadW_ptr ITTNOTIFY_NAME(module_loadW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_module_load ITTNOTIFY_VOID(module_load) +#define __itt_module_load_ptr ITTNOTIFY_NAME(module_load) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_module_loadA(start_addr, end_addr, path) +#define __itt_module_loadA_ptr 0 +#define __itt_module_loadW(start_addr, end_addr, path) +#define __itt_module_loadW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_module_load(start_addr, end_addr, path) +#define __itt_module_load_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_module_loadA_ptr 0 +#define __itt_module_loadW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_module_load_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + + + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* _ITTNOTIFY_H_ */ + +#ifdef INTEL_ITTNOTIFY_API_PRIVATE + +#ifndef _ITTNOTIFY_PRIVATE_ +#define _ITTNOTIFY_PRIVATE_ + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/** + * @ingroup clockdomain + * @brief Begin an overlapped task instance. + * @param[in] domain The domain for this task + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] taskid The identifier for this task instance, *cannot* be __itt_null. + * @param[in] parentid The parent of this task, or __itt_null. + * @param[in] name The name of this task. + */ +void ITTAPI __itt_task_begin_overlapped_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name); + +/** + * @ingroup clockdomain + * @brief End an overlapped task instance. + * @param[in] domain The domain for this task + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] taskid Explicit ID of finished task + */ +void ITTAPI __itt_task_end_overlapped_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, task_begin_overlapped_ex, (const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name)) +ITT_STUBV(ITTAPI, void, task_end_overlapped_ex, (const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid)) +#define __itt_task_begin_overlapped_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(task_begin_overlapped_ex,d,x,y,z,a,b) +#define __itt_task_begin_overlapped_ex_ptr ITTNOTIFY_NAME(task_begin_overlapped_ex) +#define __itt_task_end_overlapped_ex(d,x,y,z) ITTNOTIFY_VOID_D3(task_end_overlapped_ex,d,x,y,z) +#define __itt_task_end_overlapped_ex_ptr ITTNOTIFY_NAME(task_end_overlapped_ex) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_task_begin_overlapped_ex(domain,clock_domain,timestamp,taskid,parentid,name) +#define __itt_task_begin_overlapped_ex_ptr 0 +#define __itt_task_end_overlapped_ex(domain,clock_domain,timestamp,taskid) +#define __itt_task_end_overlapped_ex_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_task_begin_overlapped_ex_ptr 0 +#define __itt_task_end_overlapped_ptr 0 +#define __itt_task_end_overlapped_ex_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @defgroup makrs_internal Marks + * @ingroup internal + * Marks group + * @warning Internal API: + * - It is not shipped to outside of Intel + * - It is delivered to internal Intel teams using e-mail or SVN access only + * @{ + */ +/** @brief user mark type */ +typedef int __itt_mark_type; + +/** + * @brief Creates a user mark type with the specified name using char or Unicode string. + * @param[in] name - name of mark to create + * @return Returns a handle to the mark type + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_mark_type ITTAPI __itt_mark_createA(const char *name); +__itt_mark_type ITTAPI __itt_mark_createW(const wchar_t *name); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_mark_create __itt_mark_createW +# define __itt_mark_create_ptr __itt_mark_createW_ptr +#else /* UNICODE */ +# define __itt_mark_create __itt_mark_createA +# define __itt_mark_create_ptr __itt_mark_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_mark_type ITTAPI __itt_mark_create(const char *name); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_mark_type, mark_createA, (const char *name)) +ITT_STUB(ITTAPI, __itt_mark_type, mark_createW, (const wchar_t *name)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_mark_type, mark_create, (const char *name)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_mark_createA ITTNOTIFY_DATA(mark_createA) +#define __itt_mark_createA_ptr ITTNOTIFY_NAME(mark_createA) +#define __itt_mark_createW ITTNOTIFY_DATA(mark_createW) +#define __itt_mark_createW_ptr ITTNOTIFY_NAME(mark_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark_create ITTNOTIFY_DATA(mark_create) +#define __itt_mark_create_ptr ITTNOTIFY_NAME(mark_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_mark_createA(name) (__itt_mark_type)0 +#define __itt_mark_createA_ptr 0 +#define __itt_mark_createW(name) (__itt_mark_type)0 +#define __itt_mark_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark_create(name) (__itt_mark_type)0 +#define __itt_mark_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_mark_createA_ptr 0 +#define __itt_mark_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Creates a "discrete" user mark type of the specified type and an optional parameter using char or Unicode string. + * + * - The mark of "discrete" type is placed to collection results in case of success. It appears in overtime view(s) as a special tick sign. + * - The call is "synchronous" - function returns after mark is actually added to results. + * - This function is useful, for example, to mark different phases of application + * (beginning of the next mark automatically meand end of current region). + * - Can be used together with "continuous" marks (see below) at the same collection session + * @param[in] mt - mark, created by __itt_mark_create(const char* name) function + * @param[in] parameter - string parameter of mark + * @return Returns zero value in case of success, non-zero value otherwise. + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +int ITTAPI __itt_markA(__itt_mark_type mt, const char *parameter); +int ITTAPI __itt_markW(__itt_mark_type mt, const wchar_t *parameter); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_mark __itt_markW +# define __itt_mark_ptr __itt_markW_ptr +#else /* UNICODE */ +# define __itt_mark __itt_markA +# define __itt_mark_ptr __itt_markA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +int ITTAPI __itt_mark(__itt_mark_type mt, const char *parameter); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, int, markA, (__itt_mark_type mt, const char *parameter)) +ITT_STUB(ITTAPI, int, markW, (__itt_mark_type mt, const wchar_t *parameter)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, int, mark, (__itt_mark_type mt, const char *parameter)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_markA ITTNOTIFY_DATA(markA) +#define __itt_markA_ptr ITTNOTIFY_NAME(markA) +#define __itt_markW ITTNOTIFY_DATA(markW) +#define __itt_markW_ptr ITTNOTIFY_NAME(markW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark ITTNOTIFY_DATA(mark) +#define __itt_mark_ptr ITTNOTIFY_NAME(mark) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_markA(mt, parameter) (int)0 +#define __itt_markA_ptr 0 +#define __itt_markW(mt, parameter) (int)0 +#define __itt_markW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark(mt, parameter) (int)0 +#define __itt_mark_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_markA_ptr 0 +#define __itt_markW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Use this if necessary to create a "discrete" user event type (mark) for process + * rather then for one thread + * @see int __itt_mark(__itt_mark_type mt, const char* parameter); + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +int ITTAPI __itt_mark_globalA(__itt_mark_type mt, const char *parameter); +int ITTAPI __itt_mark_globalW(__itt_mark_type mt, const wchar_t *parameter); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_mark_global __itt_mark_globalW +# define __itt_mark_global_ptr __itt_mark_globalW_ptr +#else /* UNICODE */ +# define __itt_mark_global __itt_mark_globalA +# define __itt_mark_global_ptr __itt_mark_globalA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +int ITTAPI __itt_mark_global(__itt_mark_type mt, const char *parameter); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, int, mark_globalA, (__itt_mark_type mt, const char *parameter)) +ITT_STUB(ITTAPI, int, mark_globalW, (__itt_mark_type mt, const wchar_t *parameter)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, int, mark_global, (__itt_mark_type mt, const char *parameter)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_mark_globalA ITTNOTIFY_DATA(mark_globalA) +#define __itt_mark_globalA_ptr ITTNOTIFY_NAME(mark_globalA) +#define __itt_mark_globalW ITTNOTIFY_DATA(mark_globalW) +#define __itt_mark_globalW_ptr ITTNOTIFY_NAME(mark_globalW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark_global ITTNOTIFY_DATA(mark_global) +#define __itt_mark_global_ptr ITTNOTIFY_NAME(mark_global) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_mark_globalA(mt, parameter) (int)0 +#define __itt_mark_globalA_ptr 0 +#define __itt_mark_globalW(mt, parameter) (int)0 +#define __itt_mark_globalW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark_global(mt, parameter) (int)0 +#define __itt_mark_global_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_mark_globalA_ptr 0 +#define __itt_mark_globalW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark_global_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Creates an "end" point for "continuous" mark with specified name. + * + * - Returns zero value in case of success, non-zero value otherwise. + * Also returns non-zero value when preceding "begin" point for the + * mark with the same name failed to be created or not created. + * - The mark of "continuous" type is placed to collection results in + * case of success. It appears in overtime view(s) as a special tick + * sign (different from "discrete" mark) together with line from + * corresponding "begin" mark to "end" mark. + * @note Continuous marks can overlap and be nested inside each other. + * Discrete mark can be nested inside marked region + * @param[in] mt - mark, created by __itt_mark_create(const char* name) function + * @return Returns zero value in case of success, non-zero value otherwise. + */ +int ITTAPI __itt_mark_off(__itt_mark_type mt); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, int, mark_off, (__itt_mark_type mt)) +#define __itt_mark_off ITTNOTIFY_DATA(mark_off) +#define __itt_mark_off_ptr ITTNOTIFY_NAME(mark_off) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_mark_off(mt) (int)0 +#define __itt_mark_off_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_mark_off_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Use this if necessary to create an "end" point for mark of process + * @see int __itt_mark_off(__itt_mark_type mt); + */ +int ITTAPI __itt_mark_global_off(__itt_mark_type mt); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, int, mark_global_off, (__itt_mark_type mt)) +#define __itt_mark_global_off ITTNOTIFY_DATA(mark_global_off) +#define __itt_mark_global_off_ptr ITTNOTIFY_NAME(mark_global_off) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_mark_global_off(mt) (int)0 +#define __itt_mark_global_off_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_mark_global_off_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} marks group */ + +/** + * @defgroup counters_internal Counters + * @ingroup internal + * Counters group + * @{ + */ + + +/** + * @defgroup stitch Stack Stitching + * @ingroup internal + * Stack Stitching group + * @{ + */ +/** + * @brief opaque structure for counter identification + */ +typedef struct ___itt_caller *__itt_caller; + +/** + * @brief Create the stitch point e.g. a point in call stack where other stacks should be stitched to. + * The function returns a unique identifier which is used to match the cut points with corresponding stitch points. + */ +__itt_caller ITTAPI __itt_stack_caller_create(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, __itt_caller, stack_caller_create, (void)) +#define __itt_stack_caller_create ITTNOTIFY_DATA(stack_caller_create) +#define __itt_stack_caller_create_ptr ITTNOTIFY_NAME(stack_caller_create) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_stack_caller_create() (__itt_caller)0 +#define __itt_stack_caller_create_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_stack_caller_create_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Destroy the inforamtion about stitch point identified by the pointer previously returned by __itt_stack_caller_create() + */ +void ITTAPI __itt_stack_caller_destroy(__itt_caller id); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, stack_caller_destroy, (__itt_caller id)) +#define __itt_stack_caller_destroy ITTNOTIFY_VOID(stack_caller_destroy) +#define __itt_stack_caller_destroy_ptr ITTNOTIFY_NAME(stack_caller_destroy) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_stack_caller_destroy(id) +#define __itt_stack_caller_destroy_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_stack_caller_destroy_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Sets the cut point. Stack from each event which occurs after this call will be cut + * at the same stack level the function was called and stitched to the corresponding stitch point. + */ +void ITTAPI __itt_stack_callee_enter(__itt_caller id); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, stack_callee_enter, (__itt_caller id)) +#define __itt_stack_callee_enter ITTNOTIFY_VOID(stack_callee_enter) +#define __itt_stack_callee_enter_ptr ITTNOTIFY_NAME(stack_callee_enter) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_stack_callee_enter(id) +#define __itt_stack_callee_enter_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_stack_callee_enter_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief This function eliminates the cut point which was set by latest __itt_stack_callee_enter(). + */ +void ITTAPI __itt_stack_callee_leave(__itt_caller id); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, stack_callee_leave, (__itt_caller id)) +#define __itt_stack_callee_leave ITTNOTIFY_VOID(stack_callee_leave) +#define __itt_stack_callee_leave_ptr ITTNOTIFY_NAME(stack_callee_leave) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_stack_callee_leave(id) +#define __itt_stack_callee_leave_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_stack_callee_leave_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @} stitch group */ + +/* ***************************************************************************************************************************** */ + +#include <stdarg.h> + +/** @cond exclude_from_documentation */ +typedef enum __itt_error_code +{ + __itt_error_success = 0, /*!< no error */ + __itt_error_no_module = 1, /*!< module can't be loaded */ + /* %1$s -- library name; win: %2$d -- system error code; unx: %2$s -- system error message. */ + __itt_error_no_symbol = 2, /*!< symbol not found */ + /* %1$s -- library name, %2$s -- symbol name. */ + __itt_error_unknown_group = 3, /*!< unknown group specified */ + /* %1$s -- env var name, %2$s -- group name. */ + __itt_error_cant_read_env = 4, /*!< GetEnvironmentVariable() failed */ + /* %1$s -- env var name, %2$d -- system error. */ + __itt_error_env_too_long = 5, /*!< variable value too long */ + /* %1$s -- env var name, %2$d -- actual length of the var, %3$d -- max allowed length. */ + __itt_error_system = 6 /*!< pthread_mutexattr_init or pthread_mutex_init failed */ + /* %1$s -- function name, %2$d -- errno. */ +} __itt_error_code; + +typedef void (__itt_error_handler_t)(__itt_error_code code, va_list); +__itt_error_handler_t* __itt_set_error_handler(__itt_error_handler_t*); + +const char* ITTAPI __itt_api_version(void); +/** @endcond */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#define __itt_error_handler ITT_JOIN(INTEL_ITTNOTIFY_PREFIX, error_handler) +void __itt_error_handler(__itt_error_code code, va_list args); +extern const int ITTNOTIFY_NAME(err); +#define __itt_err ITTNOTIFY_NAME(err) +ITT_STUB(ITTAPI, const char*, api_version, (void)) +#define __itt_api_version ITTNOTIFY_DATA(api_version) +#define __itt_api_version_ptr ITTNOTIFY_NAME(api_version) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_api_version() (const char*)0 +#define __itt_api_version_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_api_version_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* _ITTNOTIFY_PRIVATE_ */ + +#endif /* INTEL_ITTNOTIFY_API_PRIVATE */ |