diff options
Diffstat (limited to 'mozglue/baseprofiler/public/BaseProfilingStack.h')
-rw-r--r-- | mozglue/baseprofiler/public/BaseProfilingStack.h | 520 |
1 files changed, 520 insertions, 0 deletions
diff --git a/mozglue/baseprofiler/public/BaseProfilingStack.h b/mozglue/baseprofiler/public/BaseProfilingStack.h new file mode 100644 index 0000000000..214fc1ebbf --- /dev/null +++ b/mozglue/baseprofiler/public/BaseProfilingStack.h @@ -0,0 +1,520 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef BaseProfilingStack_h +#define BaseProfilingStack_h + +#include "BaseProfilingCategory.h" + +#include "mozilla/Atomics.h" + +#include "BaseProfiler.h" + +#ifndef MOZ_GECKO_PROFILER +# error Do not #include this header when MOZ_GECKO_PROFILER is not #defined. +#endif + +#include <algorithm> +#include <stdint.h> + +// This file defines the classes ProfilingStack and ProfilingStackFrame. +// The ProfilingStack manages an array of ProfilingStackFrames. +// It keeps track of the "label stack" and the JS interpreter stack. +// The two stack types are interleaved. +// +// Usage: +// +// ProfilingStack* profilingStack = ...; +// +// // For label frames: +// profilingStack->pushLabelFrame(...); +// // Execute some code. When finished, pop the frame: +// profilingStack->pop(); +// +// // For JS stack frames: +// profilingStack->pushJSFrame(...); +// // Execute some code. When finished, pop the frame: +// profilingStack->pop(); +// +// +// Concurrency considerations +// +// A thread's profiling stack (and the frames inside it) is only modified by +// that thread. However, the profiling stack can be *read* by a different +// thread, the sampler thread: Whenever the profiler wants to sample a given +// thread A, the following happens: +// (1) Thread A is suspended. +// (2) The sampler thread (thread S) reads the ProfilingStack of thread A, +// including all ProfilingStackFrames that are currently in that stack +// (profilingStack->frames[0..profilingStack->stackSize()]). +// (3) Thread A is resumed. +// +// Thread suspension is achieved using platform-specific APIs; refer to each +// platform's Sampler::SuspendAndSampleAndResumeThread implementation in +// platform-*.cpp for details. +// +// When the thread is suspended, the values in profilingStack->stackPointer and +// in the stack frame range +// profilingStack->frames[0..profilingStack->stackPointer] need to be in a +// consistent state, so that thread S does not read partially- constructed stack +// frames. More specifically, we have two requirements: +// (1) When adding a new frame at the top of the stack, its ProfilingStackFrame +// data needs to be put in place *before* the stackPointer is incremented, +// and the compiler + CPU need to know that this order matters. +// (2) When popping an frame from the stack and then preparing the +// ProfilingStackFrame data for the next frame that is about to be pushed, +// the decrement of the stackPointer in pop() needs to happen *before* the +// ProfilingStackFrame for the new frame is being popuplated, and the +// compiler + CPU need to know that this order matters. +// +// We can express the relevance of these orderings in multiple ways. +// Option A is to make stackPointer an atomic with SequentiallyConsistent +// memory ordering. This would ensure that no writes in thread A would be +// reordered across any writes to stackPointer, which satisfies requirements +// (1) and (2) at the same time. Option A is the simplest. +// Option B is to use ReleaseAcquire memory ordering both for writes to +// stackPointer *and* for writes to ProfilingStackFrame fields. Release-stores +// ensure that all writes that happened *before this write in program order* are +// not reordered to happen after this write. ReleaseAcquire ordering places no +// requirements on the ordering of writes that happen *after* this write in +// program order. +// Using release-stores for writes to stackPointer expresses requirement (1), +// and using release-stores for writes to the ProfilingStackFrame fields +// expresses requirement (2). +// +// Option B is more complicated than option A, but has much better performance +// on x86/64: In a microbenchmark run on a Macbook Pro from 2017, switching +// from option A to option B reduced the overhead of pushing+popping a +// ProfilingStackFrame by 10 nanoseconds. +// On x86/64, release-stores require no explicit hardware barriers or lock +// instructions. +// On ARM/64, option B may be slower than option A, because the compiler will +// generate hardware barriers for every single release-store instead of just +// for the writes to stackPointer. However, the actual performance impact of +// this has not yet been measured on ARM, so we're currently using option B +// everywhere. This is something that we may want to change in the future once +// we've done measurements. + +namespace mozilla { +namespace baseprofiler { + +// A call stack can be specified to the JS engine such that all JS entry/exits +// to functions push/pop a stack frame to/from the specified stack. +// +// For more detailed information, see vm/GeckoProfiler.h. +// +class ProfilingStackFrame { + // A ProfilingStackFrame represents either a label frame or a JS frame. + + // WARNING WARNING WARNING + // + // All the fields below are Atomic<...,ReleaseAcquire>. This is needed so + // that writes to these fields are release-writes, which ensures that + // earlier writes in this thread don't get reordered after the writes to + // these fields. In particular, the decrement of the stack pointer in + // ProfilingStack::pop() is a write that *must* happen before the values in + // this ProfilingStackFrame are changed. Otherwise, the sampler thread might + // see an inconsistent state where the stack pointer still points to a + // ProfilingStackFrame which has already been popped off the stack and whose + // fields have now been partially repopulated with new values. + // See the "Concurrency considerations" paragraph at the top of this file + // for more details. + + // Descriptive label for this stack frame. Must be a static string! Can be + // an empty string, but not a null pointer. + Atomic<const char*, ReleaseAcquire> label_; + + // An additional descriptive string of this frame which is combined with + // |label_| in profiler output. Need not be (and usually isn't) static. Can + // be null. + Atomic<const char*, ReleaseAcquire> dynamicString_; + + // Stack pointer for non-JS stack frames, the script pointer otherwise. + Atomic<void*, ReleaseAcquire> spOrScript; + + // ID of the JS Realm for JS stack frames. + // Must not be used on non-JS frames; it'll contain either the default 0, + // or a leftover value from a previous JS stack frame that was using this + // ProfilingStackFrame object. + mozilla::Atomic<uint64_t, mozilla::ReleaseAcquire> realmID_; + + // The bytecode offset for JS stack frames. + // Must not be used on non-JS frames; it'll contain either the default 0, + // or a leftover value from a previous JS stack frame that was using this + // ProfilingStackFrame object. + Atomic<int32_t, ReleaseAcquire> pcOffsetIfJS_; + + // Bits 0...8 hold the Flags. Bits 9...31 hold the category pair. + Atomic<uint32_t, ReleaseAcquire> flagsAndCategoryPair_; + + public: + ProfilingStackFrame() = default; + ProfilingStackFrame& operator=(const ProfilingStackFrame& other) { + label_ = other.label(); + dynamicString_ = other.dynamicString(); + void* spScript = other.spOrScript; + spOrScript = spScript; + int32_t offsetIfJS = other.pcOffsetIfJS_; + pcOffsetIfJS_ = offsetIfJS; + int64_t realmID = other.realmID_; + realmID_ = realmID; + uint32_t flagsAndCategory = other.flagsAndCategoryPair_; + flagsAndCategoryPair_ = flagsAndCategory; + return *this; + } + + // Reserve up to 16 bits for flags, and 16 for category pair. + enum class Flags : uint32_t { + // The first three flags describe the kind of the frame and are + // mutually exclusive. (We still give them individual bits for + // simplicity.) + + // A regular label frame. These usually come from AutoProfilerLabel. + IS_LABEL_FRAME = 1 << 0, + + // A special frame indicating the start of a run of JS profiling stack + // frames. IS_SP_MARKER_FRAME frames are ignored, except for the sp + // field. These frames are needed to get correct ordering between JS + // and LABEL frames because JS frames don't carry sp information. + // SP is short for "stack pointer". + IS_SP_MARKER_FRAME = 1 << 1, + + // A JS frame. + IS_JS_FRAME = 1 << 2, + + // An interpreter JS frame that has OSR-ed into baseline. IS_JS_FRAME + // frames can have this flag set and unset during their lifetime. + // JS_OSR frames are ignored. + JS_OSR = 1 << 3, + + // The next three are mutually exclusive. + // By default, for profiling stack frames that have both a label and a + // dynamic string, the two strings are combined into one string of the + // form "<label> <dynamicString>" during JSON serialization. The + // following flags can be used to change this preset. + STRING_TEMPLATE_METHOD = 1 << 4, // "<label>.<dynamicString>" + STRING_TEMPLATE_GETTER = 1 << 5, // "get <label>.<dynamicString>" + STRING_TEMPLATE_SETTER = 1 << 6, // "set <label>.<dynamicString>" + + // If set, causes this stack frame to be marked as "relevantForJS" in + // the profile JSON, which will make it show up in the "JS only" call + // tree view. + RELEVANT_FOR_JS = 1 << 7, + + // If set, causes the label on this ProfilingStackFrame to be ignored + // and to be replaced by the subcategory's label. + LABEL_DETERMINED_BY_CATEGORY_PAIR = 1 << 8, + + // Frame dynamic string does not contain user data. + NONSENSITIVE = 1 << 9, + + // A JS Baseline Interpreter frame. + IS_BLINTERP_FRAME = 1 << 10, + + FLAGS_BITCOUNT = 16, + FLAGS_MASK = (1 << FLAGS_BITCOUNT) - 1 + }; + + static_assert( + uint32_t(ProfilingCategoryPair::LAST) <= + (UINT32_MAX >> uint32_t(Flags::FLAGS_BITCOUNT)), + "Too many category pairs to fit into u32 with together with the " + "reserved bits for the flags"); + + bool isLabelFrame() const { + return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::IS_LABEL_FRAME); + } + + bool isSpMarkerFrame() const { + return uint32_t(flagsAndCategoryPair_) & + uint32_t(Flags::IS_SP_MARKER_FRAME); + } + + bool isJsFrame() const { + return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::IS_JS_FRAME); + } + + bool isOSRFrame() const { + return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::JS_OSR); + } + + void setIsOSRFrame(bool isOSR) { + if (isOSR) { + flagsAndCategoryPair_ = + uint32_t(flagsAndCategoryPair_) | uint32_t(Flags::JS_OSR); + } else { + flagsAndCategoryPair_ = + uint32_t(flagsAndCategoryPair_) & ~uint32_t(Flags::JS_OSR); + } + } + + const char* label() const { + uint32_t flagsAndCategoryPair = flagsAndCategoryPair_; + if (flagsAndCategoryPair & + uint32_t(Flags::LABEL_DETERMINED_BY_CATEGORY_PAIR)) { + auto categoryPair = ProfilingCategoryPair( + flagsAndCategoryPair >> uint32_t(Flags::FLAGS_BITCOUNT)); + return GetProfilingCategoryPairInfo(categoryPair).mLabel; + } + return label_; + } + + const char* dynamicString() const { return dynamicString_; } + + void initLabelFrame(const char* aLabel, const char* aDynamicString, void* sp, + ProfilingCategoryPair aCategoryPair, uint32_t aFlags) { + label_ = aLabel; + dynamicString_ = aDynamicString; + spOrScript = sp; + // pcOffsetIfJS_ is not set and must not be used on label frames. + flagsAndCategoryPair_ = + uint32_t(Flags::IS_LABEL_FRAME) | + (uint32_t(aCategoryPair) << uint32_t(Flags::FLAGS_BITCOUNT)) | aFlags; + MOZ_ASSERT(isLabelFrame()); + } + + void initSpMarkerFrame(void* sp) { + label_ = ""; + dynamicString_ = nullptr; + spOrScript = sp; + // pcOffsetIfJS_ is not set and must not be used on sp marker frames. + flagsAndCategoryPair_ = uint32_t(Flags::IS_SP_MARKER_FRAME) | + (uint32_t(ProfilingCategoryPair::OTHER) + << uint32_t(Flags::FLAGS_BITCOUNT)); + MOZ_ASSERT(isSpMarkerFrame()); + } + + void initJsFrame(const char* aLabel, const char* aDynamicString, + void* /* JSScript* */ aScript, int32_t aOffset, + uint64_t aRealmID) { + label_ = aLabel; + dynamicString_ = aDynamicString; + spOrScript = aScript; + pcOffsetIfJS_ = aOffset; + realmID_ = aRealmID; + flagsAndCategoryPair_ = + uint32_t(Flags::IS_JS_FRAME) | (uint32_t(ProfilingCategoryPair::JS) + << uint32_t(Flags::FLAGS_BITCOUNT)); + MOZ_ASSERT(isJsFrame()); + } + + uint32_t flags() const { + return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::FLAGS_MASK); + } + + ProfilingCategoryPair categoryPair() const { + return ProfilingCategoryPair(flagsAndCategoryPair_ >> + uint32_t(Flags::FLAGS_BITCOUNT)); + } + + uint64_t realmID() const { return realmID_; } + + void* stackAddress() const { + MOZ_ASSERT(!isJsFrame()); + return spOrScript; + } + + // Note that the pointer returned might be invalid. + void* rawScript() const { + MOZ_ASSERT(isJsFrame()); + return spOrScript; + } + void setRawScript(void* aScript) { + MOZ_ASSERT(isJsFrame()); + spOrScript = aScript; + } + + int32_t pcOffset() const { + MOZ_ASSERT(isJsFrame()); + return pcOffsetIfJS_; + } + + void setPCOffset(int32_t aOffset) { + MOZ_ASSERT(isJsFrame()); + pcOffsetIfJS_ = aOffset; + } + + // The offset of a pc into a script's code can actually be 0, so to + // signify a nullptr pc, use a -1 index. This is checked against in + // pc() and setPC() to set/get the right pc. + static const int32_t NullPCOffset = -1; +}; + +// Each thread has its own ProfilingStack. That thread modifies the +// ProfilingStack, pushing and popping elements as necessary. +// +// The ProfilingStack is also read periodically by the profiler's sampler +// thread. This happens only when the thread that owns the ProfilingStack is +// suspended. So there are no genuine parallel accesses. +// +// However, it is possible for pushing/popping to be interrupted by a periodic +// sample. Because of this, we need pushing/popping to be effectively atomic. +// +// - When pushing a new frame, we increment the stack pointer -- making the new +// frame visible to the sampler thread -- only after the new frame has been +// fully written. The stack pointer is Atomic<uint32_t,ReleaseAcquire>, so +// the increment is a release-store, which ensures that this store is not +// reordered before the writes of the frame. +// +// - When popping an old frame, the only operation is the decrementing of the +// stack pointer, which is obviously atomic. +// +class ProfilingStack final { + public: + ProfilingStack() = default; + + MFBT_API ~ProfilingStack(); + + void pushLabelFrame(const char* label, const char* dynamicString, void* sp, + ProfilingCategoryPair categoryPair, uint32_t flags = 0) { + // This thread is the only one that ever changes the value of + // stackPointer. + // Store the value of the atomic in a non-atomic local variable so that + // the compiler won't generate two separate loads from the atomic for + // the size check and the frames[] array indexing operation. + uint32_t stackPointerVal = stackPointer; + + if (MOZ_UNLIKELY(stackPointerVal >= capacity)) { + ensureCapacitySlow(); + } + frames[stackPointerVal].initLabelFrame(label, dynamicString, sp, + categoryPair, flags); + + // This must happen at the end! The compiler will not reorder this + // update because stackPointer is Atomic<..., ReleaseAcquire>, so any + // the writes above will not be reordered below the stackPointer store. + // Do the read and the write as two separate statements, in order to + // make it clear that we don't need an atomic increment, which would be + // more expensive on x86 than the separate operations done here. + // However, don't use stackPointerVal here; instead, allow the compiler + // to turn this store into a non-atomic increment instruction which + // takes up less code size. + stackPointer = stackPointer + 1; + } + + void pushSpMarkerFrame(void* sp) { + uint32_t oldStackPointer = stackPointer; + + if (MOZ_UNLIKELY(oldStackPointer >= capacity)) { + ensureCapacitySlow(); + } + frames[oldStackPointer].initSpMarkerFrame(sp); + + // This must happen at the end, see the comment in pushLabelFrame. + stackPointer = oldStackPointer + 1; + } + + void pushJsOffsetFrame(const char* label, const char* dynamicString, + void* script, int32_t offset, uint64_t aRealmID) { + // This thread is the only one that ever changes the value of + // stackPointer. Only load the atomic once. + uint32_t oldStackPointer = stackPointer; + + if (MOZ_UNLIKELY(oldStackPointer >= capacity)) { + ensureCapacitySlow(); + } + frames[oldStackPointer].initJsFrame(label, dynamicString, script, offset, + aRealmID); + + // This must happen at the end, see the comment in pushLabelFrame. + stackPointer = stackPointer + 1; + } + + void pop() { + MOZ_ASSERT(stackPointer > 0); + // Do the read and the write as two separate statements, in order to + // make it clear that we don't need an atomic decrement, which would be + // more expensive on x86 than the separate operations done here. + // This thread is the only one that ever changes the value of + // stackPointer. + uint32_t oldStackPointer = stackPointer; + stackPointer = oldStackPointer - 1; + } + + uint32_t stackSize() const { return stackPointer; } + uint32_t stackCapacity() const { return capacity; } + + private: + // Out of line path for expanding the buffer, since otherwise this would get + // inlined in every DOM WebIDL call. + MFBT_API MOZ_COLD void ensureCapacitySlow(); + + // No copying. + ProfilingStack(const ProfilingStack&) = delete; + void operator=(const ProfilingStack&) = delete; + + // No moving either. + ProfilingStack(ProfilingStack&&) = delete; + void operator=(ProfilingStack&&) = delete; + + uint32_t capacity = 0; + + public: + // The pointer to the stack frames, this is read from the profiler thread and + // written from the current thread. + // + // This is effectively a unique pointer. + Atomic<ProfilingStackFrame*, SequentiallyConsistent> frames{nullptr}; + + // This may exceed the capacity, so instead use the stackSize() method to + // determine the number of valid frames in stackFrames. When this is less + // than stackCapacity(), it refers to the first free stackframe past the top + // of the in-use stack (i.e. frames[stackPointer - 1] is the top stack + // frame). + // + // WARNING WARNING WARNING + // + // This is an atomic variable that uses ReleaseAcquire memory ordering. + // See the "Concurrency considerations" paragraph at the top of this file + // for more details. + Atomic<uint32_t, ReleaseAcquire> stackPointer{0}; +}; + +class AutoGeckoProfilerEntry; +class GeckoProfilerEntryMarker; +class GeckoProfilerBaselineOSRMarker; + +class GeckoProfilerThread { + friend class AutoGeckoProfilerEntry; + friend class GeckoProfilerEntryMarker; + friend class GeckoProfilerBaselineOSRMarker; + + ProfilingStack* profilingStack_; + + // Same as profilingStack_ if the profiler is currently active, otherwise + // null. + ProfilingStack* profilingStackIfEnabled_; + + public: + MFBT_API GeckoProfilerThread(); + + uint32_t stackPointer() { + MOZ_ASSERT(infraInstalled()); + return profilingStack_->stackPointer; + } + ProfilingStackFrame* stack() { return profilingStack_->frames; } + ProfilingStack* getProfilingStack() { return profilingStack_; } + ProfilingStack* getProfilingStackIfEnabled() { + return profilingStackIfEnabled_; + } + + /* + * True if the profiler infrastructure is setup. Should be true in builds + * that include profiler support except during early startup or late + * shutdown. Unrelated to the presence of the Gecko Profiler addon. + */ + bool infraInstalled() { return profilingStack_ != nullptr; } + + MFBT_API void setProfilingStack(ProfilingStack* profilingStack, bool enabled); + void enable(bool enable) { + profilingStackIfEnabled_ = enable ? profilingStack_ : nullptr; + } +}; + +} // namespace baseprofiler +} // namespace mozilla + +#endif /* BaseProfilingStack_h */ |