summaryrefslogtreecommitdiffstats
path: root/mozglue/baseprofiler/public/BaseProfilingStack.h
diff options
context:
space:
mode:
Diffstat (limited to 'mozglue/baseprofiler/public/BaseProfilingStack.h')
-rw-r--r--mozglue/baseprofiler/public/BaseProfilingStack.h520
1 files changed, 520 insertions, 0 deletions
diff --git a/mozglue/baseprofiler/public/BaseProfilingStack.h b/mozglue/baseprofiler/public/BaseProfilingStack.h
new file mode 100644
index 0000000000..214fc1ebbf
--- /dev/null
+++ b/mozglue/baseprofiler/public/BaseProfilingStack.h
@@ -0,0 +1,520 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef BaseProfilingStack_h
+#define BaseProfilingStack_h
+
+#include "BaseProfilingCategory.h"
+
+#include "mozilla/Atomics.h"
+
+#include "BaseProfiler.h"
+
+#ifndef MOZ_GECKO_PROFILER
+# error Do not #include this header when MOZ_GECKO_PROFILER is not #defined.
+#endif
+
+#include <algorithm>
+#include <stdint.h>
+
+// This file defines the classes ProfilingStack and ProfilingStackFrame.
+// The ProfilingStack manages an array of ProfilingStackFrames.
+// It keeps track of the "label stack" and the JS interpreter stack.
+// The two stack types are interleaved.
+//
+// Usage:
+//
+// ProfilingStack* profilingStack = ...;
+//
+// // For label frames:
+// profilingStack->pushLabelFrame(...);
+// // Execute some code. When finished, pop the frame:
+// profilingStack->pop();
+//
+// // For JS stack frames:
+// profilingStack->pushJSFrame(...);
+// // Execute some code. When finished, pop the frame:
+// profilingStack->pop();
+//
+//
+// Concurrency considerations
+//
+// A thread's profiling stack (and the frames inside it) is only modified by
+// that thread. However, the profiling stack can be *read* by a different
+// thread, the sampler thread: Whenever the profiler wants to sample a given
+// thread A, the following happens:
+// (1) Thread A is suspended.
+// (2) The sampler thread (thread S) reads the ProfilingStack of thread A,
+// including all ProfilingStackFrames that are currently in that stack
+// (profilingStack->frames[0..profilingStack->stackSize()]).
+// (3) Thread A is resumed.
+//
+// Thread suspension is achieved using platform-specific APIs; refer to each
+// platform's Sampler::SuspendAndSampleAndResumeThread implementation in
+// platform-*.cpp for details.
+//
+// When the thread is suspended, the values in profilingStack->stackPointer and
+// in the stack frame range
+// profilingStack->frames[0..profilingStack->stackPointer] need to be in a
+// consistent state, so that thread S does not read partially- constructed stack
+// frames. More specifically, we have two requirements:
+// (1) When adding a new frame at the top of the stack, its ProfilingStackFrame
+// data needs to be put in place *before* the stackPointer is incremented,
+// and the compiler + CPU need to know that this order matters.
+// (2) When popping an frame from the stack and then preparing the
+// ProfilingStackFrame data for the next frame that is about to be pushed,
+// the decrement of the stackPointer in pop() needs to happen *before* the
+// ProfilingStackFrame for the new frame is being popuplated, and the
+// compiler + CPU need to know that this order matters.
+//
+// We can express the relevance of these orderings in multiple ways.
+// Option A is to make stackPointer an atomic with SequentiallyConsistent
+// memory ordering. This would ensure that no writes in thread A would be
+// reordered across any writes to stackPointer, which satisfies requirements
+// (1) and (2) at the same time. Option A is the simplest.
+// Option B is to use ReleaseAcquire memory ordering both for writes to
+// stackPointer *and* for writes to ProfilingStackFrame fields. Release-stores
+// ensure that all writes that happened *before this write in program order* are
+// not reordered to happen after this write. ReleaseAcquire ordering places no
+// requirements on the ordering of writes that happen *after* this write in
+// program order.
+// Using release-stores for writes to stackPointer expresses requirement (1),
+// and using release-stores for writes to the ProfilingStackFrame fields
+// expresses requirement (2).
+//
+// Option B is more complicated than option A, but has much better performance
+// on x86/64: In a microbenchmark run on a Macbook Pro from 2017, switching
+// from option A to option B reduced the overhead of pushing+popping a
+// ProfilingStackFrame by 10 nanoseconds.
+// On x86/64, release-stores require no explicit hardware barriers or lock
+// instructions.
+// On ARM/64, option B may be slower than option A, because the compiler will
+// generate hardware barriers for every single release-store instead of just
+// for the writes to stackPointer. However, the actual performance impact of
+// this has not yet been measured on ARM, so we're currently using option B
+// everywhere. This is something that we may want to change in the future once
+// we've done measurements.
+
+namespace mozilla {
+namespace baseprofiler {
+
+// A call stack can be specified to the JS engine such that all JS entry/exits
+// to functions push/pop a stack frame to/from the specified stack.
+//
+// For more detailed information, see vm/GeckoProfiler.h.
+//
+class ProfilingStackFrame {
+ // A ProfilingStackFrame represents either a label frame or a JS frame.
+
+ // WARNING WARNING WARNING
+ //
+ // All the fields below are Atomic<...,ReleaseAcquire>. This is needed so
+ // that writes to these fields are release-writes, which ensures that
+ // earlier writes in this thread don't get reordered after the writes to
+ // these fields. In particular, the decrement of the stack pointer in
+ // ProfilingStack::pop() is a write that *must* happen before the values in
+ // this ProfilingStackFrame are changed. Otherwise, the sampler thread might
+ // see an inconsistent state where the stack pointer still points to a
+ // ProfilingStackFrame which has already been popped off the stack and whose
+ // fields have now been partially repopulated with new values.
+ // See the "Concurrency considerations" paragraph at the top of this file
+ // for more details.
+
+ // Descriptive label for this stack frame. Must be a static string! Can be
+ // an empty string, but not a null pointer.
+ Atomic<const char*, ReleaseAcquire> label_;
+
+ // An additional descriptive string of this frame which is combined with
+ // |label_| in profiler output. Need not be (and usually isn't) static. Can
+ // be null.
+ Atomic<const char*, ReleaseAcquire> dynamicString_;
+
+ // Stack pointer for non-JS stack frames, the script pointer otherwise.
+ Atomic<void*, ReleaseAcquire> spOrScript;
+
+ // ID of the JS Realm for JS stack frames.
+ // Must not be used on non-JS frames; it'll contain either the default 0,
+ // or a leftover value from a previous JS stack frame that was using this
+ // ProfilingStackFrame object.
+ mozilla::Atomic<uint64_t, mozilla::ReleaseAcquire> realmID_;
+
+ // The bytecode offset for JS stack frames.
+ // Must not be used on non-JS frames; it'll contain either the default 0,
+ // or a leftover value from a previous JS stack frame that was using this
+ // ProfilingStackFrame object.
+ Atomic<int32_t, ReleaseAcquire> pcOffsetIfJS_;
+
+ // Bits 0...8 hold the Flags. Bits 9...31 hold the category pair.
+ Atomic<uint32_t, ReleaseAcquire> flagsAndCategoryPair_;
+
+ public:
+ ProfilingStackFrame() = default;
+ ProfilingStackFrame& operator=(const ProfilingStackFrame& other) {
+ label_ = other.label();
+ dynamicString_ = other.dynamicString();
+ void* spScript = other.spOrScript;
+ spOrScript = spScript;
+ int32_t offsetIfJS = other.pcOffsetIfJS_;
+ pcOffsetIfJS_ = offsetIfJS;
+ int64_t realmID = other.realmID_;
+ realmID_ = realmID;
+ uint32_t flagsAndCategory = other.flagsAndCategoryPair_;
+ flagsAndCategoryPair_ = flagsAndCategory;
+ return *this;
+ }
+
+ // Reserve up to 16 bits for flags, and 16 for category pair.
+ enum class Flags : uint32_t {
+ // The first three flags describe the kind of the frame and are
+ // mutually exclusive. (We still give them individual bits for
+ // simplicity.)
+
+ // A regular label frame. These usually come from AutoProfilerLabel.
+ IS_LABEL_FRAME = 1 << 0,
+
+ // A special frame indicating the start of a run of JS profiling stack
+ // frames. IS_SP_MARKER_FRAME frames are ignored, except for the sp
+ // field. These frames are needed to get correct ordering between JS
+ // and LABEL frames because JS frames don't carry sp information.
+ // SP is short for "stack pointer".
+ IS_SP_MARKER_FRAME = 1 << 1,
+
+ // A JS frame.
+ IS_JS_FRAME = 1 << 2,
+
+ // An interpreter JS frame that has OSR-ed into baseline. IS_JS_FRAME
+ // frames can have this flag set and unset during their lifetime.
+ // JS_OSR frames are ignored.
+ JS_OSR = 1 << 3,
+
+ // The next three are mutually exclusive.
+ // By default, for profiling stack frames that have both a label and a
+ // dynamic string, the two strings are combined into one string of the
+ // form "<label> <dynamicString>" during JSON serialization. The
+ // following flags can be used to change this preset.
+ STRING_TEMPLATE_METHOD = 1 << 4, // "<label>.<dynamicString>"
+ STRING_TEMPLATE_GETTER = 1 << 5, // "get <label>.<dynamicString>"
+ STRING_TEMPLATE_SETTER = 1 << 6, // "set <label>.<dynamicString>"
+
+ // If set, causes this stack frame to be marked as "relevantForJS" in
+ // the profile JSON, which will make it show up in the "JS only" call
+ // tree view.
+ RELEVANT_FOR_JS = 1 << 7,
+
+ // If set, causes the label on this ProfilingStackFrame to be ignored
+ // and to be replaced by the subcategory's label.
+ LABEL_DETERMINED_BY_CATEGORY_PAIR = 1 << 8,
+
+ // Frame dynamic string does not contain user data.
+ NONSENSITIVE = 1 << 9,
+
+ // A JS Baseline Interpreter frame.
+ IS_BLINTERP_FRAME = 1 << 10,
+
+ FLAGS_BITCOUNT = 16,
+ FLAGS_MASK = (1 << FLAGS_BITCOUNT) - 1
+ };
+
+ static_assert(
+ uint32_t(ProfilingCategoryPair::LAST) <=
+ (UINT32_MAX >> uint32_t(Flags::FLAGS_BITCOUNT)),
+ "Too many category pairs to fit into u32 with together with the "
+ "reserved bits for the flags");
+
+ bool isLabelFrame() const {
+ return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::IS_LABEL_FRAME);
+ }
+
+ bool isSpMarkerFrame() const {
+ return uint32_t(flagsAndCategoryPair_) &
+ uint32_t(Flags::IS_SP_MARKER_FRAME);
+ }
+
+ bool isJsFrame() const {
+ return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::IS_JS_FRAME);
+ }
+
+ bool isOSRFrame() const {
+ return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::JS_OSR);
+ }
+
+ void setIsOSRFrame(bool isOSR) {
+ if (isOSR) {
+ flagsAndCategoryPair_ =
+ uint32_t(flagsAndCategoryPair_) | uint32_t(Flags::JS_OSR);
+ } else {
+ flagsAndCategoryPair_ =
+ uint32_t(flagsAndCategoryPair_) & ~uint32_t(Flags::JS_OSR);
+ }
+ }
+
+ const char* label() const {
+ uint32_t flagsAndCategoryPair = flagsAndCategoryPair_;
+ if (flagsAndCategoryPair &
+ uint32_t(Flags::LABEL_DETERMINED_BY_CATEGORY_PAIR)) {
+ auto categoryPair = ProfilingCategoryPair(
+ flagsAndCategoryPair >> uint32_t(Flags::FLAGS_BITCOUNT));
+ return GetProfilingCategoryPairInfo(categoryPair).mLabel;
+ }
+ return label_;
+ }
+
+ const char* dynamicString() const { return dynamicString_; }
+
+ void initLabelFrame(const char* aLabel, const char* aDynamicString, void* sp,
+ ProfilingCategoryPair aCategoryPair, uint32_t aFlags) {
+ label_ = aLabel;
+ dynamicString_ = aDynamicString;
+ spOrScript = sp;
+ // pcOffsetIfJS_ is not set and must not be used on label frames.
+ flagsAndCategoryPair_ =
+ uint32_t(Flags::IS_LABEL_FRAME) |
+ (uint32_t(aCategoryPair) << uint32_t(Flags::FLAGS_BITCOUNT)) | aFlags;
+ MOZ_ASSERT(isLabelFrame());
+ }
+
+ void initSpMarkerFrame(void* sp) {
+ label_ = "";
+ dynamicString_ = nullptr;
+ spOrScript = sp;
+ // pcOffsetIfJS_ is not set and must not be used on sp marker frames.
+ flagsAndCategoryPair_ = uint32_t(Flags::IS_SP_MARKER_FRAME) |
+ (uint32_t(ProfilingCategoryPair::OTHER)
+ << uint32_t(Flags::FLAGS_BITCOUNT));
+ MOZ_ASSERT(isSpMarkerFrame());
+ }
+
+ void initJsFrame(const char* aLabel, const char* aDynamicString,
+ void* /* JSScript* */ aScript, int32_t aOffset,
+ uint64_t aRealmID) {
+ label_ = aLabel;
+ dynamicString_ = aDynamicString;
+ spOrScript = aScript;
+ pcOffsetIfJS_ = aOffset;
+ realmID_ = aRealmID;
+ flagsAndCategoryPair_ =
+ uint32_t(Flags::IS_JS_FRAME) | (uint32_t(ProfilingCategoryPair::JS)
+ << uint32_t(Flags::FLAGS_BITCOUNT));
+ MOZ_ASSERT(isJsFrame());
+ }
+
+ uint32_t flags() const {
+ return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::FLAGS_MASK);
+ }
+
+ ProfilingCategoryPair categoryPair() const {
+ return ProfilingCategoryPair(flagsAndCategoryPair_ >>
+ uint32_t(Flags::FLAGS_BITCOUNT));
+ }
+
+ uint64_t realmID() const { return realmID_; }
+
+ void* stackAddress() const {
+ MOZ_ASSERT(!isJsFrame());
+ return spOrScript;
+ }
+
+ // Note that the pointer returned might be invalid.
+ void* rawScript() const {
+ MOZ_ASSERT(isJsFrame());
+ return spOrScript;
+ }
+ void setRawScript(void* aScript) {
+ MOZ_ASSERT(isJsFrame());
+ spOrScript = aScript;
+ }
+
+ int32_t pcOffset() const {
+ MOZ_ASSERT(isJsFrame());
+ return pcOffsetIfJS_;
+ }
+
+ void setPCOffset(int32_t aOffset) {
+ MOZ_ASSERT(isJsFrame());
+ pcOffsetIfJS_ = aOffset;
+ }
+
+ // The offset of a pc into a script's code can actually be 0, so to
+ // signify a nullptr pc, use a -1 index. This is checked against in
+ // pc() and setPC() to set/get the right pc.
+ static const int32_t NullPCOffset = -1;
+};
+
+// Each thread has its own ProfilingStack. That thread modifies the
+// ProfilingStack, pushing and popping elements as necessary.
+//
+// The ProfilingStack is also read periodically by the profiler's sampler
+// thread. This happens only when the thread that owns the ProfilingStack is
+// suspended. So there are no genuine parallel accesses.
+//
+// However, it is possible for pushing/popping to be interrupted by a periodic
+// sample. Because of this, we need pushing/popping to be effectively atomic.
+//
+// - When pushing a new frame, we increment the stack pointer -- making the new
+// frame visible to the sampler thread -- only after the new frame has been
+// fully written. The stack pointer is Atomic<uint32_t,ReleaseAcquire>, so
+// the increment is a release-store, which ensures that this store is not
+// reordered before the writes of the frame.
+//
+// - When popping an old frame, the only operation is the decrementing of the
+// stack pointer, which is obviously atomic.
+//
+class ProfilingStack final {
+ public:
+ ProfilingStack() = default;
+
+ MFBT_API ~ProfilingStack();
+
+ void pushLabelFrame(const char* label, const char* dynamicString, void* sp,
+ ProfilingCategoryPair categoryPair, uint32_t flags = 0) {
+ // This thread is the only one that ever changes the value of
+ // stackPointer.
+ // Store the value of the atomic in a non-atomic local variable so that
+ // the compiler won't generate two separate loads from the atomic for
+ // the size check and the frames[] array indexing operation.
+ uint32_t stackPointerVal = stackPointer;
+
+ if (MOZ_UNLIKELY(stackPointerVal >= capacity)) {
+ ensureCapacitySlow();
+ }
+ frames[stackPointerVal].initLabelFrame(label, dynamicString, sp,
+ categoryPair, flags);
+
+ // This must happen at the end! The compiler will not reorder this
+ // update because stackPointer is Atomic<..., ReleaseAcquire>, so any
+ // the writes above will not be reordered below the stackPointer store.
+ // Do the read and the write as two separate statements, in order to
+ // make it clear that we don't need an atomic increment, which would be
+ // more expensive on x86 than the separate operations done here.
+ // However, don't use stackPointerVal here; instead, allow the compiler
+ // to turn this store into a non-atomic increment instruction which
+ // takes up less code size.
+ stackPointer = stackPointer + 1;
+ }
+
+ void pushSpMarkerFrame(void* sp) {
+ uint32_t oldStackPointer = stackPointer;
+
+ if (MOZ_UNLIKELY(oldStackPointer >= capacity)) {
+ ensureCapacitySlow();
+ }
+ frames[oldStackPointer].initSpMarkerFrame(sp);
+
+ // This must happen at the end, see the comment in pushLabelFrame.
+ stackPointer = oldStackPointer + 1;
+ }
+
+ void pushJsOffsetFrame(const char* label, const char* dynamicString,
+ void* script, int32_t offset, uint64_t aRealmID) {
+ // This thread is the only one that ever changes the value of
+ // stackPointer. Only load the atomic once.
+ uint32_t oldStackPointer = stackPointer;
+
+ if (MOZ_UNLIKELY(oldStackPointer >= capacity)) {
+ ensureCapacitySlow();
+ }
+ frames[oldStackPointer].initJsFrame(label, dynamicString, script, offset,
+ aRealmID);
+
+ // This must happen at the end, see the comment in pushLabelFrame.
+ stackPointer = stackPointer + 1;
+ }
+
+ void pop() {
+ MOZ_ASSERT(stackPointer > 0);
+ // Do the read and the write as two separate statements, in order to
+ // make it clear that we don't need an atomic decrement, which would be
+ // more expensive on x86 than the separate operations done here.
+ // This thread is the only one that ever changes the value of
+ // stackPointer.
+ uint32_t oldStackPointer = stackPointer;
+ stackPointer = oldStackPointer - 1;
+ }
+
+ uint32_t stackSize() const { return stackPointer; }
+ uint32_t stackCapacity() const { return capacity; }
+
+ private:
+ // Out of line path for expanding the buffer, since otherwise this would get
+ // inlined in every DOM WebIDL call.
+ MFBT_API MOZ_COLD void ensureCapacitySlow();
+
+ // No copying.
+ ProfilingStack(const ProfilingStack&) = delete;
+ void operator=(const ProfilingStack&) = delete;
+
+ // No moving either.
+ ProfilingStack(ProfilingStack&&) = delete;
+ void operator=(ProfilingStack&&) = delete;
+
+ uint32_t capacity = 0;
+
+ public:
+ // The pointer to the stack frames, this is read from the profiler thread and
+ // written from the current thread.
+ //
+ // This is effectively a unique pointer.
+ Atomic<ProfilingStackFrame*, SequentiallyConsistent> frames{nullptr};
+
+ // This may exceed the capacity, so instead use the stackSize() method to
+ // determine the number of valid frames in stackFrames. When this is less
+ // than stackCapacity(), it refers to the first free stackframe past the top
+ // of the in-use stack (i.e. frames[stackPointer - 1] is the top stack
+ // frame).
+ //
+ // WARNING WARNING WARNING
+ //
+ // This is an atomic variable that uses ReleaseAcquire memory ordering.
+ // See the "Concurrency considerations" paragraph at the top of this file
+ // for more details.
+ Atomic<uint32_t, ReleaseAcquire> stackPointer{0};
+};
+
+class AutoGeckoProfilerEntry;
+class GeckoProfilerEntryMarker;
+class GeckoProfilerBaselineOSRMarker;
+
+class GeckoProfilerThread {
+ friend class AutoGeckoProfilerEntry;
+ friend class GeckoProfilerEntryMarker;
+ friend class GeckoProfilerBaselineOSRMarker;
+
+ ProfilingStack* profilingStack_;
+
+ // Same as profilingStack_ if the profiler is currently active, otherwise
+ // null.
+ ProfilingStack* profilingStackIfEnabled_;
+
+ public:
+ MFBT_API GeckoProfilerThread();
+
+ uint32_t stackPointer() {
+ MOZ_ASSERT(infraInstalled());
+ return profilingStack_->stackPointer;
+ }
+ ProfilingStackFrame* stack() { return profilingStack_->frames; }
+ ProfilingStack* getProfilingStack() { return profilingStack_; }
+ ProfilingStack* getProfilingStackIfEnabled() {
+ return profilingStackIfEnabled_;
+ }
+
+ /*
+ * True if the profiler infrastructure is setup. Should be true in builds
+ * that include profiler support except during early startup or late
+ * shutdown. Unrelated to the presence of the Gecko Profiler addon.
+ */
+ bool infraInstalled() { return profilingStack_ != nullptr; }
+
+ MFBT_API void setProfilingStack(ProfilingStack* profilingStack, bool enabled);
+ void enable(bool enable) {
+ profilingStackIfEnabled_ = enable ? profilingStack_ : nullptr;
+ }
+};
+
+} // namespace baseprofiler
+} // namespace mozilla
+
+#endif /* BaseProfilingStack_h */