/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- * vim: set ts=8 sts=2 et sw=2 tw=80: * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #ifndef BaseProfilingStack_h #define BaseProfilingStack_h #include "BaseProfilingCategory.h" #include "mozilla/Atomics.h" #include "BaseProfiler.h" #ifndef MOZ_GECKO_PROFILER # error Do not #include this header when MOZ_GECKO_PROFILER is not #defined. #endif #include #include // This file defines the classes ProfilingStack and ProfilingStackFrame. // The ProfilingStack manages an array of ProfilingStackFrames. // It keeps track of the "label stack" and the JS interpreter stack. // The two stack types are interleaved. // // Usage: // // ProfilingStack* profilingStack = ...; // // // For label frames: // profilingStack->pushLabelFrame(...); // // Execute some code. When finished, pop the frame: // profilingStack->pop(); // // // For JS stack frames: // profilingStack->pushJSFrame(...); // // Execute some code. When finished, pop the frame: // profilingStack->pop(); // // // Concurrency considerations // // A thread's profiling stack (and the frames inside it) is only modified by // that thread. However, the profiling stack can be *read* by a different // thread, the sampler thread: Whenever the profiler wants to sample a given // thread A, the following happens: // (1) Thread A is suspended. // (2) The sampler thread (thread S) reads the ProfilingStack of thread A, // including all ProfilingStackFrames that are currently in that stack // (profilingStack->frames[0..profilingStack->stackSize()]). // (3) Thread A is resumed. // // Thread suspension is achieved using platform-specific APIs; refer to each // platform's Sampler::SuspendAndSampleAndResumeThread implementation in // platform-*.cpp for details. // // When the thread is suspended, the values in profilingStack->stackPointer and // in the stack frame range // profilingStack->frames[0..profilingStack->stackPointer] need to be in a // consistent state, so that thread S does not read partially- constructed stack // frames. More specifically, we have two requirements: // (1) When adding a new frame at the top of the stack, its ProfilingStackFrame // data needs to be put in place *before* the stackPointer is incremented, // and the compiler + CPU need to know that this order matters. // (2) When popping an frame from the stack and then preparing the // ProfilingStackFrame data for the next frame that is about to be pushed, // the decrement of the stackPointer in pop() needs to happen *before* the // ProfilingStackFrame for the new frame is being popuplated, and the // compiler + CPU need to know that this order matters. // // We can express the relevance of these orderings in multiple ways. // Option A is to make stackPointer an atomic with SequentiallyConsistent // memory ordering. This would ensure that no writes in thread A would be // reordered across any writes to stackPointer, which satisfies requirements // (1) and (2) at the same time. Option A is the simplest. // Option B is to use ReleaseAcquire memory ordering both for writes to // stackPointer *and* for writes to ProfilingStackFrame fields. Release-stores // ensure that all writes that happened *before this write in program order* are // not reordered to happen after this write. ReleaseAcquire ordering places no // requirements on the ordering of writes that happen *after* this write in // program order. // Using release-stores for writes to stackPointer expresses requirement (1), // and using release-stores for writes to the ProfilingStackFrame fields // expresses requirement (2). // // Option B is more complicated than option A, but has much better performance // on x86/64: In a microbenchmark run on a Macbook Pro from 2017, switching // from option A to option B reduced the overhead of pushing+popping a // ProfilingStackFrame by 10 nanoseconds. // On x86/64, release-stores require no explicit hardware barriers or lock // instructions. // On ARM/64, option B may be slower than option A, because the compiler will // generate hardware barriers for every single release-store instead of just // for the writes to stackPointer. However, the actual performance impact of // this has not yet been measured on ARM, so we're currently using option B // everywhere. This is something that we may want to change in the future once // we've done measurements. namespace mozilla { namespace baseprofiler { // A call stack can be specified to the JS engine such that all JS entry/exits // to functions push/pop a stack frame to/from the specified stack. // // For more detailed information, see vm/GeckoProfiler.h. // class ProfilingStackFrame { // A ProfilingStackFrame represents either a label frame or a JS frame. // WARNING WARNING WARNING // // All the fields below are Atomic<...,ReleaseAcquire>. This is needed so // that writes to these fields are release-writes, which ensures that // earlier writes in this thread don't get reordered after the writes to // these fields. In particular, the decrement of the stack pointer in // ProfilingStack::pop() is a write that *must* happen before the values in // this ProfilingStackFrame are changed. Otherwise, the sampler thread might // see an inconsistent state where the stack pointer still points to a // ProfilingStackFrame which has already been popped off the stack and whose // fields have now been partially repopulated with new values. // See the "Concurrency considerations" paragraph at the top of this file // for more details. // Descriptive label for this stack frame. Must be a static string! Can be // an empty string, but not a null pointer. Atomic label_; // An additional descriptive string of this frame which is combined with // |label_| in profiler output. Need not be (and usually isn't) static. Can // be null. Atomic dynamicString_; // Stack pointer for non-JS stack frames, the script pointer otherwise. Atomic spOrScript; // ID of the JS Realm for JS stack frames. // Must not be used on non-JS frames; it'll contain either the default 0, // or a leftover value from a previous JS stack frame that was using this // ProfilingStackFrame object. mozilla::Atomic realmID_; // The bytecode offset for JS stack frames. // Must not be used on non-JS frames; it'll contain either the default 0, // or a leftover value from a previous JS stack frame that was using this // ProfilingStackFrame object. Atomic pcOffsetIfJS_; // Bits 0...8 hold the Flags. Bits 9...31 hold the category pair. Atomic flagsAndCategoryPair_; public: ProfilingStackFrame() = default; ProfilingStackFrame& operator=(const ProfilingStackFrame& other) { label_ = other.label(); dynamicString_ = other.dynamicString(); void* spScript = other.spOrScript; spOrScript = spScript; int32_t offsetIfJS = other.pcOffsetIfJS_; pcOffsetIfJS_ = offsetIfJS; int64_t realmID = other.realmID_; realmID_ = realmID; uint32_t flagsAndCategory = other.flagsAndCategoryPair_; flagsAndCategoryPair_ = flagsAndCategory; return *this; } // Reserve up to 16 bits for flags, and 16 for category pair. enum class Flags : uint32_t { // The first three flags describe the kind of the frame and are // mutually exclusive. (We still give them individual bits for // simplicity.) // A regular label frame. These usually come from AutoProfilerLabel. IS_LABEL_FRAME = 1 << 0, // A special frame indicating the start of a run of JS profiling stack // frames. IS_SP_MARKER_FRAME frames are ignored, except for the sp // field. These frames are needed to get correct ordering between JS // and LABEL frames because JS frames don't carry sp information. // SP is short for "stack pointer". IS_SP_MARKER_FRAME = 1 << 1, // A JS frame. IS_JS_FRAME = 1 << 2, // An interpreter JS frame that has OSR-ed into baseline. IS_JS_FRAME // frames can have this flag set and unset during their lifetime. // JS_OSR frames are ignored. JS_OSR = 1 << 3, // The next three are mutually exclusive. // By default, for profiling stack frames that have both a label and a // dynamic string, the two strings are combined into one string of the // form "