From 2aa4a82499d4becd2284cdb482213d541b8804dd Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 28 Apr 2024 16:29:10 +0200 Subject: Adding upstream version 86.0.1. Signed-off-by: Daniel Baumann --- mozglue/baseprofiler/public/BaseProfilingStack.h | 520 +++++++++++++++++++++++ 1 file changed, 520 insertions(+) create mode 100644 mozglue/baseprofiler/public/BaseProfilingStack.h (limited to 'mozglue/baseprofiler/public/BaseProfilingStack.h') diff --git a/mozglue/baseprofiler/public/BaseProfilingStack.h b/mozglue/baseprofiler/public/BaseProfilingStack.h new file mode 100644 index 0000000000..214fc1ebbf --- /dev/null +++ b/mozglue/baseprofiler/public/BaseProfilingStack.h @@ -0,0 +1,520 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef BaseProfilingStack_h +#define BaseProfilingStack_h + +#include "BaseProfilingCategory.h" + +#include "mozilla/Atomics.h" + +#include "BaseProfiler.h" + +#ifndef MOZ_GECKO_PROFILER +# error Do not #include this header when MOZ_GECKO_PROFILER is not #defined. +#endif + +#include +#include + +// This file defines the classes ProfilingStack and ProfilingStackFrame. +// The ProfilingStack manages an array of ProfilingStackFrames. +// It keeps track of the "label stack" and the JS interpreter stack. +// The two stack types are interleaved. +// +// Usage: +// +// ProfilingStack* profilingStack = ...; +// +// // For label frames: +// profilingStack->pushLabelFrame(...); +// // Execute some code. When finished, pop the frame: +// profilingStack->pop(); +// +// // For JS stack frames: +// profilingStack->pushJSFrame(...); +// // Execute some code. When finished, pop the frame: +// profilingStack->pop(); +// +// +// Concurrency considerations +// +// A thread's profiling stack (and the frames inside it) is only modified by +// that thread. However, the profiling stack can be *read* by a different +// thread, the sampler thread: Whenever the profiler wants to sample a given +// thread A, the following happens: +// (1) Thread A is suspended. +// (2) The sampler thread (thread S) reads the ProfilingStack of thread A, +// including all ProfilingStackFrames that are currently in that stack +// (profilingStack->frames[0..profilingStack->stackSize()]). +// (3) Thread A is resumed. +// +// Thread suspension is achieved using platform-specific APIs; refer to each +// platform's Sampler::SuspendAndSampleAndResumeThread implementation in +// platform-*.cpp for details. +// +// When the thread is suspended, the values in profilingStack->stackPointer and +// in the stack frame range +// profilingStack->frames[0..profilingStack->stackPointer] need to be in a +// consistent state, so that thread S does not read partially- constructed stack +// frames. More specifically, we have two requirements: +// (1) When adding a new frame at the top of the stack, its ProfilingStackFrame +// data needs to be put in place *before* the stackPointer is incremented, +// and the compiler + CPU need to know that this order matters. +// (2) When popping an frame from the stack and then preparing the +// ProfilingStackFrame data for the next frame that is about to be pushed, +// the decrement of the stackPointer in pop() needs to happen *before* the +// ProfilingStackFrame for the new frame is being popuplated, and the +// compiler + CPU need to know that this order matters. +// +// We can express the relevance of these orderings in multiple ways. +// Option A is to make stackPointer an atomic with SequentiallyConsistent +// memory ordering. This would ensure that no writes in thread A would be +// reordered across any writes to stackPointer, which satisfies requirements +// (1) and (2) at the same time. Option A is the simplest. +// Option B is to use ReleaseAcquire memory ordering both for writes to +// stackPointer *and* for writes to ProfilingStackFrame fields. Release-stores +// ensure that all writes that happened *before this write in program order* are +// not reordered to happen after this write. ReleaseAcquire ordering places no +// requirements on the ordering of writes that happen *after* this write in +// program order. +// Using release-stores for writes to stackPointer expresses requirement (1), +// and using release-stores for writes to the ProfilingStackFrame fields +// expresses requirement (2). +// +// Option B is more complicated than option A, but has much better performance +// on x86/64: In a microbenchmark run on a Macbook Pro from 2017, switching +// from option A to option B reduced the overhead of pushing+popping a +// ProfilingStackFrame by 10 nanoseconds. +// On x86/64, release-stores require no explicit hardware barriers or lock +// instructions. +// On ARM/64, option B may be slower than option A, because the compiler will +// generate hardware barriers for every single release-store instead of just +// for the writes to stackPointer. However, the actual performance impact of +// this has not yet been measured on ARM, so we're currently using option B +// everywhere. This is something that we may want to change in the future once +// we've done measurements. + +namespace mozilla { +namespace baseprofiler { + +// A call stack can be specified to the JS engine such that all JS entry/exits +// to functions push/pop a stack frame to/from the specified stack. +// +// For more detailed information, see vm/GeckoProfiler.h. +// +class ProfilingStackFrame { + // A ProfilingStackFrame represents either a label frame or a JS frame. + + // WARNING WARNING WARNING + // + // All the fields below are Atomic<...,ReleaseAcquire>. This is needed so + // that writes to these fields are release-writes, which ensures that + // earlier writes in this thread don't get reordered after the writes to + // these fields. In particular, the decrement of the stack pointer in + // ProfilingStack::pop() is a write that *must* happen before the values in + // this ProfilingStackFrame are changed. Otherwise, the sampler thread might + // see an inconsistent state where the stack pointer still points to a + // ProfilingStackFrame which has already been popped off the stack and whose + // fields have now been partially repopulated with new values. + // See the "Concurrency considerations" paragraph at the top of this file + // for more details. + + // Descriptive label for this stack frame. Must be a static string! Can be + // an empty string, but not a null pointer. + Atomic label_; + + // An additional descriptive string of this frame which is combined with + // |label_| in profiler output. Need not be (and usually isn't) static. Can + // be null. + Atomic dynamicString_; + + // Stack pointer for non-JS stack frames, the script pointer otherwise. + Atomic spOrScript; + + // ID of the JS Realm for JS stack frames. + // Must not be used on non-JS frames; it'll contain either the default 0, + // or a leftover value from a previous JS stack frame that was using this + // ProfilingStackFrame object. + mozilla::Atomic realmID_; + + // The bytecode offset for JS stack frames. + // Must not be used on non-JS frames; it'll contain either the default 0, + // or a leftover value from a previous JS stack frame that was using this + // ProfilingStackFrame object. + Atomic pcOffsetIfJS_; + + // Bits 0...8 hold the Flags. Bits 9...31 hold the category pair. + Atomic flagsAndCategoryPair_; + + public: + ProfilingStackFrame() = default; + ProfilingStackFrame& operator=(const ProfilingStackFrame& other) { + label_ = other.label(); + dynamicString_ = other.dynamicString(); + void* spScript = other.spOrScript; + spOrScript = spScript; + int32_t offsetIfJS = other.pcOffsetIfJS_; + pcOffsetIfJS_ = offsetIfJS; + int64_t realmID = other.realmID_; + realmID_ = realmID; + uint32_t flagsAndCategory = other.flagsAndCategoryPair_; + flagsAndCategoryPair_ = flagsAndCategory; + return *this; + } + + // Reserve up to 16 bits for flags, and 16 for category pair. + enum class Flags : uint32_t { + // The first three flags describe the kind of the frame and are + // mutually exclusive. (We still give them individual bits for + // simplicity.) + + // A regular label frame. These usually come from AutoProfilerLabel. + IS_LABEL_FRAME = 1 << 0, + + // A special frame indicating the start of a run of JS profiling stack + // frames. IS_SP_MARKER_FRAME frames are ignored, except for the sp + // field. These frames are needed to get correct ordering between JS + // and LABEL frames because JS frames don't carry sp information. + // SP is short for "stack pointer". + IS_SP_MARKER_FRAME = 1 << 1, + + // A JS frame. + IS_JS_FRAME = 1 << 2, + + // An interpreter JS frame that has OSR-ed into baseline. IS_JS_FRAME + // frames can have this flag set and unset during their lifetime. + // JS_OSR frames are ignored. + JS_OSR = 1 << 3, + + // The next three are mutually exclusive. + // By default, for profiling stack frames that have both a label and a + // dynamic string, the two strings are combined into one string of the + // form "