/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=8 sts=2 et sw=2 tw=80: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ // The Gecko Profiler is an always-on profiler that takes fast and low overhead // samples of the program execution using only userspace functionality for // portability. The goal of this module is to provide performance data in a // generic cross-platform way without requiring custom tools or kernel support. // // Samples are collected to form a timeline with optional timeline event // (markers) used for filtering. The samples include both native stacks and // platform-independent "label stack" frames. #ifndef BaseProfilerState_h #define BaseProfilerState_h // This header contains most functions that give information about the Base // Profiler: Whether it is active or not, paused, the selected features, and // some generic process and thread information. // It is safe to include unconditionally, but uses of structs and functions must // be guarded by `#ifdef MOZ_GECKO_PROFILER`. #include "mozilla/BaseProfilerUtils.h" #ifndef MOZ_GECKO_PROFILER # define AUTO_PROFILER_STATS(name) namespace mozilla::baseprofiler { [[nodiscard]] inline bool profiler_is_active() { return false; } [[nodiscard]] inline bool profiler_is_active_and_unpaused() { return false; } } // namespace mozilla::baseprofiler #else // !MOZ_GECKO_PROFILER # include "mozilla/Atomics.h" # include "mozilla/Maybe.h" # include # include // Uncomment the following line to display profiler runtime statistics at // shutdown. // # define PROFILER_RUNTIME_STATS # ifdef PROFILER_RUNTIME_STATS # include "mozilla/TimeStamp.h" # endif namespace mozilla::baseprofiler { # ifdef PROFILER_RUNTIME_STATS // This class gathers durations and displays some basic stats when destroyed. // It is intended to be used as a static variable (see `AUTO_PROFILER_STATS` // below), to display stats at the end of the program. class StaticBaseProfilerStats { public: explicit StaticBaseProfilerStats(const char* aName) : mName(aName) {} ~StaticBaseProfilerStats() { // Using unsigned long long for computations and printfs. using ULL = unsigned long long; ULL n = static_cast(mNumberDurations); if (n != 0) { ULL sumNs = static_cast(mSumDurationsNs); printf( "[%d] Profiler stats `%s`: %llu ns / %llu = %llu ns, max %llu ns\n", int(profiler_current_process_id().ToNumber()), mName, sumNs, n, sumNs / n, static_cast(mLongestDurationNs)); } else { printf("[%d] Profiler stats `%s`: (nothing)\n", int(profiler_current_process_id().ToNumber()), mName); } } void AddDurationFrom(TimeStamp aStart) { DurationNs duration = static_cast( (TimeStamp::Now() - aStart).ToMicroseconds() * 1000 + 0.5); mSumDurationsNs += duration; ++mNumberDurations; // Update mLongestDurationNs if this one is longer. for (;;) { DurationNs longest = mLongestDurationNs; if (MOZ_LIKELY(longest >= duration)) { // This duration is not the longest, nothing to do. break; } if (MOZ_LIKELY(mLongestDurationNs.compareExchange(longest, duration))) { // Successfully updated `mLongestDurationNs` with the new value. break; } // Otherwise someone else just updated `mLongestDurationNs`, we need to // try again by looping. } } private: using DurationNs = uint64_t; using Count = uint32_t; Atomic mSumDurationsNs{0}; Atomic mLongestDurationNs{0}; Atomic mNumberDurations{0}; const char* mName; }; // RAII object that measure its scoped lifetime duration and reports it to a // `StaticBaseProfilerStats`. class MOZ_RAII AutoProfilerStats { public: explicit AutoProfilerStats(StaticBaseProfilerStats& aStats) : mStats(aStats), mStart(TimeStamp::Now()) {} ~AutoProfilerStats() { mStats.AddDurationFrom(mStart); } private: StaticBaseProfilerStats& mStats; TimeStamp mStart; }; // Macro that should be used to collect basic statistics from measurements of // block durations, from where this macro is, until the end of its enclosing // scope. The name is used in the static variable name and when displaying stats // at the end of the program; Another location could use the same name but their // stats will not be combined, so use different name if these locations should // be distinguished. # define AUTO_PROFILER_STATS(name) \ static ::mozilla::baseprofiler::StaticBaseProfilerStats sStat##name( \ #name); \ ::mozilla::baseprofiler::AutoProfilerStats autoStat##name(sStat##name); # else // PROFILER_RUNTIME_STATS # define AUTO_PROFILER_STATS(name) # endif // PROFILER_RUNTIME_STATS else //--------------------------------------------------------------------------- // Profiler features //--------------------------------------------------------------------------- # if defined(__APPLE__) && defined(__aarch64__) # define POWER_HELP "Sample per process power use" # elif defined(__APPLE__) && defined(__x86_64__) # define POWER_HELP \ "Record the power used by the entire system with each sample." # elif defined(__linux__) && defined(__x86_64__) # define POWER_HELP \ "Record the power used by the entire system with each sample. " \ "Only available with Intel CPUs and requires setting " \ "the sysctl kernel.perf_event_paranoid to 0." # elif defined(_MSC_VER) # define POWER_HELP \ "Record the value of every energy meter available on the system with " \ "each sample. Only available on Windows 11 with Intel CPUs." # else # define POWER_HELP "Not supported on this platform." # endif // Higher-order macro containing all the feature info in one place. Define // |MACRO| appropriately to extract the relevant parts. Note that the number // values are used internally only and so can be changed without consequence. // Any changes to this list should also be applied to the feature list in // toolkit/components/extensions/schemas/geckoProfiler.json. // *** Synchronize with lists in ProfilerState.h and geckoProfiler.json *** # define BASE_PROFILER_FOR_EACH_FEATURE(MACRO) \ MACRO(0, "java", Java, "Profile Java code, Android only") \ \ MACRO(1, "js", JS, \ "Get the JS engine to expose the JS stack to the profiler") \ \ MACRO(2, "mainthreadio", MainThreadIO, "Add main thread file I/O") \ \ MACRO(3, "fileio", FileIO, \ "Add file I/O from all profiled threads, implies mainthreadio") \ \ MACRO(4, "fileioall", FileIOAll, \ "Add file I/O from all threads, implies fileio") \ \ MACRO(5, "noiostacks", NoIOStacks, \ "File I/O markers do not capture stacks, to reduce overhead") \ \ MACRO(6, "screenshots", Screenshots, \ "Take a snapshot of the window on every composition") \ \ MACRO(7, "seqstyle", SequentialStyle, \ "Disable parallel traversal in styling") \ \ MACRO(8, "stackwalk", StackWalk, \ "Walk the C++ stack, not available on all platforms") \ \ MACRO(9, "jsallocations", JSAllocations, \ "Have the JavaScript engine track allocations") \ \ MACRO(10, "nostacksampling", NoStackSampling, \ "Disable all stack sampling: Cancels \"js\", \"stackwalk\" and " \ "labels") \ \ MACRO(11, "preferencereads", PreferenceReads, \ "Track when preferences are read") \ \ MACRO(12, "nativeallocations", NativeAllocations, \ "Collect the stacks from a smaller subset of all native " \ "allocations, biasing towards collecting larger allocations") \ \ MACRO(13, "ipcmessages", IPCMessages, \ "Have the IPC layer track cross-process messages") \ \ MACRO(14, "audiocallbacktracing", AudioCallbackTracing, \ "Audio callback tracing") \ \ MACRO(15, "cpu", CPUUtilization, "CPU utilization") \ \ MACRO(16, "notimerresolutionchange", NoTimerResolutionChange, \ "Do not adjust the timer resolution for fast sampling, so that " \ "other Firefox timers do not get affected") \ \ MACRO(17, "cpuallthreads", CPUAllThreads, \ "Sample the CPU utilization of all registered threads") \ \ MACRO(18, "samplingallthreads", SamplingAllThreads, \ "Sample the stacks of all registered threads") \ \ MACRO(19, "markersallthreads", MarkersAllThreads, \ "Record markers from all registered threads") \ \ MACRO(20, "unregisteredthreads", UnregisteredThreads, \ "Discover and profile unregistered threads -- beware: expensive!") \ \ MACRO(21, "processcpu", ProcessCPU, \ "Sample the CPU utilization of each process") \ \ MACRO(22, "power", Power, POWER_HELP) // *** Synchronize with lists in ProfilerState.h and geckoProfiler.json *** struct ProfilerFeature { # define DECLARE(n_, str_, Name_, desc_) \ static constexpr uint32_t Name_ = (1u << n_); \ [[nodiscard]] static constexpr bool Has##Name_(uint32_t aFeatures) { \ return aFeatures & Name_; \ } \ static constexpr void Set##Name_(uint32_t& aFeatures) { \ aFeatures |= Name_; \ } \ static constexpr void Clear##Name_(uint32_t& aFeatures) { \ aFeatures &= ~Name_; \ } // Define a bitfield constant, a getter, and two setters for each feature. BASE_PROFILER_FOR_EACH_FEATURE(DECLARE) # undef DECLARE }; namespace detail { // RacyFeatures is only defined in this header file so that its methods can // be inlined into profiler_is_active(). Please do not use anything from the // detail namespace outside the profiler. // Within the profiler's code, the preferred way to check profiler activeness // and features is via ActivePS(). However, that requires locking gPSMutex. // There are some hot operations where absolute precision isn't required, so we // duplicate the activeness/feature state in a lock-free manner in this class. class RacyFeatures { public: MFBT_API static void SetActive(uint32_t aFeatures); MFBT_API static void SetInactive(); MFBT_API static void SetPaused(); MFBT_API static void SetUnpaused(); MFBT_API static void SetSamplingPaused(); MFBT_API static void SetSamplingUnpaused(); [[nodiscard]] MFBT_API static bool IsActive(); [[nodiscard]] MFBT_API static bool IsActiveWithFeature(uint32_t aFeature); // True if profiler is active, and not fully paused. // Note that periodic sampling *could* be paused! [[nodiscard]] MFBT_API static bool IsActiveAndUnpaused(); // True if profiler is active, and sampling is not paused (though generic // `SetPaused()` or specific `SetSamplingPaused()`). [[nodiscard]] MFBT_API static bool IsActiveAndSamplingUnpaused(); private: static constexpr uint32_t Active = 1u << 31; static constexpr uint32_t Paused = 1u << 30; static constexpr uint32_t SamplingPaused = 1u << 29; // Ensure Active/Paused don't overlap with any of the feature bits. # define NO_OVERLAP(n_, str_, Name_, desc_) \ static_assert(ProfilerFeature::Name_ != SamplingPaused, \ "bad feature value"); BASE_PROFILER_FOR_EACH_FEATURE(NO_OVERLAP); # undef NO_OVERLAP // We combine the active bit with the feature bits so they can be read or // written in a single atomic operation. // TODO: Could this be MFBT_DATA for better inlining optimization? static Atomic sActiveAndFeatures; }; MFBT_API bool IsThreadBeingProfiled(); } // namespace detail //--------------------------------------------------------------------------- // Get information from the profiler //--------------------------------------------------------------------------- // Is the profiler active? Note: the return value of this function can become // immediately out-of-date. E.g. the profile might be active but then // profiler_stop() is called immediately afterward. One common and reasonable // pattern of usage is the following: // // if (profiler_is_active()) { // ExpensiveData expensiveData = CreateExpensiveData(); // PROFILER_OPERATION(expensiveData); // } // // where PROFILER_OPERATION is a no-op if the profiler is inactive. In this // case the profiler_is_active() check is just an optimization -- it prevents // us calling CreateExpensiveData() unnecessarily in most cases, but the // expensive data will end up being created but not used if another thread // stops the profiler between the CreateExpensiveData() and PROFILER_OPERATION // calls. [[nodiscard]] inline bool profiler_is_active() { return baseprofiler::detail::RacyFeatures::IsActive(); } // Same as profiler_is_active(), but also checks if the profiler is not paused. [[nodiscard]] inline bool profiler_is_active_and_unpaused() { return baseprofiler::detail::RacyFeatures::IsActiveAndUnpaused(); } // Is the profiler active and unpaused, and is the current thread being // profiled? (Same caveats and recommented usage as profiler_is_active().) [[nodiscard]] inline bool profiler_thread_is_being_profiled() { return baseprofiler::detail::RacyFeatures::IsActiveAndUnpaused() && baseprofiler::detail::IsThreadBeingProfiled(); } // Is the profiler active and paused? Returns false if the profiler is inactive. [[nodiscard]] MFBT_API bool profiler_is_paused(); // Is the profiler active and sampling is paused? Returns false if the profiler // is inactive. [[nodiscard]] MFBT_API bool profiler_is_sampling_paused(); // Is the current thread sleeping? [[nodiscard]] MFBT_API bool profiler_thread_is_sleeping(); // Get all the features supported by the profiler that are accepted by // profiler_start(). The result is the same whether the profiler is active or // not. [[nodiscard]] MFBT_API uint32_t profiler_get_available_features(); // Check if a profiler feature (specified via the ProfilerFeature type) is // active. Returns false if the profiler is inactive. Note: the return value // can become immediately out-of-date, much like the return value of // profiler_is_active(). [[nodiscard]] MFBT_API bool profiler_feature_active(uint32_t aFeature); // Returns true if any of the profiler mutexes are currently locked *on the // current thread*. This may be used by re-entrant code that may call profiler // functions while the same of a different profiler mutex is locked, which could // deadlock. [[nodiscard]] bool profiler_is_locked_on_current_thread(); } // namespace mozilla::baseprofiler #endif // !MOZ_GECKO_PROFILER #endif // BaseProfilerState_h