diff options
Diffstat (limited to 'mozglue/baseprofiler/core/platform.cpp')
-rw-r--r-- | mozglue/baseprofiler/core/platform.cpp | 3824 |
1 files changed, 3824 insertions, 0 deletions
diff --git a/mozglue/baseprofiler/core/platform.cpp b/mozglue/baseprofiler/core/platform.cpp new file mode 100644 index 0000000000..dd3bfe6674 --- /dev/null +++ b/mozglue/baseprofiler/core/platform.cpp @@ -0,0 +1,3824 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// There are three kinds of samples done by the profiler. +// +// - A "periodic" sample is the most complex kind. It is done in response to a +// timer while the profiler is active. It involves writing a stack trace plus +// a variety of other values (memory measurements, responsiveness +// measurements, etc.) into the main ProfileBuffer. The sampling is done from +// off-thread, and so SuspendAndSampleAndResumeThread() is used to get the +// register values. +// +// - A "synchronous" sample is a simpler kind. It is done in response to an API +// call (profiler_get_backtrace()). It involves writing a stack trace and +// little else into a temporary ProfileBuffer, and wrapping that up in a +// ProfilerBacktrace that can be subsequently used in a marker. The sampling +// is done on-thread, and so REGISTERS_SYNC_POPULATE() is used to get the +// register values. +// +// - A "backtrace" sample is the simplest kind. It is done in response to an +// API call (profiler_suspend_and_sample_thread()). It involves getting a +// stack trace via a ProfilerStackCollector; it does not write to a +// ProfileBuffer. The sampling is done from off-thread, and so uses +// SuspendAndSampleAndResumeThread() to get the register values. + +#include "platform.h" + +#include <algorithm> +#include <errno.h> +#include <fstream> +#include <ostream> +#include <set> +#include <sstream> +#include <string_view> + +// #include "memory_hooks.h" +#include "mozilla/ArrayUtils.h" +#include "mozilla/AutoProfilerLabel.h" +#include "mozilla/BaseAndGeckoProfilerDetail.h" +#include "mozilla/BaseProfilerDetail.h" +#include "mozilla/DoubleConversion.h" +#include "mozilla/Printf.h" +#include "mozilla/ProfileBufferChunkManagerSingle.h" +#include "mozilla/ProfileBufferChunkManagerWithLocalLimit.h" +#include "mozilla/ProfileChunkedBuffer.h" +#include "mozilla/Services.h" +#include "mozilla/Span.h" +#include "mozilla/StackWalk.h" +#ifdef XP_WIN +# include "mozilla/StackWalkThread.h" +#endif +#include "mozilla/StaticPtr.h" +#include "mozilla/ThreadLocal.h" +#include "mozilla/TimeStamp.h" + +#include "mozilla/UniquePtr.h" +#include "mozilla/Vector.h" +#include "prdtoa.h" +#include "prtime.h" + +#include "BaseProfiler.h" +#include "BaseProfilingCategory.h" +#include "PageInformation.h" +#include "ProfiledThreadData.h" +#include "ProfilerBacktrace.h" +#include "ProfileBuffer.h" +#include "RegisteredThread.h" +#include "BaseProfilerSharedLibraries.h" +#include "ThreadInfo.h" +#include "VTuneProfiler.h" + +// Win32 builds always have frame pointers, so FramePointerStackWalk() always +// works. +#if defined(GP_PLAT_x86_windows) +# define HAVE_NATIVE_UNWIND +# define USE_FRAME_POINTER_STACK_WALK +#endif + +// Win64 builds always omit frame pointers, so we use the slower +// MozStackWalk(), which works in that case. +#if defined(GP_PLAT_amd64_windows) +# define HAVE_NATIVE_UNWIND +# define USE_MOZ_STACK_WALK +#endif + +// AArch64 Win64 doesn't seem to use frame pointers, so we use the slower +// MozStackWalk(). +#if defined(GP_PLAT_arm64_windows) +# define HAVE_NATIVE_UNWIND +# define USE_MOZ_STACK_WALK +#endif + +// Mac builds use FramePointerStackWalk(). Even if we build without +// frame pointers, we'll still get useful stacks in system libraries +// because those always have frame pointers. +// We don't use MozStackWalk() on Mac. +#if defined(GP_OS_darwin) +# define HAVE_NATIVE_UNWIND +# define USE_FRAME_POINTER_STACK_WALK +#endif + +// No stack-walking in baseprofiler on linux, android, bsd. +// APIs now make it easier to capture backtraces from the Base Profiler, which +// is currently not supported on these platform, and would lead to a MOZ_CRASH +// in REGISTERS_SYNC_POPULATE(). `#if 0` added in bug 1658232, follow-up bugs +// should be referenced in meta bug 1557568. +#if 0 +// Android builds use the ARM Exception Handling ABI to unwind. +# if defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android) +# define HAVE_NATIVE_UNWIND +# define USE_EHABI_STACKWALK +# include "EHABIStackWalk.h" +# endif + +// Linux/BSD builds use LUL, which uses DWARF info to unwind stacks. +# if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) || \ + defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) || \ + defined(GP_PLAT_mips64_linux) || defined(GP_PLAT_arm64_linux) || \ + defined(GP_PLAT_arm64_android) || defined(GP_PLAT_amd64_freebsd) || \ + defined(GP_PLAT_arm64_freebsd) +# define HAVE_NATIVE_UNWIND +# define USE_LUL_STACKWALK +# include "lul/LulMain.h" +# include "lul/platform-linux-lul.h" + +// On linux we use LUL for periodic samples and synchronous samples, but we use +// FramePointerStackWalk for backtrace samples when MOZ_PROFILING is enabled. +// (See the comment at the top of the file for a definition of +// periodic/synchronous/backtrace.). +// +// FramePointerStackWalk can produce incomplete stacks when the current entry is +// in a shared library without framepointers, however LUL can take a long time +// to initialize, which is undesirable for consumers of +// profiler_suspend_and_sample_thread like the Background Hang Reporter. +# if defined(MOZ_PROFILING) +# define USE_FRAME_POINTER_STACK_WALK +# endif +# endif +#endif + +// We can only stackwalk without expensive initialization on platforms which +// support FramePointerStackWalk or MozStackWalk. LUL Stackwalking requires +// initializing LUL, and EHABIStackWalk requires initializing EHABI, both of +// which can be expensive. +#if defined(USE_FRAME_POINTER_STACK_WALK) || defined(USE_MOZ_STACK_WALK) +# define HAVE_FASTINIT_NATIVE_UNWIND +#endif + +#ifdef MOZ_VALGRIND +# include <valgrind/memcheck.h> +#else +# define VALGRIND_MAKE_MEM_DEFINED(_addr, _len) ((void)0) +#endif + +#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd) +# include <ucontext.h> +#endif + +namespace mozilla { +namespace baseprofiler { + +using detail::RacyFeatures; + +bool LogTest(int aLevelToTest) { + static const int maxLevel = getenv("MOZ_BASE_PROFILER_VERBOSE_LOGGING") ? 5 + : getenv("MOZ_BASE_PROFILER_DEBUG_LOGGING") ? 4 + : getenv("MOZ_BASE_PROFILER_LOGGING") ? 3 + : 0; + return aLevelToTest <= maxLevel; +} + +void PrintToConsole(const char* aFmt, ...) { + va_list args; + va_start(args, aFmt); +#if defined(ANDROID) + __android_log_vprint(ANDROID_LOG_INFO, "Gecko", aFmt, args); +#else + vfprintf(stderr, aFmt, args); +#endif + va_end(args); +} + +ProfileChunkedBuffer& profiler_get_core_buffer() { + // This needs its own mutex, because it is used concurrently from functions + // guarded by gPSMutex as well as others without safety (e.g., + // profiler_add_marker). It is *not* used inside the critical section of the + // sampler, because mutexes cannot be used there. + static ProfileChunkedBuffer sProfileChunkedBuffer{ + ProfileChunkedBuffer::ThreadSafety::WithMutex}; + return sProfileChunkedBuffer; +} + +Atomic<int, MemoryOrdering::Relaxed> gSkipSampling; + +constexpr static bool ValidateFeatures() { + int expectedFeatureNumber = 0; + + // Feature numbers should start at 0 and increase by 1 each. +#define CHECK_FEATURE(n_, str_, Name_, desc_) \ + if ((n_) != expectedFeatureNumber) { \ + return false; \ + } \ + ++expectedFeatureNumber; + + BASE_PROFILER_FOR_EACH_FEATURE(CHECK_FEATURE) + +#undef CHECK_FEATURE + + return true; +} + +static_assert(ValidateFeatures(), "Feature list is invalid"); + +// Return all features that are available on this platform. +static uint32_t AvailableFeatures() { + uint32_t features = 0; + +#define ADD_FEATURE(n_, str_, Name_, desc_) \ + ProfilerFeature::Set##Name_(features); + + // Add all the possible features. + BASE_PROFILER_FOR_EACH_FEATURE(ADD_FEATURE) + +#undef ADD_FEATURE + + // Now remove features not supported on this platform/configuration. + ProfilerFeature::ClearJava(features); + ProfilerFeature::ClearJS(features); + ProfilerFeature::ClearScreenshots(features); +#if !defined(HAVE_NATIVE_UNWIND) + ProfilerFeature::ClearStackWalk(features); +#endif +#if !defined(GP_OS_windows) + ProfilerFeature::ClearNoTimerResolutionChange(features); +#endif + + return features; +} + +// Default features common to all contexts (even if not available). +static constexpr uint32_t DefaultFeatures() { + return ProfilerFeature::Java | ProfilerFeature::JS | + ProfilerFeature::StackWalk | ProfilerFeature::CPUUtilization | + ProfilerFeature::ProcessCPU; +} + +// Extra default features when MOZ_PROFILER_STARTUP is set (even if not +// available). +static constexpr uint32_t StartupExtraDefaultFeatures() { + // Enable mainthreadio by default for startup profiles as startup is heavy on + // I/O operations, and main thread I/O is really important to see there. + return ProfilerFeature::MainThreadIO | ProfilerFeature::IPCMessages; +} + +// The auto-lock/unlock mutex that guards accesses to CorePS and ActivePS. +// Use `PSAutoLock lock;` to take the lock until the end of the enclosing block. +// External profilers may use this same lock for their own data, but as the lock +// is non-recursive, *only* `f(PSLockRef, ...)` functions below should be +// called, to avoid double-locking. +class MOZ_RAII PSAutoLock { + public: + PSAutoLock() : mLock(gPSMutex) {} + + PSAutoLock(const PSAutoLock&) = delete; + void operator=(const PSAutoLock&) = delete; + + [[nodiscard]] static bool IsLockedOnCurrentThread() { + return gPSMutex.IsLockedOnCurrentThread(); + } + + private: + static detail::BaseProfilerMutex gPSMutex; + detail::BaseProfilerAutoLock mLock; +}; + +detail::BaseProfilerMutex PSAutoLock::gPSMutex{"Base Profiler mutex"}; + +// Only functions that take a PSLockRef arg can access CorePS's and ActivePS's +// fields. +typedef const PSAutoLock& PSLockRef; + +#define PS_GET(type_, name_) \ + static type_ name_(PSLockRef) { \ + MOZ_ASSERT(sInstance); \ + return sInstance->m##name_; \ + } + +#define PS_GET_LOCKLESS(type_, name_) \ + static type_ name_() { \ + MOZ_ASSERT(sInstance); \ + return sInstance->m##name_; \ + } + +#define PS_GET_AND_SET(type_, name_) \ + PS_GET(type_, name_) \ + static void Set##name_(PSLockRef, type_ a##name_) { \ + MOZ_ASSERT(sInstance); \ + sInstance->m##name_ = a##name_; \ + } + +// All functions in this file can run on multiple threads unless they have an +// NS_IsMainThread() assertion. + +// This class contains the profiler's core global state, i.e. that which is +// valid even when the profiler is not active. Most profile operations can't do +// anything useful when this class is not instantiated, so we release-assert +// its non-nullness in all such operations. +// +// Accesses to CorePS are guarded by gPSMutex. Getters and setters take a +// PSAutoLock reference as an argument as proof that the gPSMutex is currently +// locked. This makes it clear when gPSMutex is locked and helps avoid +// accidental unlocked accesses to global state. There are ways to circumvent +// this mechanism, but please don't do so without *very* good reason and a +// detailed explanation. +// +// The exceptions to this rule: +// +// - mProcessStartTime, because it's immutable; +// +// - each thread's RacyRegisteredThread object is accessible without locking via +// TLSRegisteredThread::RacyRegisteredThread(). +class CorePS { + private: + CorePS() + : mProcessStartTime(TimeStamp::ProcessCreation()) +#ifdef USE_LUL_STACKWALK + , + mLul(nullptr) +#endif + { + } + + ~CorePS() {} + + public: + static void Create(PSLockRef aLock) { + MOZ_ASSERT(!sInstance); + sInstance = new CorePS(); + } + + static void Destroy(PSLockRef aLock) { + MOZ_ASSERT(sInstance); + delete sInstance; + sInstance = nullptr; + } + + // Unlike ActivePS::Exists(), CorePS::Exists() can be called without gPSMutex + // being locked. This is because CorePS is instantiated so early on the main + // thread that we don't have to worry about it being racy. + static bool Exists() { return !!sInstance; } + + static void AddSizeOf(PSLockRef, MallocSizeOf aMallocSizeOf, + size_t& aProfSize, size_t& aLulSize) { + MOZ_ASSERT(sInstance); + + aProfSize += aMallocSizeOf(sInstance); + + for (auto& registeredThread : sInstance->mRegisteredThreads) { + aProfSize += registeredThread->SizeOfIncludingThis(aMallocSizeOf); + } + + for (auto& registeredPage : sInstance->mRegisteredPages) { + aProfSize += registeredPage->SizeOfIncludingThis(aMallocSizeOf); + } + + // Measurement of the following things may be added later if DMD finds it + // is worthwhile: + // - CorePS::mRegisteredThreads itself (its elements' children are + // measured above) + // - CorePS::mRegisteredPages itself (its elements' children are + // measured above) + // - CorePS::mInterposeObserver + +#if defined(USE_LUL_STACKWALK) + if (sInstance->mLul) { + aLulSize += sInstance->mLul->SizeOfIncludingThis(aMallocSizeOf); + } +#endif + } + + // No PSLockRef is needed for this field because it's immutable. + PS_GET_LOCKLESS(const TimeStamp&, ProcessStartTime) + + PS_GET(const Vector<UniquePtr<RegisteredThread>>&, RegisteredThreads) + + static void AppendRegisteredThread( + PSLockRef, UniquePtr<RegisteredThread>&& aRegisteredThread) { + MOZ_ASSERT(sInstance); + MOZ_RELEASE_ASSERT( + sInstance->mRegisteredThreads.append(std::move(aRegisteredThread))); + } + + static void RemoveRegisteredThread(PSLockRef, + RegisteredThread* aRegisteredThread) { + MOZ_ASSERT(sInstance); + // Remove aRegisteredThread from mRegisteredThreads. + for (UniquePtr<RegisteredThread>& rt : sInstance->mRegisteredThreads) { + if (rt.get() == aRegisteredThread) { + sInstance->mRegisteredThreads.erase(&rt); + return; + } + } + } + + PS_GET(Vector<RefPtr<PageInformation>>&, RegisteredPages) + + static void AppendRegisteredPage(PSLockRef, + RefPtr<PageInformation>&& aRegisteredPage) { + MOZ_ASSERT(sInstance); + struct RegisteredPageComparator { + PageInformation* aA; + bool operator()(PageInformation* aB) const { return aA->Equals(aB); } + }; + + auto foundPageIter = std::find_if( + sInstance->mRegisteredPages.begin(), sInstance->mRegisteredPages.end(), + RegisteredPageComparator{aRegisteredPage.get()}); + + if (foundPageIter != sInstance->mRegisteredPages.end()) { + if ((*foundPageIter)->Url() == "about:blank") { + // When a BrowsingContext is loaded, the first url loaded in it will be + // about:blank, and if the principal matches, the first document loaded + // in it will share an inner window. That's why we should delete the + // intermittent about:blank if they share the inner window. + sInstance->mRegisteredPages.erase(foundPageIter); + } else { + // Do not register the same page again. + return; + } + } + MOZ_RELEASE_ASSERT( + sInstance->mRegisteredPages.append(std::move(aRegisteredPage))); + } + + static void RemoveRegisteredPage(PSLockRef, + uint64_t aRegisteredInnerWindowID) { + MOZ_ASSERT(sInstance); + // Remove RegisteredPage from mRegisteredPages by given inner window ID. + sInstance->mRegisteredPages.eraseIf([&](const RefPtr<PageInformation>& rd) { + return rd->InnerWindowID() == aRegisteredInnerWindowID; + }); + } + + static void ClearRegisteredPages(PSLockRef) { + MOZ_ASSERT(sInstance); + sInstance->mRegisteredPages.clear(); + } + + PS_GET(const Vector<BaseProfilerCount*>&, Counters) + + static void AppendCounter(PSLockRef, BaseProfilerCount* aCounter) { + MOZ_ASSERT(sInstance); + // we don't own the counter; they may be stored in static objects + MOZ_RELEASE_ASSERT(sInstance->mCounters.append(aCounter)); + } + + static void RemoveCounter(PSLockRef, BaseProfilerCount* aCounter) { + // we may be called to remove a counter after the profiler is stopped or + // late in shutdown. + if (sInstance) { + auto* counter = std::find(sInstance->mCounters.begin(), + sInstance->mCounters.end(), aCounter); + MOZ_RELEASE_ASSERT(counter != sInstance->mCounters.end()); + sInstance->mCounters.erase(counter); + } + } + +#ifdef USE_LUL_STACKWALK + static lul::LUL* Lul(PSLockRef) { + MOZ_ASSERT(sInstance); + return sInstance->mLul.get(); + } + static void SetLul(PSLockRef, UniquePtr<lul::LUL> aLul) { + MOZ_ASSERT(sInstance); + sInstance->mLul = std::move(aLul); + } +#endif + + PS_GET_AND_SET(const std::string&, ProcessName) + PS_GET_AND_SET(const std::string&, ETLDplus1) + + private: + // The singleton instance + static CorePS* sInstance; + + // The time that the process started. + const TimeStamp mProcessStartTime; + + // Info on all the registered threads. + // ThreadIds in mRegisteredThreads are unique. + Vector<UniquePtr<RegisteredThread>> mRegisteredThreads; + + // Info on all the registered pages. + // InnerWindowIDs in mRegisteredPages are unique. + Vector<RefPtr<PageInformation>> mRegisteredPages; + + // Non-owning pointers to all active counters + Vector<BaseProfilerCount*> mCounters; + +#ifdef USE_LUL_STACKWALK + // LUL's state. Null prior to the first activation, non-null thereafter. + UniquePtr<lul::LUL> mLul; +#endif + + // Process name, provided by child process initialization code. + std::string mProcessName; + // Private name, provided by child process initialization code (eTLD+1 in + // fission) + std::string mETLDplus1; +}; + +CorePS* CorePS::sInstance = nullptr; + +class SamplerThread; + +static SamplerThread* NewSamplerThread(PSLockRef aLock, uint32_t aGeneration, + double aInterval, uint32_t aFeatures); + +struct LiveProfiledThreadData { + RegisteredThread* mRegisteredThread; + UniquePtr<ProfiledThreadData> mProfiledThreadData; +}; + +// The buffer size is provided as a number of "entries", this is their size in +// bytes. +constexpr static uint32_t scBytesPerEntry = 8; + +// This class contains the profiler's global state that is valid only when the +// profiler is active. When not instantiated, the profiler is inactive. +// +// Accesses to ActivePS are guarded by gPSMutex, in much the same fashion as +// CorePS. +// +class ActivePS { + private: + // We need to decide how many chunks of what size we want to fit in the given + // total maximum capacity for this process, in the (likely) context of + // multiple processes doing the same choice and having an inter-process + // mechanism to control the overal memory limit. + + // Minimum chunk size allowed, enough for at least one stack. + constexpr static uint32_t scMinimumChunkSize = + 2 * ProfileBufferChunkManager::scExpectedMaximumStackSize; + + // Ideally we want at least 2 unreleased chunks to work with (1 current and 1 + // next), and 2 released chunks (so that one can be recycled when old, leaving + // one with some data). + constexpr static uint32_t scMinimumNumberOfChunks = 4; + + // And we want to limit chunks to a maximum size, which is a compromise + // between: + // - A big size, which helps with reducing the rate of allocations and IPCs. + // - A small size, which helps with equalizing the duration of recorded data + // (as the inter-process controller will discard the oldest chunks in all + // Firefox processes). + constexpr static uint32_t scMaximumChunkSize = 1024 * 1024; + + public: + // We should be able to store at least the minimum number of the smallest- + // possible chunks. + constexpr static uint32_t scMinimumBufferSize = + scMinimumNumberOfChunks * scMinimumChunkSize; + constexpr static uint32_t scMinimumBufferEntries = + scMinimumBufferSize / scBytesPerEntry; + + // Limit to 2GiB. + constexpr static uint32_t scMaximumBufferSize = 2u * 1024u * 1024u * 1024u; + constexpr static uint32_t scMaximumBufferEntries = + scMaximumBufferSize / scBytesPerEntry; + + constexpr static uint32_t ClampToAllowedEntries(uint32_t aEntries) { + if (aEntries <= scMinimumBufferEntries) { + return scMinimumBufferEntries; + } + if (aEntries >= scMaximumBufferEntries) { + return scMaximumBufferEntries; + } + return aEntries; + } + + private: + constexpr static uint32_t ChunkSizeForEntries(uint32_t aEntries) { + return uint32_t(std::min(size_t(ClampToAllowedEntries(aEntries)) * + scBytesPerEntry / scMinimumNumberOfChunks, + size_t(scMaximumChunkSize))); + } + + static uint32_t AdjustFeatures(uint32_t aFeatures, uint32_t aFilterCount) { + // Filter out any features unavailable in this platform/configuration. + aFeatures &= AvailableFeatures(); + + // Some features imply others. + if (aFeatures & ProfilerFeature::FileIOAll) { + aFeatures |= ProfilerFeature::MainThreadIO | ProfilerFeature::FileIO; + } else if (aFeatures & ProfilerFeature::FileIO) { + aFeatures |= ProfilerFeature::MainThreadIO; + } + + return aFeatures; + } + + ActivePS(PSLockRef aLock, const TimeStamp& aProfilingStartTime, + PowerOfTwo32 aCapacity, double aInterval, uint32_t aFeatures, + const char** aFilters, uint32_t aFilterCount, + const Maybe<double>& aDuration) + : mProfilingStartTime(aProfilingStartTime), + mGeneration(sNextGeneration++), + mCapacity(aCapacity), + mDuration(aDuration), + mInterval(aInterval), + mFeatures(AdjustFeatures(aFeatures, aFilterCount)), + mProfileBufferChunkManager( + MakeUnique<ProfileBufferChunkManagerWithLocalLimit>( + size_t(ClampToAllowedEntries(aCapacity.Value())) * + scBytesPerEntry, + ChunkSizeForEntries(aCapacity.Value()))), + mProfileBuffer([this]() -> ProfileChunkedBuffer& { + ProfileChunkedBuffer& buffer = profiler_get_core_buffer(); + buffer.SetChunkManager(*mProfileBufferChunkManager); + return buffer; + }()), + // The new sampler thread doesn't start sampling immediately because the + // main loop within Run() is blocked until this function's caller + // unlocks gPSMutex. + mSamplerThread( + NewSamplerThread(aLock, mGeneration, aInterval, aFeatures)), + mIsPaused(false), + mIsSamplingPaused(false) { + // Deep copy and lower-case aFilters. + MOZ_ALWAYS_TRUE(mFilters.resize(aFilterCount)); + MOZ_ALWAYS_TRUE(mFiltersLowered.resize(aFilterCount)); + for (uint32_t i = 0; i < aFilterCount; ++i) { + mFilters[i] = aFilters[i]; + mFiltersLowered[i].reserve(mFilters[i].size()); + std::transform(mFilters[i].cbegin(), mFilters[i].cend(), + std::back_inserter(mFiltersLowered[i]), ::tolower); + } + } + + ~ActivePS() { + if (mProfileBufferChunkManager) { + // We still control the chunk manager, remove it from the core buffer. + profiler_get_core_buffer().ResetChunkManager(); + } + } + + bool ThreadSelected(const char* aThreadName) { + if (mFiltersLowered.empty()) { + return true; + } + + std::string name = aThreadName; + std::transform(name.begin(), name.end(), name.begin(), ::tolower); + + for (const auto& filter : mFiltersLowered) { + if (filter == "*") { + return true; + } + + // Crude, non UTF-8 compatible, case insensitive substring search + if (name.find(filter) != std::string::npos) { + return true; + } + + // If the filter is "pid:<my pid>", profile all threads. + if (mozilla::profiler::detail::FilterHasPid(filter.c_str())) { + return true; + } + } + + return false; + } + + public: + static void Create(PSLockRef aLock, const TimeStamp& aProfilingStartTime, + PowerOfTwo32 aCapacity, double aInterval, + uint32_t aFeatures, const char** aFilters, + uint32_t aFilterCount, const Maybe<double>& aDuration) { + MOZ_ASSERT(!sInstance); + sInstance = new ActivePS(aLock, aProfilingStartTime, aCapacity, aInterval, + aFeatures, aFilters, aFilterCount, aDuration); + } + + [[nodiscard]] static SamplerThread* Destroy(PSLockRef aLock) { + MOZ_ASSERT(sInstance); + auto samplerThread = sInstance->mSamplerThread; + delete sInstance; + sInstance = nullptr; + + return samplerThread; + } + + static bool Exists(PSLockRef) { return !!sInstance; } + + static bool Equals(PSLockRef, PowerOfTwo32 aCapacity, + const Maybe<double>& aDuration, double aInterval, + uint32_t aFeatures, const char** aFilters, + uint32_t aFilterCount) { + MOZ_ASSERT(sInstance); + if (sInstance->mCapacity != aCapacity || + sInstance->mDuration != aDuration || + sInstance->mInterval != aInterval || + sInstance->mFeatures != aFeatures || + sInstance->mFilters.length() != aFilterCount) { + return false; + } + + for (uint32_t i = 0; i < sInstance->mFilters.length(); ++i) { + if (strcmp(sInstance->mFilters[i].c_str(), aFilters[i]) != 0) { + return false; + } + } + return true; + } + + static size_t SizeOf(PSLockRef, MallocSizeOf aMallocSizeOf) { + MOZ_ASSERT(sInstance); + + size_t n = aMallocSizeOf(sInstance); + + n += sInstance->mProfileBuffer.SizeOfExcludingThis(aMallocSizeOf); + + // Measurement of the following members may be added later if DMD finds it + // is worthwhile: + // - mLiveProfiledThreads (both the array itself, and the contents) + // - mDeadProfiledThreads (both the array itself, and the contents) + // + + return n; + } + + static UniquePtr<ProfileBufferChunkManagerWithLocalLimit> + ExtractBaseProfilerChunkManager(PSLockRef) { + MOZ_ASSERT(sInstance); + return std::move(sInstance->mProfileBufferChunkManager); + } + + static bool ShouldProfileThread(PSLockRef aLock, ThreadInfo* aInfo) { + MOZ_ASSERT(sInstance); + return sInstance->ThreadSelected(aInfo->Name()); + } + + PS_GET_LOCKLESS(TimeStamp, ProfilingStartTime) + + PS_GET(uint32_t, Generation) + + PS_GET(PowerOfTwo32, Capacity) + + PS_GET(Maybe<double>, Duration) + + PS_GET(double, Interval) + + PS_GET(uint32_t, Features) + +#define PS_GET_FEATURE(n_, str_, Name_, desc_) \ + static bool Feature##Name_(PSLockRef) { \ + MOZ_ASSERT(sInstance); \ + return ProfilerFeature::Has##Name_(sInstance->mFeatures); \ + } + + BASE_PROFILER_FOR_EACH_FEATURE(PS_GET_FEATURE) + +#undef PS_GET_FEATURE + + PS_GET(const Vector<std::string>&, Filters) + PS_GET(const Vector<std::string>&, FiltersLowered) + + static void FulfillChunkRequests(PSLockRef) { + MOZ_ASSERT(sInstance); + if (sInstance->mProfileBufferChunkManager) { + sInstance->mProfileBufferChunkManager->FulfillChunkRequests(); + } + } + + static ProfileBuffer& Buffer(PSLockRef) { + MOZ_ASSERT(sInstance); + return sInstance->mProfileBuffer; + } + + static const Vector<LiveProfiledThreadData>& LiveProfiledThreads(PSLockRef) { + MOZ_ASSERT(sInstance); + return sInstance->mLiveProfiledThreads; + } + + // Returns an array containing (RegisteredThread*, ProfiledThreadData*) pairs + // for all threads that should be included in a profile, both for threads + // that are still registered, and for threads that have been unregistered but + // still have data in the buffer. + // For threads that have already been unregistered, the RegisteredThread + // pointer will be null. + // The returned array is sorted by thread register time. + // Do not hold on to the return value across thread registration or profiler + // restarts. + static Vector<std::pair<RegisteredThread*, ProfiledThreadData*>> + ProfiledThreads(PSLockRef) { + MOZ_ASSERT(sInstance); + Vector<std::pair<RegisteredThread*, ProfiledThreadData*>> array; + MOZ_RELEASE_ASSERT( + array.initCapacity(sInstance->mLiveProfiledThreads.length() + + sInstance->mDeadProfiledThreads.length())); + for (auto& t : sInstance->mLiveProfiledThreads) { + MOZ_RELEASE_ASSERT(array.append( + std::make_pair(t.mRegisteredThread, t.mProfiledThreadData.get()))); + } + for (auto& t : sInstance->mDeadProfiledThreads) { + MOZ_RELEASE_ASSERT( + array.append(std::make_pair((RegisteredThread*)nullptr, t.get()))); + } + + std::sort(array.begin(), array.end(), + [](const std::pair<RegisteredThread*, ProfiledThreadData*>& a, + const std::pair<RegisteredThread*, ProfiledThreadData*>& b) { + return a.second->Info()->RegisterTime() < + b.second->Info()->RegisterTime(); + }); + return array; + } + + static Vector<RefPtr<PageInformation>> ProfiledPages(PSLockRef aLock) { + MOZ_ASSERT(sInstance); + Vector<RefPtr<PageInformation>> array; + for (auto& d : CorePS::RegisteredPages(aLock)) { + MOZ_RELEASE_ASSERT(array.append(d)); + } + for (auto& d : sInstance->mDeadProfiledPages) { + MOZ_RELEASE_ASSERT(array.append(d)); + } + // We don't need to sort the pages like threads since we won't show them + // as a list. + return array; + } + + // Do a linear search through mLiveProfiledThreads to find the + // ProfiledThreadData object for a RegisteredThread. + static ProfiledThreadData* GetProfiledThreadData( + PSLockRef, RegisteredThread* aRegisteredThread) { + MOZ_ASSERT(sInstance); + for (const LiveProfiledThreadData& thread : + sInstance->mLiveProfiledThreads) { + if (thread.mRegisteredThread == aRegisteredThread) { + return thread.mProfiledThreadData.get(); + } + } + return nullptr; + } + + static ProfiledThreadData* AddLiveProfiledThread( + PSLockRef, RegisteredThread* aRegisteredThread, + UniquePtr<ProfiledThreadData>&& aProfiledThreadData) { + MOZ_ASSERT(sInstance); + MOZ_RELEASE_ASSERT( + sInstance->mLiveProfiledThreads.append(LiveProfiledThreadData{ + aRegisteredThread, std::move(aProfiledThreadData)})); + + // Return a weak pointer to the ProfiledThreadData object. + return sInstance->mLiveProfiledThreads.back().mProfiledThreadData.get(); + } + + static void UnregisterThread(PSLockRef aLockRef, + RegisteredThread* aRegisteredThread) { + MOZ_ASSERT(sInstance); + + DiscardExpiredDeadProfiledThreads(aLockRef); + + // Find the right entry in the mLiveProfiledThreads array and remove the + // element, moving the ProfiledThreadData object for the thread into the + // mDeadProfiledThreads array. + // The thread's RegisteredThread object gets destroyed here. + for (size_t i = 0; i < sInstance->mLiveProfiledThreads.length(); i++) { + LiveProfiledThreadData& thread = sInstance->mLiveProfiledThreads[i]; + if (thread.mRegisteredThread == aRegisteredThread) { + thread.mProfiledThreadData->NotifyUnregistered( + sInstance->mProfileBuffer.BufferRangeEnd()); + MOZ_RELEASE_ASSERT(sInstance->mDeadProfiledThreads.append( + std::move(thread.mProfiledThreadData))); + sInstance->mLiveProfiledThreads.erase( + &sInstance->mLiveProfiledThreads[i]); + return; + } + } + } + + PS_GET_AND_SET(bool, IsPaused) + + // True if sampling is paused (though generic `SetIsPaused()` or specific + // `SetIsSamplingPaused()`). + static bool IsSamplingPaused(PSLockRef lock) { + MOZ_ASSERT(sInstance); + return IsPaused(lock) || sInstance->mIsSamplingPaused; + } + + static void SetIsSamplingPaused(PSLockRef, bool aIsSamplingPaused) { + MOZ_ASSERT(sInstance); + sInstance->mIsSamplingPaused = aIsSamplingPaused; + } + + static void DiscardExpiredDeadProfiledThreads(PSLockRef) { + MOZ_ASSERT(sInstance); + uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart(); + // Discard any dead threads that were unregistered before bufferRangeStart. + sInstance->mDeadProfiledThreads.eraseIf( + [bufferRangeStart]( + const UniquePtr<ProfiledThreadData>& aProfiledThreadData) { + Maybe<uint64_t> bufferPosition = + aProfiledThreadData->BufferPositionWhenUnregistered(); + MOZ_RELEASE_ASSERT(bufferPosition, + "should have unregistered this thread"); + return *bufferPosition < bufferRangeStart; + }); + } + + static void UnregisterPage(PSLockRef aLock, + uint64_t aRegisteredInnerWindowID) { + MOZ_ASSERT(sInstance); + auto& registeredPages = CorePS::RegisteredPages(aLock); + for (size_t i = 0; i < registeredPages.length(); i++) { + RefPtr<PageInformation>& page = registeredPages[i]; + if (page->InnerWindowID() == aRegisteredInnerWindowID) { + page->NotifyUnregistered(sInstance->mProfileBuffer.BufferRangeEnd()); + MOZ_RELEASE_ASSERT( + sInstance->mDeadProfiledPages.append(std::move(page))); + registeredPages.erase(®isteredPages[i--]); + } + } + } + + static void DiscardExpiredPages(PSLockRef) { + MOZ_ASSERT(sInstance); + uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart(); + // Discard any dead pages that were unregistered before + // bufferRangeStart. + sInstance->mDeadProfiledPages.eraseIf( + [bufferRangeStart](const RefPtr<PageInformation>& aProfiledPage) { + Maybe<uint64_t> bufferPosition = + aProfiledPage->BufferPositionWhenUnregistered(); + MOZ_RELEASE_ASSERT(bufferPosition, + "should have unregistered this page"); + return *bufferPosition < bufferRangeStart; + }); + } + + static void ClearUnregisteredPages(PSLockRef) { + MOZ_ASSERT(sInstance); + sInstance->mDeadProfiledPages.clear(); + } + + static void ClearExpiredExitProfiles(PSLockRef) { + MOZ_ASSERT(sInstance); + uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart(); + // Discard exit profiles that were gathered before our buffer RangeStart. + sInstance->mExitProfiles.eraseIf( + [bufferRangeStart](const ExitProfile& aExitProfile) { + return aExitProfile.mBufferPositionAtGatherTime < bufferRangeStart; + }); + } + + static void AddExitProfile(PSLockRef aLock, const std::string& aExitProfile) { + MOZ_ASSERT(sInstance); + + ClearExpiredExitProfiles(aLock); + + MOZ_RELEASE_ASSERT(sInstance->mExitProfiles.append( + ExitProfile{aExitProfile, sInstance->mProfileBuffer.BufferRangeEnd()})); + } + + static Vector<std::string> MoveExitProfiles(PSLockRef aLock) { + MOZ_ASSERT(sInstance); + + ClearExpiredExitProfiles(aLock); + + Vector<std::string> profiles; + MOZ_RELEASE_ASSERT( + profiles.initCapacity(sInstance->mExitProfiles.length())); + for (auto& profile : sInstance->mExitProfiles) { + MOZ_RELEASE_ASSERT(profiles.append(std::move(profile.mJSON))); + } + sInstance->mExitProfiles.clear(); + return profiles; + } + + private: + // The singleton instance. + static ActivePS* sInstance; + + const TimeStamp mProfilingStartTime; + + // We need to track activity generations. If we didn't we could have the + // following scenario. + // + // - profiler_stop() locks gPSMutex, de-instantiates ActivePS, unlocks + // gPSMutex, deletes the SamplerThread (which does a join). + // + // - profiler_start() runs on a different thread, locks gPSMutex, + // re-instantiates ActivePS, unlocks gPSMutex -- all before the join + // completes. + // + // - SamplerThread::Run() locks gPSMutex, sees that ActivePS is instantiated, + // and continues as if the start/stop pair didn't occur. Also + // profiler_stop() is stuck, unable to finish. + // + // By checking ActivePS *and* the generation, we can avoid this scenario. + // sNextGeneration is used to track the next generation number; it is static + // because it must persist across different ActivePS instantiations. + const uint32_t mGeneration; + static uint32_t sNextGeneration; + + // The maximum number of 8-byte entries in mProfileBuffer. + const PowerOfTwo32 mCapacity; + + // The maximum duration of entries in mProfileBuffer, in seconds. + const Maybe<double> mDuration; + + // The interval between samples, measured in milliseconds. + const double mInterval; + + // The profile features that are enabled. + const uint32_t mFeatures; + + // Substrings of names of threads we want to profile. + Vector<std::string> mFilters; + Vector<std::string> mFiltersLowered; + + // The chunk manager used by `mProfileBuffer` below. + // May become null if it gets transferred to the Gecko Profiler. + UniquePtr<ProfileBufferChunkManagerWithLocalLimit> mProfileBufferChunkManager; + + // The buffer into which all samples are recorded. + ProfileBuffer mProfileBuffer; + + // ProfiledThreadData objects for any threads that were profiled at any point + // during this run of the profiler: + // - mLiveProfiledThreads contains all threads that are still registered, and + // - mDeadProfiledThreads contains all threads that have already been + // unregistered but for which there is still data in the profile buffer. + Vector<LiveProfiledThreadData> mLiveProfiledThreads; + Vector<UniquePtr<ProfiledThreadData>> mDeadProfiledThreads; + + // Info on all the dead pages. + // Registered pages are being moved to this array after unregistration. + // We are keeping them in case we need them in the profile data. + // We are removing them when we ensure that we won't need them anymore. + Vector<RefPtr<PageInformation>> mDeadProfiledPages; + + // The current sampler thread. This class is not responsible for destroying + // the SamplerThread object; the Destroy() method returns it so the caller + // can destroy it. + SamplerThread* const mSamplerThread; + + // Is the profiler fully paused? + bool mIsPaused; + + // Is the profiler periodic sampling paused? + bool mIsSamplingPaused; + + struct ExitProfile { + std::string mJSON; + uint64_t mBufferPositionAtGatherTime; + }; + Vector<ExitProfile> mExitProfiles; +}; + +ActivePS* ActivePS::sInstance = nullptr; +uint32_t ActivePS::sNextGeneration = 0; + +#undef PS_GET +#undef PS_GET_LOCKLESS +#undef PS_GET_AND_SET + +namespace detail { + +TimeStamp GetProfilingStartTime() { + if (!CorePS::Exists()) { + return {}; + } + PSAutoLock lock; + if (!ActivePS::Exists(lock)) { + return {}; + } + return ActivePS::ProfilingStartTime(); +} + +[[nodiscard]] MFBT_API UniquePtr<ProfileBufferChunkManagerWithLocalLimit> +ExtractBaseProfilerChunkManager() { + PSAutoLock lock; + if (MOZ_UNLIKELY(!ActivePS::Exists(lock))) { + return nullptr; + } + return ActivePS::ExtractBaseProfilerChunkManager(lock); +} + +} // namespace detail + +Atomic<uint32_t, MemoryOrdering::Relaxed> RacyFeatures::sActiveAndFeatures(0); + +/* static */ +void RacyFeatures::SetActive(uint32_t aFeatures) { + sActiveAndFeatures = Active | aFeatures; +} + +/* static */ +void RacyFeatures::SetInactive() { sActiveAndFeatures = 0; } + +/* static */ +bool RacyFeatures::IsActive() { return uint32_t(sActiveAndFeatures) & Active; } + +/* static */ +void RacyFeatures::SetPaused() { sActiveAndFeatures |= Paused; } + +/* static */ +void RacyFeatures::SetUnpaused() { sActiveAndFeatures &= ~Paused; } + +/* static */ +void RacyFeatures::SetSamplingPaused() { sActiveAndFeatures |= SamplingPaused; } + +/* static */ +void RacyFeatures::SetSamplingUnpaused() { + sActiveAndFeatures &= ~SamplingPaused; +} + +/* static */ +bool RacyFeatures::IsActiveWithFeature(uint32_t aFeature) { + uint32_t af = sActiveAndFeatures; // copy it first + return (af & Active) && (af & aFeature); +} + +/* static */ +bool RacyFeatures::IsActiveWithoutFeature(uint32_t aFeature) { + uint32_t af = sActiveAndFeatures; // copy it first + return (af & Active) && !(af & aFeature); +} + +/* static */ +bool RacyFeatures::IsActiveAndUnpaused() { + uint32_t af = sActiveAndFeatures; // copy it first + return (af & Active) && !(af & Paused); +} + +/* static */ +bool RacyFeatures::IsActiveAndSamplingUnpaused() { + uint32_t af = sActiveAndFeatures; // copy it first + return (af & Active) && !(af & (Paused | SamplingPaused)); +} + +// Each live thread has a RegisteredThread, and we store a reference to it in +// TLS. This class encapsulates that TLS. +class TLSRegisteredThread { + public: + static bool Init(PSLockRef) { + bool ok1 = sRegisteredThread.init(); + bool ok2 = AutoProfilerLabel::sProfilingStack.init(); + return ok1 && ok2; + } + + // Get the entire RegisteredThread. Accesses are guarded by gPSMutex. + static class RegisteredThread* RegisteredThread(PSLockRef) { + return sRegisteredThread.get(); + } + + // Get only the RacyRegisteredThread. Accesses are not guarded by gPSMutex. + static class RacyRegisteredThread* RacyRegisteredThread() { + class RegisteredThread* registeredThread = sRegisteredThread.get(); + return registeredThread ? ®isteredThread->RacyRegisteredThread() + : nullptr; + } + + // Get only the ProfilingStack. Accesses are not guarded by gPSMutex. + // RacyRegisteredThread() can also be used to get the ProfilingStack, but that + // is marginally slower because it requires an extra pointer indirection. + static ProfilingStack* Stack() { + return AutoProfilerLabel::sProfilingStack.get(); + } + + static void SetRegisteredThread(PSLockRef, + class RegisteredThread* aRegisteredThread) { + sRegisteredThread.set(aRegisteredThread); + AutoProfilerLabel::sProfilingStack.set( + aRegisteredThread + ? &aRegisteredThread->RacyRegisteredThread().ProfilingStack() + : nullptr); + } + + private: + // This is a non-owning reference to the RegisteredThread; + // CorePS::mRegisteredThreads is the owning reference. On thread + // deregistration, this reference is cleared and the RegisteredThread is + // destroyed. + static MOZ_THREAD_LOCAL(class RegisteredThread*) sRegisteredThread; +}; + +MOZ_THREAD_LOCAL(RegisteredThread*) TLSRegisteredThread::sRegisteredThread; + +/* static */ +ProfilingStack* AutoProfilerLabel::GetProfilingStack() { + return sProfilingStack.get(); +} + +// Although you can access a thread's ProfilingStack via +// TLSRegisteredThread::sRegisteredThread, we also have a second TLS pointer +// directly to the ProfilingStack. Here's why. +// +// - We need to be able to push to and pop from the ProfilingStack in +// AutoProfilerLabel. +// +// - The class functions are hot and must be defined in BaseProfiler.h so they +// can be inlined. +// +// - We don't want to expose TLSRegisteredThread (and RegisteredThread) in +// BaseProfiler.h. +// +// This second pointer isn't ideal, but does provide a way to satisfy those +// constraints. TLSRegisteredThread is responsible for updating it. +MOZ_THREAD_LOCAL(ProfilingStack*) AutoProfilerLabel::sProfilingStack; + +namespace detail { + +[[nodiscard]] MFBT_API TimeStamp GetThreadRegistrationTime() { + if (!CorePS::Exists()) { + return {}; + } + + PSAutoLock lock; + + RegisteredThread* registeredThread = + TLSRegisteredThread::RegisteredThread(lock); + if (!registeredThread) { + return {}; + } + + return registeredThread->Info()->RegisterTime(); +} + +} // namespace detail + +// The name of the main thread. +static const char* const kMainThreadName = "GeckoMain"; + +//////////////////////////////////////////////////////////////////////// +// BEGIN sampling/unwinding code + +// The registers used for stack unwinding and a few other sampling purposes. +// The ctor does nothing; users are responsible for filling in the fields. +class Registers { + public: + Registers() : mPC{nullptr}, mSP{nullptr}, mFP{nullptr}, mLR{nullptr} {} + + void Clear() { memset(this, 0, sizeof(*this)); } + + // These fields are filled in by + // Sampler::SuspendAndSampleAndResumeThread() for periodic and backtrace + // samples, and by REGISTERS_SYNC_POPULATE for synchronous samples. + Address mPC; // Instruction pointer. + Address mSP; // Stack pointer. + Address mFP; // Frame pointer. + Address mLR; // ARM link register. +#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd) + // This contains all the registers, which means it duplicates the four fields + // above. This is ok. + ucontext_t* mContext; // The context from the signal handler. +#endif +}; + +// Setting MAX_NATIVE_FRAMES too high risks the unwinder wasting a lot of time +// looping on corrupted stacks. +static const size_t MAX_NATIVE_FRAMES = 1024; + +struct NativeStack { + void* mPCs[MAX_NATIVE_FRAMES]; + void* mSPs[MAX_NATIVE_FRAMES]; + size_t mCount; // Number of frames filled. + + NativeStack() : mPCs(), mSPs(), mCount(0) {} +}; + +// Merges the profiling stack and native stack, outputting the details to +// aCollector. +static void MergeStacks(uint32_t aFeatures, bool aIsSynchronous, + const RegisteredThread& aRegisteredThread, + const Registers& aRegs, const NativeStack& aNativeStack, + ProfilerStackCollector& aCollector) { + // WARNING: this function runs within the profiler's "critical section". + // WARNING: this function might be called while the profiler is inactive, and + // cannot rely on ActivePS. + + const ProfilingStack& profilingStack = + aRegisteredThread.RacyRegisteredThread().ProfilingStack(); + const ProfilingStackFrame* profilingStackFrames = profilingStack.frames; + uint32_t profilingStackFrameCount = profilingStack.stackSize(); + + Maybe<uint64_t> samplePosInBuffer; + if (!aIsSynchronous) { + // aCollector.SamplePositionInBuffer() will return Nothing() when + // profiler_suspend_and_sample_thread is called from the background hang + // reporter. + samplePosInBuffer = aCollector.SamplePositionInBuffer(); + } + // While the profiling stack array is ordered oldest-to-youngest, the JS and + // native arrays are ordered youngest-to-oldest. We must add frames to aInfo + // oldest-to-youngest. Thus, iterate over the profiling stack forwards and JS + // and native arrays backwards. Note: this means the terminating condition + // jsIndex and nativeIndex is being < 0. + uint32_t profilingStackIndex = 0; + int32_t nativeIndex = aNativeStack.mCount - 1; + + uint8_t* lastLabelFrameStackAddr = nullptr; + + // Iterate as long as there is at least one frame remaining. + while (profilingStackIndex != profilingStackFrameCount || nativeIndex >= 0) { + // There are 1 to 3 frames available. Find and add the oldest. + uint8_t* profilingStackAddr = nullptr; + uint8_t* nativeStackAddr = nullptr; + + if (profilingStackIndex != profilingStackFrameCount) { + const ProfilingStackFrame& profilingStackFrame = + profilingStackFrames[profilingStackIndex]; + + if (profilingStackFrame.isLabelFrame() || + profilingStackFrame.isSpMarkerFrame()) { + lastLabelFrameStackAddr = (uint8_t*)profilingStackFrame.stackAddress(); + } + + // Skip any JS_OSR frames. Such frames are used when the JS interpreter + // enters a jit frame on a loop edge (via on-stack-replacement, or OSR). + // To avoid both the profiling stack frame and jit frame being recorded + // (and showing up twice), the interpreter marks the interpreter + // profiling stack frame as JS_OSR to ensure that it doesn't get counted. + if (profilingStackFrame.isOSRFrame()) { + profilingStackIndex++; + continue; + } + + MOZ_ASSERT(lastLabelFrameStackAddr); + profilingStackAddr = lastLabelFrameStackAddr; + } + + if (nativeIndex >= 0) { + nativeStackAddr = (uint8_t*)aNativeStack.mSPs[nativeIndex]; + } + + // If there's a native stack frame which has the same SP as a profiling + // stack frame, pretend we didn't see the native stack frame. Ditto for a + // native stack frame which has the same SP as a JS stack frame. In effect + // this means profiling stack frames or JS frames trump conflicting native + // frames. + if (nativeStackAddr && (profilingStackAddr == nativeStackAddr)) { + nativeStackAddr = nullptr; + nativeIndex--; + MOZ_ASSERT(profilingStackAddr); + } + + // Sanity checks. + MOZ_ASSERT_IF(profilingStackAddr, profilingStackAddr != nativeStackAddr); + MOZ_ASSERT_IF(nativeStackAddr, nativeStackAddr != profilingStackAddr); + + // Check to see if profiling stack frame is top-most. + if (profilingStackAddr > nativeStackAddr) { + MOZ_ASSERT(profilingStackIndex < profilingStackFrameCount); + const ProfilingStackFrame& profilingStackFrame = + profilingStackFrames[profilingStackIndex]; + + // Sp marker frames are just annotations and should not be recorded in + // the profile. + if (!profilingStackFrame.isSpMarkerFrame()) { + if (aIsSynchronous && profilingStackFrame.categoryPair() == + ProfilingCategoryPair::PROFILER) { + // For stacks captured synchronously (ie. marker stacks), stop + // walking the stack as soon as we enter the profiler category, + // to avoid showing profiler internal code in marker stacks. + return; + } + aCollector.CollectProfilingStackFrame(profilingStackFrame); + } + profilingStackIndex++; + continue; + } + + // If we reach here, there must be a native stack frame and it must be the + // greatest frame. + if (nativeStackAddr) { + MOZ_ASSERT(nativeIndex >= 0); + void* addr = (void*)aNativeStack.mPCs[nativeIndex]; + aCollector.CollectNativeLeafAddr(addr); + } + if (nativeIndex >= 0) { + nativeIndex--; + } + } +} + +#if defined(GP_OS_windows) && defined(USE_MOZ_STACK_WALK) +static HANDLE GetThreadHandle(PlatformData* aData); +#endif + +#if defined(USE_FRAME_POINTER_STACK_WALK) || defined(USE_MOZ_STACK_WALK) +static void StackWalkCallback(uint32_t aFrameNumber, void* aPC, void* aSP, + void* aClosure) { + NativeStack* nativeStack = static_cast<NativeStack*>(aClosure); + MOZ_ASSERT(nativeStack->mCount < MAX_NATIVE_FRAMES); + nativeStack->mSPs[nativeStack->mCount] = aSP; + nativeStack->mPCs[nativeStack->mCount] = aPC; + nativeStack->mCount++; +} +#endif + +#if defined(USE_FRAME_POINTER_STACK_WALK) +static void DoFramePointerBacktrace(PSLockRef aLock, + const RegisteredThread& aRegisteredThread, + const Registers& aRegs, + NativeStack& aNativeStack) { + // WARNING: this function runs within the profiler's "critical section". + // WARNING: this function might be called while the profiler is inactive, and + // cannot rely on ActivePS. + + // Start with the current function. We use 0 as the frame number here because + // the FramePointerStackWalk() call below will use 1..N. This is a bit weird + // but it doesn't matter because StackWalkCallback() doesn't use the frame + // number argument. + StackWalkCallback(/* frameNum */ 0, aRegs.mPC, aRegs.mSP, &aNativeStack); + + uint32_t maxFrames = uint32_t(MAX_NATIVE_FRAMES - aNativeStack.mCount); + + const void* stackEnd = aRegisteredThread.StackTop(); + if (aRegs.mFP >= aRegs.mSP && aRegs.mFP <= stackEnd) { + FramePointerStackWalk(StackWalkCallback, maxFrames, &aNativeStack, + reinterpret_cast<void**>(aRegs.mFP), + const_cast<void*>(stackEnd)); + } +} +#endif + +#if defined(USE_MOZ_STACK_WALK) +static void DoMozStackWalkBacktrace(PSLockRef aLock, + const RegisteredThread& aRegisteredThread, + const Registers& aRegs, + NativeStack& aNativeStack) { + // WARNING: this function runs within the profiler's "critical section". + // WARNING: this function might be called while the profiler is inactive, and + // cannot rely on ActivePS. + + // Start with the current function. We use 0 as the frame number here because + // the MozStackWalkThread() call below will use 1..N. This is a bit weird but + // it doesn't matter because StackWalkCallback() doesn't use the frame number + // argument. + StackWalkCallback(/* frameNum */ 0, aRegs.mPC, aRegs.mSP, &aNativeStack); + + uint32_t maxFrames = uint32_t(MAX_NATIVE_FRAMES - aNativeStack.mCount); + + HANDLE thread = GetThreadHandle(aRegisteredThread.GetPlatformData()); + MOZ_ASSERT(thread); + MozStackWalkThread(StackWalkCallback, maxFrames, &aNativeStack, thread, + /* context */ nullptr); +} +#endif + +#ifdef USE_EHABI_STACKWALK +static void DoEHABIBacktrace(PSLockRef aLock, + const RegisteredThread& aRegisteredThread, + const Registers& aRegs, + NativeStack& aNativeStack) { + // WARNING: this function runs within the profiler's "critical section". + // WARNING: this function might be called while the profiler is inactive, and + // cannot rely on ActivePS. + + aNativeStack.mCount = + EHABIStackWalk(aRegs.mContext->uc_mcontext, + const_cast<void*>(aRegisteredThread.StackTop()), + aNativeStack.mSPs, aNativeStack.mPCs, MAX_NATIVE_FRAMES); +} +#endif + +#ifdef USE_LUL_STACKWALK + +// See the comment at the callsite for why this function is necessary. +# if defined(MOZ_HAVE_ASAN_IGNORE) +MOZ_ASAN_IGNORE static void ASAN_memcpy(void* aDst, const void* aSrc, + size_t aLen) { + // The obvious thing to do here is call memcpy(). However, although + // ASAN_memcpy() is not instrumented by ASAN, memcpy() still is, and the + // false positive still manifests! So we must implement memcpy() ourselves + // within this function. + char* dst = static_cast<char*>(aDst); + const char* src = static_cast<const char*>(aSrc); + + for (size_t i = 0; i < aLen; i++) { + dst[i] = src[i]; + } +} +# endif + +static void DoLULBacktrace(PSLockRef aLock, + const RegisteredThread& aRegisteredThread, + const Registers& aRegs, NativeStack& aNativeStack) { + // WARNING: this function runs within the profiler's "critical section". + // WARNING: this function might be called while the profiler is inactive, and + // cannot rely on ActivePS. + + const mcontext_t* mc = &aRegs.mContext->uc_mcontext; + + lul::UnwindRegs startRegs; + memset(&startRegs, 0, sizeof(startRegs)); + +# if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android) + startRegs.xip = lul::TaggedUWord(mc->gregs[REG_RIP]); + startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_RSP]); + startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_RBP]); +# elif defined(GP_PLAT_amd64_freebsd) + startRegs.xip = lul::TaggedUWord(mc->mc_rip); + startRegs.xsp = lul::TaggedUWord(mc->mc_rsp); + startRegs.xbp = lul::TaggedUWord(mc->mc_rbp); +# elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android) + startRegs.r15 = lul::TaggedUWord(mc->arm_pc); + startRegs.r14 = lul::TaggedUWord(mc->arm_lr); + startRegs.r13 = lul::TaggedUWord(mc->arm_sp); + startRegs.r12 = lul::TaggedUWord(mc->arm_ip); + startRegs.r11 = lul::TaggedUWord(mc->arm_fp); + startRegs.r7 = lul::TaggedUWord(mc->arm_r7); +# elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android) + startRegs.pc = lul::TaggedUWord(mc->pc); + startRegs.x29 = lul::TaggedUWord(mc->regs[29]); + startRegs.x30 = lul::TaggedUWord(mc->regs[30]); + startRegs.sp = lul::TaggedUWord(mc->sp); +# elif defined(GP_PLAT_arm64_freebsd) + startRegs.pc = lul::TaggedUWord(mc->mc_gpregs.gp_elr); + startRegs.x29 = lul::TaggedUWord(mc->mc_gpregs.gp_x[29]); + startRegs.x30 = lul::TaggedUWord(mc->mc_gpregs.gp_lr); + startRegs.sp = lul::TaggedUWord(mc->mc_gpregs.gp_sp); +# elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android) + startRegs.xip = lul::TaggedUWord(mc->gregs[REG_EIP]); + startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_ESP]); + startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_EBP]); +# elif defined(GP_PLAT_mips64_linux) + startRegs.pc = lul::TaggedUWord(mc->pc); + startRegs.sp = lul::TaggedUWord(mc->gregs[29]); + startRegs.fp = lul::TaggedUWord(mc->gregs[30]); +# else +# error "Unknown plat" +# endif + + // Copy up to N_STACK_BYTES from rsp-REDZONE upwards, but not going past the + // stack's registered top point. Do some basic sanity checks too. This + // assumes that the TaggedUWord holding the stack pointer value is valid, but + // it should be, since it was constructed that way in the code just above. + + // We could construct |stackImg| so that LUL reads directly from the stack in + // question, rather than from a copy of it. That would reduce overhead and + // space use a bit. However, it gives a problem with dynamic analysis tools + // (ASan, TSan, Valgrind) which is that such tools will report invalid or + // racing memory accesses, and such accesses will be reported deep inside LUL. + // By taking a copy here, we can either sanitise the copy (for Valgrind) or + // copy it using an unchecked memcpy (for ASan, TSan). That way we don't have + // to try and suppress errors inside LUL. + // + // N_STACK_BYTES is set to 160KB. This is big enough to hold all stacks + // observed in some minutes of testing, whilst keeping the size of this + // function (DoNativeBacktrace)'s frame reasonable. Most stacks observed in + // practice are small, 4KB or less, and so the copy costs are insignificant + // compared to other profiler overhead. + // + // |stackImg| is allocated on this (the sampling thread's) stack. That + // implies that the frame for this function is at least N_STACK_BYTES large. + // In general it would be considered unacceptable to have such a large frame + // on a stack, but it only exists for the unwinder thread, and so is not + // expected to be a problem. Allocating it on the heap is troublesome because + // this function runs whilst the sampled thread is suspended, so any heap + // allocation risks deadlock. Allocating it as a global variable is not + // thread safe, which would be a problem if we ever allow multiple sampler + // threads. Hence allocating it on the stack seems to be the least-worst + // option. + + lul::StackImage stackImg; + + { +# if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android) || \ + defined(GP_PLAT_amd64_freebsd) + uintptr_t rEDZONE_SIZE = 128; + uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE; +# elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android) + uintptr_t rEDZONE_SIZE = 0; + uintptr_t start = startRegs.r13.Value() - rEDZONE_SIZE; +# elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android) || \ + defined(GP_PLAT_arm64_freebsd) + uintptr_t rEDZONE_SIZE = 0; + uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE; +# elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android) + uintptr_t rEDZONE_SIZE = 0; + uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE; +# elif defined(GP_PLAT_mips64_linux) + uintptr_t rEDZONE_SIZE = 0; + uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE; +# else +# error "Unknown plat" +# endif + uintptr_t end = reinterpret_cast<uintptr_t>(aRegisteredThread.StackTop()); + uintptr_t ws = sizeof(void*); + start &= ~(ws - 1); + end &= ~(ws - 1); + uintptr_t nToCopy = 0; + if (start < end) { + nToCopy = end - start; + if (nToCopy > lul::N_STACK_BYTES) nToCopy = lul::N_STACK_BYTES; + } + MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES); + stackImg.mLen = nToCopy; + stackImg.mStartAvma = start; + if (nToCopy > 0) { + // If this is a vanilla memcpy(), ASAN makes the following complaint: + // + // ERROR: AddressSanitizer: stack-buffer-underflow ... + // ... + // HINT: this may be a false positive if your program uses some custom + // stack unwind mechanism or swapcontext + // + // This code is very much a custom stack unwind mechanism! So we use an + // alternative memcpy() implementation that is ignored by ASAN. +# if defined(MOZ_HAVE_ASAN_IGNORE) + ASAN_memcpy(&stackImg.mContents[0], (void*)start, nToCopy); +# else + memcpy(&stackImg.mContents[0], (void*)start, nToCopy); +# endif + (void)VALGRIND_MAKE_MEM_DEFINED(&stackImg.mContents[0], nToCopy); + } + } + + size_t framePointerFramesAcquired = 0; + lul::LUL* lul = CorePS::Lul(aLock); + lul->Unwind(reinterpret_cast<uintptr_t*>(aNativeStack.mPCs), + reinterpret_cast<uintptr_t*>(aNativeStack.mSPs), + &aNativeStack.mCount, &framePointerFramesAcquired, + MAX_NATIVE_FRAMES, &startRegs, &stackImg); + + // Update stats in the LUL stats object. Unfortunately this requires + // three global memory operations. + lul->mStats.mContext += 1; + lul->mStats.mCFI += aNativeStack.mCount - 1 - framePointerFramesAcquired; + lul->mStats.mFP += framePointerFramesAcquired; +} + +#endif + +#ifdef HAVE_NATIVE_UNWIND +static void DoNativeBacktrace(PSLockRef aLock, + const RegisteredThread& aRegisteredThread, + const Registers& aRegs, + NativeStack& aNativeStack) { + // This method determines which stackwalker is used for periodic and + // synchronous samples. (Backtrace samples are treated differently, see + // profiler_suspend_and_sample_thread() for details). The only part of the + // ordering that matters is that LUL must precede FRAME_POINTER, because on + // Linux they can both be present. +# if defined(USE_LUL_STACKWALK) + DoLULBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack); +# elif defined(USE_EHABI_STACKWALK) + DoEHABIBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack); +# elif defined(USE_FRAME_POINTER_STACK_WALK) + DoFramePointerBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack); +# elif defined(USE_MOZ_STACK_WALK) + DoMozStackWalkBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack); +# else +# error "Invalid configuration" +# endif +} +#endif + +// Writes some components shared by periodic and synchronous profiles to +// ActivePS's ProfileBuffer. (This should only be called from DoSyncSample() +// and DoPeriodicSample().) +// +// The grammar for entry sequences is in a comment above +// ProfileBuffer::StreamSamplesToJSON. +static inline void DoSharedSample( + PSLockRef aLock, bool aIsSynchronous, RegisteredThread& aRegisteredThread, + const Registers& aRegs, uint64_t aSamplePos, uint64_t aBufferRangeStart, + ProfileBuffer& aBuffer, + StackCaptureOptions aCaptureOptions = StackCaptureOptions::Full) { + // WARNING: this function runs within the profiler's "critical section". + + MOZ_ASSERT(!aBuffer.IsThreadSafe(), + "Mutexes cannot be used inside this critical section"); + + MOZ_RELEASE_ASSERT(ActivePS::Exists(aLock)); + + ProfileBufferCollector collector(aBuffer, aSamplePos, aBufferRangeStart); + NativeStack nativeStack; +#if defined(HAVE_NATIVE_UNWIND) + if (ActivePS::FeatureStackWalk(aLock) && + aCaptureOptions == StackCaptureOptions::Full) { + DoNativeBacktrace(aLock, aRegisteredThread, aRegs, nativeStack); + + MergeStacks(ActivePS::Features(aLock), aIsSynchronous, aRegisteredThread, + aRegs, nativeStack, collector); + } else +#endif + { + MergeStacks(ActivePS::Features(aLock), aIsSynchronous, aRegisteredThread, + aRegs, nativeStack, collector); + + // We can't walk the whole native stack, but we can record the top frame. + if (aCaptureOptions == StackCaptureOptions::Full) { + aBuffer.AddEntry(ProfileBufferEntry::NativeLeafAddr((void*)aRegs.mPC)); + } + } +} + +// Writes the components of a synchronous sample to the given ProfileBuffer. +static void DoSyncSample(PSLockRef aLock, RegisteredThread& aRegisteredThread, + const TimeStamp& aNow, const Registers& aRegs, + ProfileBuffer& aBuffer, + StackCaptureOptions aCaptureOptions) { + // WARNING: this function runs within the profiler's "critical section". + + MOZ_ASSERT(aCaptureOptions != StackCaptureOptions::NoStack, + "DoSyncSample should not be called when no capture is needed"); + + const uint64_t bufferRangeStart = aBuffer.BufferRangeStart(); + + const uint64_t samplePos = + aBuffer.AddThreadIdEntry(aRegisteredThread.Info()->ThreadId()); + + TimeDuration delta = aNow - CorePS::ProcessStartTime(); + aBuffer.AddEntry(ProfileBufferEntry::Time(delta.ToMilliseconds())); + + DoSharedSample(aLock, /* aIsSynchronous = */ true, aRegisteredThread, aRegs, + samplePos, bufferRangeStart, aBuffer, aCaptureOptions); +} + +// Writes the components of a periodic sample to ActivePS's ProfileBuffer. +// The ThreadId entry is already written in the main ProfileBuffer, its location +// is `aSamplePos`, we can write the rest to `aBuffer` (which may be different). +static void DoPeriodicSample(PSLockRef aLock, + RegisteredThread& aRegisteredThread, + ProfiledThreadData& aProfiledThreadData, + const Registers& aRegs, uint64_t aSamplePos, + uint64_t aBufferRangeStart, + ProfileBuffer& aBuffer) { + // WARNING: this function runs within the profiler's "critical section". + + DoSharedSample(aLock, /* aIsSynchronous = */ false, aRegisteredThread, aRegs, + aSamplePos, aBufferRangeStart, aBuffer); +} + +// END sampling/unwinding code +//////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////// +// BEGIN saving/streaming code + +const static uint64_t kJS_MAX_SAFE_UINTEGER = +9007199254740991ULL; + +static int64_t SafeJSInteger(uint64_t aValue) { + return aValue <= kJS_MAX_SAFE_UINTEGER ? int64_t(aValue) : -1; +} + +static void AddSharedLibraryInfoToStream(JSONWriter& aWriter, + const SharedLibrary& aLib) { + aWriter.StartObjectElement(); + aWriter.IntProperty("start", SafeJSInteger(aLib.GetStart())); + aWriter.IntProperty("end", SafeJSInteger(aLib.GetEnd())); + aWriter.IntProperty("offset", SafeJSInteger(aLib.GetOffset())); + aWriter.StringProperty("name", aLib.GetModuleName()); + aWriter.StringProperty("path", aLib.GetModulePath()); + aWriter.StringProperty("debugName", aLib.GetDebugName()); + aWriter.StringProperty("debugPath", aLib.GetDebugPath()); + aWriter.StringProperty("breakpadId", aLib.GetBreakpadId()); + aWriter.StringProperty("codeId", aLib.GetCodeId()); + aWriter.StringProperty("arch", aLib.GetArch()); + aWriter.EndObject(); +} + +void AppendSharedLibraries(JSONWriter& aWriter) { + SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf(); + info.SortByAddress(); + for (size_t i = 0; i < info.GetSize(); i++) { + AddSharedLibraryInfoToStream(aWriter, info.GetEntry(i)); + } +} + +static void StreamCategories(SpliceableJSONWriter& aWriter) { + // Same order as ProfilingCategory. Format: + // [ + // { + // name: "Idle", + // color: "transparent", + // subcategories: ["Other"], + // }, + // { + // name: "Other", + // color: "grey", + // subcategories: [ + // "JSM loading", + // "Subprocess launching", + // "DLL loading" + // ] + // }, + // ... + // ] + +#define CATEGORY_JSON_BEGIN_CATEGORY(name, labelAsString, color) \ + aWriter.Start(); \ + aWriter.StringProperty("name", labelAsString); \ + aWriter.StringProperty("color", color); \ + aWriter.StartArrayProperty("subcategories"); +#define CATEGORY_JSON_SUBCATEGORY(supercategory, name, labelAsString) \ + aWriter.StringElement(labelAsString); +#define CATEGORY_JSON_END_CATEGORY \ + aWriter.EndArray(); \ + aWriter.EndObject(); + + MOZ_PROFILING_CATEGORY_LIST(CATEGORY_JSON_BEGIN_CATEGORY, + CATEGORY_JSON_SUBCATEGORY, + CATEGORY_JSON_END_CATEGORY) + +#undef CATEGORY_JSON_BEGIN_CATEGORY +#undef CATEGORY_JSON_SUBCATEGORY +#undef CATEGORY_JSON_END_CATEGORY +} + +static void StreamMarkerSchema(SpliceableJSONWriter& aWriter) { + // Get an array view with all registered marker-type-specific functions. + base_profiler_markers_detail::Streaming::LockedMarkerTypeFunctionsList + markerTypeFunctionsArray; + // List of streamed marker names, this is used to spot duplicates. + std::set<std::string> names; + // Stream the display schema for each different one. (Duplications may come + // from the same code potentially living in different libraries.) + for (const auto& markerTypeFunctions : markerTypeFunctionsArray) { + auto name = markerTypeFunctions.mMarkerTypeNameFunction(); + // std::set.insert(T&&) returns a pair, its `second` is true if the element + // was actually inserted (i.e., it was not there yet.) + const bool didInsert = + names.insert(std::string(name.data(), name.size())).second; + if (didInsert) { + markerTypeFunctions.mMarkerSchemaFunction().Stream(aWriter, name); + } + } +} + +static int64_t MicrosecondsSince1970(); + +static void StreamMetaJSCustomObject(PSLockRef aLock, + SpliceableJSONWriter& aWriter, + bool aIsShuttingDown) { + MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock)); + + aWriter.IntProperty("version", 27); + + // The "startTime" field holds the number of milliseconds since midnight + // January 1, 1970 GMT. This grotty code computes (Now - (Now - + // ProcessStartTime)) to convert CorePS::ProcessStartTime() into that form. + // Note: This is the only absolute time in the profile! All other timestamps + // are relative to this startTime. + TimeDuration delta = TimeStamp::Now() - CorePS::ProcessStartTime(); + aWriter.DoubleProperty( + "startTime", MicrosecondsSince1970() / 1000.0 - delta.ToMilliseconds()); + + aWriter.DoubleProperty("profilingStartTime", (ActivePS::ProfilingStartTime() - + CorePS::ProcessStartTime()) + .ToMilliseconds()); + + if (const TimeStamp contentEarliestTime = + ActivePS::Buffer(aLock) + .UnderlyingChunkedBuffer() + .GetEarliestChunkStartTimeStamp(); + !contentEarliestTime.IsNull()) { + aWriter.DoubleProperty( + "contentEarliestTime", + (contentEarliestTime - CorePS::ProcessStartTime()).ToMilliseconds()); + } else { + aWriter.NullProperty("contentEarliestTime"); + } + + const double profilingEndTime = profiler_time(); + aWriter.DoubleProperty("profilingEndTime", profilingEndTime); + + if (aIsShuttingDown) { + aWriter.DoubleProperty("shutdownTime", profilingEndTime); + } else { + aWriter.NullProperty("shutdownTime"); + } + + aWriter.StartArrayProperty("categories"); + StreamCategories(aWriter); + aWriter.EndArray(); + + aWriter.StartArrayProperty("markerSchema"); + StreamMarkerSchema(aWriter); + aWriter.EndArray(); + + if (!profiler_is_main_thread()) { + // Leave the rest of the properties out if we're not on the main thread. + // At the moment, the only case in which this function is called on a + // background thread is if we're in a content process and are going to + // send this profile to the parent process. In that case, the parent + // process profile's "meta" object already has the rest of the properties, + // and the parent process profile is dumped on that process's main thread. + return; + } + + aWriter.DoubleProperty("interval", ActivePS::Interval(aLock)); + aWriter.IntProperty("stackwalk", ActivePS::FeatureStackWalk(aLock)); + +#ifdef DEBUG + aWriter.IntProperty("debug", 1); +#else + aWriter.IntProperty("debug", 0); +#endif + + aWriter.IntProperty("gcpoison", 0); + + aWriter.IntProperty("asyncstack", 0); + + aWriter.IntProperty("processType", 0); +} + +static void StreamPages(PSLockRef aLock, SpliceableJSONWriter& aWriter) { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + ActivePS::DiscardExpiredPages(aLock); + for (const auto& page : ActivePS::ProfiledPages(aLock)) { + page->StreamJSON(aWriter); + } +} + +static void locked_profiler_stream_json_for_this_process( + PSLockRef aLock, SpliceableJSONWriter& aWriter, double aSinceTime, + bool aIsShuttingDown, bool aOnlyThreads = false) { + LOG("locked_profiler_stream_json_for_this_process"); + + MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock)); + + AUTO_PROFILER_STATS(base_locked_profiler_stream_json_for_this_process); + + const double collectionStartMs = profiler_time(); + + ProfileBuffer& buffer = ActivePS::Buffer(aLock); + + // If there is a set "Window length", discard older data. + Maybe<double> durationS = ActivePS::Duration(aLock); + if (durationS.isSome()) { + const double durationStartMs = collectionStartMs - *durationS * 1000; + buffer.DiscardSamplesBeforeTime(durationStartMs); + } + + if (!aOnlyThreads) { + // Put shared library info + aWriter.StartArrayProperty("libs"); + AppendSharedLibraries(aWriter); + aWriter.EndArray(); + + // Put meta data + aWriter.StartObjectProperty("meta"); + { StreamMetaJSCustomObject(aLock, aWriter, aIsShuttingDown); } + aWriter.EndObject(); + + // Put page data + aWriter.StartArrayProperty("pages"); + { StreamPages(aLock, aWriter); } + aWriter.EndArray(); + + buffer.StreamProfilerOverheadToJSON(aWriter, CorePS::ProcessStartTime(), + aSinceTime); + buffer.StreamCountersToJSON(aWriter, CorePS::ProcessStartTime(), + aSinceTime); + + // Lists the samples for each thread profile + aWriter.StartArrayProperty("threads"); + } + + // if aOnlyThreads is true, the only output will be the threads array items. + { + ActivePS::DiscardExpiredDeadProfiledThreads(aLock); + Vector<std::pair<RegisteredThread*, ProfiledThreadData*>> threads = + ActivePS::ProfiledThreads(aLock); + for (auto& thread : threads) { + ProfiledThreadData* profiledThreadData = thread.second; + profiledThreadData->StreamJSON( + buffer, aWriter, CorePS::ProcessName(aLock), CorePS::ETLDplus1(aLock), + CorePS::ProcessStartTime(), aSinceTime); + } + } + + if (!aOnlyThreads) { + aWriter.EndArray(); + + aWriter.StartArrayProperty("pausedRanges"); + { buffer.StreamPausedRangesToJSON(aWriter, aSinceTime); } + aWriter.EndArray(); + } + + const double collectionEndMs = profiler_time(); + + // Record timestamps for the collection into the buffer, so that consumers + // know why we didn't collect any samples for its duration. + // We put these entries into the buffer after we've collected the profile, + // so they'll be visible for the *next* profile collection (if they haven't + // been overwritten due to buffer wraparound by then). + buffer.AddEntry(ProfileBufferEntry::CollectionStart(collectionStartMs)); + buffer.AddEntry(ProfileBufferEntry::CollectionEnd(collectionEndMs)); +} + +bool profiler_stream_json_for_this_process(SpliceableJSONWriter& aWriter, + double aSinceTime, + bool aIsShuttingDown, + bool aOnlyThreads) { + LOG("profiler_stream_json_for_this_process"); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return false; + } + + locked_profiler_stream_json_for_this_process(lock, aWriter, aSinceTime, + aIsShuttingDown, aOnlyThreads); + return true; +} + +// END saving/streaming code +//////////////////////////////////////////////////////////////////////// + +static char FeatureCategory(uint32_t aFeature) { + if (aFeature & DefaultFeatures()) { + if (aFeature & AvailableFeatures()) { + return 'D'; + } + return 'd'; + } + + if (aFeature & StartupExtraDefaultFeatures()) { + if (aFeature & AvailableFeatures()) { + return 'S'; + } + return 's'; + } + + if (aFeature & AvailableFeatures()) { + return '-'; + } + return 'x'; +} + +static void PrintUsage() { + PrintToConsole( + "\n" + "Profiler environment variable usage:\n" + "\n" + " MOZ_BASE_PROFILER_HELP\n" + " If set to any value, prints this message.\n" + " (Only BaseProfiler features are known here; Use MOZ_PROFILER_HELP\n" + " for Gecko Profiler help, with more features).\n" + "\n" + " MOZ_BASE_PROFILER_{,DEBUG_,VERBOSE}LOGGING\n" + " Enables BaseProfiler logging to stdout. The levels of logging\n" + " available are MOZ_BASE_PROFILER_LOGGING' (least verbose),\n" + " '..._DEBUG_LOGGING', '..._VERBOSE_LOGGING' (most verbose)\n" + "\n" + " MOZ_PROFILER_STARTUP\n" + " If set to any value other than '' or '0'/'N'/'n', starts the\n" + " profiler immediately on start-up.\n" + " Useful if you want profile code that runs very early.\n" + "\n" + " MOZ_PROFILER_STARTUP_ENTRIES=<%u..%u>\n" + " If MOZ_PROFILER_STARTUP is set, specifies the number of entries\n" + " per process in the profiler's circular buffer when the profiler is\n" + " first started.\n" + " If unset, the platform default is used:\n" + " %u entries per process, or %u when MOZ_PROFILER_STARTUP is set.\n" + " (%u bytes per entry -> %u or %u total bytes per process)\n" + " Optional units in bytes: KB, KiB, MB, MiB, GB, GiB\n" + "\n" + " MOZ_PROFILER_STARTUP_DURATION=<1..>\n" + " If MOZ_PROFILER_STARTUP is set, specifies the maximum life time\n" + " of entries in the the profiler's circular buffer when the profiler\n" + " is first started, in seconds.\n" + " If unset, the life time of the entries will only be restricted by\n" + " MOZ_PROFILER_STARTUP_ENTRIES (or its default value), and no\n" + " additional time duration restriction will be applied.\n" + "\n" + " MOZ_PROFILER_STARTUP_INTERVAL=<1..1000>\n" + " If MOZ_PROFILER_STARTUP is set, specifies the sample interval,\n" + " measured in milliseconds, when the profiler is first started.\n" + " If unset, the platform default is used.\n" + "\n" + " MOZ_PROFILER_STARTUP_FEATURES_BITFIELD=<Number>\n" + " If MOZ_PROFILER_STARTUP is set, specifies the profiling\n" + " features, as the integer value of the features bitfield.\n" + " If unset, the value from MOZ_PROFILER_STARTUP_FEATURES is used.\n" + "\n" + " MOZ_PROFILER_STARTUP_FEATURES=<Features>\n" + " If MOZ_PROFILER_STARTUP is set, specifies the profiling\n" + " features, as a comma-separated list of strings.\n" + " Ignored if MOZ_PROFILER_STARTUP_FEATURES_BITFIELD is set.\n" + " If unset, the platform default is used.\n" + "\n" + " Features: (x=unavailable, D/d=default/unavailable,\n" + " S/s=MOZ_PROFILER_STARTUP extra " + "default/unavailable)\n", + unsigned(ActivePS::scMinimumBufferEntries), + unsigned(ActivePS::scMaximumBufferEntries), + unsigned(BASE_PROFILER_DEFAULT_ENTRIES.Value()), + unsigned(BASE_PROFILER_DEFAULT_STARTUP_ENTRIES.Value()), + unsigned(scBytesPerEntry), + unsigned(BASE_PROFILER_DEFAULT_ENTRIES.Value() * scBytesPerEntry), + unsigned(BASE_PROFILER_DEFAULT_STARTUP_ENTRIES.Value() * + scBytesPerEntry)); + +#define PRINT_FEATURE(n_, str_, Name_, desc_) \ + PrintToConsole(" %c %7u: \"%s\" (%s)\n", \ + FeatureCategory(ProfilerFeature::Name_), \ + ProfilerFeature::Name_, str_, desc_); + + BASE_PROFILER_FOR_EACH_FEATURE(PRINT_FEATURE) + +#undef PRINT_FEATURE + + PrintToConsole( + " - \"default\" (All above D+S defaults)\n" + "\n" + " MOZ_PROFILER_STARTUP_FILTERS=<Filters>\n" + " If MOZ_PROFILER_STARTUP is set, specifies the thread filters, as " + "a\n" + " comma-separated list of strings. A given thread will be sampled if\n" + " any of the filters is a case-insensitive substring of the thread\n" + " name. If unset, a default is used.\n" + "\n" + " MOZ_PROFILER_SHUTDOWN\n" + " If set, the profiler saves a profile to the named file on shutdown.\n" + "\n" + " MOZ_PROFILER_SYMBOLICATE\n" + " If set, the profiler will pre-symbolicate profiles.\n" + " *Note* This will add a significant pause when gathering data, and\n" + " is intended mainly for local development.\n" + "\n" + " MOZ_PROFILER_LUL_TEST\n" + " If set to any value, runs LUL unit tests at startup.\n" + "\n" + " This platform %s native unwinding.\n" + "\n", +#if defined(HAVE_NATIVE_UNWIND) + "supports" +#else + "does not support" +#endif + ); +} + +//////////////////////////////////////////////////////////////////////// +// BEGIN Sampler + +#if defined(GP_OS_linux) || defined(GP_OS_android) +struct SigHandlerCoordinator; +#endif + +// Sampler performs setup and teardown of the state required to sample with the +// profiler. Sampler may exist when ActivePS is not present. +// +// SuspendAndSampleAndResumeThread must only be called from a single thread, +// and must not sample the thread it is being called from. A separate Sampler +// instance must be used for each thread which wants to capture samples. + +// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING +// +// With the exception of SamplerThread, all Sampler objects must be Disable-d +// before releasing the lock which was used to create them. This avoids races +// on linux with the SIGPROF signal handler. + +class Sampler { + public: + // Sets up the profiler such that it can begin sampling. + explicit Sampler(PSLockRef aLock); + + // Disable the sampler, restoring it to its previous state. This must be + // called once, and only once, before the Sampler is destroyed. + void Disable(PSLockRef aLock); + + // This method suspends and resumes the samplee thread. It calls the passed-in + // function-like object aProcessRegs (passing it a populated |const + // Registers&| arg) while the samplee thread is suspended. + // + // Func must be a function-like object of type `void()`. + template <typename Func> + void SuspendAndSampleAndResumeThread( + PSLockRef aLock, const RegisteredThread& aRegisteredThread, + const TimeStamp& aNow, const Func& aProcessRegs); + + private: +#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd) + // Used to restore the SIGPROF handler when ours is removed. + struct sigaction mOldSigprofHandler; + + // This process' ID. Needed as an argument for tgkill in + // SuspendAndSampleAndResumeThread. + BaseProfilerProcessId mMyPid; + + // The sampler thread's ID. Used to assert that it is not sampling itself, + // which would lead to deadlock. + BaseProfilerThreadId mSamplerTid; + + public: + // This is the one-and-only variable used to communicate between the sampler + // thread and the samplee thread's signal handler. It's static because the + // samplee thread's signal handler is static. + static struct SigHandlerCoordinator* sSigHandlerCoordinator; +#endif +}; + +// END Sampler +//////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////// +// BEGIN SamplerThread + +// The sampler thread controls sampling and runs whenever the profiler is +// active. It periodically runs through all registered threads, finds those +// that should be sampled, then pauses and samples them. + +class SamplerThread { + public: + // Creates a sampler thread, but doesn't start it. + SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration, + double aIntervalMilliseconds, uint32_t aFeatures); + ~SamplerThread(); + + // This runs on (is!) the sampler thread. + void Run(); + + // This runs on the main thread. + void Stop(PSLockRef aLock); + + private: + // This suspends the calling thread for the given number of microseconds. + // Best effort timing. + void SleepMicro(uint32_t aMicroseconds); + + // The sampler used to suspend and sample threads. + Sampler mSampler; + + // The activity generation, for detecting when the sampler thread must stop. + const uint32_t mActivityGeneration; + + // The interval between samples, measured in microseconds. + const int mIntervalMicroseconds; + + // The OS-specific handle for the sampler thread. +#if defined(GP_OS_windows) + HANDLE mThread; +#elif defined(GP_OS_darwin) || defined(GP_OS_linux) || \ + defined(GP_OS_android) || defined(GP_OS_freebsd) + pthread_t mThread; +#endif + +#if defined(GP_OS_windows) + bool mNoTimerResolutionChange = true; +#endif + + SamplerThread(const SamplerThread&) = delete; + void operator=(const SamplerThread&) = delete; +}; + +// This function is required because we need to create a SamplerThread within +// ActivePS's constructor, but SamplerThread is defined after ActivePS. It +// could probably be removed by moving some code around. +static SamplerThread* NewSamplerThread(PSLockRef aLock, uint32_t aGeneration, + double aInterval, uint32_t aFeatures) { + return new SamplerThread(aLock, aGeneration, aInterval, aFeatures); +} + +// This function is the sampler thread. This implementation is used for all +// targets. +void SamplerThread::Run() { + // TODO: If possible, name this thread later on, after NSPR becomes available. + // PR_SetCurrentThreadName("SamplerThread"); + + // Features won't change during this SamplerThread's lifetime, so we can read + // them once and store them locally. + const uint32_t features = []() -> uint32_t { + PSAutoLock lock; + if (!ActivePS::Exists(lock)) { + // If there is no active profiler, it doesn't matter what we return, + // because this thread will exit before any feature is used. + return 0; + } + return ActivePS::Features(lock); + }(); + + // Not *no*-stack-sampling means we do want stack sampling. + const bool stackSampling = !ProfilerFeature::HasNoStackSampling(features); + + // Use local BlocksRingBuffer&ProfileBuffer to capture the stack. + // (This is to avoid touching the CorePS::CoreBuffer lock while + // a thread is suspended, because that thread could be working with + // the CorePS::CoreBuffer as well.) + ProfileBufferChunkManagerSingle localChunkManager( + ProfileBufferChunkManager::scExpectedMaximumStackSize); + ProfileChunkedBuffer localBuffer( + ProfileChunkedBuffer::ThreadSafety::WithoutMutex, localChunkManager); + ProfileBuffer localProfileBuffer(localBuffer); + + // Will be kept between collections, to know what each collection does. + auto previousState = localBuffer.GetState(); + + // This will be positive if we are running behind schedule (sampling less + // frequently than desired) and negative if we are ahead of schedule. + TimeDuration lastSleepOvershoot = 0; + TimeStamp sampleStart = TimeStamp::Now(); + + while (true) { + // This scope is for |lock|. It ends before we sleep below. + { + PSAutoLock lock; + TimeStamp lockAcquired = TimeStamp::Now(); + + if (!ActivePS::Exists(lock)) { + return; + } + + // At this point profiler_stop() might have been called, and + // profiler_start() might have been called on another thread. If this + // happens the generation won't match. + if (ActivePS::Generation(lock) != mActivityGeneration) { + return; + } + + ActivePS::ClearExpiredExitProfiles(lock); + + TimeStamp expiredMarkersCleaned = TimeStamp::Now(); + + if (int(gSkipSampling) <= 0 && !ActivePS::IsSamplingPaused(lock)) { + TimeDuration delta = sampleStart - CorePS::ProcessStartTime(); + ProfileBuffer& buffer = ActivePS::Buffer(lock); + + // handle per-process generic counters + const Vector<BaseProfilerCount*>& counters = CorePS::Counters(lock); + for (auto& counter : counters) { + // create Buffer entries for each counter + buffer.AddEntry(ProfileBufferEntry::CounterId(counter)); + buffer.AddEntry(ProfileBufferEntry::Time(delta.ToMilliseconds())); + // XXX support keyed maps of counts + // In the future, we'll support keyed counters - for example, counters + // with a key which is a thread ID. For "simple" counters we'll just + // use a key of 0. + int64_t count; + uint64_t number; + counter->Sample(count, number); + buffer.AddEntry(ProfileBufferEntry::CounterKey(0)); + buffer.AddEntry(ProfileBufferEntry::Count(count)); + if (number) { + buffer.AddEntry(ProfileBufferEntry::Number(number)); + } + } + TimeStamp countersSampled = TimeStamp::Now(); + + if (stackSampling) { + const Vector<LiveProfiledThreadData>& liveThreads = + ActivePS::LiveProfiledThreads(lock); + + for (auto& thread : liveThreads) { + RegisteredThread* registeredThread = thread.mRegisteredThread; + ProfiledThreadData* profiledThreadData = + thread.mProfiledThreadData.get(); + RefPtr<ThreadInfo> info = registeredThread->Info(); + + // If the thread is asleep and has been sampled before in the same + // sleep episode, find and copy the previous sample, as that's + // cheaper than taking a new sample. + if (registeredThread->RacyRegisteredThread() + .CanDuplicateLastSampleDueToSleep()) { + bool dup_ok = ActivePS::Buffer(lock).DuplicateLastSample( + info->ThreadId(), CorePS::ProcessStartTime(), + profiledThreadData->LastSample()); + if (dup_ok) { + continue; + } + } + + AUTO_PROFILER_STATS(base_SamplerThread_Run_DoPeriodicSample); + + TimeStamp now = TimeStamp::Now(); + + // Record the global profiler buffer's range start now, before + // adding the first entry for this thread's sample. + const uint64_t bufferRangeStart = buffer.BufferRangeStart(); + + // Add the thread ID now, so we know its position in the main + // buffer, which is used by some JS data. (DoPeriodicSample only + // knows about the temporary local buffer.) + const uint64_t samplePos = + buffer.AddThreadIdEntry(registeredThread->Info()->ThreadId()); + profiledThreadData->LastSample() = Some(samplePos); + + // Also add the time, so it's always there after the thread ID, as + // expected by the parser. (Other stack data is optional.) + TimeDuration delta = now - CorePS::ProcessStartTime(); + buffer.AddEntry(ProfileBufferEntry::Time(delta.ToMilliseconds())); + + mSampler.SuspendAndSampleAndResumeThread( + lock, *registeredThread, now, + [&](const Registers& aRegs, const TimeStamp& aNow) { + DoPeriodicSample(lock, *registeredThread, *profiledThreadData, + aRegs, samplePos, bufferRangeStart, + localProfileBuffer); + }); + + // If data is complete, copy it into the global buffer. + auto state = localBuffer.GetState(); + if (state.mClearedBlockCount != previousState.mClearedBlockCount) { + LOG("Stack sample too big for local storage, needed %u bytes", + unsigned(state.mRangeEnd - previousState.mRangeEnd)); + } else if (state.mRangeEnd - previousState.mRangeEnd >= + *profiler_get_core_buffer().BufferLength()) { + LOG("Stack sample too big for profiler storage, needed %u bytes", + unsigned(state.mRangeEnd - previousState.mRangeEnd)); + } else { + profiler_get_core_buffer().AppendContents(localBuffer); + } + + // Clean up for the next run. + localBuffer.Clear(); + previousState = localBuffer.GetState(); + } + } + +#if defined(USE_LUL_STACKWALK) + // The LUL unwind object accumulates frame statistics. Periodically we + // should poke it to give it a chance to print those statistics. This + // involves doing I/O (fprintf, __android_log_print, etc.) and so + // can't safely be done from the critical section inside + // SuspendAndSampleAndResumeThread, which is why it is done here. + lul::LUL* lul = CorePS::Lul(lock); + if (lul) { + lul->MaybeShowStats(); + } +#endif + TimeStamp threadsSampled = TimeStamp::Now(); + + { + AUTO_PROFILER_STATS(Sampler_FulfillChunkRequests); + ActivePS::FulfillChunkRequests(lock); + } + + buffer.CollectOverheadStats(delta, lockAcquired - sampleStart, + expiredMarkersCleaned - lockAcquired, + countersSampled - expiredMarkersCleaned, + threadsSampled - countersSampled); + } + } + // gPSMutex is not held after this point. + + // Calculate how long a sleep to request. After the sleep, measure how + // long we actually slept and take the difference into account when + // calculating the sleep interval for the next iteration. This is an + // attempt to keep "to schedule" in the presence of inaccuracy of the + // actual sleep intervals. + TimeStamp targetSleepEndTime = + sampleStart + TimeDuration::FromMicroseconds(mIntervalMicroseconds); + TimeStamp beforeSleep = TimeStamp::Now(); + TimeDuration targetSleepDuration = targetSleepEndTime - beforeSleep; + double sleepTime = std::max( + 0.0, (targetSleepDuration - lastSleepOvershoot).ToMicroseconds()); + SleepMicro(static_cast<uint32_t>(sleepTime)); + sampleStart = TimeStamp::Now(); + lastSleepOvershoot = + sampleStart - (beforeSleep + TimeDuration::FromMicroseconds(sleepTime)); + } +} + +// Temporary closing namespaces from enclosing platform.cpp. +} // namespace baseprofiler +} // namespace mozilla + +// We #include these files directly because it means those files can use +// declarations from this file trivially. These provide target-specific +// implementations of all SamplerThread methods except Run(). +#if defined(GP_OS_windows) +# include "platform-win32.cpp" +#elif defined(GP_OS_darwin) +# include "platform-macos.cpp" +#elif defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd) +# include "platform-linux-android.cpp" +#else +# error "bad platform" +#endif + +namespace mozilla { +namespace baseprofiler { + +UniquePlatformData AllocPlatformData(BaseProfilerThreadId aThreadId) { + return UniquePlatformData(new PlatformData(aThreadId)); +} + +void PlatformDataDestructor::operator()(PlatformData* aData) { delete aData; } + +// END SamplerThread +//////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////// +// BEGIN externally visible functions + +static uint32_t ParseFeature(const char* aFeature, bool aIsStartup) { + if (strcmp(aFeature, "default") == 0) { + return (aIsStartup ? (DefaultFeatures() | StartupExtraDefaultFeatures()) + : DefaultFeatures()) & + AvailableFeatures(); + } + +#define PARSE_FEATURE_BIT(n_, str_, Name_, desc_) \ + if (strcmp(aFeature, str_) == 0) { \ + return ProfilerFeature::Name_; \ + } + + BASE_PROFILER_FOR_EACH_FEATURE(PARSE_FEATURE_BIT) + +#undef PARSE_FEATURE_BIT + + PrintToConsole("\nUnrecognized feature \"%s\".\n\n", aFeature); + // Since we may have an old feature we don't implement anymore, don't exit. + PrintUsage(); + return 0; +} + +uint32_t ParseFeaturesFromStringArray(const char** aFeatures, + uint32_t aFeatureCount, + bool aIsStartup /* = false */) { + uint32_t features = 0; + for (size_t i = 0; i < aFeatureCount; i++) { + features |= ParseFeature(aFeatures[i], aIsStartup); + } + return features; +} + +// Find the RegisteredThread for the current thread. This should only be called +// in places where TLSRegisteredThread can't be used. +static RegisteredThread* FindCurrentThreadRegisteredThread(PSLockRef aLock) { + BaseProfilerThreadId id = profiler_current_thread_id(); + const Vector<UniquePtr<RegisteredThread>>& registeredThreads = + CorePS::RegisteredThreads(aLock); + for (auto& registeredThread : registeredThreads) { + if (registeredThread->Info()->ThreadId() == id) { + return registeredThread.get(); + } + } + + return nullptr; +} + +static ProfilingStack* locked_register_thread(PSLockRef aLock, + const char* aName, + void* aStackTop) { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + MOZ_ASSERT(!FindCurrentThreadRegisteredThread(aLock)); + + VTUNE_REGISTER_THREAD(aName); + + if (!TLSRegisteredThread::Init(aLock)) { + return nullptr; + } + + RefPtr<ThreadInfo> info = new ThreadInfo(aName, profiler_current_thread_id(), + profiler_is_main_thread()); + UniquePtr<RegisteredThread> registeredThread = + MakeUnique<RegisteredThread>(info, aStackTop); + + TLSRegisteredThread::SetRegisteredThread(aLock, registeredThread.get()); + + if (ActivePS::Exists(aLock) && ActivePS::ShouldProfileThread(aLock, info)) { + registeredThread->RacyRegisteredThread().SetIsBeingProfiled(true); + ActivePS::AddLiveProfiledThread(aLock, registeredThread.get(), + MakeUnique<ProfiledThreadData>(info)); + } + + ProfilingStack* profilingStack = + ®isteredThread->RacyRegisteredThread().ProfilingStack(); + + CorePS::AppendRegisteredThread(aLock, std::move(registeredThread)); + + return profilingStack; +} + +static void locked_profiler_start(PSLockRef aLock, PowerOfTwo32 aCapacity, + double aInterval, uint32_t aFeatures, + const char** aFilters, uint32_t aFilterCount, + const Maybe<double>& aDuration); + +static Vector<const char*> SplitAtCommas(const char* aString, + UniquePtr<char[]>& aStorage) { + size_t len = strlen(aString); + aStorage = MakeUnique<char[]>(len + 1); + PodCopy(aStorage.get(), aString, len + 1); + + // Iterate over all characters in aStorage and split at commas, by + // overwriting commas with the null char. + Vector<const char*> array; + size_t currentElementStart = 0; + for (size_t i = 0; i <= len; i++) { + if (aStorage[i] == ',') { + aStorage[i] = '\0'; + } + if (aStorage[i] == '\0') { + // Only add non-empty elements, otherwise ParseFeatures would later + // complain about unrecognized features. + if (currentElementStart != i) { + MOZ_RELEASE_ASSERT(array.append(&aStorage[currentElementStart])); + } + currentElementStart = i + 1; + } + } + return array; +} + +static const char* get_size_suffix(const char* str) { + const char* ptr = str; + + while (isdigit(*ptr)) { + ptr++; + } + + return ptr; +} + +void profiler_init(void* aStackTop) { + LOG("profiler_init"); + + profiler_init_main_thread_id(); + + VTUNE_INIT(); + + MOZ_RELEASE_ASSERT(!CorePS::Exists()); + + if (getenv("MOZ_BASE_PROFILER_HELP")) { + PrintUsage(); + exit(0); + } + + SharedLibraryInfo::Initialize(); + + uint32_t features = DefaultFeatures() & AvailableFeatures(); + + UniquePtr<char[]> filterStorage; + + Vector<const char*> filters; + MOZ_RELEASE_ASSERT(filters.append(kMainThreadName)); + + PowerOfTwo32 capacity = BASE_PROFILER_DEFAULT_ENTRIES; + Maybe<double> duration = Nothing(); + double interval = BASE_PROFILER_DEFAULT_INTERVAL; + + { + PSAutoLock lock; + + // We've passed the possible failure point. Instantiate CorePS, which + // indicates that the profiler has initialized successfully. + CorePS::Create(lock); + + Unused << locked_register_thread(lock, kMainThreadName, aStackTop); + + // Platform-specific initialization. + PlatformInit(lock); + + // (Linux-only) We could create CorePS::mLul and read unwind info into it + // at this point. That would match the lifetime implied by destruction of + // it in profiler_shutdown() just below. However, that gives a big delay on + // startup, even if no profiling is actually to be done. So, instead, it is + // created on demand at the first call to PlatformStart(). + + const char* startupEnv = getenv("MOZ_PROFILER_STARTUP"); + if (!startupEnv || startupEnv[0] == '\0' || + ((startupEnv[0] == '0' || startupEnv[0] == 'N' || + startupEnv[0] == 'n') && + startupEnv[1] == '\0')) { + return; + } + + // Hidden option to stop Base Profiler, mostly due to Talos intermittents, + // see https://bugzilla.mozilla.org/show_bug.cgi?id=1638851#c3 + // TODO: Investigate root cause and remove this in bugs 1648324 and 1648325. + if (getenv("MOZ_PROFILER_STARTUP_NO_BASE")) { + return; + } + + LOG("- MOZ_PROFILER_STARTUP is set"); + + // Startup default capacity may be different. + capacity = BASE_PROFILER_DEFAULT_STARTUP_ENTRIES; + + const char* startupCapacity = getenv("MOZ_PROFILER_STARTUP_ENTRIES"); + if (startupCapacity && startupCapacity[0] != '\0') { + errno = 0; + long capacityLong = strtol(startupCapacity, nullptr, 10); + std::string_view sizeSuffix = get_size_suffix(startupCapacity); + + if (sizeSuffix == "KB") { + capacityLong *= 1000 / scBytesPerEntry; + } else if (sizeSuffix == "KiB") { + capacityLong *= 1024 / scBytesPerEntry; + } else if (sizeSuffix == "MB") { + capacityLong *= (1000 * 1000) / scBytesPerEntry; + } else if (sizeSuffix == "MiB") { + capacityLong *= (1024 * 1024) / scBytesPerEntry; + } else if (sizeSuffix == "GB") { + capacityLong *= (1000 * 1000 * 1000) / scBytesPerEntry; + } else if (sizeSuffix == "GiB") { + capacityLong *= (1024 * 1024 * 1024) / scBytesPerEntry; + } else if (!sizeSuffix.empty()) { + PrintToConsole( + "- MOZ_PROFILER_STARTUP_ENTRIES unit must be one of the " + "following: KB, KiB, MB, MiB, GB, GiB"); + PrintUsage(); + exit(1); + } + + // `long` could be 32 or 64 bits, so we force a 64-bit comparison with + // the maximum 32-bit signed number (as more than that is clamped down to + // 2^31 anyway). + if (errno == 0 && capacityLong > 0 && + static_cast<uint64_t>(capacityLong) <= + static_cast<uint64_t>(INT32_MAX)) { + capacity = PowerOfTwo32(ActivePS::ClampToAllowedEntries( + static_cast<uint32_t>(capacityLong))); + LOG("- MOZ_PROFILER_STARTUP_ENTRIES = %u", unsigned(capacity.Value())); + } else { + PrintToConsole("- MOZ_PROFILER_STARTUP_ENTRIES not a valid integer: %s", + startupCapacity); + PrintUsage(); + exit(1); + } + } + + const char* startupDuration = getenv("MOZ_PROFILER_STARTUP_DURATION"); + if (startupDuration && startupDuration[0] != '\0') { + // The duration is a floating point number. Use StringToDouble rather than + // strtod, so that "." is used as the decimal separator regardless of OS + // locale. + auto durationVal = StringToDouble(std::string(startupDuration)); + if (durationVal && *durationVal >= 0.0) { + if (*durationVal > 0.0) { + duration = Some(*durationVal); + } + LOG("- MOZ_PROFILER_STARTUP_DURATION = %f", *durationVal); + } else { + PrintToConsole("- MOZ_PROFILER_STARTUP_DURATION not a valid float: %s", + startupDuration); + PrintUsage(); + exit(1); + } + } + + const char* startupInterval = getenv("MOZ_PROFILER_STARTUP_INTERVAL"); + if (startupInterval && startupInterval[0] != '\0') { + // The interval is a floating point number. Use StringToDouble rather than + // strtod, so that "." is used as the decimal separator regardless of OS + // locale. + auto intervalValue = StringToDouble(MakeStringSpan(startupInterval)); + if (intervalValue && *intervalValue > 0.0 && *intervalValue <= 1000.0) { + interval = *intervalValue; + LOG("- MOZ_PROFILER_STARTUP_INTERVAL = %f", interval); + } else { + PrintToConsole("- MOZ_PROFILER_STARTUP_INTERVAL not a valid float: %s", + startupInterval); + PrintUsage(); + exit(1); + } + } + + features |= StartupExtraDefaultFeatures() & AvailableFeatures(); + + const char* startupFeaturesBitfield = + getenv("MOZ_PROFILER_STARTUP_FEATURES_BITFIELD"); + if (startupFeaturesBitfield && startupFeaturesBitfield[0] != '\0') { + errno = 0; + features = strtol(startupFeaturesBitfield, nullptr, 10); + if (errno == 0) { + LOG("- MOZ_PROFILER_STARTUP_FEATURES_BITFIELD = %d", features); + } else { + PrintToConsole( + "- MOZ_PROFILER_STARTUP_FEATURES_BITFIELD not a valid integer: %s", + startupFeaturesBitfield); + PrintUsage(); + exit(1); + } + } else { + const char* startupFeatures = getenv("MOZ_PROFILER_STARTUP_FEATURES"); + if (startupFeatures) { + // Interpret startupFeatures as a list of feature strings, separated by + // commas. + UniquePtr<char[]> featureStringStorage; + Vector<const char*> featureStringArray = + SplitAtCommas(startupFeatures, featureStringStorage); + features = ParseFeaturesFromStringArray(featureStringArray.begin(), + featureStringArray.length(), + /* aIsStartup */ true); + LOG("- MOZ_PROFILER_STARTUP_FEATURES = %d", features); + } + } + + const char* startupFilters = getenv("MOZ_PROFILER_STARTUP_FILTERS"); + if (startupFilters && startupFilters[0] != '\0') { + filters = SplitAtCommas(startupFilters, filterStorage); + LOG("- MOZ_PROFILER_STARTUP_FILTERS = %s", startupFilters); + + if (mozilla::profiler::detail::FiltersExcludePid(filters)) { + LOG(" -> This process is excluded and won't be profiled"); + return; + } + } + + locked_profiler_start(lock, capacity, interval, features, filters.begin(), + filters.length(), duration); + } + + // TODO: Install memory counter if it is possible from mozglue. + // #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY) + // // start counting memory allocations (outside of lock because this may + // call + // // profiler_add_sampled_counter which would attempt to take the lock.) + // mozilla::profiler::install_memory_counter(true); + // #endif +} + +static void locked_profiler_save_profile_to_file(PSLockRef aLock, + const char* aFilename, + bool aIsShuttingDown); + +static SamplerThread* locked_profiler_stop(PSLockRef aLock); + +void profiler_shutdown() { + LOG("profiler_shutdown"); + + VTUNE_SHUTDOWN(); + + MOZ_RELEASE_ASSERT(profiler_is_main_thread()); + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + // If the profiler is active we must get a handle to the SamplerThread before + // ActivePS is destroyed, in order to delete it. + SamplerThread* samplerThread = nullptr; + { + PSAutoLock lock; + + // Save the profile on shutdown if requested. + if (ActivePS::Exists(lock)) { + const char* filename = getenv("MOZ_PROFILER_SHUTDOWN"); + if (filename && filename[0] != '\0') { + locked_profiler_save_profile_to_file(lock, filename, + /* aIsShuttingDown */ true); + } + + samplerThread = locked_profiler_stop(lock); + } + + CorePS::Destroy(lock); + + // We just destroyed CorePS and the ThreadInfos it contains, so we can + // clear this thread's TLSRegisteredThread. + TLSRegisteredThread::SetRegisteredThread(lock, nullptr); + } + + // We do these operations with gPSMutex unlocked. The comments in + // profiler_stop() explain why. + if (samplerThread) { + delete samplerThread; + } +} + +static bool WriteProfileToJSONWriter(SpliceableChunkedJSONWriter& aWriter, + double aSinceTime, bool aIsShuttingDown, + bool aOnlyThreads = false) { + LOG("WriteProfileToJSONWriter"); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + if (!aOnlyThreads) { + aWriter.Start(); + { + if (!profiler_stream_json_for_this_process( + aWriter, aSinceTime, aIsShuttingDown, aOnlyThreads)) { + return false; + } + + // Don't include profiles from other processes because this is a + // synchronous function. + aWriter.StartArrayProperty("processes"); + aWriter.EndArray(); + } + aWriter.End(); + } else { + aWriter.StartBareList(); + if (!profiler_stream_json_for_this_process(aWriter, aSinceTime, + aIsShuttingDown, aOnlyThreads)) { + return false; + } + aWriter.EndBareList(); + } + return true; +} + +void profiler_set_process_name(const std::string& aProcessName, + const std::string* aETLDplus1) { + LOG("profiler_set_process_name(\"%s\", \"%s\")", aProcessName.c_str(), + aETLDplus1 ? aETLDplus1->c_str() : "<none>"); + PSAutoLock lock; + CorePS::SetProcessName(lock, aProcessName); + if (aETLDplus1) { + CorePS::SetETLDplus1(lock, *aETLDplus1); + } +} + +UniquePtr<char[]> profiler_get_profile(double aSinceTime, bool aIsShuttingDown, + bool aOnlyThreads) { + LOG("profiler_get_profile"); + + SpliceableChunkedJSONWriter b{FailureLatchInfallibleSource::Singleton()}; + if (!WriteProfileToJSONWriter(b, aSinceTime, aIsShuttingDown, aOnlyThreads)) { + return nullptr; + } + return b.ChunkedWriteFunc().CopyData(); +} + +void profiler_get_start_params(int* aCapacity, Maybe<double>* aDuration, + double* aInterval, uint32_t* aFeatures, + Vector<const char*>* aFilters) { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + if (!aCapacity || !aDuration || !aInterval || !aFeatures || !aFilters) { + return; + } + + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + *aCapacity = 0; + *aDuration = Nothing(); + *aInterval = 0; + *aFeatures = 0; + aFilters->clear(); + return; + } + + *aCapacity = ActivePS::Capacity(lock).Value(); + *aDuration = ActivePS::Duration(lock); + *aInterval = ActivePS::Interval(lock); + *aFeatures = ActivePS::Features(lock); + + const Vector<std::string>& filters = ActivePS::Filters(lock); + MOZ_ALWAYS_TRUE(aFilters->resize(filters.length())); + for (uint32_t i = 0; i < filters.length(); ++i) { + (*aFilters)[i] = filters[i].c_str(); + } +} + +void GetProfilerEnvVarsForChildProcess( + std::function<void(const char* key, const char* value)>&& aSetEnv) { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + aSetEnv("MOZ_PROFILER_STARTUP", ""); + return; + } + + aSetEnv("MOZ_PROFILER_STARTUP", "1"); + auto capacityString = + Smprintf("%u", unsigned(ActivePS::Capacity(lock).Value())); + aSetEnv("MOZ_PROFILER_STARTUP_ENTRIES", capacityString.get()); + + // Use AppendFloat instead of Smprintf with %f because the decimal + // separator used by %f is locale-dependent. But the string we produce needs + // to be parseable by strtod, which only accepts the period character as a + // decimal separator. AppendFloat always uses the period character. + std::string intervalString = std::to_string(ActivePS::Interval(lock)); + aSetEnv("MOZ_PROFILER_STARTUP_INTERVAL", intervalString.c_str()); + + auto featuresString = Smprintf("%d", ActivePS::Features(lock)); + aSetEnv("MOZ_PROFILER_STARTUP_FEATURES_BITFIELD", featuresString.get()); + + std::string filtersString; + const Vector<std::string>& filters = ActivePS::Filters(lock); + for (uint32_t i = 0; i < filters.length(); ++i) { + filtersString += filters[i]; + if (i != filters.length() - 1) { + filtersString += ","; + } + } + aSetEnv("MOZ_PROFILER_STARTUP_FILTERS", filtersString.c_str()); +} + +void profiler_received_exit_profile(const std::string& aExitProfile) { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + PSAutoLock lock; + if (!ActivePS::Exists(lock)) { + return; + } + ActivePS::AddExitProfile(lock, aExitProfile); +} + +Vector<std::string> profiler_move_exit_profiles() { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + PSAutoLock lock; + Vector<std::string> profiles; + if (ActivePS::Exists(lock)) { + profiles = ActivePS::MoveExitProfiles(lock); + } + return profiles; +} + +static void locked_profiler_save_profile_to_file(PSLockRef aLock, + const char* aFilename, + bool aIsShuttingDown = false) { + LOG("locked_profiler_save_profile_to_file(%s)", aFilename); + + MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock)); + + std::ofstream stream; + stream.open(aFilename); + if (stream.is_open()) { + OStreamJSONWriteFunc jw(stream); + SpliceableJSONWriter w(jw, FailureLatchInfallibleSource::Singleton()); + w.Start(); + { + locked_profiler_stream_json_for_this_process(aLock, w, /* sinceTime */ 0, + aIsShuttingDown); + + w.StartArrayProperty("processes"); + Vector<std::string> exitProfiles = ActivePS::MoveExitProfiles(aLock); + for (auto& exitProfile : exitProfiles) { + if (!exitProfile.empty()) { + w.Splice(exitProfile); + } + } + w.EndArray(); + } + w.End(); + + stream.close(); + } +} + +void baseprofiler_save_profile_to_file(const char* aFilename) { + LOG("baseprofiler_save_profile_to_file(%s)", aFilename); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return; + } + + locked_profiler_save_profile_to_file(lock, aFilename); +} + +uint32_t profiler_get_available_features() { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + return AvailableFeatures(); +} + +Maybe<ProfilerBufferInfo> profiler_get_buffer_info() { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return Nothing(); + } + + return Some(ActivePS::Buffer(lock).GetProfilerBufferInfo()); +} + +// This basically duplicates AutoProfilerLabel's constructor. +static void* MozGlueBaseLabelEnter(const char* aLabel, + const char* aDynamicString, void* aSp) { + ProfilingStack* profilingStack = AutoProfilerLabel::sProfilingStack.get(); + if (profilingStack) { + profilingStack->pushLabelFrame(aLabel, aDynamicString, aSp, + ProfilingCategoryPair::OTHER); + } + return profilingStack; +} + +// This basically duplicates AutoProfilerLabel's destructor. +static void MozGlueBaseLabelExit(void* sProfilingStack) { + if (sProfilingStack) { + reinterpret_cast<ProfilingStack*>(sProfilingStack)->pop(); + } +} + +static void locked_profiler_start(PSLockRef aLock, PowerOfTwo32 aCapacity, + double aInterval, uint32_t aFeatures, + const char** aFilters, uint32_t aFilterCount, + const Maybe<double>& aDuration) { + const TimeStamp profilingStartTime = TimeStamp::Now(); + + if (LOG_TEST) { + LOG("locked_profiler_start"); + LOG("- capacity = %d", int(aCapacity.Value())); + LOG("- duration = %.2f", aDuration ? *aDuration : -1); + LOG("- interval = %.2f", aInterval); + +#define LOG_FEATURE(n_, str_, Name_, desc_) \ + if (ProfilerFeature::Has##Name_(aFeatures)) { \ + LOG("- feature = %s", str_); \ + } + + BASE_PROFILER_FOR_EACH_FEATURE(LOG_FEATURE) + +#undef LOG_FEATURE + + for (uint32_t i = 0; i < aFilterCount; i++) { + LOG("- threads = %s", aFilters[i]); + } + } + + MOZ_RELEASE_ASSERT(CorePS::Exists() && !ActivePS::Exists(aLock)); + + mozilla::base_profiler_markers_detail::EnsureBufferForMainThreadAddMarker(); + +#if defined(GP_PLAT_amd64_windows) + InitializeWin64ProfilerHooks(); +#endif + + // Fall back to the default values if the passed-in values are unreasonable. + // We want to be able to store at least one full stack. + // TODO: Review magic numbers. + PowerOfTwo32 capacity = + (aCapacity.Value() >= + ProfileBufferChunkManager::scExpectedMaximumStackSize / scBytesPerEntry) + ? aCapacity + : BASE_PROFILER_DEFAULT_ENTRIES; + Maybe<double> duration = aDuration; + + if (aDuration && *aDuration <= 0) { + duration = Nothing(); + } + double interval = aInterval > 0 ? aInterval : BASE_PROFILER_DEFAULT_INTERVAL; + + ActivePS::Create(aLock, profilingStartTime, capacity, interval, aFeatures, + aFilters, aFilterCount, duration); + + // Set up profiling for each registered thread, if appropriate. + const Vector<UniquePtr<RegisteredThread>>& registeredThreads = + CorePS::RegisteredThreads(aLock); + for (auto& registeredThread : registeredThreads) { + RefPtr<ThreadInfo> info = registeredThread->Info(); + + if (ActivePS::ShouldProfileThread(aLock, info)) { + registeredThread->RacyRegisteredThread().SetIsBeingProfiled(true); + ActivePS::AddLiveProfiledThread(aLock, registeredThread.get(), + MakeUnique<ProfiledThreadData>(info)); + registeredThread->RacyRegisteredThread().ReinitializeOnResume(); + } + } + + // Setup support for pushing/popping labels in mozglue. + RegisterProfilerLabelEnterExit(MozGlueBaseLabelEnter, MozGlueBaseLabelExit); + + // At the very end, set up RacyFeatures. + RacyFeatures::SetActive(ActivePS::Features(aLock)); +} + +void profiler_start(PowerOfTwo32 aCapacity, double aInterval, + uint32_t aFeatures, const char** aFilters, + uint32_t aFilterCount, const Maybe<double>& aDuration) { + LOG("profiler_start"); + + SamplerThread* samplerThread = nullptr; + { + PSAutoLock lock; + + // Initialize if necessary. + if (!CorePS::Exists()) { + profiler_init(nullptr); + } + + // Reset the current state if the profiler is running. + if (ActivePS::Exists(lock)) { + samplerThread = locked_profiler_stop(lock); + } + + locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters, + aFilterCount, aDuration); + } + + // TODO: Install memory counter if it is possible from mozglue. + // #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY) + // // start counting memory allocations (outside of lock because this may + // call + // // profiler_add_sampled_counter which would attempt to take the lock.) + // mozilla::profiler::install_memory_counter(true); + // #endif + + // We do these operations with gPSMutex unlocked. The comments in + // profiler_stop() explain why. + if (samplerThread) { + delete samplerThread; + } +} + +void profiler_ensure_started(PowerOfTwo32 aCapacity, double aInterval, + uint32_t aFeatures, const char** aFilters, + uint32_t aFilterCount, + const Maybe<double>& aDuration) { + LOG("profiler_ensure_started"); + + // bool startedProfiler = false; (See TODO below) + SamplerThread* samplerThread = nullptr; + { + PSAutoLock lock; + + // Initialize if necessary. + if (!CorePS::Exists()) { + profiler_init(nullptr); + } + + if (ActivePS::Exists(lock)) { + // The profiler is active. + if (!ActivePS::Equals(lock, aCapacity, aDuration, aInterval, aFeatures, + aFilters, aFilterCount)) { + // Stop and restart with different settings. + samplerThread = locked_profiler_stop(lock); + locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters, + aFilterCount, aDuration); + // startedProfiler = true; (See TODO below) + } + } else { + // The profiler is stopped. + locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters, + aFilterCount, aDuration); + // startedProfiler = true; (See TODO below) + } + } + + // TODO: Install memory counter if it is possible from mozglue. + // #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY) + // // start counting memory allocations (outside of lock because this may + // // call profiler_add_sampled_counter which would attempt to take the + // // lock.) + // mozilla::profiler::install_memory_counter(true); + // #endif + + // We do these operations with gPSMutex unlocked. The comments in + // profiler_stop() explain why. + if (samplerThread) { + delete samplerThread; + } +} + +[[nodiscard]] static SamplerThread* locked_profiler_stop(PSLockRef aLock) { + LOG("locked_profiler_stop"); + + MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock)); + + // At the very start, clear RacyFeatures. + RacyFeatures::SetInactive(); + + // TODO: Uninstall memory counter if it is possible from mozglue. + // #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY) + // mozilla::profiler::install_memory_counter(false); + // #endif + + // Remove support for pushing/popping labels in mozglue. + RegisterProfilerLabelEnterExit(nullptr, nullptr); + + // Stop sampling live threads. + const Vector<LiveProfiledThreadData>& liveProfiledThreads = + ActivePS::LiveProfiledThreads(aLock); + for (auto& thread : liveProfiledThreads) { + RegisteredThread* registeredThread = thread.mRegisteredThread; + registeredThread->RacyRegisteredThread().SetIsBeingProfiled(false); + } + + // The Stop() call doesn't actually stop Run(); that happens in this + // function's caller when the sampler thread is destroyed. Stop() just gives + // the SamplerThread a chance to do some cleanup with gPSMutex locked. + SamplerThread* samplerThread = ActivePS::Destroy(aLock); + samplerThread->Stop(aLock); + + mozilla::base_profiler_markers_detail::ReleaseBufferForMainThreadAddMarker(); + + return samplerThread; +} + +void profiler_stop() { + LOG("profiler_stop"); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + SamplerThread* samplerThread; + { + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return; + } + + samplerThread = locked_profiler_stop(lock); + } + + // We delete with gPSMutex unlocked. Otherwise we would get a deadlock: we + // would be waiting here with gPSMutex locked for SamplerThread::Run() to + // return so the join operation within the destructor can complete, but Run() + // needs to lock gPSMutex to return. + // + // Because this call occurs with gPSMutex unlocked, it -- including the final + // iteration of Run()'s loop -- must be able detect deactivation and return + // in a way that's safe with respect to other gPSMutex-locking operations + // that may have occurred in the meantime. + delete samplerThread; +} + +bool profiler_is_paused() { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return false; + } + + return ActivePS::IsPaused(lock); +} + +void profiler_pause() { + LOG("profiler_pause"); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + { + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return; + } + + RacyFeatures::SetPaused(); + ActivePS::SetIsPaused(lock, true); + ActivePS::Buffer(lock).AddEntry(ProfileBufferEntry::Pause(profiler_time())); + } +} + +void profiler_resume() { + LOG("profiler_resume"); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + { + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return; + } + + ActivePS::Buffer(lock).AddEntry( + ProfileBufferEntry::Resume(profiler_time())); + ActivePS::SetIsPaused(lock, false); + RacyFeatures::SetUnpaused(); + } +} + +bool profiler_is_sampling_paused() { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return false; + } + + return ActivePS::IsSamplingPaused(lock); +} + +void profiler_pause_sampling() { + LOG("profiler_pause_sampling"); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + { + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return; + } + + RacyFeatures::SetSamplingPaused(); + ActivePS::SetIsSamplingPaused(lock, true); + ActivePS::Buffer(lock).AddEntry( + ProfileBufferEntry::PauseSampling(profiler_time())); + } +} + +void profiler_resume_sampling() { + LOG("profiler_resume_sampling"); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + { + PSAutoLock lock; + + if (!ActivePS::Exists(lock)) { + return; + } + + ActivePS::Buffer(lock).AddEntry( + ProfileBufferEntry::ResumeSampling(profiler_time())); + ActivePS::SetIsSamplingPaused(lock, false); + RacyFeatures::SetSamplingUnpaused(); + } +} + +bool profiler_feature_active(uint32_t aFeature) { + // This function runs both on and off the main thread. + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + // This function is hot enough that we use RacyFeatures, not ActivePS. + return RacyFeatures::IsActiveWithFeature(aFeature); +} + +bool profiler_active_without_feature(uint32_t aFeature) { + // This function runs both on and off the main thread. + + // This function is hot enough that we use RacyFeatures, not ActivePS. + return RacyFeatures::IsActiveWithoutFeature(aFeature); +} + +void profiler_add_sampled_counter(BaseProfilerCount* aCounter) { + DEBUG_LOG("profiler_add_sampled_counter(%s)", aCounter->mLabel); + PSAutoLock lock; + CorePS::AppendCounter(lock, aCounter); +} + +void profiler_remove_sampled_counter(BaseProfilerCount* aCounter) { + DEBUG_LOG("profiler_remove_sampled_counter(%s)", aCounter->mLabel); + PSAutoLock lock; + // Note: we don't enforce a final sample, though we could do so if the + // profiler was active + CorePS::RemoveCounter(lock, aCounter); +} + +ProfilingStack* profiler_register_thread(const char* aName, + void* aGuessStackTop) { + DEBUG_LOG("profiler_register_thread(%s)", aName); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + PSAutoLock lock; + + if (RegisteredThread* thread = FindCurrentThreadRegisteredThread(lock); + thread) { + LOG("profiler_register_thread(%s) - thread %" PRIu64 + " already registered as %s", + aName, uint64_t(profiler_current_thread_id().ToNumber()), + thread->Info()->Name()); + // TODO: Use new name. This is currently not possible because the + // RegisteredThread's ThreadInfo cannot be changed. + // In the meantime, we record a marker that could be used in the frontend. + std::string text("Thread "); + text += std::to_string(profiler_current_thread_id().ToNumber()); + text += " \""; + text += thread->Info()->Name(); + text += "\" attempted to re-register as \""; + text += aName; + text += "\""; + BASE_PROFILER_MARKER_TEXT("profiler_register_thread again", OTHER_Profiling, + MarkerThreadId::MainThread(), text); + + return &thread->RacyRegisteredThread().ProfilingStack(); + } + + void* stackTop = GetStackTop(aGuessStackTop); + return locked_register_thread(lock, aName, stackTop); +} + +void profiler_unregister_thread() { + if (!CorePS::Exists()) { + // This function can be called after the main thread has already shut down. + return; + } + + PSAutoLock lock; + + RegisteredThread* registeredThread = FindCurrentThreadRegisteredThread(lock); + MOZ_RELEASE_ASSERT(registeredThread == + TLSRegisteredThread::RegisteredThread(lock)); + if (registeredThread) { + RefPtr<ThreadInfo> info = registeredThread->Info(); + + DEBUG_LOG("profiler_unregister_thread: %s", info->Name()); + + if (ActivePS::Exists(lock)) { + ActivePS::UnregisterThread(lock, registeredThread); + } + + // Clear the pointer to the RegisteredThread object that we're about to + // destroy. + TLSRegisteredThread::SetRegisteredThread(lock, nullptr); + + // Remove the thread from the list of registered threads. This deletes the + // registeredThread object. + CorePS::RemoveRegisteredThread(lock, registeredThread); + } else { + LOG("profiler_unregister_thread() - thread %" PRIu64 + " already unregistered", + uint64_t(profiler_current_thread_id().ToNumber())); + // We cannot record a marker on this thread because it was already + // unregistered. Send it to the main thread (unless this *is* already the + // main thread, which has been unregistered); this may be useful to catch + // mismatched register/unregister pairs in Firefox. + if (BaseProfilerThreadId tid = profiler_current_thread_id(); + tid != profiler_main_thread_id()) { + BASE_PROFILER_MARKER_TEXT( + "profiler_unregister_thread again", OTHER_Profiling, + MarkerThreadId::MainThread(), + std::to_string(profiler_current_thread_id().ToNumber())); + } + // There are two ways FindCurrentThreadRegisteredThread() might have failed. + // + // - TLSRegisteredThread::Init() failed in locked_register_thread(). + // + // - We've already called profiler_unregister_thread() for this thread. + // (Whether or not it should, this does happen in practice.) + // + // Either way, TLSRegisteredThread should be empty. + MOZ_RELEASE_ASSERT(!TLSRegisteredThread::RegisteredThread(lock)); + } +} + +void profiler_register_page(uint64_t aTabID, uint64_t aInnerWindowID, + const std::string& aUrl, + uint64_t aEmbedderInnerWindowID) { + DEBUG_LOG("profiler_register_page(%" PRIu64 ", %" PRIu64 ", %s, %" PRIu64 ")", + aTabID, aInnerWindowID, aUrl.c_str(), aEmbedderInnerWindowID); + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + PSAutoLock lock; + + // When a Browsing context is first loaded, the first url loaded in it will be + // about:blank. Because of that, this call keeps the first non-about:blank + // registration of window and discards the previous one. + RefPtr<PageInformation> pageInfo = + new PageInformation(aTabID, aInnerWindowID, aUrl, aEmbedderInnerWindowID); + CorePS::AppendRegisteredPage(lock, std::move(pageInfo)); + + // After appending the given page to CorePS, look for the expired + // pages and remove them if there are any. + if (ActivePS::Exists(lock)) { + ActivePS::DiscardExpiredPages(lock); + } +} + +void profiler_unregister_page(uint64_t aRegisteredInnerWindowID) { + if (!CorePS::Exists()) { + // This function can be called after the main thread has already shut down. + return; + } + + PSAutoLock lock; + + // During unregistration, if the profiler is active, we have to keep the + // page information since there may be some markers associated with the given + // page. But if profiler is not active. we have no reason to keep the + // page information here because there can't be any marker associated with it. + if (ActivePS::Exists(lock)) { + ActivePS::UnregisterPage(lock, aRegisteredInnerWindowID); + } else { + CorePS::RemoveRegisteredPage(lock, aRegisteredInnerWindowID); + } +} + +void profiler_clear_all_pages() { + if (!CorePS::Exists()) { + // This function can be called after the main thread has already shut down. + return; + } + + { + PSAutoLock lock; + CorePS::ClearRegisteredPages(lock); + if (ActivePS::Exists(lock)) { + ActivePS::ClearUnregisteredPages(lock); + } + } +} + +void profiler_thread_sleep() { + // This function runs both on and off the main thread. + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + RacyRegisteredThread* racyRegisteredThread = + TLSRegisteredThread::RacyRegisteredThread(); + if (!racyRegisteredThread) { + return; + } + + racyRegisteredThread->SetSleeping(); +} + +void profiler_thread_wake() { + // This function runs both on and off the main thread. + + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + RacyRegisteredThread* racyRegisteredThread = + TLSRegisteredThread::RacyRegisteredThread(); + if (!racyRegisteredThread) { + return; + } + + racyRegisteredThread->SetAwake(); +} + +bool detail::IsThreadBeingProfiled() { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + const RacyRegisteredThread* racyRegisteredThread = + TLSRegisteredThread::RacyRegisteredThread(); + return racyRegisteredThread && racyRegisteredThread->IsBeingProfiled(); +} + +bool profiler_thread_is_sleeping() { + MOZ_RELEASE_ASSERT(profiler_is_main_thread()); + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + RacyRegisteredThread* racyRegisteredThread = + TLSRegisteredThread::RacyRegisteredThread(); + if (!racyRegisteredThread) { + return false; + } + return racyRegisteredThread->IsSleeping(); +} + +double profiler_time() { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + TimeDuration delta = TimeStamp::Now() - CorePS::ProcessStartTime(); + return delta.ToMilliseconds(); +} + +bool profiler_capture_backtrace_into(ProfileChunkedBuffer& aChunkedBuffer, + StackCaptureOptions aCaptureOptions) { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + PSAutoLock lock; + + if (!ActivePS::Exists(lock) || + aCaptureOptions == StackCaptureOptions::NoStack) { + return false; + } + + RegisteredThread* registeredThread = + TLSRegisteredThread::RegisteredThread(lock); + if (!registeredThread) { + MOZ_ASSERT(registeredThread); + return false; + } + + ProfileBuffer profileBuffer(aChunkedBuffer); + + Registers regs; +#if defined(HAVE_NATIVE_UNWIND) + REGISTERS_SYNC_POPULATE(regs); +#else + regs.Clear(); +#endif + + DoSyncSample(lock, *registeredThread, TimeStamp::Now(), regs, profileBuffer, + aCaptureOptions); + + return true; +} + +UniquePtr<ProfileChunkedBuffer> profiler_capture_backtrace() { + MOZ_RELEASE_ASSERT(CorePS::Exists()); + AUTO_BASE_PROFILER_LABEL("baseprofiler::profiler_capture_backtrace", + PROFILER); + + // Quick is-active check before allocating a buffer. + // If NoMarkerStacks is set, we don't want to capture a backtrace. + if (!profiler_active_without_feature(ProfilerFeature::NoMarkerStacks)) { + return nullptr; + } + + auto buffer = MakeUnique<ProfileChunkedBuffer>( + ProfileChunkedBuffer::ThreadSafety::WithoutMutex, + MakeUnique<ProfileBufferChunkManagerSingle>( + ProfileBufferChunkManager::scExpectedMaximumStackSize)); + + if (!profiler_capture_backtrace_into(*buffer, StackCaptureOptions::Full)) { + return nullptr; + } + + return buffer; +} + +UniqueProfilerBacktrace profiler_get_backtrace() { + UniquePtr<ProfileChunkedBuffer> buffer = profiler_capture_backtrace(); + + if (!buffer) { + return nullptr; + } + + return UniqueProfilerBacktrace( + new ProfilerBacktrace("SyncProfile", std::move(buffer))); +} + +void ProfilerBacktraceDestructor::operator()(ProfilerBacktrace* aBacktrace) { + delete aBacktrace; +} + +bool profiler_is_locked_on_current_thread() { + // This function is used to help users avoid calling `profiler_...` functions + // when the profiler may already have a lock in place, which would prevent a + // 2nd recursive lock (resulting in a crash or a never-ending wait). + // So we must return `true` for any of: + // - The main profiler mutex, used by most functions, and/or + // - The buffer mutex, used directly in some functions without locking the + // main mutex, e.g., marker-related functions. + return PSAutoLock::IsLockedOnCurrentThread() || + profiler_get_core_buffer().IsThreadSafeAndLockedOnCurrentThread(); +} + +// This is a simplified version of profiler_add_marker that can be easily passed +// into the JS engine. +void profiler_add_js_marker(const char* aMarkerName, const char* aMarkerText) { + BASE_PROFILER_MARKER_TEXT( + ProfilerString8View::WrapNullTerminatedString(aMarkerName), JS, {}, + ProfilerString8View::WrapNullTerminatedString(aMarkerText)); +} + +// NOTE: aCollector's methods will be called while the target thread is paused. +// Doing things in those methods like allocating -- which may try to claim +// locks -- is a surefire way to deadlock. +void profiler_suspend_and_sample_thread(BaseProfilerThreadId aThreadId, + uint32_t aFeatures, + ProfilerStackCollector& aCollector, + bool aSampleNative /* = true */) { + const bool isSynchronous = [&aThreadId]() { + const BaseProfilerThreadId currentThreadId = profiler_current_thread_id(); + if (!aThreadId.IsSpecified()) { + aThreadId = currentThreadId; + return true; + } + return aThreadId == currentThreadId; + }(); + + // Lock the profiler mutex + PSAutoLock lock; + + const Vector<UniquePtr<RegisteredThread>>& registeredThreads = + CorePS::RegisteredThreads(lock); + for (auto& thread : registeredThreads) { + RefPtr<ThreadInfo> info = thread->Info(); + RegisteredThread& registeredThread = *thread.get(); + + if (info->ThreadId() == aThreadId) { + if (info->IsMainThread()) { + aCollector.SetIsMainThread(); + } + + // Allocate the space for the native stack + NativeStack nativeStack; + + auto collectStack = [&](const Registers& aRegs, const TimeStamp& aNow) { + // The target thread is now suspended. Collect a native + // backtrace, and call the callback. +#if defined(HAVE_FASTINIT_NATIVE_UNWIND) + if (aSampleNative) { + // We can only use FramePointerStackWalk or MozStackWalk from + // suspend_and_sample_thread as other stackwalking methods may not be + // initialized. +# if defined(USE_FRAME_POINTER_STACK_WALK) + DoFramePointerBacktrace(lock, registeredThread, aRegs, nativeStack); +# elif defined(USE_MOZ_STACK_WALK) + DoMozStackWalkBacktrace(lock, registeredThread, aRegs, nativeStack); +# else +# error "Invalid configuration" +# endif + + MergeStacks(aFeatures, isSynchronous, registeredThread, aRegs, + nativeStack, aCollector); + } else +#endif + { + MergeStacks(aFeatures, isSynchronous, registeredThread, aRegs, + nativeStack, aCollector); + + aCollector.CollectNativeLeafAddr((void*)aRegs.mPC); + } + }; + + if (isSynchronous) { + // Sampling the current thread, do NOT suspend it! + Registers regs; +#if defined(HAVE_NATIVE_UNWIND) + REGISTERS_SYNC_POPULATE(regs); +#else + regs.Clear(); +#endif + collectStack(regs, TimeStamp::Now()); + } else { + // Suspend, sample, and then resume the target thread. + Sampler sampler(lock); + TimeStamp now = TimeStamp::Now(); + sampler.SuspendAndSampleAndResumeThread(lock, registeredThread, now, + collectStack); + + // NOTE: Make sure to disable the sampler before it is destroyed, in + // case the profiler is running at the same time. + sampler.Disable(lock); + } + break; + } + } +} + +// END externally visible functions +//////////////////////////////////////////////////////////////////////// + +} // namespace baseprofiler +} // namespace mozilla |