From 26a029d407be480d791972afb5975cf62c9360a6 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 02:47:55 +0200 Subject: Adding upstream version 124.0.1. Signed-off-by: Daniel Baumann --- .../BHRTelemetryService.sys.mjs | 161 +++++ .../BackgroundHangMonitor.cpp | 766 +++++++++++++++++++++ .../backgroundhangmonitor/BackgroundHangMonitor.h | 198 ++++++ .../backgroundhangmonitor/HangAnnotations.cpp | 89 +++ .../backgroundhangmonitor/HangAnnotations.h | 71 ++ .../backgroundhangmonitor/HangDetails.cpp | 737 ++++++++++++++++++++ .../components/backgroundhangmonitor/HangDetails.h | 101 +++ .../backgroundhangmonitor/HangTypes.ipdlh | 95 +++ .../backgroundhangmonitor/ThreadStackHelper.cpp | 395 +++++++++++ .../backgroundhangmonitor/ThreadStackHelper.h | 111 +++ .../backgroundhangmonitor/components.conf | 16 + toolkit/components/backgroundhangmonitor/moz.build | 68 ++ .../backgroundhangmonitor/nsIHangDetails.idl | 77 +++ .../tests/child_cause_hang.js | 23 + .../tests/test_BHRObserver.js | 164 +++++ .../backgroundhangmonitor/tests/xpcshell.toml | 14 + 16 files changed, 3086 insertions(+) create mode 100644 toolkit/components/backgroundhangmonitor/BHRTelemetryService.sys.mjs create mode 100644 toolkit/components/backgroundhangmonitor/BackgroundHangMonitor.cpp create mode 100644 toolkit/components/backgroundhangmonitor/BackgroundHangMonitor.h create mode 100644 toolkit/components/backgroundhangmonitor/HangAnnotations.cpp create mode 100644 toolkit/components/backgroundhangmonitor/HangAnnotations.h create mode 100644 toolkit/components/backgroundhangmonitor/HangDetails.cpp create mode 100644 toolkit/components/backgroundhangmonitor/HangDetails.h create mode 100644 toolkit/components/backgroundhangmonitor/HangTypes.ipdlh create mode 100644 toolkit/components/backgroundhangmonitor/ThreadStackHelper.cpp create mode 100644 toolkit/components/backgroundhangmonitor/ThreadStackHelper.h create mode 100644 toolkit/components/backgroundhangmonitor/components.conf create mode 100644 toolkit/components/backgroundhangmonitor/moz.build create mode 100644 toolkit/components/backgroundhangmonitor/nsIHangDetails.idl create mode 100644 toolkit/components/backgroundhangmonitor/tests/child_cause_hang.js create mode 100644 toolkit/components/backgroundhangmonitor/tests/test_BHRObserver.js create mode 100644 toolkit/components/backgroundhangmonitor/tests/xpcshell.toml (limited to 'toolkit/components/backgroundhangmonitor') diff --git a/toolkit/components/backgroundhangmonitor/BHRTelemetryService.sys.mjs b/toolkit/components/backgroundhangmonitor/BHRTelemetryService.sys.mjs new file mode 100644 index 0000000000..98c1274b5c --- /dev/null +++ b/toolkit/components/backgroundhangmonitor/BHRTelemetryService.sys.mjs @@ -0,0 +1,161 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +const lazy = {}; + +ChromeUtils.defineESModuleGetters(lazy, { + TelemetryController: "resource://gre/modules/TelemetryController.sys.mjs", +}); + +export function BHRTelemetryService() { + // Allow tests to get access to this object to verify it works correctly. + this.wrappedJSObject = this; + + Services.obs.addObserver(this, "profile-before-change"); + Services.obs.addObserver(this, "bhr-thread-hang"); + Services.obs.addObserver(this, "idle-daily"); + + this.resetPayload(); +} + +BHRTelemetryService.prototype = Object.freeze({ + classID: Components.ID("{117c8cdf-69e6-4f31-a439-b8a654c67127}"), + QueryInterface: ChromeUtils.generateQI(["nsIObserver"]), + + TRANSMIT_HANG_COUNT: 50, + + resetPayload() { + this.startTime = +new Date(); + this.payload = { + modules: [], + hangs: [], + }; + this.clearPermahangFile = false; + }, + + recordHang({ + duration, + thread, + runnableName, + process, + stack, + remoteType, + modules, + annotations, + wasPersisted, + }) { + if (!Services.telemetry.canRecordExtended) { + return; + } + + // Create a mapping from module indicies in the original nsIHangDetails + // object to this.payload.modules indicies. + let moduleIdxs = modules.map(module => { + let idx = this.payload.modules.findIndex(m => { + return m[0] === module[0] && m[1] === module[1]; + }); + if (idx === -1) { + idx = this.payload.modules.length; + this.payload.modules.push(module); + } + return idx; + }); + + // Native stack frames are [modIdx, offset] arrays. If we have a valid + // module index, we want to map it to the this.payload.modules array. + for (let i = 0; i < stack.length; ++i) { + if (Array.isArray(stack[i]) && stack[i][0] !== -1) { + stack[i][0] = moduleIdxs[stack[i][0]]; + } else if (typeof stack[i] == "string") { + // This is just a precaution - we don't currently know of sensitive + // URLs being included in label frames' dynamic strings which we + // include here, but this is just an added guard. Here we strip any + // string with a :// in it that isn't a chrome:// or resource:// + // URL. This is not completely robust, but we are already trying to + // protect against this by only including dynamic strings from the + // opt-in AUTO_PROFILER_..._NONSENSITIVE macros. + let match = /[^\s]+:\/\/.*/.exec(stack[i]); + if ( + match && + !match[0].startsWith("chrome://") && + !match[0].startsWith("resource://") + ) { + stack[i] = stack[i].replace(match[0], "(excluded)"); + } + } + } + + // Create the hang object to record in the payload. + this.payload.hangs.push({ + duration, + thread, + runnableName, + process, + remoteType, + annotations, + stack, + }); + + if (wasPersisted) { + this.clearPermahangFile = true; + } + + // If we have collected enough hangs, we can submit the hangs we have + // collected to telemetry. + if (this.payload.hangs.length > this.TRANSMIT_HANG_COUNT) { + this.submit(); + } + }, + + submit() { + if (this.clearPermahangFile) { + // NB: This is async but it is called from an Observer callback. + IOUtils.remove( + PathUtils.join(PathUtils.profileDir, "last_permahang.bin") + ); + } + + if (!Services.telemetry.canRecordExtended) { + return; + } + + // NOTE: We check a separate bhrPing.enabled pref here. This pref is unset + // when running tests so that we run as much of BHR as possible (to catch + // errors) while avoiding timeouts caused by invoking `pingsender` during + // testing. + if ( + Services.prefs.getBoolPref("toolkit.telemetry.bhrPing.enabled", false) + ) { + this.payload.timeSinceLastPing = new Date() - this.startTime; + lazy.TelemetryController.submitExternalPing("bhr", this.payload, { + addEnvironment: true, + }); + } + this.resetPayload(); + }, + + shutdown() { + Services.obs.removeObserver(this, "profile-before-change"); + Services.obs.removeObserver(this, "bhr-thread-hang"); + Services.obs.removeObserver(this, "idle-daily"); + this.submit(); + }, + + observe(aSubject, aTopic, aData) { + switch (aTopic) { + case "profile-after-change": + this.resetPayload(); + break; + case "bhr-thread-hang": + this.recordHang(aSubject.QueryInterface(Ci.nsIHangDetails)); + break; + case "profile-before-change": + this.shutdown(); + break; + case "idle-daily": + this.submit(); + break; + } + }, +}); diff --git a/toolkit/components/backgroundhangmonitor/BackgroundHangMonitor.cpp b/toolkit/components/backgroundhangmonitor/BackgroundHangMonitor.cpp new file mode 100644 index 0000000000..3989495ab3 --- /dev/null +++ b/toolkit/components/backgroundhangmonitor/BackgroundHangMonitor.cpp @@ -0,0 +1,766 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/BackgroundHangMonitor.h" + +#include +#include + +#include "GeckoProfiler.h" +#include "HangDetails.h" +#include "ThreadStackHelper.h" +#include "mozilla/ArrayUtils.h" +#include "mozilla/CPUUsageWatcher.h" +#include "mozilla/LinkedList.h" +#include "mozilla/Monitor.h" +#include "mozilla/Preferences.h" +#include "mozilla/StaticPrefs_toolkit.h" +#include "mozilla/Services.h" +#include "mozilla/StaticPtr.h" +#include "mozilla/Telemetry.h" +#include "mozilla/ThreadLocal.h" +#include "mozilla/Unused.h" +#if defined(XP_WIN) +# include "mozilla/WindowsStackWalkInitialization.h" +#endif // XP_WIN +#include "mozilla/dom/RemoteType.h" +#include "nsAppDirectoryServiceDefs.h" +#include "nsIObserver.h" +#include "nsIObserverService.h" +#include "nsIThread.h" +#include "nsThreadUtils.h" +#include "nsXULAppAPI.h" +#include "prinrval.h" +#include "prthread.h" + +#include + +#if defined(XP_WIN) +# include "mozilla/NativeNt.h" +#endif + +// Activate BHR only for one every BHR_BETA_MOD users. +// We're doing experimentation with collecting a lot more data from BHR, and +// don't want to enable it for beta users at the moment. We can scale this up in +// the future. +#define BHR_BETA_MOD INT32_MAX; + +// Interval at which we check the global and per-process CPU usage in order to +// determine if there is high external CPU usage. +static const int32_t kCheckCPUIntervalMilliseconds = 2000; + +// An utility comparator function used by std::unique to collapse "(* script)" +// entries in a vector representing a call stack. +bool StackScriptEntriesCollapser(const char* aStackEntry, + const char* aAnotherStackEntry) { + return !strcmp(aStackEntry, aAnotherStackEntry) && + (!strcmp(aStackEntry, "(chrome script)") || + !strcmp(aStackEntry, "(content script)")); +} + +namespace mozilla { + +/** + * BackgroundHangManager is the global object that + * manages all instances of BackgroundHangThread. + */ +class BackgroundHangManager : public nsIObserver { + private: + // Stop hang monitoring + bool mShutdown; + + BackgroundHangManager(const BackgroundHangManager&); + BackgroundHangManager& operator=(const BackgroundHangManager&); + + public: + NS_DECL_THREADSAFE_ISUPPORTS + NS_DECL_NSIOBSERVER + static StaticRefPtr sInstance; + static bool sDisabled; + + // Lock for access to members of this class + Monitor mLock MOZ_UNANNOTATED; + // List of BackgroundHangThread instances associated with each thread + LinkedList mHangThreads; + + // Unwinding and reporting of hangs is despatched to this thread. + nsCOMPtr mHangProcessingThread; + + // Hang monitor thread + nsCOMPtr mHangMonitorThread; + + ProfilerThreadId mHangMonitorProfilerThreadId; + + void InitMonitorThread() { + mHangMonitorProfilerThreadId = profiler_current_thread_id(); +#if defined(MOZ_GECKO_PROFILER) && defined(XP_WIN) && defined(_M_X64) + // Pre-commit 5 more pages of stack to guarantee enough commited stack + // space on this thread upon hang detection, when we will need to run + // profiler_suspend_and_sample_thread (bug 1840164). + mozilla::nt::CheckStack(5 * 0x1000); +#endif + } + + // Used for recording a permahang in case we don't ever make it back to + // the main thread to record/send it. + nsCOMPtr mPermahangFile; + + // Allows us to watch CPU usage and annotate hangs when the system is + // under high external load. + CPUUsageWatcher mCPUUsageWatcher; + + TimeStamp mLastCheckedCPUUsage; + + void CollectCPUUsage(TimeStamp aNow, bool aForce = false) { + if (aForce || + aNow - mLastCheckedCPUUsage > + TimeDuration::FromMilliseconds(kCheckCPUIntervalMilliseconds)) { + Unused << NS_WARN_IF(mCPUUsageWatcher.CollectCPUUsage().isErr()); + mLastCheckedCPUUsage = aNow; + } + } + + void Shutdown() { + MonitorAutoLock autoLock(mLock); + mShutdown = true; + } + + BackgroundHangManager(); + + private: + virtual ~BackgroundHangManager(); +}; + +NS_IMPL_ISUPPORTS(BackgroundHangManager, nsIObserver) + +NS_IMETHODIMP +BackgroundHangManager::Observe(nsISupports* aSubject, const char* aTopic, + const char16_t* aData) { + if (!strcmp(aTopic, "browser-delayed-startup-finished")) { + MonitorAutoLock autoLock(mLock); + nsresult rv = NS_GetSpecialDirectory(NS_APP_USER_PROFILE_50_DIR, + getter_AddRefs(mPermahangFile)); + if (NS_SUCCEEDED(rv)) { + mPermahangFile->AppendNative("last_permahang.bin"_ns); + } else { + mPermahangFile = nullptr; + } + + if (mHangProcessingThread && mPermahangFile) { + nsCOMPtr submitRunnable = + new SubmitPersistedPermahangRunnable(mPermahangFile); + mHangProcessingThread->Dispatch(submitRunnable.forget()); + } + nsCOMPtr observerService = + mozilla::services::GetObserverService(); + MOZ_ASSERT(observerService); + observerService->RemoveObserver(BackgroundHangManager::sInstance, + "browser-delayed-startup-finished"); + } else if (!strcmp(aTopic, "profile-after-change")) { + BackgroundHangMonitor::DisableOnBeta(); + nsCOMPtr observerService = + mozilla::services::GetObserverService(); + MOZ_ASSERT(observerService); + observerService->RemoveObserver(BackgroundHangManager::sInstance, + "profile-after-change"); + } else { + return NS_ERROR_UNEXPECTED; + } + + return NS_OK; +} + +/** + * BackgroundHangThread is a per-thread object that is used + * by all instances of BackgroundHangMonitor to monitor hangs. + */ +class BackgroundHangThread final + : public LinkedListElement, + public nsITimerCallback, + public nsINamed { + private: + static MOZ_THREAD_LOCAL(BackgroundHangThread*) sTlsKey; + static bool sTlsKeyInitialized; + + BackgroundHangThread(const BackgroundHangThread&); + BackgroundHangThread& operator=(const BackgroundHangThread&); + ~BackgroundHangThread(); + + /* Keep a reference to the manager, so we can keep going even + after BackgroundHangManager::Shutdown is called. */ + const RefPtr mManager; + // Unique thread ID for identification + const PRThread* mThreadID; + RefPtr mTimer; + TimeStamp mExpectedTimerNotification; + + public: + NS_DECL_THREADSAFE_ISUPPORTS + NS_DECL_NSITIMERCALLBACK + NS_DECL_NSINAMED + + /** + * Returns the BackgroundHangThread associated with the + * running thread. Note that this will not find private + * BackgroundHangThread threads. + * + * @return BackgroundHangThread*, or nullptr if no thread + * is found. + */ + static BackgroundHangThread* FindThread(); + + static void Startup() { + /* We can tolerate init() failing. */ + sTlsKeyInitialized = sTlsKey.init(); + } + + // Hang timeout + const TimeDuration mTimeout; + // PermaHang timeout + const TimeDuration mMaxTimeout; + // Time at last activity + TimeStamp mLastActivity; + // Time when a hang started + TimeStamp mHangStart; + // Is the thread in a hang + bool mHanging; + // Is the thread in a waiting state + bool mWaiting; + // Is the thread dedicated to a single BackgroundHangMonitor + BackgroundHangMonitor::ThreadType mThreadType; +#ifdef MOZ_GECKO_PROFILER + // Platform-specific helper to get hang stacks + ThreadStackHelper mStackHelper; +#endif + // Stack of current hang + HangStack mHangStack; + // Annotations for the current hang + BackgroundHangAnnotations mAnnotations; + // Annotators registered for this thread + BackgroundHangAnnotators mAnnotators; + // The name of the runnable which is hanging the current process + nsCString mRunnableName; + // The name of the thread which is being monitored + nsCString mThreadName; + + BackgroundHangThread(const char* aName, uint32_t aTimeoutMs, + uint32_t aMaxTimeoutMs, + BackgroundHangMonitor::ThreadType aThreadType = + BackgroundHangMonitor::THREAD_SHARED); + + // Report a hang; aManager->mLock IS locked. The hang will be processed + // off-main-thread, and will then be submitted back. + void ReportHang(TimeDuration aHangTime, + PersistedToDisk aPersistedToDisk = PersistedToDisk::No); + // Report a permanent hang; aManager->mLock IS locked + void ReportPermaHang(); + // Called by BackgroundHangMonitor::NotifyActivity + void NotifyActivity() { + if (MOZ_UNLIKELY(!mTimer)) { + return; + } + + MonitorAutoLock autoLock(mManager->mLock); + PROFILER_MARKER_UNTYPED( + "NotifyActivity", OTHER, + MarkerThreadId(mManager->mHangMonitorProfilerThreadId)); + + TimeStamp now = TimeStamp::Now(); + if (mWaiting) { + mWaiting = false; + } else if (mHanging) { + // A hang ended. + ReportHang(now - mHangStart); + mHanging = false; + } + mLastActivity = now; + BackgroundHangManager::sInstance->CollectCPUUsage(now); + + // Set or reset the timer. + mExpectedTimerNotification = now + mTimeout; + if (mTimeout != TimeDuration::Forever()) { + mTimer->InitHighResolutionWithCallback(this, mTimeout, + nsITimer::TYPE_ONE_SHOT); + } + } + // Called by BackgroundHangMonitor::NotifyWait + void NotifyWait() { + if (MOZ_UNLIKELY(!mTimer)) { + return; + } + + MonitorAutoLock autoLock(mManager->mLock); + PROFILER_MARKER_UNTYPED( + "NotifyWait", OTHER, + MarkerThreadId(mManager->mHangMonitorProfilerThreadId)); + + if (mWaiting) { + return; + } + + mTimer->Cancel(); + + mLastActivity = TimeStamp::Now(); + + if (mHanging) { + // We were hanging! We're done with that now, so let's report it. + // ReportHang() doesn't do much work on the current thread, and is + // safe to call from any thread as long as we're holding the lock. + ReportHang(mLastActivity - mHangStart); + mHanging = false; + } + mWaiting = true; + } + + // Returns true if this thread is (or might be) shared between other + // BackgroundHangMonitors for the monitored thread. + bool IsShared() { + return mThreadType == BackgroundHangMonitor::THREAD_SHARED; + } +}; + +NS_IMPL_ISUPPORTS(BackgroundHangThread, nsITimerCallback, nsINamed) + +NS_IMETHODIMP +BackgroundHangThread::GetName(nsACString& aName) { + aName.AssignLiteral("BackgroundHangThread_timer"); + return NS_OK; +} + +StaticRefPtr BackgroundHangManager::sInstance; +bool BackgroundHangManager::sDisabled = false; + +MOZ_THREAD_LOCAL(BackgroundHangThread*) BackgroundHangThread::sTlsKey; +bool BackgroundHangThread::sTlsKeyInitialized; + +BackgroundHangManager::BackgroundHangManager() + : mShutdown(false), mLock("BackgroundHangManager") { + // Save a reference to sInstance now so that the destructor is not triggered + // if the InitMonitorThread RunnableMethod is released before we are done. + sInstance = this; + + DebugOnly rv = + NS_NewNamedThread("BHMgr Monitor", getter_AddRefs(mHangMonitorThread), + mozilla::NewRunnableMethod( + "BackgroundHangManager::InitMonitorThread", this, + &BackgroundHangManager::InitMonitorThread)); + + MOZ_ASSERT(NS_SUCCEEDED(rv) && mHangMonitorThread, + "Failed to create BHR processing thread"); + + rv = NS_NewNamedThread("BHMgr Processor", + getter_AddRefs(mHangProcessingThread)); + MOZ_ASSERT(NS_SUCCEEDED(rv) && mHangProcessingThread, + "Failed to create BHR processing thread"); +} + +BackgroundHangManager::~BackgroundHangManager() { + MOZ_ASSERT(mShutdown, "Destruction without Shutdown call"); + MOZ_ASSERT(mHangThreads.isEmpty(), "Destruction with outstanding monitors"); + MOZ_ASSERT(mHangMonitorThread, "No monitor thread"); + MOZ_ASSERT(mHangProcessingThread, "No processing thread"); + + // NS_NewNamedThread could have failed above due to resource limitation + if (mHangMonitorThread) { + // The monitor thread can only live as long as the instance lives + mHangMonitorThread->Shutdown(); + } + + // Similarly, NS_NewNamedThread above could have failed. + if (mHangProcessingThread) { + mHangProcessingThread->Shutdown(); + } +} + +BackgroundHangThread::BackgroundHangThread( + const char* aName, uint32_t aTimeoutMs, uint32_t aMaxTimeoutMs, + BackgroundHangMonitor::ThreadType aThreadType) + : mManager(BackgroundHangManager::sInstance), + mThreadID(PR_GetCurrentThread()), + mTimeout(aTimeoutMs == BackgroundHangMonitor::kNoTimeout + ? TimeDuration::Forever() + : TimeDuration::FromMilliseconds(aTimeoutMs)), + mMaxTimeout(aMaxTimeoutMs == BackgroundHangMonitor::kNoTimeout + ? TimeDuration::Forever() + : TimeDuration::FromMilliseconds(aMaxTimeoutMs)), + mHanging(false), + mWaiting(true), + mThreadType(aThreadType), + mThreadName(aName) { + if (sTlsKeyInitialized && IsShared()) { + sTlsKey.set(this); + } + if (mManager->mHangMonitorThread) { + mTimer = NS_NewTimer(mManager->mHangMonitorThread); + } + // Lock here because LinkedList is not thread-safe + MonitorAutoLock autoLock(mManager->mLock); + // Add to thread list + mManager->mHangThreads.insertBack(this); +} + +BackgroundHangThread::~BackgroundHangThread() { + // Lock here because LinkedList is not thread-safe + MonitorAutoLock autoLock(mManager->mLock); + // Remove from thread list + remove(); + + // We no longer have a thread + if (sTlsKeyInitialized && IsShared()) { + sTlsKey.set(nullptr); + } +} + +void BackgroundHangThread::ReportHang(TimeDuration aHangTime, + PersistedToDisk aPersistedToDisk) { + // Recovered from a hang; called on the monitor thread + // mManager->mLock IS locked + + HangDetails hangDetails(aHangTime, + nsDependentCString(XRE_GetProcessTypeString()), + NOT_REMOTE_TYPE, mThreadName, mRunnableName, + std::move(mHangStack), std::move(mAnnotations)); + + PersistedToDisk persistedToDisk = aPersistedToDisk; + if (aPersistedToDisk == PersistedToDisk::Yes && XRE_IsParentProcess() && + mManager->mPermahangFile) { + auto res = WriteHangDetailsToFile(hangDetails, mManager->mPermahangFile); + persistedToDisk = res.isOk() ? PersistedToDisk::Yes : PersistedToDisk::No; + } + + // If the hang processing thread exists, we can process the native stack + // on it. Otherwise, we are unable to report a native stack, so we just + // report without one. + if (mManager->mHangProcessingThread) { + nsCOMPtr processHangStackRunnable = + new ProcessHangStackRunnable(std::move(hangDetails), persistedToDisk); + mManager->mHangProcessingThread->Dispatch( + processHangStackRunnable.forget()); + } else { + NS_WARNING("Unable to report native stack without a BHR processing thread"); + RefPtr hd = + new nsHangDetails(std::move(hangDetails), persistedToDisk); + hd->Submit(); + } + + // If the profiler is enabled, add a marker. +#ifdef MOZ_GECKO_PROFILER + if (profiler_thread_is_being_profiled_for_markers( + mStackHelper.GetThreadId())) { + struct HangMarker { + static constexpr Span MarkerTypeName() { + return MakeStringSpan("BHR-detected hang"); + } + static void StreamJSONMarkerData( + baseprofiler::SpliceableJSONWriter& aWriter) {} + static MarkerSchema MarkerTypeDisplay() { + using MS = MarkerSchema; + MS schema{MS::Location::MarkerChart, MS::Location::MarkerTable}; + return schema; + } + }; + + const TimeStamp endTime = TimeStamp::Now(); + const TimeStamp startTime = endTime - aHangTime; + profiler_add_marker("BHR-detected hang", geckoprofiler::category::OTHER, + {MarkerThreadId(mStackHelper.GetThreadId()), + MarkerTiming::Interval(startTime, endTime)}, + HangMarker{}); + } +#endif +} + +void BackgroundHangThread::ReportPermaHang() { + // Permanently hanged; called on the monitor thread + // mManager->mLock IS locked + + // The significance of a permahang is that it's likely that we won't ever + // recover and be allowed to submit this hang. On the parent thread, we + // compensate for this by writing the hang details to disk on this thread, + // and in our next session we'll try to read those details + ReportHang(mMaxTimeout, PersistedToDisk::Yes); +} + +NS_IMETHODIMP BackgroundHangThread::Notify(nsITimer* aTimer) { + MOZ_ASSERT(profiler_current_thread_id() == + mManager->mHangMonitorProfilerThreadId); + + MonitorAutoLock autoLock(mManager->mLock); + PROFILER_MARKER_UNTYPED("TimerNotify", OTHER, {}); + + TimeStamp now = TimeStamp::Now(); + if (MOZ_UNLIKELY((now - mExpectedTimerNotification) * 2 > mTimeout)) { + // If the timer notification has been delayed by more than half the timeout + // time, assume the machine is not scheduling tasks correctly and ignore + // this hang. + mWaiting = true; + mHanging = false; + return NS_OK; + } + + TimeDuration hangTime = now - mLastActivity; + if (MOZ_UNLIKELY(hangTime >= mMaxTimeout)) { + // A permahang started. No point in trying to find its exact + // duration, so avoid restarting the timer until there is new + // activity. + mWaiting = true; + mHanging = false; + ReportPermaHang(); + return NS_OK; + } + + if (MOZ_LIKELY(!mHanging && hangTime >= mTimeout)) { +#ifdef MOZ_GECKO_PROFILER + // A hang started, collect a stack + mStackHelper.GetStack(mHangStack, mRunnableName, true); +#endif + + // If we hang immediately on waking, then the most recently collected + // CPU usage is going to be an average across the whole time we were + // sleeping. Accordingly, we want to make sure that when we hang, we + // collect a fresh value. + BackgroundHangManager::sInstance->CollectCPUUsage(now, true); + + mHangStart = mLastActivity; + mHanging = true; + mAnnotations = mAnnotators.GatherAnnotations(); + } + + TimeDuration nextRecheck = mMaxTimeout - hangTime; + mExpectedTimerNotification = now + nextRecheck; + return mTimer->InitHighResolutionWithCallback(this, nextRecheck, + nsITimer::TYPE_ONE_SHOT); +} + +BackgroundHangThread* BackgroundHangThread::FindThread() { +#ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR + if (BackgroundHangManager::sInstance == nullptr) { + MOZ_ASSERT(BackgroundHangManager::sDisabled, + "BackgroundHandleManager is not initialized"); + return nullptr; + } + + if (sTlsKeyInitialized) { + // Use TLS if available + return sTlsKey.get(); + } + // If TLS is unavailable, we can search through the thread list + RefPtr manager(BackgroundHangManager::sInstance); + MOZ_ASSERT(manager, "Creating BackgroundHangMonitor after shutdown"); + + PRThread* threadID = PR_GetCurrentThread(); + // Lock thread list for traversal + MonitorAutoLock autoLock(manager->mLock); + for (BackgroundHangThread* thread = manager->mHangThreads.getFirst(); thread; + thread = thread->getNext()) { + if (thread->mThreadID == threadID && thread->IsShared()) { + return thread; + } + } +#endif + // Current thread is not initialized + return nullptr; +} + +bool BackgroundHangMonitor::ShouldDisableOnBeta(const nsCString& clientID) { + MOZ_ASSERT(clientID.Length() == 36, "clientID is invalid"); + const char* suffix = clientID.get() + clientID.Length() - 4; + return strtol(suffix, NULL, 16) % BHR_BETA_MOD; +} + +bool BackgroundHangMonitor::DisableOnBeta() { + nsAutoCString clientID; + nsresult rv = + Preferences::GetCString("toolkit.telemetry.cachedClientID", clientID); + bool telemetryEnabled = Telemetry::CanRecordPrereleaseData(); + + if (!telemetryEnabled || NS_FAILED(rv) || + BackgroundHangMonitor::ShouldDisableOnBeta(clientID)) { + if (XRE_IsParentProcess()) { + BackgroundHangMonitor::Shutdown(); + } else { + BackgroundHangManager::sDisabled = true; + } + return true; + } + + return false; +} + +void BackgroundHangMonitor::Startup() { + MOZ_RELEASE_ASSERT(NS_IsMainThread()); +#ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR + MOZ_ASSERT(!BackgroundHangManager::sInstance, "Already initialized"); + + if (XRE_IsContentProcess() && + StaticPrefs::toolkit_content_background_hang_monitor_disabled()) { + BackgroundHangManager::sDisabled = true; + return; + } + +# if defined(MOZ_GECKO_PROFILER) && defined(XP_WIN) +# if defined(_M_AMD64) || defined(_M_ARM64) + mozilla::WindowsStackWalkInitialization(); +# endif // _M_AMD64 || _M_ARM64 +# endif // MOZ_GECKO_PROFILER && XP_WIN + + nsCOMPtr observerService = + mozilla::services::GetObserverService(); + MOZ_ASSERT(observerService); + +# ifdef __clang__ +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wunreachable-code" +# endif + if constexpr (std::string_view(MOZ_STRINGIFY(MOZ_UPDATE_CHANNEL)) == "beta") { + if (XRE_IsParentProcess()) { // cached ClientID hasn't been read yet + BackgroundHangThread::Startup(); + new BackgroundHangManager(); + Unused << NS_WARN_IF( + BackgroundHangManager::sInstance->mCPUUsageWatcher.Init().isErr()); + observerService->AddObserver(BackgroundHangManager::sInstance, + "profile-after-change", false); + return; + } else if (DisableOnBeta()) { + return; + } + } +# ifdef __clang__ +# pragma clang diagnostic pop +# endif + + BackgroundHangThread::Startup(); + new BackgroundHangManager(); + Unused << NS_WARN_IF( + BackgroundHangManager::sInstance->mCPUUsageWatcher.Init().isErr()); + if (XRE_IsParentProcess()) { + observerService->AddObserver(BackgroundHangManager::sInstance, + "browser-delayed-startup-finished", false); + } +#endif +} + +void BackgroundHangMonitor::Shutdown() { +#ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR + if (BackgroundHangManager::sDisabled) { + MOZ_ASSERT(!BackgroundHangManager::sInstance, "Initialized"); + return; + } + + MOZ_ASSERT(BackgroundHangManager::sInstance, "Not initialized"); + BackgroundHangManager::sInstance->mCPUUsageWatcher.Uninit(); + /* Scope our lock inside Shutdown() because the sInstance object can + be destroyed as soon as we set sInstance to nullptr below, and + we don't want to hold the lock when it's being destroyed. */ + BackgroundHangManager::sInstance->Shutdown(); + BackgroundHangManager::sInstance = nullptr; + BackgroundHangManager::sDisabled = true; +#endif +} + +BackgroundHangMonitor::BackgroundHangMonitor(const char* aName, + uint32_t aTimeoutMs, + uint32_t aMaxTimeoutMs, + ThreadType aThreadType) + : mThread(aThreadType == THREAD_SHARED ? BackgroundHangThread::FindThread() + : nullptr) { +#ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR +# ifdef MOZ_VALGRIND + // If we're running on Valgrind, we'll be making forward progress at a + // rate of somewhere between 1/25th and 1/50th of normal. This causes the + // BHR to capture a lot of stacks, which slows us down even more. As an + // attempt to avoid the worst of this, scale up all presented timeouts by + // a factor of thirty, and add six seconds so as to impose a six second + // floor on all timeouts. For a non-Valgrind-enabled build, or for an + // enabled build which isn't running on Valgrind, the timeouts are + // unchanged. + if (RUNNING_ON_VALGRIND) { + const uint32_t scaleUp = 30; + const uint32_t extraMs = 6000; + if (aTimeoutMs != BackgroundHangMonitor::kNoTimeout) { + aTimeoutMs *= scaleUp; + aTimeoutMs += extraMs; + } + if (aMaxTimeoutMs != BackgroundHangMonitor::kNoTimeout) { + aMaxTimeoutMs *= scaleUp; + aMaxTimeoutMs += extraMs; + } + } +# endif + + if (!BackgroundHangManager::sDisabled && !mThread) { + mThread = + new BackgroundHangThread(aName, aTimeoutMs, aMaxTimeoutMs, aThreadType); + } +#endif +} + +BackgroundHangMonitor::BackgroundHangMonitor() + : mThread(BackgroundHangThread::FindThread()) { +#ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR + if (BackgroundHangManager::sDisabled) { + return; + } +#endif +} + +BackgroundHangMonitor::~BackgroundHangMonitor() = default; + +void BackgroundHangMonitor::NotifyActivity() { +#ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR + if (mThread == nullptr) { + MOZ_ASSERT(BackgroundHangManager::sDisabled, + "This thread is not initialized for hang monitoring"); + return; + } + + if (Telemetry::CanRecordExtended()) { + mThread->NotifyActivity(); + } +#endif +} + +void BackgroundHangMonitor::NotifyWait() { +#ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR + if (mThread == nullptr) { + MOZ_ASSERT(BackgroundHangManager::sDisabled, + "This thread is not initialized for hang monitoring"); + return; + } + + if (Telemetry::CanRecordExtended()) { + mThread->NotifyWait(); + } +#endif +} + +bool BackgroundHangMonitor::RegisterAnnotator( + BackgroundHangAnnotator& aAnnotator) { +#ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR + BackgroundHangThread* thisThread = BackgroundHangThread::FindThread(); + if (!thisThread) { + return false; + } + return thisThread->mAnnotators.Register(aAnnotator); +#else + return false; +#endif +} + +bool BackgroundHangMonitor::UnregisterAnnotator( + BackgroundHangAnnotator& aAnnotator) { +#ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR + BackgroundHangThread* thisThread = BackgroundHangThread::FindThread(); + if (!thisThread) { + return false; + } + return thisThread->mAnnotators.Unregister(aAnnotator); +#else + return false; +#endif +} + +} // namespace mozilla diff --git a/toolkit/components/backgroundhangmonitor/BackgroundHangMonitor.h b/toolkit/components/backgroundhangmonitor/BackgroundHangMonitor.h new file mode 100644 index 0000000000..037ea5e52d --- /dev/null +++ b/toolkit/components/backgroundhangmonitor/BackgroundHangMonitor.h @@ -0,0 +1,198 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_BackgroundHangMonitor_h +#define mozilla_BackgroundHangMonitor_h + +#include "mozilla/CPUUsageWatcher.h" +#include "mozilla/HangAnnotations.h" +#include "mozilla/Monitor.h" +#include "mozilla/RefPtr.h" + +#include "nsString.h" + +#include + +namespace mozilla { + +class BackgroundHangThread; +class BackgroundHangManager; + +/** + * The background hang monitor is responsible for detecting and reporting + * hangs in main and background threads. A thread registers itself using + * the BackgroundHangMonitor object and periodically calls its methods to + * inform the hang monitor of the thread's activity. Each thread is given + * a thread name, a timeout, and a maximum timeout. If one of the thread's + * tasks runs for longer than the timeout duration but shorter than the + * maximum timeout, a (transient) hang is reported. On the other hand, if + * a task runs for longer than the maximum timeout duration or never + * finishes (e.g. in a deadlock), a permahang is reported. + * + * Tasks are defined arbitrarily, but are typically represented by events + * in an event loop -- processing one event is equivalent to running one + * task. To ensure responsiveness, tasks in a thread often have a target + * running time. This is a good starting point for determining the timeout + * and maximum timeout values. For example, the Compositor thread has a + * responsiveness goal of 60Hz or 17ms, so a starting timeout could be + * 100ms. Considering some platforms (e.g. Android) can terminate the app + * when a critical thread hangs for longer than a few seconds, a good + * starting maximum timeout is 4 or 5 seconds. + * + * A thread registers itself through the BackgroundHangMonitor constructor. + * Multiple BackgroundHangMonitor objects can be used in one thread. The + * constructor without arguments can be used when it is known that the thread + * already has a BackgroundHangMonitor registered. When all instances of + * BackgroundHangMonitor are destroyed, the thread is unregistered. + * + * The thread then uses two methods to inform BackgroundHangMonitor of the + * thread's activity: + * + * > BackgroundHangMonitor::NotifyActivity should be called *before* + * starting a task. The task run time is determined by the interval + * between this call and the next NotifyActivity call. + * + * > BackgroundHangMonitor::NotifyWait should be called *before* the + * thread enters a wait state (e.g. to wait for a new event). This + * prevents a waiting thread from being detected as hanging. The wait + * state is automatically cleared at the next NotifyActivity call. + * + * The following example shows hang monitoring in a simple event loop: + * + * void thread_main() + * { + * mozilla::BackgroundHangMonitor hangMonitor("example1", 100, 1000); + * while (!exiting) { + * hangMonitor.NotifyActivity(); + * process_next_event(); + * hangMonitor.NotifyWait(); + * wait_for_next_event(); + * } + * } + * + * The following example shows reentrancy in nested event loops: + * + * void thread_main() + * { + * mozilla::BackgroundHangMonitor hangMonitor("example2", 100, 1000); + * while (!exiting) { + * hangMonitor.NotifyActivity(); + * process_next_event(); + * hangMonitor.NotifyWait(); + * wait_for_next_event(); + * } + * } + * + * void process_next_event() + * { + * mozilla::BackgroundHangMonitor hangMonitor(); + * if (is_sync_event) { + * while (!finished_event) { + * hangMonitor.NotifyActivity(); + * process_next_event(); + * hangMonitor.NotifyWait(); + * wait_for_next_event(); + * } + * } else { + * process_nonsync_event(); + * } + * } + */ +class BackgroundHangMonitor { + private: + friend BackgroundHangManager; + + RefPtr mThread; + + static bool ShouldDisableOnBeta(const nsCString&); + static bool DisableOnBeta(); + + public: + static const uint32_t kNoTimeout = 0; + enum ThreadType { + // For a new BackgroundHangMonitor for thread T, only create a new + // monitoring thread for T if one doesn't already exist. If one does, + // share that pre-existing monitoring thread. + THREAD_SHARED, + // For a new BackgroundHangMonitor for thread T, create a new + // monitoring thread for T even if there are other, pre-existing + // monitoring threads for T. + THREAD_PRIVATE + }; + + /** + * Enable hang monitoring. + * Must return before using BackgroundHangMonitor. + */ + static void Startup(); + + /** + * Disable hang monitoring. + * Can be called without destroying all BackgroundHangMonitors first. + */ + static void Shutdown(); + + /** + * Start monitoring hangs for the current thread. + * + * @param aName Name to identify the thread with + * @param aTimeoutMs Amount of time in milliseconds without + * activity before registering a hang + * @param aMaxTimeoutMs Amount of time in milliseconds without + * activity before registering a permanent hang + * @param aThreadType + * The ThreadType type of monitoring thread that should be created + * for this monitor. See the documentation for ThreadType. + */ + BackgroundHangMonitor(const char* aName, uint32_t aTimeoutMs, + uint32_t aMaxTimeoutMs, + ThreadType aThreadType = THREAD_SHARED); + + /** + * Monitor hangs using an existing monitor + * associated with the current thread. + */ + BackgroundHangMonitor(); + + /** + * Destroys the hang monitor; hang monitoring for a thread stops + * when all monitors associated with the thread are destroyed. + */ + ~BackgroundHangMonitor(); + + /** + * Notify the hang monitor of pending current thread activity. + * Call this method before starting an "activity" or after + * exiting from a wait state. + */ + void NotifyActivity(); + + /** + * Notify the hang monitor of current thread wait. + * Call this method before entering a wait state; call + * NotifyActivity when subsequently exiting the wait state. + */ + void NotifyWait(); + + /** + * Register an annotator with BHR for the current thread. + * @param aAnnotator annotator to register + * @return true if the annotator was registered, otherwise false. + */ + static bool RegisterAnnotator(BackgroundHangAnnotator& aAnnotator); + + /** + * Unregister an annotator that was previously registered via + * RegisterAnnotator. + * @param aAnnotator annotator to unregister + * @return true if there are still remaining annotators registered + */ + static bool UnregisterAnnotator(BackgroundHangAnnotator& aAnnotator); +}; + +} // namespace mozilla + +#endif // mozilla_BackgroundHangMonitor_h diff --git a/toolkit/components/backgroundhangmonitor/HangAnnotations.cpp b/toolkit/components/backgroundhangmonitor/HangAnnotations.cpp new file mode 100644 index 0000000000..a8093c4781 --- /dev/null +++ b/toolkit/components/backgroundhangmonitor/HangAnnotations.cpp @@ -0,0 +1,89 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/HangAnnotations.h" + +#include + +#include "MainThreadUtils.h" +#include "mozilla/DebugOnly.h" +#include "nsXULAppAPI.h" +#include "mozilla/BackgroundHangMonitor.h" + +namespace mozilla { + +void BackgroundHangAnnotations::AddAnnotation(const nsString& aName, + const int32_t aData) { + nsAutoString dataString; + dataString.AppendInt(aData); + AppendElement(HangAnnotation(aName, dataString)); +} + +void BackgroundHangAnnotations::AddAnnotation(const nsString& aName, + const double aData) { + nsAutoString dataString; + dataString.AppendFloat(aData); + AppendElement(HangAnnotation(aName, dataString)); +} + +void BackgroundHangAnnotations::AddAnnotation(const nsString& aName, + const nsString& aData) { + AppendElement(HangAnnotation(aName, aData)); +} + +void BackgroundHangAnnotations::AddAnnotation(const nsString& aName, + const nsCString& aData) { + NS_ConvertUTF8toUTF16 dataString(aData); + AppendElement(HangAnnotation(aName, dataString)); +} + +void BackgroundHangAnnotations::AddAnnotation(const nsString& aName, + const bool aData) { + if (aData) { + AppendElement(HangAnnotation(aName, u"true"_ns)); + } else { + AppendElement(HangAnnotation(aName, u"false"_ns)); + } +} + +BackgroundHangAnnotators::BackgroundHangAnnotators() + : mMutex("BackgroundHangAnnotators::mMutex") { + MOZ_COUNT_CTOR(BackgroundHangAnnotators); +} + +BackgroundHangAnnotators::~BackgroundHangAnnotators() { + MOZ_ASSERT(mAnnotators.empty()); + MOZ_COUNT_DTOR(BackgroundHangAnnotators); +} + +bool BackgroundHangAnnotators::Register(BackgroundHangAnnotator& aAnnotator) { + MutexAutoLock lock(mMutex); + auto result = mAnnotators.insert(&aAnnotator); + return result.second; +} + +bool BackgroundHangAnnotators::Unregister(BackgroundHangAnnotator& aAnnotator) { + MutexAutoLock lock(mMutex); + DebugOnly::size_type> numErased; + numErased = mAnnotators.erase(&aAnnotator); + MOZ_ASSERT(numErased == 1); + return mAnnotators.empty(); +} + +BackgroundHangAnnotations BackgroundHangAnnotators::GatherAnnotations() { + BackgroundHangAnnotations annotations; + { // Scope for lock + MutexAutoLock lock(mMutex); + for (std::set::iterator i = mAnnotators.begin(), + e = mAnnotators.end(); + i != e; ++i) { + (*i)->AnnotateHang(annotations); + } + } + return annotations; +} + +} // namespace mozilla diff --git a/toolkit/components/backgroundhangmonitor/HangAnnotations.h b/toolkit/components/backgroundhangmonitor/HangAnnotations.h new file mode 100644 index 0000000000..f6667efa42 --- /dev/null +++ b/toolkit/components/backgroundhangmonitor/HangAnnotations.h @@ -0,0 +1,71 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_HangAnnotations_h +#define mozilla_HangAnnotations_h + +#include + +#include "mozilla/HangTypes.h" +#include "mozilla/MemoryReporting.h" +#include "mozilla/Mutex.h" +#include "mozilla/Vector.h" +#include "nsString.h" +#include "nsTArray.h" +#include "mozilla/ipc/IPDLParamTraits.h" + +namespace mozilla { + +/** + * This class extends nsTArray with some methods for adding + * annotations being reported by a registered hang Annotator. + */ +class BackgroundHangAnnotations : public nsTArray { + public: + void AddAnnotation(const nsString& aName, const int32_t aData); + void AddAnnotation(const nsString& aName, const double aData); + void AddAnnotation(const nsString& aName, const nsString& aData); + void AddAnnotation(const nsString& aName, const nsCString& aData); + void AddAnnotation(const nsString& aName, const bool aData); +}; + +class BackgroundHangAnnotator { + public: + /** + * NB: This function is always called by the BackgroundHangMonitor thread. + * Plan accordingly. + */ + virtual void AnnotateHang(BackgroundHangAnnotations& aAnnotations) = 0; +}; + +class BackgroundHangAnnotators { + public: + BackgroundHangAnnotators(); + ~BackgroundHangAnnotators(); + + bool Register(BackgroundHangAnnotator& aAnnotator); + bool Unregister(BackgroundHangAnnotator& aAnnotator); + + BackgroundHangAnnotations GatherAnnotations(); + + private: + Mutex mMutex MOZ_UNANNOTATED; + std::set mAnnotators; +}; + +namespace ipc { + +template <> +struct IPDLParamTraits + : public IPDLParamTraits> { + typedef mozilla::BackgroundHangAnnotations paramType; +}; + +} // namespace ipc + +} // namespace mozilla + +#endif // mozilla_HangAnnotations_h diff --git a/toolkit/components/backgroundhangmonitor/HangDetails.cpp b/toolkit/components/backgroundhangmonitor/HangDetails.cpp new file mode 100644 index 0000000000..de6ee056f6 --- /dev/null +++ b/toolkit/components/backgroundhangmonitor/HangDetails.cpp @@ -0,0 +1,737 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "HangDetails.h" + +#include "nsIHangDetails.h" +#include "nsPrintfCString.h" +#include "js/Array.h" // JS::NewArrayObject +#include "js/PropertyAndElement.h" // JS_DefineElement +#include "mozilla/FileUtils.h" +#include "mozilla/gfx/GPUParent.h" +#include "mozilla/dom/ContentChild.h" +#include "mozilla/dom/ContentParent.h" // For RemoteTypePrefix +#include "mozilla/FileUtils.h" +#include "mozilla/SchedulerGroup.h" +#include "mozilla/Unused.h" +#include "mozilla/GfxMessageUtils.h" // For ParamTraits +#include "mozilla/ResultExtensions.h" +#include "mozilla/Try.h" +#include "shared-libraries.h" + +static const char MAGIC[] = "permahangsavev1"; + +namespace mozilla { + +NS_IMETHODIMP +nsHangDetails::GetWasPersisted(bool* aWasPersisted) { + *aWasPersisted = mPersistedToDisk == PersistedToDisk::Yes; + return NS_OK; +} + +NS_IMETHODIMP +nsHangDetails::GetDuration(double* aDuration) { + *aDuration = mDetails.duration().ToMilliseconds(); + return NS_OK; +} + +NS_IMETHODIMP +nsHangDetails::GetThread(nsACString& aName) { + aName.Assign(mDetails.threadName()); + return NS_OK; +} + +NS_IMETHODIMP +nsHangDetails::GetRunnableName(nsACString& aRunnableName) { + aRunnableName.Assign(mDetails.runnableName()); + return NS_OK; +} + +NS_IMETHODIMP +nsHangDetails::GetProcess(nsACString& aName) { + aName.Assign(mDetails.process()); + return NS_OK; +} + +NS_IMETHODIMP +nsHangDetails::GetRemoteType(nsACString& aName) { + aName.Assign(mDetails.remoteType()); + return NS_OK; +} + +NS_IMETHODIMP +nsHangDetails::GetAnnotations(JSContext* aCx, + JS::MutableHandle aVal) { + // We create an Array with ["key", "value"] string pair entries for each item + // in our annotations object. + auto& annotations = mDetails.annotations(); + size_t length = annotations.Length(); + JS::Rooted retObj(aCx, JS::NewArrayObject(aCx, length)); + if (!retObj) { + return NS_ERROR_OUT_OF_MEMORY; + } + + for (size_t i = 0; i < length; ++i) { + const auto& annotation = annotations[i]; + JS::Rooted annotationPair(aCx, JS::NewArrayObject(aCx, 2)); + if (!annotationPair) { + return NS_ERROR_OUT_OF_MEMORY; + } + + JS::Rooted key(aCx, + JS_NewUCStringCopyN(aCx, annotation.name().get(), + annotation.name().Length())); + if (!key) { + return NS_ERROR_OUT_OF_MEMORY; + } + + JS::Rooted value( + aCx, JS_NewUCStringCopyN(aCx, annotation.value().get(), + annotation.value().Length())); + if (!value) { + return NS_ERROR_OUT_OF_MEMORY; + } + + if (!JS_DefineElement(aCx, annotationPair, 0, key, JSPROP_ENUMERATE)) { + return NS_ERROR_OUT_OF_MEMORY; + } + + if (!JS_DefineElement(aCx, annotationPair, 1, value, JSPROP_ENUMERATE)) { + return NS_ERROR_OUT_OF_MEMORY; + } + + if (!JS_DefineElement(aCx, retObj, i, annotationPair, JSPROP_ENUMERATE)) { + return NS_ERROR_OUT_OF_MEMORY; + } + } + + aVal.setObject(*retObj); + return NS_OK; +} + +namespace { + +nsresult StringFrame(JSContext* aCx, JS::RootedObject& aTarget, size_t aIndex, + const char* aString) { + JSString* jsString = JS_NewStringCopyZ(aCx, aString); + if (!jsString) { + return NS_ERROR_OUT_OF_MEMORY; + } + JS::Rooted string(aCx, jsString); + if (!string) { + return NS_ERROR_OUT_OF_MEMORY; + } + if (!JS_DefineElement(aCx, aTarget, aIndex, string, JSPROP_ENUMERATE)) { + return NS_ERROR_OUT_OF_MEMORY; + } + return NS_OK; +} + +} // anonymous namespace + +NS_IMETHODIMP +nsHangDetails::GetStack(JSContext* aCx, JS::MutableHandle aStack) { + auto& stack = mDetails.stack(); + uint32_t length = stack.stack().Length(); + JS::Rooted ret(aCx, JS::NewArrayObject(aCx, length)); + if (!ret) { + return NS_ERROR_OUT_OF_MEMORY; + } + + for (uint32_t i = 0; i < length; ++i) { + auto& entry = stack.stack()[i]; + switch (entry.type()) { + case HangEntry::TnsCString: { + nsresult rv = StringFrame(aCx, ret, i, entry.get_nsCString().get()); + NS_ENSURE_SUCCESS(rv, rv); + break; + } + case HangEntry::THangEntryBufOffset: { + uint32_t offset = entry.get_HangEntryBufOffset().index(); + + // NOTE: We can't trust the offset we got, as we might have gotten it + // from a compromised content process. Validate that it is in bounds. + if (NS_WARN_IF(stack.strbuffer().IsEmpty() || + offset >= stack.strbuffer().Length())) { + MOZ_ASSERT_UNREACHABLE("Corrupted offset data"); + return NS_ERROR_FAILURE; + } + + // NOTE: If our content process is compromised, it could send us back a + // strbuffer() which didn't have a null terminator. If the last byte in + // the buffer is not '\0', we abort, to make sure we don't read out of + // bounds. + if (stack.strbuffer().LastElement() != '\0') { + MOZ_ASSERT_UNREACHABLE("Corrupted strbuffer data"); + return NS_ERROR_FAILURE; + } + + // We know this offset is safe because of the previous checks. + const int8_t* start = stack.strbuffer().Elements() + offset; + nsresult rv = + StringFrame(aCx, ret, i, reinterpret_cast(start)); + NS_ENSURE_SUCCESS(rv, rv); + break; + } + case HangEntry::THangEntryModOffset: { + const HangEntryModOffset& mo = entry.get_HangEntryModOffset(); + + JS::Rooted jsFrame(aCx, JS::NewArrayObject(aCx, 2)); + if (!jsFrame) { + return NS_ERROR_OUT_OF_MEMORY; + } + + if (!JS_DefineElement(aCx, jsFrame, 0, mo.module(), JSPROP_ENUMERATE)) { + return NS_ERROR_OUT_OF_MEMORY; + } + + nsPrintfCString hexString("%" PRIxPTR, (uintptr_t)mo.offset()); + JS::Rooted hex(aCx, JS_NewStringCopyZ(aCx, hexString.get())); + if (!hex || !JS_DefineElement(aCx, jsFrame, 1, hex, JSPROP_ENUMERATE)) { + return NS_ERROR_OUT_OF_MEMORY; + } + + if (!JS_DefineElement(aCx, ret, i, jsFrame, JSPROP_ENUMERATE)) { + return NS_ERROR_OUT_OF_MEMORY; + } + break; + } + case HangEntry::THangEntryProgCounter: { + // Don't bother recording fixed program counters to JS + nsresult rv = StringFrame(aCx, ret, i, "(unresolved)"); + NS_ENSURE_SUCCESS(rv, rv); + break; + } + case HangEntry::THangEntryContent: { + nsresult rv = StringFrame(aCx, ret, i, "(content script)"); + NS_ENSURE_SUCCESS(rv, rv); + break; + } + case HangEntry::THangEntryJit: { + nsresult rv = StringFrame(aCx, ret, i, "(jit frame)"); + NS_ENSURE_SUCCESS(rv, rv); + break; + } + case HangEntry::THangEntryWasm: { + nsresult rv = StringFrame(aCx, ret, i, "(wasm)"); + NS_ENSURE_SUCCESS(rv, rv); + break; + } + case HangEntry::THangEntryChromeScript: { + nsresult rv = StringFrame(aCx, ret, i, "(chrome script)"); + NS_ENSURE_SUCCESS(rv, rv); + break; + } + case HangEntry::THangEntrySuppressed: { + nsresult rv = StringFrame(aCx, ret, i, "(profiling suppressed)"); + NS_ENSURE_SUCCESS(rv, rv); + break; + } + default: + MOZ_CRASH("Unsupported HangEntry type?"); + } + } + + aStack.setObject(*ret); + return NS_OK; +} + +NS_IMETHODIMP +nsHangDetails::GetModules(JSContext* aCx, JS::MutableHandle aVal) { + auto& modules = mDetails.stack().modules(); + size_t length = modules.Length(); + JS::Rooted retObj(aCx, JS::NewArrayObject(aCx, length)); + if (!retObj) { + return NS_ERROR_OUT_OF_MEMORY; + } + + for (size_t i = 0; i < length; ++i) { + const HangModule& module = modules[i]; + JS::Rooted jsModule(aCx, JS::NewArrayObject(aCx, 2)); + if (!jsModule) { + return NS_ERROR_OUT_OF_MEMORY; + } + + JS::Rooted name( + aCx, JS_NewUCStringCopyN(aCx, module.name().BeginReading(), + module.name().Length())); + if (!JS_DefineElement(aCx, jsModule, 0, name, JSPROP_ENUMERATE)) { + return NS_ERROR_OUT_OF_MEMORY; + } + + JS::Rooted breakpadId( + aCx, JS_NewStringCopyN(aCx, module.breakpadId().BeginReading(), + module.breakpadId().Length())); + if (!JS_DefineElement(aCx, jsModule, 1, breakpadId, JSPROP_ENUMERATE)) { + return NS_ERROR_OUT_OF_MEMORY; + } + + if (!JS_DefineElement(aCx, retObj, i, jsModule, JSPROP_ENUMERATE)) { + return NS_ERROR_OUT_OF_MEMORY; + } + } + + aVal.setObject(*retObj); + return NS_OK; +} + +// Processing and submitting the stack as an observer notification. + +void nsHangDetails::Submit() { + RefPtr hangDetails = this; + nsCOMPtr notifyObservers = + NS_NewRunnableFunction("NotifyBHRHangObservers", [hangDetails] { + // The place we need to report the hang to varies depending on process. + // + // In child processes, we report the hang to our parent process, while + // if we're in the parent process, we report a bhr-thread-hang observer + // notification. + switch (XRE_GetProcessType()) { + case GeckoProcessType_Content: { + auto cc = dom::ContentChild::GetSingleton(); + if (cc) { + // Use the prefix so we don't get URIs from Fission isolated + // processes. + hangDetails->mDetails.remoteType().Assign( + dom::RemoteTypePrefix(cc->GetRemoteType())); + Unused << cc->SendBHRThreadHang(hangDetails->mDetails); + } + break; + } + case GeckoProcessType_GPU: { + auto gp = gfx::GPUParent::GetSingleton(); + if (gp) { + Unused << gp->SendBHRThreadHang(hangDetails->mDetails); + } + break; + } + case GeckoProcessType_Default: { + nsCOMPtr os = + mozilla::services::GetObserverService(); + if (os) { + os->NotifyObservers(hangDetails, "bhr-thread-hang", nullptr); + } + break; + } + default: + // XXX: Consider handling GeckoProcessType_GMPlugin and + // GeckoProcessType_Plugin? + NS_WARNING("Unsupported BHR process type - discarding hang."); + break; + } + }); + + nsresult rv = SchedulerGroup::Dispatch(notifyObservers.forget()); + MOZ_RELEASE_ASSERT(NS_SUCCEEDED(rv)); +} + +NS_IMPL_ISUPPORTS(nsHangDetails, nsIHangDetails) + +namespace { + +// Sorting comparator used by ReadModuleInformation. Sorts PC Frames by their +// PC. +struct PCFrameComparator { + bool LessThan(HangEntry* const& a, HangEntry* const& b) const { + return a->get_HangEntryProgCounter().pc() < + b->get_HangEntryProgCounter().pc(); + } + bool Equals(HangEntry* const& a, HangEntry* const& b) const { + return a->get_HangEntryProgCounter().pc() == + b->get_HangEntryProgCounter().pc(); + } +}; + +} // anonymous namespace + +void ReadModuleInformation(HangStack& stack) { + // modules() should be empty when we start filling it. + stack.modules().Clear(); + +#ifdef MOZ_GECKO_PROFILER + // Create a sorted list of the PCs in the current stack. + AutoTArray frames; + for (auto& frame : stack.stack()) { + if (frame.type() == HangEntry::THangEntryProgCounter) { + frames.AppendElement(&frame); + } + } + PCFrameComparator comparator; + frames.Sort(comparator); + + SharedLibraryInfo rawModules = SharedLibraryInfo::GetInfoForSelf(); + rawModules.SortByAddress(); + + size_t frameIdx = 0; + for (size_t i = 0; i < rawModules.GetSize(); ++i) { + const SharedLibrary& info = rawModules.GetEntry(i); + uintptr_t moduleStart = info.GetStart(); + uintptr_t moduleEnd = info.GetEnd() - 1; + // the interval is [moduleStart, moduleEnd) + + bool moduleReferenced = false; + for (; frameIdx < frames.Length(); ++frameIdx) { + auto& frame = frames[frameIdx]; + uint64_t pc = frame->get_HangEntryProgCounter().pc(); + // We've moved past this frame, let's go to the next one. + if (pc >= moduleEnd) { + break; + } + if (pc >= moduleStart) { + uint64_t offset = pc - moduleStart; + if (NS_WARN_IF(offset > UINT32_MAX)) { + continue; // module/offset can only hold 32-bit offsets into shared + // libraries. + } + + // If we found the module, rewrite the Frame entry to instead be a + // ModOffset one. mModules.Length() will be the index of the module when + // we append it below, and we set moduleReferenced to true to ensure + // that we do. + moduleReferenced = true; + uint32_t module = stack.modules().Length(); + HangEntryModOffset modOffset(module, static_cast(offset)); + *frame = modOffset; + } + } + + if (moduleReferenced) { + HangModule module(info.GetDebugName(), info.GetBreakpadId()); + stack.modules().AppendElement(module); + } + } +#endif +} + +Result ReadData(PRFileDesc* aFile, void* aPtr, size_t aLength) { + int32_t readResult = PR_Read(aFile, aPtr, aLength); + if (readResult < 0 || size_t(readResult) != aLength) { + return Err(NS_ERROR_FAILURE); + } + return Ok(); +} + +Result WriteData(PRFileDesc* aFile, void* aPtr, size_t aLength) { + int32_t writeResult = PR_Write(aFile, aPtr, aLength); + if (writeResult < 0 || size_t(writeResult) != aLength) { + return Err(NS_ERROR_FAILURE); + } + return Ok(); +} + +Result WriteUint(PRFileDesc* aFile, const CheckedUint32& aInt) { + if (!aInt.isValid()) { + MOZ_ASSERT_UNREACHABLE("Integer value out of bounds."); + return Err(NS_ERROR_UNEXPECTED); + } + int32_t value = aInt.value(); + MOZ_TRY(WriteData(aFile, (void*)&value, sizeof(value))); + return Ok(); +} + +Result ReadUint(PRFileDesc* aFile) { + int32_t value; + MOZ_TRY(ReadData(aFile, (void*)&value, sizeof(value))); + return value; +} + +Result WriteCString(PRFileDesc* aFile, const char* aString) { + size_t length = strlen(aString); + MOZ_TRY(WriteUint(aFile, CheckedUint32(length))); + MOZ_TRY(WriteData(aFile, (void*)aString, length)); + return Ok(); +} + +template +Result WriteTString(PRFileDesc* aFile, + const nsTString& aString) { + MOZ_TRY(WriteUint(aFile, CheckedUint32(aString.Length()))); + size_t size = aString.Length() * sizeof(CharT); + MOZ_TRY(WriteData(aFile, (void*)aString.get(), size)); + return Ok(); +} + +template +Result, nsresult> ReadTString(PRFileDesc* aFile) { + uint32_t length; + MOZ_TRY_VAR(length, ReadUint(aFile)); + nsTString result; + CharT buffer[512]; + size_t bufferLength = sizeof(buffer) / sizeof(CharT); + while (length != 0) { + size_t toRead = std::min(bufferLength, size_t(length)); + size_t toReadSize = toRead * sizeof(CharT); + MOZ_TRY(ReadData(aFile, (void*)buffer, toReadSize)); + + if (!result.Append(buffer, toRead, mozilla::fallible)) { + return Err(NS_ERROR_FAILURE); + } + + if (length > bufferLength) { + length -= bufferLength; + } else { + length = 0; + } + } + return result; +} + +Result WriteEntry(PRFileDesc* aFile, const HangStack& aStack, + const HangEntry& aEntry) { + MOZ_TRY(WriteUint(aFile, uint32_t(aEntry.type()))); + switch (aEntry.type()) { + case HangEntry::TnsCString: { + MOZ_TRY(WriteTString(aFile, aEntry.get_nsCString())); + break; + } + case HangEntry::THangEntryBufOffset: { + uint32_t offset = aEntry.get_HangEntryBufOffset().index(); + + if (NS_WARN_IF(aStack.strbuffer().IsEmpty() || + offset >= aStack.strbuffer().Length())) { + MOZ_ASSERT_UNREACHABLE("Corrupted offset data"); + return Err(NS_ERROR_FAILURE); + } + + if (aStack.strbuffer().LastElement() != '\0') { + MOZ_ASSERT_UNREACHABLE("Corrupted strbuffer data"); + return Err(NS_ERROR_FAILURE); + } + + const char* start = (const char*)aStack.strbuffer().Elements() + offset; + MOZ_TRY(WriteCString(aFile, start)); + break; + } + case HangEntry::THangEntryModOffset: { + const HangEntryModOffset& mo = aEntry.get_HangEntryModOffset(); + + MOZ_TRY(WriteUint(aFile, CheckedUint32(mo.module()))); + MOZ_TRY(WriteUint(aFile, CheckedUint32(mo.offset()))); + break; + } + case HangEntry::THangEntryProgCounter: + case HangEntry::THangEntryContent: + case HangEntry::THangEntryJit: + case HangEntry::THangEntryWasm: + case HangEntry::THangEntryChromeScript: + case HangEntry::THangEntrySuppressed: { + break; + } + default: + MOZ_CRASH("Unsupported HangEntry type?"); + } + return Ok(); +} + +Result ReadEntry(PRFileDesc* aFile, HangStack& aStack) { + uint32_t type; + MOZ_TRY_VAR(type, ReadUint(aFile)); + HangEntry::Type entryType = HangEntry::Type(type); + switch (entryType) { + case HangEntry::TnsCString: + case HangEntry::THangEntryBufOffset: { + nsCString str; + MOZ_TRY_VAR(str, ReadTString(aFile)); + aStack.stack().AppendElement(std::move(str)); + break; + } + case HangEntry::THangEntryModOffset: { + uint32_t module; + MOZ_TRY_VAR(module, ReadUint(aFile)); + uint32_t offset; + MOZ_TRY_VAR(offset, ReadUint(aFile)); + aStack.stack().AppendElement(HangEntryModOffset(module, offset)); + break; + } + case HangEntry::THangEntryProgCounter: { + aStack.stack().AppendElement(HangEntryProgCounter()); + break; + } + case HangEntry::THangEntryContent: { + aStack.stack().AppendElement(HangEntryContent()); + break; + } + case HangEntry::THangEntryJit: { + aStack.stack().AppendElement(HangEntryJit()); + break; + } + case HangEntry::THangEntryWasm: { + aStack.stack().AppendElement(HangEntryWasm()); + break; + } + case HangEntry::THangEntryChromeScript: { + aStack.stack().AppendElement(HangEntryChromeScript()); + break; + } + case HangEntry::THangEntrySuppressed: { + aStack.stack().AppendElement(HangEntrySuppressed()); + break; + } + default: + return Err(NS_ERROR_UNEXPECTED); + } + return Ok(); +} + +Result ReadHangDetailsFromFile(nsIFile* aFile) { + AutoFDClose raiiFd; + nsresult rv = + aFile->OpenNSPRFileDesc(PR_RDONLY, 0644, getter_Transfers(raiiFd)); + const auto fd = raiiFd.get(); + if (NS_FAILED(rv)) { + return Err(rv); + } + + uint8_t magicBuffer[sizeof(MAGIC)]; + MOZ_TRY(ReadData(fd, (void*)magicBuffer, sizeof(MAGIC))); + + if (memcmp(magicBuffer, MAGIC, sizeof(MAGIC)) != 0) { + return Err(NS_ERROR_FAILURE); + } + + HangDetails result; + uint32_t duration; + MOZ_TRY_VAR(duration, ReadUint(fd)); + result.duration() = TimeDuration::FromMilliseconds(double(duration)); + MOZ_TRY_VAR(result.threadName(), ReadTString(fd)); + MOZ_TRY_VAR(result.runnableName(), ReadTString(fd)); + MOZ_TRY_VAR(result.process(), ReadTString(fd)); + MOZ_TRY_VAR(result.remoteType(), ReadTString(fd)); + + uint32_t numAnnotations; + MOZ_TRY_VAR(numAnnotations, ReadUint(fd)); + auto& annotations = result.annotations(); + + // Add a "Unrecovered" annotation so we can know when processing this that + // the hang persisted until the process was closed. + if (!annotations.SetCapacity(numAnnotations + 1, mozilla::fallible)) { + return Err(NS_ERROR_FAILURE); + } + annotations.AppendElement(HangAnnotation(u"Unrecovered"_ns, u"true"_ns)); + + for (size_t i = 0; i < numAnnotations; ++i) { + HangAnnotation annot; + MOZ_TRY_VAR(annot.name(), ReadTString(fd)); + MOZ_TRY_VAR(annot.value(), ReadTString(fd)); + annotations.AppendElement(std::move(annot)); + } + + auto& stack = result.stack(); + uint32_t numFrames; + MOZ_TRY_VAR(numFrames, ReadUint(fd)); + if (!stack.stack().SetCapacity(numFrames, mozilla::fallible)) { + return Err(NS_ERROR_FAILURE); + } + + for (size_t i = 0; i < numFrames; ++i) { + MOZ_TRY(ReadEntry(fd, stack)); + } + + uint32_t numModules; + MOZ_TRY_VAR(numModules, ReadUint(fd)); + auto& modules = stack.modules(); + if (!annotations.SetCapacity(numModules, mozilla::fallible)) { + return Err(NS_ERROR_FAILURE); + } + + for (size_t i = 0; i < numModules; ++i) { + HangModule module; + MOZ_TRY_VAR(module.name(), ReadTString(fd)); + MOZ_TRY_VAR(module.breakpadId(), ReadTString(fd)); + modules.AppendElement(std::move(module)); + } + + return result; +} + +Result WriteHangDetailsToFile(HangDetails& aDetails, + nsIFile* aFile) { + if (NS_WARN_IF(!aFile)) { + return Err(NS_ERROR_INVALID_POINTER); + } + + AutoFDClose raiiFd; + nsresult rv = aFile->OpenNSPRFileDesc( + PR_WRONLY | PR_CREATE_FILE | PR_TRUNCATE, 0644, getter_Transfers(raiiFd)); + const auto fd = raiiFd.get(); + + if (NS_FAILED(rv)) { + return Err(rv); + } + + MOZ_TRY(WriteData(fd, (void*)MAGIC, sizeof(MAGIC))); + + double duration = aDetails.duration().ToMilliseconds(); + if (duration > double(std::numeric_limits::max())) { + // Something has gone terribly wrong if we've hung for more than 2^32 ms. + return Err(NS_ERROR_FAILURE); + } + + MOZ_TRY(WriteUint(fd, uint32_t(duration))); + MOZ_TRY(WriteTString(fd, aDetails.threadName())); + MOZ_TRY(WriteTString(fd, aDetails.runnableName())); + MOZ_TRY(WriteTString(fd, aDetails.process())); + MOZ_TRY(WriteTString(fd, aDetails.remoteType())); + MOZ_TRY(WriteUint(fd, CheckedUint32(aDetails.annotations().Length()))); + + for (auto& annot : aDetails.annotations()) { + MOZ_TRY(WriteTString(fd, annot.name())); + MOZ_TRY(WriteTString(fd, annot.value())); + } + + auto& stack = aDetails.stack(); + ReadModuleInformation(stack); + + MOZ_TRY(WriteUint(fd, CheckedUint32(stack.stack().Length()))); + for (auto& entry : stack.stack()) { + MOZ_TRY(WriteEntry(fd, stack, entry)); + } + + auto& modules = stack.modules(); + MOZ_TRY(WriteUint(fd, CheckedUint32(modules.Length()))); + + for (auto& module : modules) { + MOZ_TRY(WriteTString(fd, module.name())); + MOZ_TRY(WriteTString(fd, module.breakpadId())); + } + + return Ok(); +} + +NS_IMETHODIMP +ProcessHangStackRunnable::Run() { + // NOTE: Reading module information can take a long time, which is why we do + // it off-main-thread. + if (mHangDetails.stack().modules().IsEmpty()) { + ReadModuleInformation(mHangDetails.stack()); + } + + RefPtr hangDetails = + new nsHangDetails(std::move(mHangDetails), mPersistedToDisk); + hangDetails->Submit(); + + return NS_OK; +} + +NS_IMETHODIMP +SubmitPersistedPermahangRunnable::Run() { + auto hangDetailsResult = ReadHangDetailsFromFile(mPermahangFile); + if (hangDetailsResult.isErr()) { + // If we somehow failed in trying to deserialize the hang file, go ahead + // and delete it to prevent future runs from having to go through the + // same thing. If we succeeded, however, the file should be cleaned up + // once the hang is submitted. + Unused << mPermahangFile->Remove(false); + return hangDetailsResult.unwrapErr(); + } + RefPtr hangDetails = + new nsHangDetails(hangDetailsResult.unwrap(), PersistedToDisk::Yes); + hangDetails->Submit(); + + return NS_OK; +} + +} // namespace mozilla diff --git a/toolkit/components/backgroundhangmonitor/HangDetails.h b/toolkit/components/backgroundhangmonitor/HangDetails.h new file mode 100644 index 0000000000..8641bfcb71 --- /dev/null +++ b/toolkit/components/backgroundhangmonitor/HangDetails.h @@ -0,0 +1,101 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_HangDetails_h +#define mozilla_HangDetails_h + +#include + +#include "ipc/IPCMessageUtils.h" +#include "mozilla/HangAnnotations.h" +#include "mozilla/HangTypes.h" +#include "mozilla/ProcessedStack.h" +#include "mozilla/RefPtr.h" +#include "mozilla/Result.h" +#include "mozilla/TimeStamp.h" +#include "nsIFile.h" +#include "nsIHangDetails.h" +#include "nsTArray.h" + +namespace mozilla { + +enum class PersistedToDisk { + No, + Yes, +}; + +/** + * HangDetails is the concrete implementaion of nsIHangDetails, and contains the + * infromation which we want to expose to observers of the bhr-thread-hang + * observer notification. + */ +class nsHangDetails : public nsIHangDetails { + public: + NS_DECL_THREADSAFE_ISUPPORTS + NS_DECL_NSIHANGDETAILS + + explicit nsHangDetails(HangDetails&& aDetails, + PersistedToDisk aPersistedToDisk) + : mDetails(std::move(aDetails)), mPersistedToDisk(aPersistedToDisk) {} + + // Submit these HangDetails to the main thread. This will dispatch a runnable + // to the main thread which will fire off the bhr-thread-hang observer + // notification with this HangDetails as the subject. + void Submit(); + + private: + virtual ~nsHangDetails() = default; + + HangDetails mDetails; + PersistedToDisk mPersistedToDisk; +}; + +Result WriteHangDetailsToFile(HangDetails& aDetails, + nsIFile* aFile); + +/** + * This runnable is run on the StreamTransportService threadpool in order to + * process the stack off main thread before submitting it to the main thread as + * an observer notification. + * + * This object should have the only remaining reference to aHangDetails, as it + * will access its fields without synchronization. + */ +class ProcessHangStackRunnable final : public Runnable { + public: + explicit ProcessHangStackRunnable(HangDetails&& aHangDetails, + PersistedToDisk aPersistedToDisk) + : Runnable("ProcessHangStackRunnable"), + mHangDetails(std::move(aHangDetails)), + mPersistedToDisk(aPersistedToDisk) {} + + NS_IMETHOD Run() override; + + private: + HangDetails mHangDetails; + PersistedToDisk mPersistedToDisk; +}; + +/** + * This runnable handles checking whether our last session wrote a permahang to + * disk which we were unable to submit through telemetry. If so, we read the + * permahang out and try again to submit it. + */ +class SubmitPersistedPermahangRunnable final : public Runnable { + public: + explicit SubmitPersistedPermahangRunnable(nsIFile* aPermahangFile) + : Runnable("SubmitPersistedPermahangRunnable"), + mPermahangFile(aPermahangFile) {} + + NS_IMETHOD Run() override; + + private: + nsCOMPtr mPermahangFile; +}; + +} // namespace mozilla + +#endif // mozilla_HangDetails_h diff --git a/toolkit/components/backgroundhangmonitor/HangTypes.ipdlh b/toolkit/components/backgroundhangmonitor/HangTypes.ipdlh new file mode 100644 index 0000000000..e791bd6f44 --- /dev/null +++ b/toolkit/components/backgroundhangmonitor/HangTypes.ipdlh @@ -0,0 +1,95 @@ +/* -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil; tab-width: 8 -*- */ +/* vim: set sw=4 ts=8 et tw=80 ft=cpp : */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +using mozilla::TimeDuration from "mozilla/TimeStamp.h"; + +namespace mozilla { + +// The different kinds of hang entries which we're going to need to handle in +// our HangStacks. + +struct HangEntryBufOffset +{ + // NOTE: Don't trust this index without checking it is a valid index into + // the strbuffer, and that the buffer's last byte is a '\0'. + uint32_t index; +}; + +struct HangEntryModOffset +{ + uint32_t module; + uint32_t offset; +}; + +struct HangEntryProgCounter +{ + uintptr_t pc; +}; + +// Singleton structs for the union type. +struct HangEntryContent {}; +struct HangEntryJit {}; +struct HangEntryWasm {}; +struct HangEntryChromeScript {}; +struct HangEntrySuppressed {}; + +union HangEntry +{ + // String representing a pseudostack or chrome JS stack. + nsCString; + // The index of the start of a string in the associated buffer. + HangEntryBufOffset; + // A module index and offset into that module. + HangEntryModOffset; + // A raw program counter which has not been mapped into a module. + HangEntryProgCounter; + // A hidden "(content script)" frame. + HangEntryContent; + // An unprocessed "(jit frame)" + HangEntryJit; + // An unprocessed "(wasm)" frame. + HangEntryWasm; + // A chrome script which didn't fit in the buffer. + HangEntryChromeScript; + // A JS frame while profiling was suppressed. + HangEntrySuppressed; +}; + +struct HangModule +{ + // The file name, /foo/bar/libxul.so for example. + // It can contain unicode characters. + nsString name; + nsCString breakpadId; +}; + +struct HangStack +{ + HangEntry[] stack; + int8_t[] strbuffer; + HangModule[] modules; +}; + +// Hang annotation information. +struct HangAnnotation +{ + nsString name; + nsString value; +}; + +// The information about an individual hang which is sent over IPC. +struct HangDetails +{ + TimeDuration duration; + nsCString process; + nsCString remoteType; + nsCString threadName; + nsCString runnableName; + HangStack stack; + HangAnnotation[] annotations; +}; + +} // namespace mozilla diff --git a/toolkit/components/backgroundhangmonitor/ThreadStackHelper.cpp b/toolkit/components/backgroundhangmonitor/ThreadStackHelper.cpp new file mode 100644 index 0000000000..c43a322fd4 --- /dev/null +++ b/toolkit/components/backgroundhangmonitor/ThreadStackHelper.cpp @@ -0,0 +1,395 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ThreadStackHelper.h" +#include "MainThreadUtils.h" +#include "nsJSPrincipals.h" +#include "nsScriptSecurityManager.h" +#include "jsapi.h" +#include "jsfriendapi.h" +#ifdef MOZ_THREADSTACKHELPER_PROFILING_STACK +# include "js/ProfilingStack.h" +#endif + +#include + +#include "mozilla/Assertions.h" +#include "mozilla/Attributes.h" +#include "mozilla/BasePrincipal.h" +#include "mozilla/HangTypes.h" +#include "mozilla/IntegerPrintfMacros.h" +#include "mozilla/MemoryChecking.h" +#include "mozilla/Sprintf.h" +#include "mozilla/UniquePtr.h" +#include "nsThread.h" + +#ifdef __GNUC__ +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wshadow" +#endif + +#if defined(MOZ_VALGRIND) +# include +#endif + +#include +#include +#include + +#ifdef XP_LINUX +# include +# include +# include +#endif + +#ifdef __GNUC__ +# pragma GCC diagnostic pop // -Wshadow +#endif + +#if defined(XP_LINUX) || defined(XP_MACOSX) +# include +#endif + +#ifdef ANDROID +# ifndef SYS_gettid +# define SYS_gettid __NR_gettid +# endif +# if defined(__arm__) && !defined(__NR_rt_tgsigqueueinfo) +// Some NDKs don't define this constant even though the kernel supports it. +# define __NR_rt_tgsigqueueinfo (__NR_SYSCALL_BASE + 363) +# endif +# ifndef SYS_rt_tgsigqueueinfo +# define SYS_rt_tgsigqueueinfo __NR_rt_tgsigqueueinfo +# endif +#endif + +namespace mozilla { + +// A character which we append to any string which gets truncated as a a +// result of trying to write it into a statically allocated buffer. This just +// makes it a little easier to know that the buffer was truncated during +// analysis. +const char kTruncationIndicator = '$'; + +ThreadStackHelper::ThreadStackHelper() + : mStackToFill(nullptr), + mMaxStackSize(16), + mMaxBufferSize(512), + mDesiredStackSize(0), + mDesiredBufferSize(0) { + mThreadId = profiler_current_thread_id(); +} + +bool ThreadStackHelper::PrepareStackBuffer(HangStack& aStack) { + // If we need to grow because we used more than we could store last time, + // increase our maximum sizes for this time. + if (mDesiredBufferSize > mMaxBufferSize) { + mMaxBufferSize = mDesiredBufferSize; + } + if (mDesiredStackSize > mMaxStackSize) { + mMaxStackSize = mDesiredStackSize; + } + mDesiredBufferSize = 0; + mDesiredStackSize = 0; + + // Clear all of the stack entries. + aStack.stack().ClearAndRetainStorage(); + aStack.strbuffer().ClearAndRetainStorage(); + aStack.modules().Clear(); + +#ifdef MOZ_THREADSTACKHELPER_PROFILING_STACK + // Ensure we have enough space in our stack and string buffers for the data we + // want to collect. + if (!aStack.stack().SetCapacity(mMaxStackSize, fallible) || + !aStack.strbuffer().SetCapacity(mMaxBufferSize, fallible)) { + return false; + } + return true; +#else + return false; +#endif +} + +namespace { +template +class ScopedSetPtr { + private: + T*& mPtr; + + public: + ScopedSetPtr(T*& p, T* val) : mPtr(p) { mPtr = val; } + ~ScopedSetPtr() { mPtr = nullptr; } +}; +} // namespace + +void ThreadStackHelper::GetStack(HangStack& aStack, nsACString& aRunnableName, + bool aStackWalk) { + aRunnableName.AssignLiteral("???"); + + if (!PrepareStackBuffer(aStack)) { + return; + } + + Array runnableName; + runnableName[0] = '\0'; + + ScopedSetPtr _stackGuard(mStackToFill, &aStack); + ScopedSetPtr> _runnableGuard( + mRunnableNameBuffer, &runnableName); + + // XXX: We don't need to pass in ProfilerFeature::StackWalk to trigger + // stackwalking, as that is instead controlled by the last argument. + profiler_suspend_and_sample_thread(mThreadId, 0, *this, aStackWalk); + + // Copy the name buffer allocation into the output string. We explicitly set + // the last byte to null in case we read in some corrupted data without a null + // terminator. + runnableName[nsThread::kRunnableNameBufSize - 1] = '\0'; + aRunnableName.AssignASCII(runnableName.cbegin()); +} + +void ThreadStackHelper::SetIsMainThread() { + MOZ_RELEASE_ASSERT(mRunnableNameBuffer); + + // NOTE: We cannot allocate any memory in this callback, as the target + // thread is suspended, so we first copy it into a stack-allocated buffer, + // and then once the target thread is resumed, we can copy it into a real + // nsCString. + // + // Currently we only store the names of runnables which are running on the + // main thread, so we only want to read sMainThreadRunnableName and copy its + // value in the case that we are currently suspending the main thread. + *mRunnableNameBuffer = nsThread::sMainThreadRunnableName; +} + +void ThreadStackHelper::TryAppendFrame(HangEntry aFrame) { + MOZ_RELEASE_ASSERT(mStackToFill); + + // We deduplicate identical Content, Jit, Wasm, ChromeScript and Suppressed + // frames. + switch (aFrame.type()) { + case HangEntry::THangEntryContent: + case HangEntry::THangEntryJit: + case HangEntry::THangEntryWasm: + case HangEntry::THangEntryChromeScript: + case HangEntry::THangEntrySuppressed: + if (!mStackToFill->stack().IsEmpty() && + mStackToFill->stack().LastElement().type() == aFrame.type()) { + return; + } + break; + default: + break; + } + + // Record that we _want_ to use another frame entry. If this exceeds + // mMaxStackSize, we'll allocate more room on the next hang. + mDesiredStackSize += 1; + + // Perform the append if we have enough space to do so. + if (mStackToFill->stack().Capacity() > mStackToFill->stack().Length()) { + mStackToFill->stack().AppendElement(std::move(aFrame)); + } +} + +void ThreadStackHelper::CollectNativeLeafAddr(void* aAddr) { + MOZ_RELEASE_ASSERT(mStackToFill); + TryAppendFrame(HangEntryProgCounter(reinterpret_cast(aAddr))); +} + +void ThreadStackHelper::CollectJitReturnAddr(void* aAddr) { + MOZ_RELEASE_ASSERT(mStackToFill); + TryAppendFrame(HangEntryJit()); +} + +void ThreadStackHelper::CollectWasmFrame(const char* aLabel) { + MOZ_RELEASE_ASSERT(mStackToFill); + // We don't want to collect WASM frames, as they are probably for content, so + // we just add a "(content wasm)" frame. + TryAppendFrame(HangEntryWasm()); +} + +namespace { + +bool IsChromeJSScript(JSScript* aScript) { + // May be called from another thread or inside a signal handler. + // We assume querying the script is safe but we must not manipulate it. + + JSPrincipals* const principals = JS_GetScriptPrincipals(aScript); + return nsJSPrincipals::get(principals)->IsSystemPrincipal(); +} + +// Get the full path after the URI scheme, if the URI matches the scheme. +// For example, GetFullPathForScheme("a://b/c/d/e", "a://") returns "b/c/d/e". +template +const char* GetFullPathForScheme(const char* filename, + const char (&scheme)[LEN]) { + // Account for the null terminator included in LEN. + if (!strncmp(filename, scheme, LEN - 1)) { + return filename + LEN - 1; + } + return nullptr; +} + +// Get the full path after a URI component, if the URI contains the component. +// For example, GetPathAfterComponent("a://b/c/d/e", "/c/") returns "d/e". +template +const char* GetPathAfterComponent(const char* filename, + const char (&component)[LEN]) { + const char* found = nullptr; + const char* next = strstr(filename, component); + while (next) { + // Move 'found' to end of the component, after the separator '/'. + // 'LEN - 1' accounts for the null terminator included in LEN, + found = next + LEN - 1; + // Resume searching before the separator '/'. + next = strstr(found - 1, component); + } + return found; +} + +} // namespace + +bool ThreadStackHelper::MaybeAppendDynamicStackFrame(Span aBuf) { + mDesiredBufferSize += aBuf.Length() + 1; + + if (mStackToFill->stack().Capacity() > mStackToFill->stack().Length() && + (mStackToFill->strbuffer().Capacity() - + mStackToFill->strbuffer().Length()) > aBuf.Length() + 1) { + // NOTE: We only increment this if we're going to successfully append. + mDesiredStackSize += 1; + uint32_t start = mStackToFill->strbuffer().Length(); + mStackToFill->strbuffer().AppendElements(aBuf.Elements(), aBuf.Length()); + mStackToFill->strbuffer().AppendElement('\0'); + mStackToFill->stack().AppendElement(HangEntryBufOffset(start)); + return true; + } + return false; +} + +void ThreadStackHelper::CollectProfilingStackFrame( + const js::ProfilingStackFrame& aFrame) { + // For non-js frames, first try to get the dynamic string and fit it in, + // otherwise just get the label. + if (!aFrame.isJsFrame()) { + const char* frameLabel = aFrame.label(); + if (aFrame.isNonsensitive() && aFrame.dynamicString()) { + const char* dynamicString = aFrame.dynamicString(); + char buffer[128]; + size_t len = SprintfLiteral(buffer, "%s %s", frameLabel, dynamicString); + if (len > sizeof(buffer)) { + buffer[sizeof(buffer) - 1] = kTruncationIndicator; + len = sizeof(buffer); + } + if (MaybeAppendDynamicStackFrame(Span(buffer, len))) { + return; + } + } + + // frameLabel is a statically allocated string, so we want to store a + // reference to it without performing any allocations. This is important, as + // we aren't allowed to allocate within this function. + // + // The variant for this kind of label in our HangStack object is a + // `nsCString`, which normally contains heap allocated string data. However, + // `nsCString` has an optimization for literal strings which causes the + // backing data to not be copied when being copied between nsCString + // objects. + // + // We take advantage of that optimization by creating a nsCString object + // which has the LITERAL flag set. Without this optimization, this code + // would be incorrect. + nsCString label; + label.AssignLiteral(frameLabel, strlen(frameLabel)); + + // Let's make sure we don't deadlock here, by asserting that `label`'s + // backing data matches. + MOZ_RELEASE_ASSERT(label.BeginReading() == frameLabel, + "String copy performed during " + "ThreadStackHelper::CollectProfilingStackFrame"); + TryAppendFrame(label); + return; + } + + if (!aFrame.script()) { + TryAppendFrame(HangEntrySuppressed()); + return; + } + + if (!IsChromeJSScript(aFrame.script())) { + TryAppendFrame(HangEntryContent()); + return; + } + + // Rather than using the profiler's dynamic string, we compute our own string. + // This is because we want to do some size-saving strategies, and throw out + // information which won't help us as much. + const char* filename = JS_GetScriptFilename(aFrame.script()); + + char buffer[256]; // Should be enough to fit our longest js function and file + // names. + size_t len = 0; + if (JSFunction* func = aFrame.function()) { + if (JSString* str = JS_GetMaybePartialFunctionDisplayId(func)) { + JSLinearString* linear = JS_ASSERT_STRING_IS_LINEAR(str); + len = JS::GetLinearStringLength(linear); + JS::LossyCopyLinearStringChars(buffer, linear, + std::min(len, sizeof(buffer))); + // NOTE: >= so that we account for the trailing space that we'd want to + // otherwise append. + if (len >= sizeof(buffer)) { + len = sizeof(buffer); + buffer[sizeof(buffer) - 1] = kTruncationIndicator; + } else { + buffer[len++] = ' '; + } + } + } + + unsigned lineno = JS_PCToLineNumber(aFrame.script(), aFrame.pc()); + + // Some script names are in the form "foo -> bar -> baz". + // Here we find the origin of these redirected scripts. + const char* basename = GetPathAfterComponent(filename, " -> "); + if (basename) { + filename = basename; + } + + // Strip chrome:// or resource:// off of the filename if present. + basename = GetFullPathForScheme(filename, "chrome://"); + if (!basename) { + basename = GetFullPathForScheme(filename, "resource://"); + } + if (!basename) { + // If we're in an add-on script, under the {profile}/extensions + // directory, extract the path after the /extensions/ part. + basename = GetPathAfterComponent(filename, "/extensions/"); + } + if (!basename) { + // Only keep the file base name for paths outside the above formats. + basename = strrchr(filename, '/'); + basename = basename ? basename + 1 : filename; + // Look for Windows path separator as well. + filename = strrchr(basename, '\\'); + if (filename) { + basename = filename + 1; + } + } + + len += + SprintfBuf(buffer + len, sizeof(buffer) - len, "%s:%u", basename, lineno); + if (len > sizeof(buffer)) { + buffer[sizeof(buffer) - 1] = kTruncationIndicator; + len = sizeof(buffer); + } + if (MaybeAppendDynamicStackFrame(Span(buffer, len))) { + return; + } + + TryAppendFrame(HangEntryChromeScript()); +} + +} // namespace mozilla diff --git a/toolkit/components/backgroundhangmonitor/ThreadStackHelper.h b/toolkit/components/backgroundhangmonitor/ThreadStackHelper.h new file mode 100644 index 0000000000..e54078d2dd --- /dev/null +++ b/toolkit/components/backgroundhangmonitor/ThreadStackHelper.h @@ -0,0 +1,111 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_ThreadStackHelper_h +#define mozilla_ThreadStackHelper_h + +#ifdef MOZ_GECKO_PROFILER + +# include "js/ProfilingStack.h" +# include "GeckoProfiler.h" +# include "HangDetails.h" +# include "mozilla/Span.h" +# include "nsThread.h" + +# include + +# if defined(XP_LINUX) +# include +# include +# include +# elif defined(XP_WIN) +# include +# elif defined(XP_MACOSX) +# include +# endif + +// Support profiling stack and native stack on these platforms. +# if defined(XP_LINUX) || defined(XP_WIN) || defined(XP_MACOSX) +# define MOZ_THREADSTACKHELPER_PROFILING_STACK +# define MOZ_THREADSTACKHELPER_NATIVE_STACK +# endif + +// Android x86 builds consistently crash in the Background Hang Reporter. bug +// 1368520. +# if defined(__ANDROID__) +# undef MOZ_THREADSTACKHELPER_PROFILING_STACK +# undef MOZ_THREADSTACKHELPER_NATIVE_STACK +# endif + +namespace mozilla { + +/** + * ThreadStackHelper is used to retrieve the profiler's "profiling stack" of a + * thread, as an alternative of using the profiler to take a profile. + * The target thread first declares an ThreadStackHelper instance; + * then another thread can call ThreadStackHelper::GetStack to retrieve + * the profiling stack of the target thread at that instant. + * + * Only non-copying labels are included in the stack, which means labels + * with custom text and markers are not included. + */ +class ThreadStackHelper : public ProfilerStackCollector { + private: + HangStack* mStackToFill; + Array* mRunnableNameBuffer; + size_t mMaxStackSize; + size_t mMaxBufferSize; + size_t mDesiredStackSize; + size_t mDesiredBufferSize; + + bool PrepareStackBuffer(HangStack& aStack); + + public: + /** + * Create a ThreadStackHelper instance targeting the current thread. + */ + ThreadStackHelper(); + + /** + * Retrieve the current interleaved stack of the thread associated with this + * ThreadStackHelper. + * + * @param aStack HangStack instance to be filled. + * @param aRunnableName The name of the current runnable on the target thread. + * @param aStackWalk If true, native stack frames will be collected + * along with profiling stack frames. + */ + void GetStack(HangStack& aStack, nsACString& aRunnableName, bool aStackWalk); + + /** + * Retrieve the thread's profiler thread ID. + */ + ProfilerThreadId GetThreadId() const { return mThreadId; } + + protected: + /** + * ProfilerStackCollector + */ + virtual void SetIsMainThread() override; + virtual void CollectNativeLeafAddr(void* aAddr) override; + virtual void CollectJitReturnAddr(void* aAddr) override; + virtual void CollectWasmFrame(const char* aLabel) override; + virtual void CollectProfilingStackFrame( + const js::ProfilingStackFrame& aEntry) override; + + private: + bool MaybeAppendDynamicStackFrame(mozilla::Span aBuf); + void TryAppendFrame(mozilla::HangEntry aFrame); + + // The profiler's unique thread identifier for the target thread. + ProfilerThreadId mThreadId; +}; + +} // namespace mozilla + +#endif // MOZ_GECKO_PROFILER + +#endif // mozilla_ThreadStackHelper_h diff --git a/toolkit/components/backgroundhangmonitor/components.conf b/toolkit/components/backgroundhangmonitor/components.conf new file mode 100644 index 0000000000..ba052e29f3 --- /dev/null +++ b/toolkit/components/backgroundhangmonitor/components.conf @@ -0,0 +1,16 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +Classes = [ + { + 'cid': '{117c8cdf-69e6-4f31-a439-b8a654c67127}', + 'contract_ids': ['@mozilla.org/bhr-telemetry-service;1'], + 'esModule': 'resource://gre/modules/BHRTelemetryService.sys.mjs', + 'constructor': 'BHRTelemetryService', + 'categories': {'profile-after-change': 'BHRTelemetryService'}, + 'processes': ProcessSelector.MAIN_PROCESS_ONLY, + }, +] diff --git a/toolkit/components/backgroundhangmonitor/moz.build b/toolkit/components/backgroundhangmonitor/moz.build new file mode 100644 index 0000000000..78cc8065b6 --- /dev/null +++ b/toolkit/components/backgroundhangmonitor/moz.build @@ -0,0 +1,68 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# NOTE: These config options must match the ones in both android/'s and +# browser/'s package-manifest.in. + +# BHR disabled outside of Nightly builds due to expected high ping frequency. +# BHR disabled for Release builds because of bug 965392. +# BHR disabled for debug builds because of bug 979069. +# BHR disabled for TSan builds because of bug 1121216. +# BHR disabled for ASan builds because of bug 1445441. +# When changing these conditions, please also change the matching conditions in +# tools/profiler/public/ProfilerLabels.h and xpcom/threads/moz.build. +if ( + CONFIG["NIGHTLY_BUILD"] + and not CONFIG["MOZ_DEBUG"] + and not CONFIG["MOZ_TSAN"] + and not CONFIG["MOZ_ASAN"] +): + DEFINES["MOZ_ENABLE_BACKGROUND_HANG_MONITOR"] = 1 + + EXTRA_JS_MODULES += [ + "BHRTelemetryService.sys.mjs", + ] + + XPCOM_MANIFESTS += [ + "components.conf", + ] + + XPCSHELL_TESTS_MANIFESTS += ["tests/xpcshell.toml"] + +XPIDL_SOURCES += [ + "nsIHangDetails.idl", +] + +XPIDL_MODULE = "backgroundhangmonitor" + +EXPORTS.mozilla += [ + "BackgroundHangMonitor.h", + "HangAnnotations.h", + "HangDetails.h", +] + +UNIFIED_SOURCES += [ + "BackgroundHangMonitor.cpp", + "HangAnnotations.cpp", + "HangDetails.cpp", +] + +IPDL_SOURCES += [ + "HangTypes.ipdlh", +] + +if CONFIG["MOZ_GECKO_PROFILER"]: + UNIFIED_SOURCES += [ + "ThreadStackHelper.cpp", + ] + +LOCAL_INCLUDES += [ + "/caps", # For nsScriptSecurityManager.h +] + +FINAL_LIBRARY = "xul" + +include("/ipc/chromium/chromium-config.mozbuild") diff --git a/toolkit/components/backgroundhangmonitor/nsIHangDetails.idl b/toolkit/components/backgroundhangmonitor/nsIHangDetails.idl new file mode 100644 index 0000000000..f9c6ba2de0 --- /dev/null +++ b/toolkit/components/backgroundhangmonitor/nsIHangDetails.idl @@ -0,0 +1,77 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.idl" + +%{ C++ +namespace mozilla { +class HangDetails; +} +%} + +[ref] native HangDetailsRef(mozilla::HangDetails); + +/** + * A scriptable interface for getting information about a BHR detected hang. + * This is the type of the subject of the "bhr-thread-hang" observer topic. + */ +[scriptable, uuid(23d63fff-38d6-4003-9c57-2c90aca1180a)] +interface nsIHangDetails : nsISupports +{ + /** + * The hang was persisted to disk as a permahang, so we can clear the + * permahang file once we submit this. + */ + readonly attribute bool wasPersisted; + + /** + * The detected duration of the hang in milliseconds. + */ + readonly attribute double duration; + + /** + * The name of the thread which hung. + */ + readonly attribute ACString thread; + + /** + * The name of the runnable which hung if it hung on the main thread. + */ + readonly attribute ACString runnableName; + + /** + * The type of process which produced the hang. This should be either: + * "default", "content", or "gpu". + */ + readonly attribute ACString process; + + /** + * The remote type of the content process which produced the hang. + */ + readonly attribute AUTF8String remoteType; + + /** + * Returns the stack which was captured by BHR. The offset is encoded as a hex + * string, as it can contain numbers larger than JS can hold losslessly. + * + * This value takes the following form: + * [ [moduleIndex, offset], ... ] + */ + [implicit_jscontext] readonly attribute jsval stack; + + /** + * Returns the modules which were captured by BHR. + * + * This value takes the following form: + * [ ["fileName", "BreakpadId"], ... ] + */ + [implicit_jscontext] readonly attribute jsval modules; + + /** + * The hang annotations which were captured when the hang occured. This + * attribute is a JS object of key-value pairs. + */ + [implicit_jscontext] readonly attribute jsval annotations; +}; diff --git a/toolkit/components/backgroundhangmonitor/tests/child_cause_hang.js b/toolkit/components/backgroundhangmonitor/tests/child_cause_hang.js new file mode 100644 index 0000000000..adf96170b6 --- /dev/null +++ b/toolkit/components/backgroundhangmonitor/tests/child_cause_hang.js @@ -0,0 +1,23 @@ +/* Any copyright is dedicated to the Public Domain. + http://creativecommons.org/publicdomain/zero/1.0/ */ + +function ensureProfilerInitialized() { + // Starting and stopping the profiler with the "stackwalk" flag will cause the + // profiler's stackwalking features to be synchronously initialized. This + // should prevent us from not initializing BHR quickly enough. + let features = ["stackwalk"]; + Services.profiler.StartProfiler(1000, 10, features); + Services.profiler.StopProfiler(); +} + +add_task(async function childCauseHang() { + ensureProfilerInitialized(); + + executeSoon(() => { + let startTime = Date.now(); + // eslint-disable-next-line no-empty + while (Date.now() - startTime < 2000) {} + }); + + await do_await_remote_message("bhr_hangs_detected"); +}); diff --git a/toolkit/components/backgroundhangmonitor/tests/test_BHRObserver.js b/toolkit/components/backgroundhangmonitor/tests/test_BHRObserver.js new file mode 100644 index 0000000000..cf6d6633b8 --- /dev/null +++ b/toolkit/components/backgroundhangmonitor/tests/test_BHRObserver.js @@ -0,0 +1,164 @@ +/* Any copyright is dedicated to the Public Domain. + http://creativecommons.org/publicdomain/zero/1.0/ */ + +const { TelemetryUtils } = ChromeUtils.importESModule( + "resource://gre/modules/TelemetryUtils.sys.mjs" +); + +function ensureProfilerInitialized() { + // Starting and stopping the profiler with the "stackwalk" flag will cause the + // profiler's stackwalking features to be synchronously initialized. This + // should prevent us from not initializing BHR quickly enough. + let features = ["stackwalk"]; + Services.profiler.StartProfiler(1000, 10, features); + Services.profiler.StopProfiler(); +} + +add_task(async function test_BHRObserver() { + if (!Services.telemetry.canRecordExtended) { + ok("Hang reporting not enabled."); + return; + } + + ensureProfilerInitialized(); + + let telSvc = + Cc["@mozilla.org/bhr-telemetry-service;1"].getService().wrappedJSObject; + ok(telSvc, "Should have BHRTelemetryService"); + let beforeLen = telSvc.payload.hangs.length; + + if (Services.appinfo.OS === "Linux" || Services.appinfo.OS === "Android") { + // We use the rt_tgsigqueueinfo syscall on Linux which requires a + // certain kernel version. It's not an error if the system running + // the test is older than that. + let kernel = + Services.sysinfo.get("kernel_version") || Services.sysinfo.get("version"); + if (Services.vc.compare(kernel, "2.6.31") < 0) { + ok("Hang reporting not supported for old kernel."); + return; + } + } + + let hangsPromise = new Promise(resolve => { + let hangs = []; + const onThreadHang = subject => { + let hang = subject.QueryInterface(Ci.nsIHangDetails); + if (hang.thread.startsWith("Gecko")) { + hangs.push(hang); + if (hangs.length >= 3) { + Services.obs.removeObserver(onThreadHang, "bhr-thread-hang"); + resolve(hangs); + } + } + }; + Services.obs.addObserver(onThreadHang, "bhr-thread-hang"); + }); + + // We're going to trigger two hangs, of various lengths. One should be a + // transient hang, and the other a permanent hang. We'll wait for the hangs to + // be recorded. + + executeSoon(() => { + let startTime = Date.now(); + // eslint-disable-next-line no-empty + while (Date.now() - startTime < 10000) {} + }); + + executeSoon(() => { + let startTime = Date.now(); + // eslint-disable-next-line no-empty + while (Date.now() - startTime < 1000) {} + }); + + Services.prefs.setBoolPref( + TelemetryUtils.Preferences.OverridePreRelease, + true + ); + let childDone = run_test_in_child("child_cause_hang.js"); + + // Now we wait for the hangs to have their bhr-thread-hang message fired for + // them, collect them, and analyize the response. + let hangs = await hangsPromise; + equal(hangs.length, 3); + hangs.forEach(hang => { + Assert.greater(hang.duration, 0); + ok(hang.thread == "Gecko" || hang.thread == "Gecko_Child"); + equal(typeof hang.runnableName, "string"); + + // hang.stack + ok(Array.isArray(hang.stack)); + ok(!!hang.stack.length); + hang.stack.forEach(entry => { + // Each stack frame entry is either a native or pseudostack entry. A + // native stack entry is an array with module index (number), and offset + // (hex string), while the pseudostack entry is a bare string. + if (Array.isArray(entry)) { + equal(entry.length, 2); + equal(typeof entry[0], "number"); + equal(typeof entry[1], "string"); + } else { + equal(typeof entry, "string"); + } + }); + + // hang.modules + ok(Array.isArray(hang.modules)); + hang.modules.forEach(module => { + ok(Array.isArray(module)); + equal(module.length, 2); + equal(typeof module[0], "string"); + equal(typeof module[1], "string"); + }); + + // hang.annotations + ok(Array.isArray(hang.annotations)); + hang.annotations.forEach(annotation => { + ok(Array.isArray(annotation)); + equal(annotation.length, 2); + equal(typeof annotation[0], "string"); + equal(typeof annotation[1], "string"); + }); + }); + + // Check that the telemetry service collected pings which make sense + Assert.greaterOrEqual(telSvc.payload.hangs.length - beforeLen, 3); + ok(Array.isArray(telSvc.payload.modules)); + telSvc.payload.modules.forEach(module => { + ok(Array.isArray(module)); + equal(module.length, 2); + equal(typeof module[0], "string"); + equal(typeof module[1], "string"); + }); + + telSvc.payload.hangs.forEach(hang => { + Assert.greater(hang.duration, 0); + ok(hang.thread == "Gecko" || hang.thread == "Gecko_Child"); + equal(typeof hang.runnableName, "string"); + + // hang.stack + ok(Array.isArray(hang.stack)); + ok(!!hang.stack.length); + hang.stack.forEach(entry => { + // Each stack frame entry is either a native or pseudostack entry. A + // native stack entry is an array with module index (number), and offset + // (hex string), while the pseudostack entry is a bare string. + if (Array.isArray(entry)) { + equal(entry.length, 2); + equal(typeof entry[0], "number"); + Assert.less(entry[0], telSvc.payload.modules.length); + equal(typeof entry[1], "string"); + } else { + equal(typeof entry, "string"); + } + }); + + // hang.annotations + equal(typeof hang.annotations, "object"); + Object.keys(hang.annotations).forEach(key => { + equal(typeof hang.annotations[key], "string"); + }); + }); + + do_send_remote_message("bhr_hangs_detected"); + await childDone; +}); diff --git a/toolkit/components/backgroundhangmonitor/tests/xpcshell.toml b/toolkit/components/backgroundhangmonitor/tests/xpcshell.toml new file mode 100644 index 0000000000..d0ea28c83c --- /dev/null +++ b/toolkit/components/backgroundhangmonitor/tests/xpcshell.toml @@ -0,0 +1,14 @@ +[DEFAULT] + +["test_BHRObserver.js"] +# BHR is disabled on android and outside of nightly +skip-if = [ + "debug", + "os == 'android'", + "release_or_beta", + "os == 'mac'", # Bug 1417723 + "win11_2009 && bits == 32", # Bug 1760134 + "os == 'win' && msix", +] +support-files = ["child_cause_hang.js"] +run-sequentially = "very high failure rate in parallel" -- cgit v1.2.3