summaryrefslogtreecommitdiffstats
path: root/toolkit/components/terminator
diff options
context:
space:
mode:
Diffstat (limited to 'toolkit/components/terminator')
-rw-r--r--toolkit/components/terminator/TerminatorTelemetry.sys.mjs119
-rw-r--r--toolkit/components/terminator/components.conf15
-rw-r--r--toolkit/components/terminator/moz.build32
-rw-r--r--toolkit/components/terminator/nsTerminator.cpp614
-rw-r--r--toolkit/components/terminator/nsTerminator.h49
-rw-r--r--toolkit/components/terminator/terminator.manifest1
-rw-r--r--toolkit/components/terminator/tests/xpcshell/test_terminator_record.js171
-rw-r--r--toolkit/components/terminator/tests/xpcshell/test_terminator_reload.js88
-rw-r--r--toolkit/components/terminator/tests/xpcshell/xpcshell.toml15
9 files changed, 1104 insertions, 0 deletions
diff --git a/toolkit/components/terminator/TerminatorTelemetry.sys.mjs b/toolkit/components/terminator/TerminatorTelemetry.sys.mjs
new file mode 100644
index 0000000000..9903027436
--- /dev/null
+++ b/toolkit/components/terminator/TerminatorTelemetry.sys.mjs
@@ -0,0 +1,119 @@
+/* -*- indent-tabs-mode: nil; js-indent-level: 2 -*- */
+/* vim: set ts=2 et sw=2 tw=80 filetype=javascript: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/**
+ * Read the data saved by nsTerminator during shutdown and feed it to the
+ * relevant telemetry histograms.
+ */
+
+const lazy = {};
+
+ChromeUtils.defineESModuleGetters(lazy, {
+ setTimeout: "resource://gre/modules/Timer.sys.mjs",
+});
+
+export function nsTerminatorTelemetry() {
+ this._wasNotified = false;
+ this._deferred = Promise.withResolvers();
+
+ IOUtils.sendTelemetry.addBlocker(
+ "TerminatoryTelemetry: Waiting to submit telemetry",
+ this._deferred.promise,
+ () => ({
+ wasNotified: this._wasNotified,
+ })
+ );
+}
+
+var HISTOGRAMS = {
+ "quit-application": "SHUTDOWN_PHASE_DURATION_TICKS_QUIT_APPLICATION",
+ "profile-change-net-teardown":
+ "SHUTDOWN_PHASE_DURATION_TICKS_PROFILE_CHANGE_NET_TEARDOWN",
+ "profile-change-teardown":
+ "SHUTDOWN_PHASE_DURATION_TICKS_PROFILE_CHANGE_TEARDOWN",
+ "profile-before-change":
+ "SHUTDOWN_PHASE_DURATION_TICKS_PROFILE_BEFORE_CHANGE",
+ "profile-before-change-qm":
+ "SHUTDOWN_PHASE_DURATION_TICKS_PROFILE_BEFORE_CHANGE_QM",
+ "xpcom-will-shutdown": "SHUTDOWN_PHASE_DURATION_TICKS_XPCOM_WILL_SHUTDOWN",
+ "xpcom-shutdown": "SHUTDOWN_PHASE_DURATION_TICKS_XPCOM_SHUTDOWN",
+
+ // The following keys appear in the JSON, but do not have associated
+ // histograms.
+ "xpcom-shutdown-threads": null,
+ XPCOMShutdownFinal: null,
+ CCPostLastCycleCollection: null,
+};
+
+nsTerminatorTelemetry.prototype = {
+ classID: Components.ID("{3f78ada1-cba2-442a-82dd-d5fb300ddea7}"),
+
+ // nsISupports
+
+ QueryInterface: ChromeUtils.generateQI(["nsIObserver"]),
+
+ // nsIObserver
+
+ observe: function DS_observe(aSubject, aTopic, aData) {
+ this._wasNotified = true;
+
+ (async () => {
+ try {
+ //
+ // This data is hardly critical, reading it can wait for a few seconds.
+ //
+ await new Promise(resolve => lazy.setTimeout(resolve, 3000));
+
+ let PATH = PathUtils.join(
+ Services.dirsvc.get("ProfLD", Ci.nsIFile).path,
+ "ShutdownDuration.json"
+ );
+ let data;
+ try {
+ data = await IOUtils.readJSON(PATH);
+ } catch (ex) {
+ if (DOMException.isInstance(ex) && ex.name == "NotFoundError") {
+ return;
+ }
+ // Let other errors be reported by Promise's error-reporting.
+ throw ex;
+ }
+
+ // Clean up
+ await IOUtils.remove(PATH);
+ await IOUtils.remove(PATH + ".tmp");
+
+ for (let k of Object.keys(data)) {
+ let id = HISTOGRAMS[k];
+ if (id === null) {
+ // No histogram associated with this entry.
+ continue;
+ }
+
+ try {
+ let histogram = Services.telemetry.getHistogramById(id);
+ histogram.add(Number.parseInt(data[k]));
+ } catch (ex) {
+ // Make sure that the error is reported and causes test failures,
+ // but otherwise, ignore it.
+ Promise.reject(ex);
+ continue;
+ }
+ }
+
+ // Inform observers that we are done.
+ Services.obs.notifyObservers(
+ null,
+ "shutdown-terminator-telemetry-updated"
+ );
+ } finally {
+ this._deferred.resolve();
+ }
+ })();
+ },
+};
+
+// Module
diff --git a/toolkit/components/terminator/components.conf b/toolkit/components/terminator/components.conf
new file mode 100644
index 0000000000..14c4c2c562
--- /dev/null
+++ b/toolkit/components/terminator/components.conf
@@ -0,0 +1,15 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+Classes = [
+ {
+ 'cid': '{3f78ada1-cba2-442a-82dd-d5fb300ddea7}',
+ 'contract_ids': ['@mozilla.org/toolkit/shutdown-terminator-telemetry;1'],
+ 'esModule': 'resource://gre/modules/TerminatorTelemetry.sys.mjs',
+ 'constructor': 'nsTerminatorTelemetry',
+ 'categories': {'profile-after-change': 'nsTerminatorTelemetry'},
+ },
+]
diff --git a/toolkit/components/terminator/moz.build b/toolkit/components/terminator/moz.build
new file mode 100644
index 0000000000..f869b86d95
--- /dev/null
+++ b/toolkit/components/terminator/moz.build
@@ -0,0 +1,32 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+with Files("**"):
+ BUG_COMPONENT = ("Toolkit", "Async Tooling")
+
+XPCSHELL_TESTS_MANIFESTS += ["tests/xpcshell/xpcshell.toml"]
+
+SOURCES += [
+ "nsTerminator.cpp",
+]
+
+EXPORTS += [
+ "nsTerminator.h",
+]
+
+EXTRA_COMPONENTS += [
+ "terminator.manifest",
+]
+
+EXTRA_JS_MODULES += [
+ "TerminatorTelemetry.sys.mjs",
+]
+
+XPCOM_MANIFESTS += [
+ "components.conf",
+]
+
+FINAL_LIBRARY = "xul"
diff --git a/toolkit/components/terminator/nsTerminator.cpp b/toolkit/components/terminator/nsTerminator.cpp
new file mode 100644
index 0000000000..3a8f9f4ba6
--- /dev/null
+++ b/toolkit/components/terminator/nsTerminator.cpp
@@ -0,0 +1,614 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: sw=2 ts=2 et lcs=trail\:.,tab\:>~ :
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/**
+ * A watchdog designed to terminate shutdown if it lasts too long.
+ *
+ * This watchdog is designed as a worst-case problem container for the
+ * common case in which Firefox just won't shutdown.
+ *
+ * We spawn a thread during quit-application. If any of the shutdown
+ * steps takes more than n milliseconds (63000 by default), kill the
+ * process as fast as possible, without any cleanup.
+ */
+
+#include "mozilla/ShutdownPhase.h"
+#include "nsTerminator.h"
+
+#include "prthread.h"
+#include "prmon.h"
+#include "prio.h"
+
+#include "nsString.h"
+#include "nsDirectoryServiceUtils.h"
+#include "nsAppDirectoryServiceDefs.h"
+
+#include "nsExceptionHandler.h"
+#include "GeckoProfiler.h"
+#include "nsThreadUtils.h"
+#include "nsXULAppAPI.h"
+
+#if defined(XP_WIN)
+# include <windows.h>
+#else
+# include <unistd.h>
+#endif
+
+#include "mozilla/AppShutdown.h"
+#include "mozilla/ArrayUtils.h"
+#include "mozilla/Atomics.h"
+#include "mozilla/DebugOnly.h"
+#include "mozilla/IntentionalCrash.h"
+#include "mozilla/MemoryChecking.h"
+#include "mozilla/Preferences.h"
+#include "mozilla/SpinEventLoopUntil.h"
+#include "mozilla/UniquePtr.h"
+#include "mozilla/Unused.h"
+#include "mozilla/Telemetry.h"
+
+#include "mozilla/dom/IOUtils.h"
+#include "mozilla/dom/workerinternals/RuntimeService.h"
+
+// Normally, the number of milliseconds that AsyncShutdown waits until
+// it decides to crash is specified as a preference. We use the
+// following value as a fallback if for some reason the preference is
+// absent.
+#define FALLBACK_ASYNCSHUTDOWN_CRASH_AFTER_MS 60000
+
+// Additional number of milliseconds to wait until we decide to exit
+// forcefully.
+#define ADDITIONAL_WAIT_BEFORE_CRASH_MS 3000
+
+#define HEARTBEAT_INTERVAL_MS 100
+
+namespace mozilla {
+
+namespace {
+
+/**
+ * A step during shutdown.
+ *
+ * Shutdown is divided in steps, which all map to an observer
+ * notification. The duration of a step is defined as the number of
+ * ticks between the time we receive a notification and the next one.
+ */
+struct ShutdownStep {
+ mozilla::ShutdownPhase mPhase;
+ Atomic<int> mTicks;
+
+ constexpr explicit ShutdownStep(mozilla::ShutdownPhase aPhase)
+ : mPhase(aPhase), mTicks(-1) {}
+};
+
+static ShutdownStep sShutdownSteps[] = {
+ ShutdownStep(mozilla::ShutdownPhase::AppShutdownConfirmed),
+ ShutdownStep(mozilla::ShutdownPhase::AppShutdownNetTeardown),
+ ShutdownStep(mozilla::ShutdownPhase::AppShutdownTeardown),
+ ShutdownStep(mozilla::ShutdownPhase::AppShutdown),
+ ShutdownStep(mozilla::ShutdownPhase::AppShutdownQM),
+ ShutdownStep(mozilla::ShutdownPhase::XPCOMWillShutdown),
+ ShutdownStep(mozilla::ShutdownPhase::XPCOMShutdown),
+ ShutdownStep(mozilla::ShutdownPhase::XPCOMShutdownThreads),
+ ShutdownStep(mozilla::ShutdownPhase::XPCOMShutdownFinal),
+ ShutdownStep(mozilla::ShutdownPhase::CCPostLastCycleCollection),
+};
+
+int GetStepForPhase(mozilla::ShutdownPhase aPhase) {
+ for (size_t i = 0; i < std::size(sShutdownSteps); i++) {
+ if (sShutdownSteps[i].mPhase >= aPhase) {
+ return (int)i;
+ }
+ }
+ return -1;
+}
+
+// Utility function: create a thread that is non-joinable,
+// does not prevent the process from terminating, is never
+// cooperatively scheduled, and uses a default stack size.
+PRThread* CreateSystemThread(void (*start)(void* arg), void* arg) {
+ PRThread* thread =
+ PR_CreateThread(PR_SYSTEM_THREAD, /* This thread will not prevent the
+ process from terminating */
+ start, arg, PR_PRIORITY_LOW,
+ PR_GLOBAL_THREAD, /* Make sure that the thread is never
+ cooperatively scheduled */
+ PR_UNJOINABLE_THREAD, 0 /* Use default stack size */
+ );
+ MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(
+ thread); // This pointer will never be deallocated.
+ return thread;
+}
+
+////////////////////////////////////////////
+//
+// The watchdog
+//
+// This nspr thread is in charge of crashing the process if any stage of
+// shutdown lasts more than some predefined duration. As a side-effect, it
+// measures the duration of each stage of shutdown.
+//
+
+// The heartbeat of the operation.
+//
+// Main thread:
+//
+// * Whenever a shutdown step has been completed, the main thread
+// swaps gHeartbeat to 0 to mark that the shutdown process is still
+// progressing. The value swapped away indicates the number of ticks
+// it took for the shutdown step to advance.
+//
+// Watchdog thread:
+//
+// * Every tick, the watchdog thread increments gHearbeat atomically.
+//
+// A note about precision:
+// Since gHeartbeat is generally reset to 0 between two ticks, this means
+// that gHeartbeat stays at 0 less than one tick. Consequently, values
+// extracted from gHeartbeat must be considered rounded up.
+Atomic<uint32_t> gHeartbeat(0);
+
+struct Options {
+ /**
+ * How many ticks before we should crash the process.
+ */
+ uint32_t crashAfterTicks;
+};
+
+/**
+ * Entry point for the watchdog thread
+ */
+void RunWatchdog(void* arg) {
+ NS_SetCurrentThreadName("Shutdown Hang Terminator");
+
+ // Let's copy and deallocate options, that's one less leak to worry
+ // about.
+ UniquePtr<Options> options((Options*)arg);
+ uint32_t crashAfterTicks = options->crashAfterTicks;
+ options = nullptr;
+
+ const uint32_t timeToLive = crashAfterTicks;
+ while (true) {
+ //
+ // We do not want to sleep for the entire duration,
+ // as putting the computer to sleep would suddenly
+ // cause us to timeout on wakeup.
+ //
+ // Rather, we prefer sleeping for at most 1 second
+ // at a time. If the computer sleeps then wakes up,
+ // we have lost at most one second, which is much
+ // more reasonable.
+ //
+#if defined(XP_WIN)
+ Sleep(HEARTBEAT_INTERVAL_MS /* ms */);
+#else
+ usleep(HEARTBEAT_INTERVAL_MS * 1000 /* usec */);
+#endif
+
+ if (gHeartbeat++ < timeToLive) {
+ continue;
+ }
+
+ // Arrived here we know we will crash in a way or another.
+ NoteIntentionalCrash(XRE_GetProcessTypeString());
+
+ // Until we have general log output for crash annotations in treeherder
+ // (bug 1728721) we manually spit out our nested event loop stack.
+ // XXX: Remove once bug 1728721 is fixed.
+ nsCString stack;
+ AutoNestedEventLoopAnnotation::CopyCurrentStack(stack);
+ printf_stderr(
+ "RunWatchdog: Mainthread nested event loops during hang: \n --- %s\n",
+ stack.get());
+
+ // Let's find the last known shutdown phase.
+ mozilla::ShutdownPhase lastPhase = mozilla::ShutdownPhase::NotInShutdown;
+ // Looping inverse here to make the search more robust in case
+ // the observer that triggers UpdateHeartbeat was not called
+ // at all or in the expected order on some step. This should
+ // give us always the last known ShutdownStep.
+ for (int i = ArrayLength(sShutdownSteps) - 1; i >= 0; --i) {
+ if (sShutdownSteps[i].mTicks > -1) {
+ lastPhase = sShutdownSteps[i].mPhase;
+ break;
+ }
+ }
+
+ if (lastPhase == mozilla::ShutdownPhase::NotInShutdown) {
+ // This is not something we expect to ever happen, but still.
+ CrashReporter::SetMinidumpAnalysisAllThreads();
+ MOZ_CRASH("Shutdown hanging before starting any known phase.");
+ }
+
+ // First check if worker shutdown started and is incomplete, in case
+ // report running workers.
+ mozilla::dom::workerinternals::RuntimeService* runtimeService =
+ mozilla::dom::workerinternals::RuntimeService::GetService();
+ if (runtimeService) {
+ // CrashIfHanging will check if we actually ever asked for worker
+ // shutdown, so calling it before is a no-op.
+ runtimeService->CrashIfHanging();
+ }
+
+ // Otherwise just report our shutdown phase.
+ // This string will be leaked.
+ nsCString msg;
+ msg.AppendPrintf(
+ "Shutdown hanging at step %s. "
+ "Something is blocking the main-thread.",
+ mozilla::AppShutdown::GetShutdownPhaseName(lastPhase));
+
+ CrashReporter::SetMinidumpAnalysisAllThreads();
+ MOZ_CRASH_UNSAFE(strdup(msg.BeginReading()));
+ }
+}
+
+////////////////////////////////////////////
+//
+// Writer thread
+//
+// This nspr thread is in charge of writing to disk statistics produced by the
+// watchdog thread and collected by the main thread. Note that we use a nspr
+// thread rather than usual XPCOM I/O simply because we outlive XPCOM and its
+// threads.
+//
+
+//
+// Communication between the main thread and the writer thread.
+//
+// Main thread:
+//
+// * Whenever a shutdown step has been completed, the main thread
+// obtains the number of ticks from the watchdog threads, builds
+// a string representing all the data gathered so far, places
+// this string in `gWriteData`, and wakes up the writer thread
+// using `gWriteReady`. If `gWriteData` already contained a non-null
+// pointer, this means that the writer thread is lagging behind the
+// main thread, and the main thread cleans up the memory.
+//
+// Writer thread:
+//
+// * When awake, the writer thread swaps `gWriteData` to nullptr. If
+// `gWriteData` contained data to write, the . If so, the writer
+// thread writes the data to a file named "ShutdownDuration.json.tmp",
+// then moves that file to "ShutdownDuration.json" and cleans up the
+// data. If `gWriteData` contains a nullptr, the writer goes to sleep
+// until it is awkened using `gWriteReady`.
+//
+//
+// The data written by the writer thread will be read by another
+// module upon the next restart and fed to Telemetry.
+//
+Atomic<nsCString*> gWriteData(nullptr);
+PRMonitor* gWriteReady = nullptr;
+
+void RunWriter(void* arg) {
+ AUTO_PROFILER_REGISTER_THREAD("Shutdown Statistics Writer");
+ NS_SetCurrentThreadName("Shutdown Statistics Writer");
+
+ MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(arg);
+ // Shutdown will generally complete before we have a chance to
+ // deallocate. This is not a leak.
+
+ // Setup destinationPath and tmpFilePath
+
+ nsCString destinationPath;
+ destinationPath.Adopt(static_cast<char*>(arg));
+ nsAutoCString tmpFilePath;
+ tmpFilePath.Append(destinationPath);
+ tmpFilePath.AppendLiteral(".tmp");
+
+ // Cleanup any file leftover from a previous run
+ Unused << PR_Delete(tmpFilePath.get());
+ Unused << PR_Delete(destinationPath.get());
+
+ while (true) {
+ //
+ // Check whether we have received data from the main thread.
+ //
+ // We perform the check before waiting on `gWriteReady` as we may
+ // have received data while we were busy writing.
+ //
+ // Also note that gWriteData may have been modified several times
+ // since we last checked. That's ok, we are not losing any important
+ // data (since we keep adding data), and we are not leaking memory
+ // (since the main thread deallocates any data that hasn't been
+ // consumed by the writer thread).
+ //
+ UniquePtr<nsCString> data(gWriteData.exchange(nullptr));
+ if (!data) {
+ // Data is not available yet.
+ // Wait until the main thread provides it.
+ PR_EnterMonitor(gWriteReady);
+ PR_Wait(gWriteReady, PR_INTERVAL_NO_TIMEOUT);
+ PR_ExitMonitor(gWriteReady);
+ continue;
+ }
+
+ MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(data.get());
+ // Shutdown may complete before we have a chance to deallocate.
+ // This is not a leak.
+
+ //
+ // Write to a temporary file
+ //
+ // In case of any error, we simply give up. Since the data is
+ // hardly critical, we don't want to spend too much effort
+ // salvaging it.
+ //
+ UniquePtr<PRFileDesc, PR_CloseDelete> tmpFileDesc(PR_Open(
+ tmpFilePath.get(), PR_WRONLY | PR_TRUNCATE | PR_CREATE_FILE, 00600));
+
+ // Shutdown may complete before we have a chance to close the file.
+ // This is not a leak.
+ MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(tmpFileDesc.get());
+
+ if (tmpFileDesc == nullptr) {
+ break;
+ }
+ if (PR_Write(tmpFileDesc.get(), data->get(), data->Length()) == -1) {
+ break;
+ }
+ tmpFileDesc.reset();
+
+ //
+ // Rename on top of destination file.
+ //
+ // This is not sufficient to guarantee that the destination file
+ // will be written correctly, but, again, we don't care enough
+ // about the data to make more efforts.
+ //
+ Unused << PR_Delete(destinationPath.get());
+ if (PR_Rename(tmpFilePath.get(), destinationPath.get()) != PR_SUCCESS) {
+ break;
+ }
+ }
+}
+
+} // namespace
+
+NS_IMPL_ISUPPORTS(nsTerminator, nsIObserver)
+
+nsTerminator::nsTerminator() : mInitialized(false), mCurrentStep(-1) {}
+
+// Actually launch these threads. This takes place at the first sign of
+// shutdown.
+void nsTerminator::Start() {
+ MOZ_ASSERT(!mInitialized);
+
+ StartWatchdog();
+#if !defined(NS_FREE_PERMANENT_DATA)
+ // Only allow nsTerminator to write on non-leak-checked builds so we don't
+ // get leak warnings on shutdown for intentional leaks (see bug 1242084).
+ // This will be enabled again by bug 1255484 when 1255478 lands.
+ StartWriter();
+#endif // !defined(NS_FREE_PERMANENT_DATA)
+ mInitialized = true;
+}
+
+NS_IMETHODIMP
+nsTerminator::Observe(nsISupports*, const char* aTopic, const char16_t*) {
+ // This Observe is now only used for testing purposes.
+ // XXX: Check if we should change our testing strategy.
+ if (strcmp(aTopic, "terminator-test-quit-application") == 0) {
+ AdvancePhase(mozilla::ShutdownPhase::AppShutdownConfirmed);
+ } else if (strcmp(aTopic, "terminator-test-profile-change-net-teardown") ==
+ 0) {
+ AdvancePhase(mozilla::ShutdownPhase::AppShutdownNetTeardown);
+ } else if (strcmp(aTopic, "terminator-test-profile-change-teardown") == 0) {
+ AdvancePhase(mozilla::ShutdownPhase::AppShutdownTeardown);
+ } else if (strcmp(aTopic, "terminator-test-profile-before-change") == 0) {
+ AdvancePhase(mozilla::ShutdownPhase::AppShutdown);
+ } else if (strcmp(aTopic, "terminator-test-profile-before-change-qm") == 0) {
+ AdvancePhase(mozilla::ShutdownPhase::AppShutdownQM);
+ } else if (strcmp(aTopic,
+ "terminator-test-profile-before-change-telemetry") == 0) {
+ AdvancePhase(mozilla::ShutdownPhase::AppShutdownTelemetry);
+ } else if (strcmp(aTopic, "terminator-test-xpcom-will-shutdown") == 0) {
+ AdvancePhase(mozilla::ShutdownPhase::XPCOMWillShutdown);
+ } else if (strcmp(aTopic, "terminator-test-xpcom-shutdown") == 0) {
+ AdvancePhase(mozilla::ShutdownPhase::XPCOMShutdown);
+ } else if (strcmp(aTopic, "terminator-test-xpcom-shutdown-threads") == 0) {
+ AdvancePhase(mozilla::ShutdownPhase::XPCOMShutdownThreads);
+ } else if (strcmp(aTopic, "terminator-test-XPCOMShutdownFinal") == 0) {
+ AdvancePhase(mozilla::ShutdownPhase::XPCOMShutdownFinal);
+ } else if (strcmp(aTopic, "terminator-test-CCPostLastCycleCollection") == 0) {
+ AdvancePhase(mozilla::ShutdownPhase::CCPostLastCycleCollection);
+ }
+
+ return NS_OK;
+}
+
+// Prepare, allocate and start the watchdog thread.
+// By design, it will never finish, nor be deallocated.
+void nsTerminator::StartWatchdog() {
+ int32_t crashAfterMS =
+ Preferences::GetInt("toolkit.asyncshutdown.crash_timeout",
+ FALLBACK_ASYNCSHUTDOWN_CRASH_AFTER_MS);
+ // Ignore negative values
+ if (crashAfterMS <= 0) {
+ crashAfterMS = FALLBACK_ASYNCSHUTDOWN_CRASH_AFTER_MS;
+ }
+
+ // Add a little padding, to ensure that we do not crash before
+ // AsyncShutdown.
+ if (crashAfterMS > INT32_MAX - ADDITIONAL_WAIT_BEFORE_CRASH_MS) {
+ // Defend against overflow
+ crashAfterMS = INT32_MAX;
+ } else {
+ crashAfterMS += ADDITIONAL_WAIT_BEFORE_CRASH_MS;
+ }
+
+#ifdef MOZ_VALGRIND
+ // If we're running on Valgrind, we'll be making forward progress at a
+ // rate of somewhere between 1/25th and 1/50th of normal. This can cause
+ // timeouts frequently enough to be a problem for the Valgrind runs on
+ // automation: see bug 1296819. As an attempt to avoid the worst of this,
+ // scale up the presented timeout by a factor of three. For a
+ // non-Valgrind-enabled build, or for an enabled build which isn't running
+ // on Valgrind, the timeout is unchanged.
+ if (RUNNING_ON_VALGRIND) {
+ const int32_t scaleUp = 3;
+ if (crashAfterMS >= (INT32_MAX / scaleUp) - 1) {
+ // Defend against overflow
+ crashAfterMS = INT32_MAX;
+ } else {
+ crashAfterMS *= scaleUp;
+ }
+ }
+#endif
+
+ UniquePtr<Options> options(new Options());
+ // crashAfterTicks is guaranteed to be > 0 as
+ // crashAfterMS >= ADDITIONAL_WAIT_BEFORE_CRASH_MS >> HEARTBEAT_INTERVAL_MS
+ options->crashAfterTicks = crashAfterMS / HEARTBEAT_INTERVAL_MS;
+
+ DebugOnly<PRThread*> watchdogThread =
+ CreateSystemThread(RunWatchdog, options.release());
+ MOZ_ASSERT(watchdogThread);
+}
+
+// Prepare, allocate and start the writer thread. By design, it will never
+// finish, nor be deallocated. In case of error, we degrade
+// gracefully to not writing Telemetry data.
+void nsTerminator::StartWriter() {
+ if (!Telemetry::CanRecordExtended()) {
+ return;
+ }
+ nsCOMPtr<nsIFile> profLD;
+ nsresult rv = NS_GetSpecialDirectory(NS_APP_USER_PROFILE_LOCAL_50_DIR,
+ getter_AddRefs(profLD));
+ if (NS_FAILED(rv)) {
+ return;
+ }
+
+ rv = profLD->Append(u"ShutdownDuration.json"_ns);
+ if (NS_FAILED(rv)) {
+ return;
+ }
+
+ nsAutoString path;
+ rv = profLD->GetPath(path);
+ if (NS_FAILED(rv)) {
+ return;
+ }
+
+ gWriteReady = PR_NewMonitor();
+ MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(
+ gWriteReady); // We will never deallocate this object
+ PRThread* writerThread = CreateSystemThread(RunWriter, ToNewUTF8String(path));
+
+ if (!writerThread) {
+ return;
+ }
+}
+
+// This helper is here to preserve the existing crash reporting behavior
+// based on observer topic names, using the shutdown phase name only for
+// phases without associated topic.
+const char* GetReadableNameForPhase(mozilla::ShutdownPhase aPhase) {
+ const char* readableName = mozilla::AppShutdown::GetObserverKey(aPhase);
+ if (!readableName) {
+ readableName = mozilla::AppShutdown::GetShutdownPhaseName(aPhase);
+ }
+ return readableName;
+}
+
+void nsTerminator::AdvancePhase(mozilla::ShutdownPhase aPhase) {
+ // If the phase is unknown, just ignore it.
+ auto step = GetStepForPhase(aPhase);
+ if (step < 0) {
+ return;
+ }
+
+ // As we have seen examples in the wild of shutdown notifications
+ // not being sent (or not being sent in the expected order), we do
+ // not assume a specific order.
+ if (!mInitialized) {
+ Start();
+ }
+
+ UpdateHeartbeat(step);
+#if !defined(NS_FREE_PERMANENT_DATA)
+ // Only allow nsTerminator to write on non-leak checked builds so we don't get
+ // leak warnings on shutdown for intentional leaks (see bug 1242084). This
+ // will be enabled again by bug 1255484 when 1255478 lands.
+ UpdateTelemetry();
+#endif // !defined(NS_FREE_PERMANENT_DATA)
+ UpdateCrashReport(GetReadableNameForPhase(aPhase));
+}
+
+void nsTerminator::UpdateHeartbeat(int32_t aStep) {
+ MOZ_ASSERT(aStep >= mCurrentStep);
+
+ if (aStep > mCurrentStep) {
+ // Reset the clock, find out how long the current phase has lasted.
+ uint32_t ticks = gHeartbeat.exchange(0);
+ if (mCurrentStep >= 0) {
+ sShutdownSteps[mCurrentStep].mTicks = ticks;
+ }
+ sShutdownSteps[aStep].mTicks = 0;
+
+ mCurrentStep = aStep;
+ }
+}
+
+void nsTerminator::UpdateTelemetry() {
+ if (!Telemetry::CanRecordExtended() || !gWriteReady) {
+ return;
+ }
+
+ //
+ // We need Telemetry data on the effective duration of each step,
+ // to be able to tune the time-to-crash of each of both the
+ // Terminator and AsyncShutdown. However, at this stage, it is too
+ // late to record such data into Telemetry, so we write it to disk
+ // and read it upon the next startup.
+ //
+
+ // Build JSON.
+ UniquePtr<nsCString> telemetryData(new nsCString());
+ telemetryData->AppendLiteral("{");
+ size_t fields = 0;
+ for (auto& shutdownStep : sShutdownSteps) {
+ if (shutdownStep.mTicks < 0) {
+ // Ignore this field.
+ continue;
+ }
+ if (fields++ > 0) {
+ telemetryData->AppendLiteral(", ");
+ }
+ telemetryData->AppendLiteral(R"(")");
+ telemetryData->Append(GetReadableNameForPhase(shutdownStep.mPhase));
+ telemetryData->AppendLiteral(R"(": )");
+ telemetryData->AppendInt(shutdownStep.mTicks);
+ }
+ telemetryData->AppendLiteral("}");
+
+ if (fields == 0) {
+ // Nothing to write
+ return;
+ }
+
+ //
+ // Send data to the worker thread.
+ //
+ delete gWriteData.exchange(
+ telemetryData.release()); // Clear any data that hasn't been written yet
+
+ // In case the worker thread was sleeping, wake it up.
+ PR_EnterMonitor(gWriteReady);
+ PR_Notify(gWriteReady);
+ PR_ExitMonitor(gWriteReady);
+}
+
+void nsTerminator::UpdateCrashReport(const char* aTopic) {
+ // In case of crash, we wish to know where in shutdown we are
+ nsAutoCString report(aTopic);
+
+ Unused << CrashReporter::AnnotateCrashReport(
+ CrashReporter::Annotation::ShutdownProgress, report);
+}
+} // namespace mozilla
diff --git a/toolkit/components/terminator/nsTerminator.h b/toolkit/components/terminator/nsTerminator.h
new file mode 100644
index 0000000000..46c3e17b92
--- /dev/null
+++ b/toolkit/components/terminator/nsTerminator.h
@@ -0,0 +1,49 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: sw=2 ts=2 et lcs=trail\:.,tab\:>~ :
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsTerminator_h__
+#define nsTerminator_h__
+
+#include "nsISupports.h"
+#include "nsIObserver.h"
+
+namespace mozilla {
+
+class nsTerminator final : public nsIObserver {
+ public:
+ NS_DECL_ISUPPORTS
+ NS_DECL_NSIOBSERVER
+
+ nsTerminator();
+ void AdvancePhase(mozilla::ShutdownPhase aPhase);
+
+ private:
+ void Start();
+ void StartWatchdog();
+ void StartWriter();
+
+ void UpdateHeartbeat(int aStep);
+ void UpdateTelemetry();
+ void UpdateCrashReport(const char* aTopic);
+
+ ~nsTerminator() = default;
+
+ bool mInitialized;
+ int mCurrentStep;
+};
+
+} // namespace mozilla
+
+#define NS_TOOLKIT_TERMINATOR_CID \
+ { \
+ 0x2e59cc70, 0xf83a, 0x412f, { \
+ 0x89, 0xd4, 0x45, 0x38, 0x85, 0x83, 0x72, 0x17 \
+ } \
+ }
+#define NS_TOOLKIT_TERMINATOR_CONTRACTID \
+ "@mozilla.org/toolkit/shutdown-terminator;1"
+
+#endif // nsTerminator_h__
diff --git a/toolkit/components/terminator/terminator.manifest b/toolkit/components/terminator/terminator.manifest
new file mode 100644
index 0000000000..33cb143de9
--- /dev/null
+++ b/toolkit/components/terminator/terminator.manifest
@@ -0,0 +1 @@
+category profile-after-change nsTerminator @mozilla.org/toolkit/shutdown-terminator;1
diff --git a/toolkit/components/terminator/tests/xpcshell/test_terminator_record.js b/toolkit/components/terminator/tests/xpcshell/test_terminator_record.js
new file mode 100644
index 0000000000..62cee636f3
--- /dev/null
+++ b/toolkit/components/terminator/tests/xpcshell/test_terminator_record.js
@@ -0,0 +1,171 @@
+/* Any copyright is dedicated to the Public Domain.
+ * http://creativecommons.org/publicdomain/zero/1.0/ */
+/* eslint-disable mozilla/no-arbitrary-setTimeout */
+
+"use strict";
+
+// Test that the Shutdown Terminator records durations correctly
+
+const { setTimeout } = ChromeUtils.importESModule(
+ "resource://gre/modules/Timer.sys.mjs"
+);
+
+var PATH;
+var PATH_TMP;
+var terminator;
+
+var HEARTBEAT_MS = 100;
+
+let KEYS = [
+ "quit-application",
+ "profile-change-net-teardown",
+ "profile-change-teardown",
+ "profile-before-change",
+ "profile-before-change-qm",
+ "xpcom-will-shutdown",
+ "xpcom-shutdown",
+ "xpcom-shutdown-threads",
+ "XPCOMShutdownFinal",
+ "CCPostLastCycleCollection",
+];
+
+let DATA = [];
+let MeasuredDurations = [];
+
+add_task(async function init() {
+ do_get_profile();
+ PATH = PathUtils.join(PathUtils.localProfileDir, "ShutdownDuration.json");
+ PATH_TMP = PATH + ".tmp";
+
+ // Initialize the terminator
+ // (normally, this is done through the manifest file, but xpcshell
+ // doesn't take them into account).
+ info("Initializing the Terminator");
+ terminator = Cc["@mozilla.org/toolkit/shutdown-terminator;1"].createInstance(
+ Ci.nsIObserver
+ );
+});
+
+var promiseShutdownDurationData = async function () {
+ // Wait until PATH exists.
+ // Timeout if it is never created.
+ while (true) {
+ if (await IOUtils.exists(PATH)) {
+ break;
+ }
+
+ // Wait just a very short period to not increase measured values.
+ // Usually the file should appear almost immediately.
+ await new Promise(resolve => setTimeout(resolve, 50));
+ }
+
+ return IOUtils.readJSON(PATH);
+};
+
+var currentPhase = 0;
+
+var advancePhase = async function () {
+ let key = "terminator-test-" + KEYS[currentPhase];
+ let msDuration = 200 + HEARTBEAT_MS * currentPhase;
+
+ info("Advancing shutdown phase to " + KEYS[currentPhase]);
+ terminator.observe(null, key, null);
+ await new Promise(resolve => setTimeout(resolve, msDuration));
+
+ let data = await promiseShutdownDurationData();
+
+ Assert.ok(KEYS[currentPhase] in data, "The file contains the expected key");
+ Assert.equal(
+ Object.keys(data).length,
+ currentPhase + 1,
+ "File does not contain more durations than expected"
+ );
+
+ DATA[currentPhase] = data;
+ currentPhase++;
+ if (currentPhase < KEYS.length) {
+ return true;
+ }
+ return false;
+};
+
+// This is a timing affected test, as we want to check if the time measurements
+// from the terminator are reasonable. Bug 1768795 assumes that they tend to
+// be lower than wall-clock, in particular on MacOS, confirmed by the logs on
+// intermittent bug 1760094. This is not a big deal for the terminator's
+// general functionality (timeouts might just come a little later than
+// expected nominally), but it makes testing harder and the transferred
+// telemetry data slightly less reliable (shutdowns might appear shorter than
+// they really were). So this test is just happy if there is any data that
+// is not too long wrt what we expect. If we ever want to fix bug 1768795,
+// we can check for a more reasonable lower boundary, too.
+add_task(async function test_record() {
+ info("Collecting duration data for all known phases");
+
+ let morePhases = true;
+ while (morePhases) {
+ let beforeWait = Date.now();
+
+ morePhases = await advancePhase();
+
+ await IOUtils.remove(PATH);
+ await IOUtils.remove(PATH_TMP);
+
+ // We measure the effective time that passed as wall-clock and include all
+ // file IO overhead as the terminator will do so in its measurement, too.
+ MeasuredDurations[currentPhase - 1] = Math.floor(
+ (Date.now() - beforeWait) / HEARTBEAT_MS
+ );
+ }
+
+ Assert.equal(DATA.length, KEYS.length, "We have data for each phase");
+
+ for (let i = 0; i < KEYS.length; i++) {
+ let lastDuration = DATA[KEYS.length - 1][KEYS[i]];
+ Assert.equal(
+ typeof lastDuration,
+ "number",
+ "Duration of phase " + i + ":" + KEYS[i] + " is a number"
+ );
+
+ // The durations are only meaningful after we advanced to the next phase.
+ if (i < KEYS.length - 1) {
+ // So we read it from the data written for the following phase.
+ let ticksDuration = DATA[i + 1][KEYS[i]];
+ let measuredDuration = MeasuredDurations[i];
+ info(
+ "measuredDuration:" + measuredDuration + " - " + typeof measuredDuration
+ );
+ Assert.lessOrEqual(
+ ticksDuration,
+ measuredDuration + 2,
+ "Duration of phase " + i + ":" + KEYS[i] + " is not too long"
+ );
+ Assert.greaterOrEqual(
+ ticksDuration,
+ 0, // TODO: Raise the lower boundary after bug 1768795.
+ "Duration of phase " + i + ":" + KEYS[i] + " is not too short"
+ );
+ }
+ // This check is done only for phases <= xpcom-shutdown-threads
+ // where we have two data points.
+ if (i < KEYS.length - 2) {
+ let ticksDuration = DATA[i + 1][KEYS[i]];
+ Assert.equal(
+ ticksDuration,
+ DATA[KEYS.length - 1][KEYS[i]],
+ "Duration of phase " + i + ":" + KEYS[i] + " hasn't changed"
+ );
+ }
+ }
+
+ // Note that after this check the KEYS array remains sorted, so this
+ // must be the last check to not get confused.
+ Assert.equal(
+ Object.keys(DATA[KEYS.length - 1])
+ .sort()
+ .join(", "),
+ KEYS.sort().join(", "),
+ "The last file contains all expected keys"
+ );
+});
diff --git a/toolkit/components/terminator/tests/xpcshell/test_terminator_reload.js b/toolkit/components/terminator/tests/xpcshell/test_terminator_reload.js
new file mode 100644
index 0000000000..79aecd818b
--- /dev/null
+++ b/toolkit/components/terminator/tests/xpcshell/test_terminator_reload.js
@@ -0,0 +1,88 @@
+/* Any copyright is dedicated to the Public Domain.
+ * http://creativecommons.org/publicdomain/zero/1.0/ */
+
+"use strict";
+
+// Test that the Shutdown Terminator reloads durations correctly
+
+const HISTOGRAMS = {
+ "quit-application": "SHUTDOWN_PHASE_DURATION_TICKS_QUIT_APPLICATION",
+ "profile-change-net-teardown":
+ "SHUTDOWN_PHASE_DURATION_TICKS_PROFILE_CHANGE_NET_TEARDOWN",
+ "profile-change-teardown":
+ "SHUTDOWN_PHASE_DURATION_TICKS_PROFILE_CHANGE_TEARDOWN",
+ "profile-before-change":
+ "SHUTDOWN_PHASE_DURATION_TICKS_PROFILE_BEFORE_CHANGE",
+ "profile-before-change-qm":
+ "SHUTDOWN_PHASE_DURATION_TICKS_PROFILE_BEFORE_CHANGE_QM",
+ "xpcom-will-shutdown": "SHUTDOWN_PHASE_DURATION_TICKS_XPCOM_WILL_SHUTDOWN",
+ "xpcom-shutdown": "SHUTDOWN_PHASE_DURATION_TICKS_XPCOM_SHUTDOWN",
+};
+
+let PATH;
+
+add_setup(async function init() {
+ do_get_profile();
+ PATH = PathUtils.join(PathUtils.localProfileDir, "ShutdownDuration.json");
+});
+
+add_task(async function test_reload() {
+ info("Forging data");
+ let data = {};
+ let telemetrySnapshots = Services.telemetry.getSnapshotForHistograms(
+ "main",
+ false /* clear */
+ ).parent;
+ let i = 0;
+ for (let k of Object.keys(HISTOGRAMS)) {
+ let id = HISTOGRAMS[k];
+ data[k] = i++;
+ Assert.equal(
+ telemetrySnapshots[id] || undefined,
+ undefined,
+ "Histogram " + id + " is empty"
+ );
+ }
+
+ // Extra fields that nsTerminator reports that we do not have histograms for.
+ data["xpcom-shutdown-threads"] = 123;
+ data.XPCOMShutdownFinal = 456;
+ data.CCPostLastCycleCollection = 789;
+
+ await IOUtils.writeJSON(PATH, data);
+
+ const TOPIC = "shutdown-terminator-telemetry-updated";
+
+ let wait = new Promise(resolve =>
+ Services.obs.addObserver(function observer() {
+ info("Telemetry has been updated");
+ Services.obs.removeObserver(observer, TOPIC);
+ resolve();
+ }, TOPIC)
+ );
+
+ info("Starting nsTerminatorTelemetry");
+ let tt = Cc[
+ "@mozilla.org/toolkit/shutdown-terminator-telemetry;1"
+ ].createInstance(Ci.nsIObserver);
+ tt.observe(null, "profile-after-change", "");
+
+ info("Waiting until telemetry is updated");
+ // Now wait until Telemetry is updated
+ await wait;
+
+ telemetrySnapshots = Services.telemetry.getSnapshotForHistograms(
+ "main",
+ false /* clear */
+ ).parent;
+ for (let k of Object.keys(HISTOGRAMS)) {
+ let id = HISTOGRAMS[k];
+ info("Testing histogram " + id);
+ let snapshot = telemetrySnapshots[id];
+ let count = 0;
+ for (let x of Object.values(snapshot.values)) {
+ count += x;
+ }
+ Assert.equal(count, 1, "We have added one item");
+ }
+});
diff --git a/toolkit/components/terminator/tests/xpcshell/xpcshell.toml b/toolkit/components/terminator/tests/xpcshell/xpcshell.toml
new file mode 100644
index 0000000000..cb328ca2c8
--- /dev/null
+++ b/toolkit/components/terminator/tests/xpcshell/xpcshell.toml
@@ -0,0 +1,15 @@
+[DEFAULT]
+head = ""
+
+["test_terminator_record.js"]
+skip-if = [
+ "debug",
+ "asan", # Disabled by bug 1242084, bug 1255484 will enable it again
+ "ccov", # Bug 1607583 tracks the ccov failure
+ "tsan", # Bug 1683730 made this timeout for tsan.
+]
+run-sequentially = "very high failure rate in parallel"
+
+["test_terminator_reload.js"]
+skip-if = ["os == 'android'"]
+run-sequentially = "very high failure rate in parallel"