summaryrefslogtreecommitdiffstats
path: root/toolkit/components/telemetry/pings/HealthPing.sys.mjs
diff options
context:
space:
mode:
Diffstat (limited to 'toolkit/components/telemetry/pings/HealthPing.sys.mjs')
-rw-r--r--toolkit/components/telemetry/pings/HealthPing.sys.mjs271
1 files changed, 271 insertions, 0 deletions
diff --git a/toolkit/components/telemetry/pings/HealthPing.sys.mjs b/toolkit/components/telemetry/pings/HealthPing.sys.mjs
new file mode 100644
index 0000000000..3450f65a3e
--- /dev/null
+++ b/toolkit/components/telemetry/pings/HealthPing.sys.mjs
@@ -0,0 +1,271 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * This module collects data on send failures and other critical issues with Telemetry submissions.
+ */
+
+import { TelemetryUtils } from "resource://gre/modules/TelemetryUtils.sys.mjs";
+
+import { XPCOMUtils } from "resource://gre/modules/XPCOMUtils.sys.mjs";
+
+const lazy = {};
+
+ChromeUtils.defineESModuleGetters(lazy, {
+ Log: "resource://gre/modules/Log.sys.mjs",
+ TelemetryController: "resource://gre/modules/TelemetryController.sys.mjs",
+ clearTimeout: "resource://gre/modules/Timer.sys.mjs",
+ setTimeout: "resource://gre/modules/Timer.sys.mjs",
+});
+
+const Utils = TelemetryUtils;
+
+const MS_IN_A_MINUTE = 60 * 1000;
+// Send health ping every hour
+const SEND_TICK_DELAY = 60 * MS_IN_A_MINUTE;
+
+// Send top 10 discarded pings only to minimize health ping size
+const MAX_SEND_DISCARDED_PINGS = 10;
+
+const LOGGER_NAME = "Toolkit.Telemetry";
+const LOGGER_PREFIX = "TelemetryHealthPing::";
+
+export var Policy = {
+ setSchedulerTickTimeout: (callback, delayMs) =>
+ lazy.setTimeout(callback, delayMs),
+ clearSchedulerTickTimeout: id => lazy.clearTimeout(id),
+};
+
+export var TelemetryHealthPing = {
+ Reason: Object.freeze({
+ IMMEDIATE: "immediate", // Ping was sent immediately after recording with no delay.
+ DELAYED: "delayed", // Recorded data was sent after a delay.
+ SHUT_DOWN: "shutdown", // Recorded data was sent on shutdown.
+ }),
+
+ FailureType: Object.freeze({
+ DISCARDED_FOR_SIZE: "pingDiscardedForSize",
+ SEND_FAILURE: "sendFailure",
+ }),
+
+ OsInfo: Object.freeze({
+ name: Services.appinfo.OS,
+ version:
+ Services.sysinfo.get("kernel_version") || Services.sysinfo.get("version"),
+ }),
+
+ HEALTH_PING_TYPE: "health",
+
+ _logger: null,
+
+ // The health ping is sent every every SEND_TICK_DELAY.
+ // Initialize this so that first failures are sent immediately.
+ _lastSendTime: -SEND_TICK_DELAY,
+
+ /**
+ * This stores reported send failures with the following structure:
+ * {
+ * type1: {
+ * subtype1: value,
+ * ...
+ * subtypeN: value
+ * },
+ * ...
+ * }
+ */
+ _failures: {},
+ _timeoutId: null,
+
+ /**
+ * Record a failure to send a ping out.
+ * @param {String} failureType The type of failure (e.g. "timeout", ...).
+ * @returns {Promise} Test-only, resolved when the ping is stored or sent.
+ */
+ recordSendFailure(failureType) {
+ return this._addToFailure(this.FailureType.SEND_FAILURE, failureType);
+ },
+
+ /**
+ * Record that a ping was discarded and its type.
+ * @param {String} pingType The type of discarded ping (e.g. "main", ...).
+ * @returns {Promise} Test-only, resolved when the ping is stored or sent.
+ */
+ recordDiscardedPing(pingType) {
+ return this._addToFailure(this.FailureType.DISCARDED_FOR_SIZE, pingType);
+ },
+
+ /**
+ * Assemble payload.
+ * @param {String} reason A string indicating the triggering reason (e.g. "immediate", "delayed", "shutdown").
+ * @returns {Object} The assembled payload.
+ */
+ _assemblePayload(reason) {
+ this._log.trace("_assemblePayload()");
+ let payload = {
+ os: this.OsInfo,
+ reason,
+ };
+
+ for (let key of Object.keys(this._failures)) {
+ if (key === this.FailureType.DISCARDED_FOR_SIZE) {
+ payload[key] = this._getTopDiscardFailures(this._failures[key]);
+ } else {
+ payload[key] = this._failures[key];
+ }
+ }
+
+ return payload;
+ },
+
+ /**
+ * Sort input dictionary descending by value.
+ * @param {Object} failures A dictionary of failures subtype and count.
+ * @returns {Object} Sorted failures by value.
+ */
+ _getTopDiscardFailures(failures) {
+ this._log.trace("_getTopDiscardFailures()");
+ let sortedItems = Object.entries(failures).sort((first, second) => {
+ return second[1] - first[1];
+ });
+
+ let result = {};
+ sortedItems.slice(0, MAX_SEND_DISCARDED_PINGS).forEach(([key, value]) => {
+ result[key] = value;
+ });
+
+ return result;
+ },
+
+ /**
+ * Assemble the failure information and submit it.
+ * @param {String} reason A string indicating the triggering reason (e.g. "immediate", "delayed", "shutdown").
+ * @returns {Promise} Test-only promise that resolves when the ping was stored or sent (if any).
+ */
+ _submitPing(reason) {
+ if (!TelemetryHealthPing.enabled || !this._hasDataToSend()) {
+ return Promise.resolve();
+ }
+
+ this._log.trace("_submitPing(" + reason + ")");
+ let payload = this._assemblePayload(reason);
+ this._clearData();
+ this._lastSendTime = Utils.monotonicNow();
+
+ let options = {
+ addClientId: true,
+ usePingSender: reason === this.Reason.SHUT_DOWN,
+ };
+
+ return new Promise(r =>
+ // If we submit the health ping immediately, the send task would be triggered again
+ // before discarding oversized pings from the queue.
+ // To work around this, we send the ping on the next tick.
+ Services.tm.dispatchToMainThread(() =>
+ r(
+ lazy.TelemetryController.submitExternalPing(
+ this.HEALTH_PING_TYPE,
+ payload,
+ options
+ )
+ )
+ )
+ );
+ },
+
+ /**
+ * Accumulate failure information and trigger a ping immediately or on timeout.
+ * @param {String} failureType The type of failure (e.g. "timeout", ...).
+ * @param {String} failureSubType The subtype of failure (e.g. ping type, ...).
+ * @returns {Promise} Test-only, resolved when the ping is stored or sent.
+ */
+ _addToFailure(failureType, failureSubType) {
+ this._log.trace(
+ "_addToFailure() - with type and subtype: " +
+ failureType +
+ " : " +
+ failureSubType
+ );
+
+ if (!(failureType in this._failures)) {
+ this._failures[failureType] = {};
+ }
+
+ let current = this._failures[failureType][failureSubType] || 0;
+ this._failures[failureType][failureSubType] = current + 1;
+
+ const now = Utils.monotonicNow();
+ if (now - this._lastSendTime >= SEND_TICK_DELAY) {
+ return this._submitPing(this.Reason.IMMEDIATE);
+ }
+
+ let submissionDelay = SEND_TICK_DELAY - now - this._lastSendTime;
+ this._timeoutId = Policy.setSchedulerTickTimeout(
+ () => TelemetryHealthPing._submitPing(this.Reason.DELAYED),
+ submissionDelay
+ );
+ return Promise.resolve();
+ },
+
+ /**
+ * @returns {boolean} Check the availability of recorded failures data.
+ */
+ _hasDataToSend() {
+ return Object.keys(this._failures).length !== 0;
+ },
+
+ /**
+ * Clear recorded failures data.
+ */
+ _clearData() {
+ this._log.trace("_clearData()");
+ this._failures = {};
+ },
+
+ /**
+ * Clear and reset timeout.
+ */
+ _resetTimeout() {
+ if (this._timeoutId) {
+ Policy.clearSchedulerTickTimeout(this._timeoutId);
+ this._timeoutId = null;
+ }
+ },
+
+ /**
+ * Submit latest ping on shutdown.
+ * @returns {Promise} Test-only, resolved when the ping is stored or sent.
+ */
+ shutdown() {
+ this._log.trace("shutdown()");
+ this._resetTimeout();
+ return this._submitPing(this.Reason.SHUT_DOWN);
+ },
+
+ /**
+ * Test-only, restore to initial state.
+ */
+ testReset() {
+ this._lastSendTime = -SEND_TICK_DELAY;
+ this._clearData();
+ this._resetTimeout();
+ },
+
+ get _log() {
+ if (!this._logger) {
+ this._logger = lazy.Log.repository.getLoggerWithMessagePrefix(
+ LOGGER_NAME,
+ LOGGER_PREFIX + "::"
+ );
+ }
+
+ return this._logger;
+ },
+};
+
+XPCOMUtils.defineLazyPreferenceGetter(
+ TelemetryHealthPing,
+ "enabled",
+ TelemetryUtils.Preferences.HealthPingEnabled,
+ true
+);