firefox/services/sync/modules/telemetry.sys.mjs

/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
 * You can obtain one at http://mozilla.org/MPL/2.0/. */

// Support for Sync-and-FxA-related telemetry, which is submitted in a special-purpose
// telemetry ping called the "sync ping", documented here:
//
//  ../../../toolkit/components/telemetry/docs/data/sync-ping.rst
//
// The sync ping contains identifiers that are linked to the user's Firefox Account
// and are separate from the main telemetry client_id, so this file is also responsible
// for ensuring that we can delete those pings upon user request, by plumbing its
// identifiers into the "deletion-request" ping.

import { Log } from "resource://gre/modules/Log.sys.mjs";

const lazy = {};

ChromeUtils.defineESModuleGetters(lazy, {
  Async: "resource://services-common/async.sys.mjs",
  AuthenticationError: "resource://services-sync/sync_auth.sys.mjs",
  FxAccounts: "resource://gre/modules/FxAccounts.sys.mjs",
  ObjectUtils: "resource://gre/modules/ObjectUtils.sys.mjs",
  Observers: "resource://services-common/observers.sys.mjs",
  Resource: "resource://services-sync/resource.sys.mjs",
  Status: "resource://services-sync/status.sys.mjs",
  Svc: "resource://services-sync/util.sys.mjs",
  TelemetryController: "resource://gre/modules/TelemetryController.sys.mjs",
  TelemetryEnvironment: "resource://gre/modules/TelemetryEnvironment.sys.mjs",
  TelemetryUtils: "resource://gre/modules/TelemetryUtils.sys.mjs",
  Weave: "resource://services-sync/main.sys.mjs",
});

ChromeUtils.defineLazyGetter(lazy, "fxAccounts", () => {
  return ChromeUtils.importESModule(
    "resource://gre/modules/FxAccounts.sys.mjs"
  ).getFxAccountsSingleton();
});

import * as constants from "resource://services-sync/constants.sys.mjs";

ChromeUtils.defineLazyGetter(
  lazy,
  "WeaveService",
  () => Cc["@mozilla.org/weave/service;1"].getService().wrappedJSObject
);
const log = Log.repository.getLogger("Sync.Telemetry");

const TOPICS = [
  // For tracking change to account/device identifiers.
  "fxaccounts:new_device_id",
  "fxaccounts:onlogout",
  "weave:service:ready",
  "weave:service:login:got-hashed-id",

  // For whole-of-sync metrics.
  "weave:service:sync:start",
  "weave:service:sync:finish",
  "weave:service:sync:error",

  // For individual engine metrics.
  "weave:engine:sync:start",
  "weave:engine:sync:finish",
  "weave:engine:sync:error",
  "weave:engine:sync:applied",
  "weave:engine:sync:step",
  "weave:engine:sync:uploaded",
  "weave:engine:validate:finish",
  "weave:engine:validate:error",

  // For ad-hoc telemetry events.
  "weave:telemetry:event",
  "weave:telemetry:histogram",
  "fxa:telemetry:event",

  "weave:telemetry:migration",
];

const PING_FORMAT_VERSION = 1;

const EMPTY_UID = "0".repeat(32);

// The set of engines we record telemetry for - any other engines are ignored.
const ENGINES = new Set([
  "addons",
  "bookmarks",
  "clients",
  "forms",
  "history",
  "passwords",
  "prefs",
  "tabs",
  "extension-storage",
  "addresses",
  "creditcards",
]);

function tryGetMonotonicTimestamp() {
  try {
    return Services.telemetry.msSinceProcessStart();
  } catch (e) {
    log.warn("Unable to get a monotonic timestamp!");
    return -1;
  }
}

function timeDeltaFrom(monotonicStartTime) {
  let now = tryGetMonotonicTimestamp();
  if (monotonicStartTime !== -1 && now !== -1) {
    return Math.round(now - monotonicStartTime);
  }
  return -1;
}

const NS_ERROR_MODULE_BASE_OFFSET = 0x45;
const NS_ERROR_MODULE_NETWORK = 6;

// A reimplementation of NS_ERROR_GET_MODULE, which surprisingly doesn't seem
// to exist anywhere in .js code in a way that can be reused.
// This is taken from DownloadCore.sys.mjs.
function NS_ERROR_GET_MODULE(code) {
  return ((code & 0x7fff0000) >> 16) - NS_ERROR_MODULE_BASE_OFFSET;
}

// Converts extra integer fields to strings, rounds floats to three
// decimal places (nanosecond precision for timings), and removes profile
// directory paths and URLs from potential error messages.
function normalizeExtraTelemetryFields(extra) {
  let result = {};
  for (let key in extra) {
    let value = extra[key];
    let type = typeof value;
    if (type == "string") {
      result[key] = ErrorSanitizer.cleanErrorMessage(value);
    } else if (type == "number") {
      result[key] = Number.isInteger(value)
        ? value.toString(10)
        : value.toFixed(3);
    } else if (type != "undefined") {
      throw new TypeError(
        `Invalid type ${type} for extra telemetry field ${key}`
      );
    }
  }
  return lazy.ObjectUtils.isEmpty(result) ? undefined : result;
}

// Keps track of the counts of individual records fate during a sync cycle
// The main reason this is a class is to keep track of reasons individual records
// failure reasons without huge memory overhead.
export class SyncedRecordsTelemetry {
  // applied    => number of items that should be applied.
  // failed     => number of items that failed in this sync.
  // newFailed  => number of items that failed for the first time in this sync.
  // reconciled => number of items that were reconciled.
  // failedReasons => {name, count} of reasons a record failed
  incomingCounts = {
    applied: 0,
    failed: 0,
    newFailed: 0,
    reconciled: 0,
    failedReasons: null,
  };
  outgoingCounts = { failed: 0, sent: 0, failedReasons: null };

  addIncomingFailedReason(reason) {
    if (!this.incomingCounts.failedReasons) {
      this.incomingCounts.failedReasons = [];
    }
    let transformedReason = SyncTelemetry.transformError(reason);
    // Some errors like http/nss errors don't have an error object
    // those will be caught by the higher level telemetry
    if (!transformedReason.error) {
      return;
    }

    let index = this.incomingCounts.failedReasons.findIndex(
      reasons => reasons.name === transformedReason.error
    );

    if (index >= 0) {
      this.incomingCounts.failedReasons[index].count += 1;
    } else {
      this.incomingCounts.failedReasons.push({
        name: transformedReason.error,
        count: 1,
      });
    }
  }

  addOutgoingFailedReason(reason) {
    if (!this.outgoingCounts.failedReasons) {
      this.outgoingCounts.failedReasons = [];
    }
    let transformedReason = SyncTelemetry.transformError(reason);
    // Some errors like http/nss errors don't have an error object
    // those will be caught by the higher level telemetry
    if (!transformedReason.error) {
      return;
    }
    let index = this.outgoingCounts.failedReasons.findIndex(
      reasons => reasons.name === transformedReason.error
    );
    if (index >= 0) {
      this.outgoingCounts.failedReasons[index].count += 1;
    } else {
      this.outgoingCounts.failedReasons.push({
        name: transformedReason.error,
        count: 1,
      });
    }
  }
}

// The `ErrorSanitizer` has 2 main jobs:
// * Remove PII from errors, such as the profile dir or URLs the user might
//   have visited.
// * Normalize errors so different locales or operating systems etc don't
//   generate different messages for the same underlying error.
// * [TODO] Normalize errors so environmental factors don't influence message.
//   For example, timestamps or GUIDs should be replaced with something static.
export class ErrorSanitizer {
  // Things we normalize - this isn't exhaustive, but covers the common error messages we see.
  // Eg:
  // > Win error 112 during operation write on file [profileDir]\weave\addonsreconciler.json (Espacio en disco insuficiente. )
  // > Win error 112 during operation write on file [profileDir]\weave\addonsreconciler.json (Diskte yeterli yer yok. )
  // > <snip many other translations of the error>
  // > Unix error 28 during operation write on file [profileDir]/weave/addonsreconciler.json (No space left on device)
  // These tend to crowd out other errors we might care about (eg, top 5 errors for some engines are
  // variations of the "no space left on device")

  // Note that only errors that have same-but-different errors on Windows and Unix are here - we
  // still sanitize ones that aren't in these maps to remove the translations etc - eg,
  // `ERROR_SHARING_VIOLATION` doesn't really have a unix equivalent, so doesn't appear here, but
  // we still strip the translations to avoid the variants.
  static E_PERMISSION_DENIED = "OS error [Permission denied]";
  static E_NO_FILE_OR_DIR = "OS error [File/Path not found]";

  static DOMErrorSubstitutions = {
    NotFoundError: this.E_NO_FILE_OR_DIR,
    NotAllowedError: this.E_PERMISSION_DENIED,
  };

  // IOUtils error messages include the specific nsresult error code that caused them.
  static NS_ERROR_RE = new RegExp(/ \(NS_ERROR_.*\)$/);

  static #cleanOSErrorMessage(message, error = undefined) {
    if (DOMException.isInstance(error)) {
      const sub = this.DOMErrorSubstitutions[error.name];
      message = message.replaceAll("\\", "/");
      message = message.replace(this.NS_ERROR_RE, "");
      if (sub) {
        return `${sub} ${message}`;
      }
    }

    return message;
  }

  // A regex we can use to replace the profile dir in error messages. We use a
  // regexp so we can simply replace all case-insensitive occurences.
  // This escaping function is from:
  // https://developer.mozilla.org/en/docs/Web/JavaScript/Guide/Regular_Expressions
  static reProfileDir = new RegExp(
    PathUtils.profileDir.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"),
    "gi"
  );

  /**
   * Clean an error message, removing PII and normalizing OS-specific messages.
   *
   * @param {string} message The error message
   * @param {Error?} error The error class instance, if any.
   */
  static cleanErrorMessage(message, error = undefined) {
    // There's a chance the profiledir is in the error string which is PII we
    // want to avoid including in the ping.
    message = message.replace(this.reProfileDir, "[profileDir]");
    // MSG_INVALID_URL from /dom/bindings/Errors.msg -- no way to access this
    // directly from JS.
    if (message.endsWith("is not a valid URL.")) {
      message = "<URL> is not a valid URL.";
    }
    // Try to filter things that look somewhat like a URL (in that they contain a
    // colon in the middle of non-whitespace), in case anything else is including
    // these in error messages. Note that JSON.stringified stuff comes through
    // here, so we explicitly ignore double-quotes as well.
    message = message.replace(/[^\s"]+:[^\s"]+/g, "<URL>");

    // Anywhere that's normalized the guid in errors we can easily filter
    // to make it easier to aggregate these types of errors
    message = message.replace(/<guid: ([^>]+)>/g, "<GUID>");

    return this.#cleanOSErrorMessage(message, error);
  }
}

// This function validates the payload of a telemetry "event" - this can be
// removed once there are APIs available for the telemetry modules to collect
// these events (bug 1329530) - but for now we simulate that planned API as
// best we can.
function validateTelemetryEvent(eventDetails) {
  let { object, method, value, extra } = eventDetails;
  // Do do basic validation of the params - everything except "extra" must
  // be a string. method and object are required.
  if (
    typeof method != "string" ||
    typeof object != "string" ||
    (value && typeof value != "string") ||
    (extra && typeof extra != "object")
  ) {
    log.warn("Invalid event parameters - wrong types", eventDetails);
    return false;
  }
  // length checks.
  if (
    method.length > 20 ||
    object.length > 20 ||
    (value && value.length > 80)
  ) {
    log.warn("Invalid event parameters - wrong lengths", eventDetails);
    return false;
  }

  // extra can be falsey, or an object with string names and values.
  if (extra) {
    if (Object.keys(extra).length > 10) {
      log.warn("Invalid event parameters - too many extra keys", eventDetails);
      return false;
    }
    for (let [ename, evalue] of Object.entries(extra)) {
      if (
        typeof ename != "string" ||
        ename.length > 15 ||
        typeof evalue != "string" ||
        evalue.length > 85
      ) {
        log.warn(
          `Invalid event parameters: extra item "${ename} is invalid`,
          eventDetails
        );
        return false;
      }
    }
  }
  return true;
}

class EngineRecord {
  constructor(name) {
    // startTime is in ms from process start, but is monotonic (unlike Date.now())
    // so we need to keep both it and when.
    this.startTime = tryGetMonotonicTimestamp();
    this.name = name;

    // This allows cases like bookmarks-buffered to have a separate name from
    // the bookmarks engine.
    let engineImpl = lazy.Weave.Service.engineManager.get(name);
    if (engineImpl && engineImpl.overrideTelemetryName) {
      this.overrideTelemetryName = engineImpl.overrideTelemetryName;
    }
  }

  toJSON() {
    let result = { name: this.overrideTelemetryName || this.name };
    let properties = [
      "took",
      "status",
      "failureReason",
      "incoming",
      "outgoing",
      "validation",
      "steps",
    ];
    for (let property of properties) {
      result[property] = this[property];
    }
    return result;
  }

  finished(error) {
    let took = timeDeltaFrom(this.startTime);
    if (took > 0) {
      this.took = took;
    }
    if (error) {
      this.failureReason = SyncTelemetry.transformError(error);
    }
  }

  recordApplied(counts) {
    if (this.incoming) {
      log.error(
        `Incoming records applied multiple times for engine ${this.name}!`
      );
      return;
    }
    if (this.name === "clients" && !counts.failed) {
      // ignore successful application of client records
      // since otherwise they show up every time and are meaningless.
      return;
    }

    let incomingData = {};

    if (counts.failedReasons) {
      // sort the failed reasons in desc by count, then take top 10
      counts.failedReasons = counts.failedReasons
        .sort((a, b) => b.count - a.count)
        .slice(0, 10);
    }
    // Counts has extra stuff used for logging, but we only care about a few
    let properties = ["applied", "failed", "failedReasons"];
    // Only record non-zero properties and only record incoming at all if
    // there's at least one property we care about.
    for (let property of properties) {
      if (counts[property]) {
        incomingData[property] = counts[property];
        this.incoming = incomingData;
      }
    }
  }

  recordStep(stepResult) {
    let step = {
      name: stepResult.name,
    };
    if (stepResult.took > 0) {
      step.took = Math.round(stepResult.took);
    }
    if (stepResult.counts) {
      let counts = stepResult.counts.filter(({ count }) => count > 0);
      if (counts.length) {
        step.counts = counts;
      }
    }
    if (this.steps) {
      this.steps.push(step);
    } else {
      this.steps = [step];
    }
  }

  recordValidation(validationResult) {
    if (this.validation) {
      log.error(`Multiple validations occurred for engine ${this.name}!`);
      return;
    }
    let { problems, version, took, checked } = validationResult;
    let validation = {
      version: version || 0,
      checked: checked || 0,
    };
    if (took > 0) {
      validation.took = Math.round(took);
    }
    let summarized = problems.filter(({ count }) => count > 0);
    if (summarized.length) {
      validation.problems = summarized;
    }
    this.validation = validation;
  }

  recordValidationError(e) {
    if (this.validation) {
      log.error(`Multiple validations occurred for engine ${this.name}!`);
      return;
    }

    this.validation = {
      failureReason: SyncTelemetry.transformError(e),
    };
  }

  recordUploaded(counts) {
    if (counts.sent || counts.failed) {
      if (!this.outgoing) {
        this.outgoing = [];
      }
      if (counts.failedReasons) {
        // sort the failed reasons in desc by count, then take top 10
        counts.failedReasons = counts.failedReasons
          .sort((a, b) => b.count - a.count)
          .slice(0, 10);
      }
      this.outgoing.push({
        sent: counts.sent || undefined,
        failed: counts.failed || undefined,
        failedReasons: counts.failedReasons || undefined,
      });
    }
  }
}

// The record of a single "sync" - typically many of these are submitted in
// a single ping (ie, as a 'syncs' array)
export class SyncRecord {
  constructor(allowedEngines, why) {
    this.allowedEngines = allowedEngines;
    // Our failure reason. This property only exists in the generated ping if an
    // error actually occurred.
    this.failureReason = undefined;
    this.syncNodeType = null;
    this.when = Date.now();
    this.startTime = tryGetMonotonicTimestamp();
    this.took = 0; // will be set later.
    this.why = why;

    // All engines that have finished (ie, does not include the "current" one)
    // We omit this from the ping if it's empty.
    this.engines = [];
    // The engine that has started but not yet stopped.
    this.currentEngine = null;
  }

  toJSON() {
    let result = {
      when: this.when,
      took: this.took,
      failureReason: this.failureReason,
      status: this.status,
    };
    if (this.why) {
      result.why = this.why;
    }
    let engines = [];
    for (let engine of this.engines) {
      engines.push(engine.toJSON());
    }
    if (engines.length) {
      result.engines = engines;
    }
    return result;
  }

  finished(error) {
    this.took = timeDeltaFrom(this.startTime);
    if (this.currentEngine != null) {
      log.error(
        "Finished called for the sync before the current engine finished"
      );
      this.currentEngine.finished(null);
      this.onEngineStop(this.currentEngine.name);
    }
    if (error) {
      this.failureReason = SyncTelemetry.transformError(error);
    }

    this.syncNodeType = lazy.Weave.Service.identity.telemetryNodeType;

    // Check for engine statuses. -- We do this now, and not in engine.finished
    // to make sure any statuses that get set "late" are recorded
    for (let engine of this.engines) {
      let status = lazy.Status.engines[engine.name];
      if (status && status !== constants.ENGINE_SUCCEEDED) {
        engine.status = status;
      }
    }

    let statusObject = {};

    let serviceStatus = lazy.Status.service;
    if (serviceStatus && serviceStatus !== constants.STATUS_OK) {
      statusObject.service = serviceStatus;
      this.status = statusObject;
    }
    let syncStatus = lazy.Status.sync;
    if (syncStatus && syncStatus !== constants.SYNC_SUCCEEDED) {
      statusObject.sync = syncStatus;
      this.status = statusObject;
    }
  }

  onEngineStart(engineName) {
    if (this._shouldIgnoreEngine(engineName, false)) {
      return;
    }

    if (this.currentEngine) {
      log.error(
        `Being told that engine ${engineName} has started, but current engine ${this.currentEngine.name} hasn't stopped`
      );
      // Just discard the current engine rather than making up data for it.
    }
    this.currentEngine = new EngineRecord(engineName);
  }

  onEngineStop(engineName, error) {
    // We only care if it's the current engine if we have a current engine.
    if (this._shouldIgnoreEngine(engineName, !!this.currentEngine)) {
      return;
    }
    if (!this.currentEngine) {
      // It's possible for us to get an error before the start message of an engine
      // (somehow), in which case we still want to record that error.
      if (!error) {
        return;
      }
      log.error(
        `Error triggered on ${engineName} when no current engine exists: ${error}`
      );
      this.currentEngine = new EngineRecord(engineName);
    }
    this.currentEngine.finished(error);
    this.engines.push(this.currentEngine);
    this.currentEngine = null;
  }

  onEngineApplied(engineName, counts) {
    if (this._shouldIgnoreEngine(engineName)) {
      return;
    }
    this.currentEngine.recordApplied(counts);
  }

  onEngineStep(engineName, step) {
    if (this._shouldIgnoreEngine(engineName)) {
      return;
    }
    this.currentEngine.recordStep(step);
  }

  onEngineValidated(engineName, validationData) {
    if (this._shouldIgnoreEngine(engineName, false)) {
      return;
    }
    let engine = this.engines.find(e => e.name === engineName);
    if (
      !engine &&
      this.currentEngine &&
      engineName === this.currentEngine.name
    ) {
      engine = this.currentEngine;
    }
    if (engine) {
      engine.recordValidation(validationData);
    } else {
      log.warn(
        `Validation event triggered for engine ${engineName}, which hasn't been synced!`
      );
    }
  }

  onEngineValidateError(engineName, error) {
    if (this._shouldIgnoreEngine(engineName, false)) {
      return;
    }
    let engine = this.engines.find(e => e.name === engineName);
    if (
      !engine &&
      this.currentEngine &&
      engineName === this.currentEngine.name
    ) {
      engine = this.currentEngine;
    }
    if (engine) {
      engine.recordValidationError(error);
    } else {
      log.warn(
        `Validation failure event triggered for engine ${engineName}, which hasn't been synced!`
      );
    }
  }

  onEngineUploaded(engineName, counts) {
    if (this._shouldIgnoreEngine(engineName)) {
      return;
    }
    this.currentEngine.recordUploaded(counts);
  }

  _shouldIgnoreEngine(engineName, shouldBeCurrent = true) {
    if (!this.allowedEngines.has(engineName)) {
      log.info(
        `Notification for engine ${engineName}, but we aren't recording telemetry for it`
      );
      return true;
    }
    if (shouldBeCurrent) {
      if (!this.currentEngine || engineName != this.currentEngine.name) {
        log.info(`Notification for engine ${engineName} but it isn't current`);
        return true;
      }
    }
    return false;
  }
}

// The entire "sync ping" - it includes all the syncs, events etc recorded in
// the ping.
class SyncTelemetryImpl {
  constructor(allowedEngines) {
    log.manageLevelFromPref("services.sync.log.logger.telemetry");
    // This is accessible so we can enable custom engines during tests.
    this.allowedEngines = allowedEngines;
    this.current = null;
    this.setupObservers();

    this.payloads = [];
    this.discarded = 0;
    this.events = [];
    this.histograms = {};
    this.migrations = [];
    this.maxEventsCount = lazy.Svc.PrefBranch.getIntPref(
      "telemetry.maxEventsCount",
      1000
    );
    this.maxPayloadCount = lazy.Svc.PrefBranch.getIntPref(
      "telemetry.maxPayloadCount"
    );
    this.submissionInterval =
      lazy.Svc.PrefBranch.getIntPref("telemetry.submissionInterval") * 1000;
    this.lastSubmissionTime = Services.telemetry.msSinceProcessStart();
    this.lastUID = EMPTY_UID;
    this.lastSyncNodeType = null;
    this.currentSyncNodeType = null;
    // Note that the sessionStartDate is somewhat arbitrary - the telemetry
    // modules themselves just use `new Date()`. This means that our startDate
    // isn't going to be the same as the sessionStartDate in the main pings,
    // but that's OK for now - if it's a problem we'd need to change the
    // telemetry modules to expose what it thinks the sessionStartDate is.
    let sessionStartDate = new Date();
    this.sessionStartDate = lazy.TelemetryUtils.toLocalTimeISOString(
      lazy.TelemetryUtils.truncateToHours(sessionStartDate)
    );
    lazy.TelemetryController.registerSyncPingShutdown(() => this.shutdown());
  }

  sanitizeFxaDeviceId(deviceId) {
    return lazy.fxAccounts.telemetry.sanitizeDeviceId(deviceId);
  }

  prepareFxaDevices(devices) {
    // For non-sync users, the data per device is limited -- just an id and a
    // type (and not even the id yet). For sync users, if we can correctly map
    // the fxaDevice to a sync device, then we can get os and version info,
    // which would be quite unfortunate to lose.
    let extraInfoMap = new Map();
    if (this.syncIsEnabled()) {
      for (let client of this.getClientsEngineRecords()) {
        if (client.fxaDeviceId) {
          extraInfoMap.set(client.fxaDeviceId, {
            os: client.os,
            version: client.version,
            syncID: this.sanitizeFxaDeviceId(client.id),
          });
        }
      }
    }
    // Finally, sanitize and convert to the proper format.
    return devices.map(d => {
      let { os, version, syncID } = extraInfoMap.get(d.id) || {
        os: undefined,
        version: undefined,
        syncID: undefined,
      };
      return {
        id: this.sanitizeFxaDeviceId(d.id) || EMPTY_UID,
        type: d.type,
        os,
        version,
        syncID,
      };
    });
  }

  syncIsEnabled() {
    return lazy.WeaveService.enabled && lazy.WeaveService.ready;
  }

  // Separate for testing.
  getClientsEngineRecords() {
    if (!this.syncIsEnabled()) {
      throw new Error("Bug: syncIsEnabled() must be true, check it first");
    }
    return lazy.Weave.Service.clientsEngine.remoteClients;
  }

  updateFxaDevices(devices) {
    if (!devices) {
      return {};
    }
    let me = devices.find(d => d.isCurrentDevice);
    let id = me ? this.sanitizeFxaDeviceId(me.id) : undefined;
    let cleanDevices = this.prepareFxaDevices(devices);
    return { deviceID: id, devices: cleanDevices };
  }

  getFxaDevices() {
    return lazy.fxAccounts.device.recentDeviceList;
  }

  getPingJSON(reason) {
    let { devices, deviceID } = this.updateFxaDevices(this.getFxaDevices());
    return {
      os: lazy.TelemetryEnvironment.currentEnvironment.system.os,
      why: reason,
      devices,
      discarded: this.discarded || undefined,
      version: PING_FORMAT_VERSION,
      syncs: this.payloads.slice(),
      uid: this.lastUID,
      syncNodeType: this.lastSyncNodeType || undefined,
      deviceID,
      sessionStartDate: this.sessionStartDate,
      events: !this.events.length ? undefined : this.events,
      migrations: !this.migrations.length ? undefined : this.migrations,
      histograms: !Object.keys(this.histograms).length
        ? undefined
        : this.histograms,
    };
  }

  _addMigrationRecord(type, info) {
    log.debug("Saw telemetry migration info", type, info);
    // Updates to this need to be documented in `sync-ping.rst`
    switch (type) {
      case "webext-storage":
        this.migrations.push({
          type: "webext-storage",
          entries: +info.entries,
          entriesSuccessful: +info.entries_successful,
          extensions: +info.extensions,
          extensionsSuccessful: +info.extensions_successful,
          openFailure: !!info.open_failure,
        });
        break;
      default:
        throw new Error("Bug: Unknown migration record type " + type);
    }
  }

  finish(reason) {
    // Note that we might be in the middle of a sync right now, and so we don't
    // want to touch this.current.
    let result = this.getPingJSON(reason);
    this.payloads = [];
    this.discarded = 0;
    this.events = [];
    this.migrations = [];
    this.histograms = {};
    this.submit(result);
  }

  setupObservers() {
    for (let topic of TOPICS) {
      lazy.Observers.add(topic, this, this);
    }
  }

  shutdown() {
    this.finish("shutdown");
    for (let topic of TOPICS) {
      lazy.Observers.remove(topic, this, this);
    }
  }

  submit(record) {
    if (!this.isProductionSyncUser()) {
      return false;
    }
    // We still call submit() with possibly illegal payloads so that tests can
    // know that the ping was built. We don't end up submitting them, however.
    let numEvents = record.events ? record.events.length : 0;
    let numMigrations = record.migrations ? record.migrations.length : 0;
    if (record.syncs.length || numEvents || numMigrations) {
      log.trace(
        `submitting ${record.syncs.length} sync record(s) and ` +
          `${numEvents} event(s) to telemetry`
      );
      lazy.TelemetryController.submitExternalPing("sync", record, {
        usePingSender: true,
      }).catch(err => {
        log.error("failed to submit ping", err);
      });
      return true;
    }
    return false;
  }

  isProductionSyncUser() {
    // If FxA isn't production then we treat sync as not being production.
    // Further, there's the deprecated "services.sync.tokenServerURI" pref we
    // need to consider - fxa doesn't consider that as if that's the only
    // pref set, they *are* running a production fxa, just not production sync.
    if (
      !lazy.FxAccounts.config.isProductionConfig() ||
      Services.prefs.prefHasUserValue("services.sync.tokenServerURI")
    ) {
      log.trace(`Not sending telemetry ping for self-hosted Sync user`);
      return false;
    }
    return true;
  }

  onSyncStarted(data) {
    const why = data && JSON.parse(data).why;
    if (this.current) {
      log.warn(
        "Observed weave:service:sync:start, but we're already recording a sync!"
      );
      // Just discard the old record, consistent with our handling of engines, above.
      this.current = null;
    }
    this.current = new SyncRecord(this.allowedEngines, why);
  }

  // We need to ensure that the telemetry `deletion-request` ping always contains the user's
  // current sync device ID, because if the user opts out of telemetry then the deletion ping
  // will be immediately triggered for sending, and we won't have a chance to fill it in later.
  // This keeps the `deletion-ping` up-to-date when the user's account state changes.
  onAccountInitOrChange() {
    // We don't submit sync pings for self-hosters, so don't need to collect their device ids either.
    if (!this.isProductionSyncUser()) {
      return;
    }
    // Awkwardly async, but no need to await. If the user's account state changes while
    // this promise is in flight, it will reject and we won't record any data in the ping.
    // (And a new notification will trigger us to try again with the new state).
    lazy.fxAccounts.device
      .getLocalId()
      .then(deviceId => {
        let sanitizedDeviceId =
          lazy.fxAccounts.telemetry.sanitizeDeviceId(deviceId);
        // In the past we did not persist the FxA metrics identifiers to disk,
        // so this might be missing until we can fetch it from the server for the
        // first time. There will be a fresh notification tirggered when it's available.
        if (sanitizedDeviceId) {
          // Sanitized device ids are 64 characters long, but telemetry limits scalar strings to 50.
          // The first 32 chars are sufficient to uniquely identify the device, so just send those.
          // It's hard to change the sync ping itself to only send 32 chars, to b/w compat reasons.
          sanitizedDeviceId = sanitizedDeviceId.substr(0, 32);
          Glean.deletionRequest.syncDeviceId.set(sanitizedDeviceId);
        }
      })
      .catch(err => {
        log.warn(
          `Failed to set sync identifiers in the deletion-request ping: ${err}`
        );
      });
  }

  // This keeps the `deletion-request` ping up-to-date when the user signs out,
  // clearing the now-nonexistent sync device id.
  onAccountLogout() {
    Glean.deletionRequest.syncDeviceId.set("");
  }

  _checkCurrent(topic) {
    if (!this.current) {
      // This is only `info` because it happens when we do a tabs "quick-write"
      log.info(
        `Observed notification ${topic} but no current sync is being recorded.`
      );
      return false;
    }
    return true;
  }

  _shouldSubmitForDataChange() {
    let newID = lazy.fxAccounts.telemetry.getSanitizedUID() || EMPTY_UID;
    let oldID = this.lastUID;
    if (
      newID != EMPTY_UID &&
      oldID != EMPTY_UID &&
      // Both are "real" uids, so we care if they've changed.
      newID != oldID
    ) {
      log.trace(
        `shouldSubmitForDataChange - uid from '${oldID}' -> '${newID}'`
      );
      return true;
    }
    // We've gone from knowing one of the ids to not knowing it (which we
    // ignore) or we've gone from not knowing it to knowing it (which is fine),
    // Now check the node type because a change there also means we should
    // submit.
    if (
      this.lastSyncNodeType &&
      this.currentSyncNodeType != this.lastSyncNodeType
    ) {
      log.trace(
        `shouldSubmitForDataChange - nodeType from '${this.lastSyncNodeType}' -> '${this.currentSyncNodeType}'`
      );
      return true;
    }
    log.trace("shouldSubmitForDataChange - no need to submit");
    return false;
  }

  maybeSubmitForDataChange() {
    if (this._shouldSubmitForDataChange()) {
      log.info(
        "Early submission of sync telemetry due to changed IDs/NodeType"
      );
      this.finish("idchange"); // this actually submits.
      this.lastSubmissionTime = Services.telemetry.msSinceProcessStart();
    }

    // Only update the last UIDs if we actually know them.
    let current_uid = lazy.fxAccounts.telemetry.getSanitizedUID();
    if (current_uid) {
      this.lastUID = current_uid;
    }
    if (this.currentSyncNodeType) {
      this.lastSyncNodeType = this.currentSyncNodeType;
    }
  }

  maybeSubmitForInterval() {
    // We want to submit the ping every `this.submissionInterval` but only when
    // there's no current sync in progress, otherwise we may end up submitting
    // the sync and the events caused by it in different pings.
    if (
      this.current == null &&
      Services.telemetry.msSinceProcessStart() - this.lastSubmissionTime >
        this.submissionInterval
    ) {
      this.finish("schedule");
      this.lastSubmissionTime = Services.telemetry.msSinceProcessStart();
    }
  }

  onSyncFinished(error) {
    if (!this.current) {
      log.warn("onSyncFinished but we aren't recording");
      return;
    }
    this.current.finished(error);
    this.currentSyncNodeType = this.current.syncNodeType;
    let current = this.current;
    this.current = null;
    this.takeTelemetryRecord(current);
  }

  takeTelemetryRecord(record) {
    // We check for "data change" before appending the current sync to payloads,
    // as it is the current sync which has the data with the new data, and thus
    // must go in the *next* submission.
    this.maybeSubmitForDataChange();
    if (this.payloads.length < this.maxPayloadCount) {
      this.payloads.push(record.toJSON());
    } else {
      ++this.discarded;
    }
    // If we are submitting due to timing, it's desirable that the most recent
    // sync is included, so we check after appending the record.
    this.maybeSubmitForInterval();
  }

  _addHistogram(hist) {
    let histogram = Services.telemetry.getHistogramById(hist);
    let s = histogram.snapshot();
    this.histograms[hist] = s;
  }

  _recordEvent(eventDetails) {
    this.maybeSubmitForDataChange();

    if (this.events.length >= this.maxEventsCount) {
      log.warn("discarding event - already queued our maximum", eventDetails);
      return;
    }

    let { object, method, value, extra } = eventDetails;
    if (extra) {
      extra = normalizeExtraTelemetryFields(extra);
      eventDetails = { object, method, value, extra };
    }

    if (!validateTelemetryEvent(eventDetails)) {
      // we've already logged what the problem is...
      return;
    }
    log.debug("recording event", eventDetails);

    if (extra && lazy.Resource.serverTime && !extra.serverTime) {
      extra.serverTime = String(lazy.Resource.serverTime);
    }
    let category = "sync";
    let ts = Math.floor(tryGetMonotonicTimestamp());

    // An event record is a simple array with at least 4 items.
    let event = [ts, category, method, object];
    // It may have up to 6 elements if |extra| is defined
    if (value) {
      event.push(value);
      if (extra) {
        event.push(extra);
      }
    } else if (extra) {
      event.push(null); // a null for the empty value.
      event.push(extra);
    }
    this.events.push(event);
    this.maybeSubmitForInterval();
  }

  observe(subject, topic, data) {
    log.trace(`observed ${topic} ${data}`);

    switch (topic) {
      case "weave:service:ready":
      case "weave:service:login:got-hashed-id":
      case "fxaccounts:new_device_id":
        this.onAccountInitOrChange();
        break;

      case "fxaccounts:onlogout":
        this.onAccountLogout();
        break;

      /* sync itself state changes */
      case "weave:service:sync:start":
        this.onSyncStarted(data);
        break;

      case "weave:service:sync:finish":
        if (this._checkCurrent(topic)) {
          this.onSyncFinished(null);
        }
        break;

      case "weave:service:sync:error":
        // argument needs to be truthy (this should always be the case)
        this.onSyncFinished(subject || "Unknown");
        break;

      /* engine sync state changes */
      case "weave:engine:sync:start":
        if (this._checkCurrent(topic)) {
          this.current.onEngineStart(data);
        }
        break;
      case "weave:engine:sync:finish":
        if (this._checkCurrent(topic)) {
          this.current.onEngineStop(data, null);
        }
        break;

      case "weave:engine:sync:error":
        if (this._checkCurrent(topic)) {
          // argument needs to be truthy (this should always be the case)
          this.current.onEngineStop(data, subject || "Unknown");
        }
        break;

      /* engine counts */
      case "weave:engine:sync:applied":
        if (this._checkCurrent(topic)) {
          this.current.onEngineApplied(data, subject);
        }
        break;

      case "weave:engine:sync:step":
        if (this._checkCurrent(topic)) {
          this.current.onEngineStep(data, subject);
        }
        break;

      case "weave:engine:sync:uploaded":
        if (this._checkCurrent(topic)) {
          this.current.onEngineUploaded(data, subject);
        }
        break;

      case "weave:engine:validate:finish":
        if (this._checkCurrent(topic)) {
          this.current.onEngineValidated(data, subject);
        }
        break;

      case "weave:engine:validate:error":
        if (this._checkCurrent(topic)) {
          this.current.onEngineValidateError(data, subject || "Unknown");
        }
        break;

      case "weave:telemetry:event":
      case "fxa:telemetry:event":
        this._recordEvent(subject);
        break;

      case "weave:telemetry:histogram":
        this._addHistogram(data);
        break;

      case "weave:telemetry:migration":
        this._addMigrationRecord(data, subject);
        break;

      default:
        log.warn(`unexpected observer topic ${topic}`);
        break;
    }
  }

  // Transform an exception into a standard description. Exposed here for when
  // this module isn't directly responsible for knowing the transform should
  // happen (for example, when including an error in the |extra| field of
  // event telemetry)
  transformError(error) {
    // Certain parts of sync will use this pattern as a way to communicate to
    // processIncoming to abort the processing. However, there's no guarantee
    // this can only happen then.
    if (typeof error == "object" && error.code && error.cause) {
      error = error.cause;
    }
    if (lazy.Async.isShutdownException(error)) {
      return { name: "shutdownerror" };
    }

    if (typeof error === "string") {
      if (error.startsWith("error.")) {
        // This is hacky, but I can't imagine that it's not also accurate.
        return { name: "othererror", error };
      }
      error = ErrorSanitizer.cleanErrorMessage(error);
      return { name: "unexpectederror", error };
    }

    if (error instanceof lazy.AuthenticationError) {
      return { name: "autherror", from: error.source };
    }

    if (DOMException.isInstance(error)) {
      return {
        name: "unexpectederror",
        error: ErrorSanitizer.cleanErrorMessage(error.message, error),
      };
    }

    let httpCode =
      error.status || (error.response && error.response.status) || error.code;

    if (httpCode) {
      return { name: "httperror", code: httpCode };
    }

    if (error.failureCode) {
      return { name: "othererror", error: error.failureCode };
    }

    if (error.result) {
      // many "nsresult" errors are actually network errors - if they are
      // associated with the "network" module we assume that's true.
      // We also assume NS_ERROR_ABORT is such an error - for almost everything
      // we care about, it acually is (eg, if the connection fails early enough
      // or if we have a captive portal etc) - we don't lose anything by this
      // assumption, it's just that the error will no longer be in the "nserror"
      // category, so our analysis can still find them.
      if (
        error.result == Cr.NS_ERROR_ABORT ||
        NS_ERROR_GET_MODULE(error.result) == NS_ERROR_MODULE_NETWORK
      ) {
        return { name: "httperror", code: error.result };
      }
      return { name: "nserror", code: error.result };
    }
    // It's probably an Error object, but it also could be some
    // other object that may or may not override toString to do
    // something useful.
    let msg = String(error);
    if (msg.startsWith("[object")) {
      // Nothing useful in the default, check for a string "message" property.
      if (typeof error.message == "string") {
        msg = String(error.message);
      } else {
        // Hopefully it won't come to this...
        msg = JSON.stringify(error);
      }
    }
    return {
      name: "unexpectederror",
      error: ErrorSanitizer.cleanErrorMessage(msg),
    };
  }
}

export var SyncTelemetry = new SyncTelemetryImpl(ENGINES);