diff options
Diffstat (limited to 'toolkit/components/crashes')
17 files changed, 4516 insertions, 0 deletions
diff --git a/toolkit/components/crashes/CrashManager.in.sys.mjs b/toolkit/components/crashes/CrashManager.in.sys.mjs new file mode 100644 index 0000000000..253f70d07e --- /dev/null +++ b/toolkit/components/crashes/CrashManager.in.sys.mjs @@ -0,0 +1,1616 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +import { AppConstants } from "resource://gre/modules/AppConstants.sys.mjs"; +import { setTimeout } from "resource://gre/modules/Timer.sys.mjs"; + +const lazy = {}; + +ChromeUtils.defineESModuleGetters(lazy, { + Log: "resource://gre/modules/Log.sys.mjs", + TelemetryController: "resource://gre/modules/TelemetryController.sys.mjs", +}); + +/** + * How long to wait after application startup before crash event files are + * automatically aggregated. + * + * We defer aggregation for performance reasons, as we don't want too many + * services competing for I/O immediately after startup. + */ +const AGGREGATE_STARTUP_DELAY_MS = 57000; + +const MILLISECONDS_IN_DAY = 24 * 60 * 60 * 1000; + +// Converts Date to days since UNIX epoch. +// This was copied from /services/metrics.storage.jsm. The implementation +// does not account for leap seconds. +export function dateToDays(date) { + return Math.floor(date.getTime() / MILLISECONDS_IN_DAY); +} + +/** + * Get a field from the specified object and remove it. + * + * @param obj {Object} The object holding the field + * @param field {String} The name of the field to be parsed and removed + * + * @returns {String} the field contents as a string, null if none was found + */ +function getAndRemoveField(obj, field) { + let value = null; + + if (field in obj) { + value = obj[field]; + delete obj[field]; + } + + return value; +} + +/** + * Parse the string stored in the specified field as JSON and then remove the + * field from the object. + * + * @param obj {Object} The object holding the field + * @param field {String} The name of the field to be parsed and removed + * + * @returns {Object} the parsed object, null if none was found + */ +function parseAndRemoveField(obj, field) { + let value = null; + + if (field in obj) { + try { + value = JSON.parse(obj[field]); + } catch (e) { + console.error(e); + } + + delete obj[field]; + } + + return value; +} + +/** + * A gateway to crash-related data. + * + * This type is generic and can be instantiated any number of times. + * However, most applications will typically only have one instance + * instantiated and that instance will point to profile and user appdata + * directories. + * + * Instances are created by passing an object with properties. + * Recognized properties are: + * + * pendingDumpsDir (string) (required) + * Where dump files that haven't been uploaded are located. + * + * submittedDumpsDir (string) (required) + * Where records of uploaded dumps are located. + * + * eventsDirs (array) + * Directories (defined as strings) where events files are written. This + * instance will collects events from files in the directories specified. + * + * storeDir (string) + * Directory we will use for our data store. This instance will write + * data files into the directory specified. + * + * telemetryStoreSizeKey (string) + * Telemetry histogram to report store size under. + */ +export var CrashManager = function (options) { + for (let k in options) { + let value = options[k]; + + switch (k) { + case "pendingDumpsDir": + case "submittedDumpsDir": + case "eventsDirs": + case "storeDir": + let key = "_" + k; + delete this[key]; + Object.defineProperty(this, key, { value }); + break; + case "telemetryStoreSizeKey": + this._telemetryStoreSizeKey = value; + break; + + default: + throw new Error("Unknown property in options: " + k); + } + } + + // Promise for in-progress aggregation operation. We store it on the + // object so it can be returned for in-progress operations. + this._aggregatePromise = null; + + // Map of crash ID / promise tuples used to track adding new crashes. + this._crashPromises = new Map(); + + // Promise for the crash ping used only for testing. + this._pingPromise = null; + + // The CrashStore currently attached to this object. + this._store = null; + + // A Task to retrieve the store. This is needed to avoid races when + // _getStore() is called multiple times in a short interval. + this._getStoreTask = null; + + // The timer controlling the expiration of the CrashStore instance. + this._storeTimer = null; + + // This is a semaphore that prevents the store from being freed by our + // timer-based resource freeing mechanism. + this._storeProtectedCount = 0; +}; + +CrashManager.prototype = Object.freeze({ + // gen_CrashManager.py will input the proper process map informations. + /* SUBST: CRASH_MANAGER_PROCESS_MAP */ + + // A real crash. + CRASH_TYPE_CRASH: "crash", + + // A hang. + CRASH_TYPE_HANG: "hang", + + // Submission result values. + SUBMISSION_RESULT_OK: "ok", + SUBMISSION_RESULT_FAILED: "failed", + + DUMP_REGEX: + /^([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\.dmp$/i, + SUBMITTED_REGEX: + /^bp-(?:hr-)?([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\.txt$/i, + ALL_REGEX: /^(.*)$/, + + // How long the store object should persist in memory before being + // automatically garbage collected. + STORE_EXPIRATION_MS: 60 * 1000, + + // Number of days after which a crash with no activity will get purged. + PURGE_OLDER_THAN_DAYS: 180, + + // The following are return codes for individual event file processing. + // File processed OK. + EVENT_FILE_SUCCESS: "ok", + // The event appears to be malformed. + EVENT_FILE_ERROR_MALFORMED: "malformed", + // The event is obsolete. + EVENT_FILE_ERROR_OBSOLETE: "obsolete", + // The type of event is unknown. + EVENT_FILE_ERROR_UNKNOWN_EVENT: "unknown-event", + + _lazyGetDir(field, path, leaf) { + delete this[field]; + let value = PathUtils.join(path, leaf); + Object.defineProperty(this, field, { value }); + return value; + }, + + get _crDir() { + return this._lazyGetDir( + "_crDir", + Services.dirsvc.get("UAppData", Ci.nsIFile).path, + "Crash Reports" + ); + }, + + get _storeDir() { + return this._lazyGetDir( + "_storeDir", + Services.dirsvc.get("ProfD", Ci.nsIFile).path, + "crashes" + ); + }, + + get _pendingDumpsDir() { + return this._lazyGetDir("_pendingDumpsDir", this._crDir, "pending"); + }, + + get _submittedDumpsDir() { + return this._lazyGetDir("_submittedDumpsDir", this._crDir, "submitted"); + }, + + get _eventsDirs() { + delete this._eventsDirs; + let value = [ + PathUtils.join(this._crDir, "events"), + PathUtils.join(this._storeDir, "events"), + ]; + Object.defineProperty(this, "_eventsDirs", { value }); + return value; + }, + + /** + * Obtain a list of all dumps pending upload. + * + * The returned value is a promise that resolves to an array of objects + * on success. Each element in the array has the following properties: + * + * id (string) + * The ID of the crash (a UUID). + * + * path (string) + * The filename of the crash (<UUID.dmp>) + * + * date (Date) + * When this dump was created + * + * The returned arry is sorted by the modified time of the file backing + * the entry, oldest to newest. + * + * @return Promise<Array> + */ + pendingDumps() { + return this._getDirectoryEntries(this._pendingDumpsDir, this.DUMP_REGEX); + }, + + /** + * Obtain a list of all dump files corresponding to submitted crashes. + * + * The returned value is a promise that resolves to an Array of + * objects. Each object has the following properties: + * + * path (string) + * The path of the file this entry comes from. + * + * id (string) + * The crash UUID. + * + * date (Date) + * The (estimated) date this crash was submitted. + * + * The returned array is sorted by the modified time of the file backing + * the entry, oldest to newest. + * + * @return Promise<Array> + */ + submittedDumps() { + return this._getDirectoryEntries( + this._submittedDumpsDir, + this.SUBMITTED_REGEX + ); + }, + + /** + * Aggregates "loose" events files into the unified "database." + * + * This function should be called periodically to collect metadata from + * all events files into the central data store maintained by this manager. + * + * Once events have been stored in the backing store the corresponding + * source files are deleted. + * + * Only one aggregation operation is allowed to occur at a time. If this + * is called when an existing aggregation is in progress, the promise for + * the original call will be returned. + * + * @return promise<int> The number of event files that were examined. + */ + aggregateEventsFiles() { + if (this._aggregatePromise) { + return this._aggregatePromise; + } + + return (this._aggregatePromise = (async () => { + if (this._aggregatePromise) { + return this._aggregatePromise; + } + + try { + let unprocessedFiles = await this._getUnprocessedEventsFiles(); + + let deletePaths = []; + let needsSave = false; + + this._storeProtectedCount++; + for (let entry of unprocessedFiles) { + try { + let result = await this._processEventFile(entry); + + switch (result) { + case this.EVENT_FILE_SUCCESS: + needsSave = true; + // Fall through. + + case this.EVENT_FILE_ERROR_MALFORMED: + case this.EVENT_FILE_ERROR_OBSOLETE: + deletePaths.push(entry.path); + break; + + case this.EVENT_FILE_ERROR_UNKNOWN_EVENT: + break; + + default: + console.error( + "Unhandled crash event file return code. Please " + + "file a bug: ", + result + ); + } + } catch (ex) { + if (DOMException.isInstance(ex)) { + this._log.warn("I/O error reading " + entry.path, ex); + } else { + // We should never encounter an exception. This likely represents + // a coding error because all errors should be detected and + // converted to return codes. + // + // If we get here, report the error and delete the source file + // so we don't see it again. + console.error( + "Exception when processing crash event file: " + + lazy.Log.exceptionStr(ex) + ); + deletePaths.push(entry.path); + } + } + } + + if (needsSave) { + let store = await this._getStore(); + await store.save(); + } + + for (let path of deletePaths) { + try { + await IOUtils.remove(path); + } catch (ex) { + this._log.warn("Error removing event file (" + path + ")", ex); + } + } + + return unprocessedFiles.length; + } finally { + this._aggregatePromise = false; + this._storeProtectedCount--; + } + })()); + }, + + /** + * Prune old crash data. + * + * @param date + * (Date) The cutoff point for pruning. Crashes without data newer + * than this will be pruned. + */ + pruneOldCrashes(date) { + return (async () => { + let store = await this._getStore(); + store.pruneOldCrashes(date); + await store.save(); + })(); + }, + + /** + * Run tasks that should be periodically performed. + */ + runMaintenanceTasks() { + return (async () => { + await this.aggregateEventsFiles(); + + let offset = this.PURGE_OLDER_THAN_DAYS * MILLISECONDS_IN_DAY; + await this.pruneOldCrashes(new Date(Date.now() - offset)); + })(); + }, + + /** + * Schedule maintenance tasks for some point in the future. + * + * @param delay + * (integer) Delay in milliseconds when maintenance should occur. + */ + scheduleMaintenance(delay) { + let deferred = Promise.withResolvers(); + + setTimeout(() => { + this.runMaintenanceTasks().then(deferred.resolve, deferred.reject); + }, delay); + + return deferred.promise; + }, + + /** + * Record the occurrence of a crash. + * + * This method skips event files altogether and writes directly and + * immediately to the manager's data store. + * + * @param processType (string) One of the PROCESS_TYPE constants. + * @param crashType (string) One of the CRASH_TYPE constants. + * @param id (string) Crash ID. Likely a UUID. + * @param date (Date) When the crash occurred. + * @param metadata (dictionary) Crash metadata, may be empty. + * + * @return promise<null> Resolved when the store has been saved. + */ + addCrash(processType, crashType, id, date, metadata) { + let promise = (async () => { + if (!this.isValidProcessType(processType)) { + console.error( + "Unhandled process type. Please file a bug: '", + processType, + "'. Ignore in the context of " + + "test_crash_manager.js:test_addCrashWrong()." + ); + return; + } + + let store = await this._getStore(); + if (store.addCrash(processType, crashType, id, date, metadata)) { + await store.save(); + } + + let deferred = this._crashPromises.get(id); + + if (deferred) { + this._crashPromises.delete(id); + deferred.resolve(); + } + + if (this.isPingAllowed(processType)) { + this._sendCrashPing("crash", id, processType, date, metadata); + } + })(); + + return promise; + }, + + /** + * Check that the processType parameter is a valid one: + * - it is a string + * - it is listed in this.processTypes + * + * @param processType (string) Process type to evaluate + * + * @return boolean True or false depending whether it is a legit one + */ + isValidProcessType(processType) { + if (typeof processType !== "string") { + return false; + } + + for (const pt of Object.values(this.processTypes)) { + if (pt === processType) { + return true; + } + } + + return false; + }, + + /** + * Check that processType is allowed to send a ping + * + * @param processType (string) Process type to check for + * + * @return boolean True or False depending on whether ping is allowed + **/ + isPingAllowed(processType) { + // gen_CrashManager.py will input the proper process pings informations. + + let processPings = { + /* SUBST: CRASH_MANAGER_PROCESS_PINGS */ + }; + + // Should not even reach this because of isValidProcessType() but just in + // case we try to be cautious + if (!(processType in processPings)) { + return false; + } + + return processPings[processType]; + }, + + /** + * Returns a promise that is resolved only the crash with the specified id + * has been fully recorded. + * + * @param id (string) Crash ID. Likely a UUID. + * + * @return promise<null> Resolved when the crash is present. + */ + async ensureCrashIsPresent(id) { + let store = await this._getStore(); + let crash = store.getCrash(id); + + if (crash) { + return Promise.resolve(); + } + + let deferred = Promise.withResolvers(); + + this._crashPromises.set(id, deferred); + return deferred.promise; + }, + + /** + * Record the remote ID for a crash. + * + * @param crashID (string) Crash ID. Likely a UUID. + * @param remoteID (Date) Server/Breakpad ID. + * + * @return boolean True if the remote ID was recorded. + */ + async setRemoteCrashID(crashID, remoteID) { + let store = await this._getStore(); + if (store.setRemoteCrashID(crashID, remoteID)) { + await store.save(); + } + }, + + /** + * Generate a submission ID for use with addSubmission{Attempt,Result}. + */ + generateSubmissionID() { + return "sub-" + Services.uuid.generateUUID().toString().slice(1, -1); + }, + + /** + * Record the occurrence of a submission attempt for a crash. + * + * @param crashID (string) Crash ID. Likely a UUID. + * @param submissionID (string) Submission ID. Likely a UUID. + * @param date (Date) When the attempt occurred. + * + * @return boolean True if the attempt was recorded and false if not. + */ + async addSubmissionAttempt(crashID, submissionID, date) { + let store = await this._getStore(); + if (store.addSubmissionAttempt(crashID, submissionID, date)) { + await store.save(); + } + }, + + /** + * Record the occurrence of a submission result for a crash. + * + * @param crashID (string) Crash ID. Likely a UUID. + * @param submissionID (string) Submission ID. Likely a UUID. + * @param date (Date) When the submission result was obtained. + * @param result (string) One of the SUBMISSION_RESULT constants. + * + * @return boolean True if the result was recorded and false if not. + */ + async addSubmissionResult(crashID, submissionID, date, result) { + let store = await this._getStore(); + if (store.addSubmissionResult(crashID, submissionID, date, result)) { + await store.save(); + } + }, + + /** + * Set the classification of a crash. + * + * @param crashID (string) Crash ID. Likely a UUID. + * @param classifications (array) Crash classifications. + * + * @return boolean True if the data was recorded and false if not. + */ + async setCrashClassifications(crashID, classifications) { + let store = await this._getStore(); + if (store.setCrashClassifications(crashID, classifications)) { + await store.save(); + } + }, + + /** + * Obtain the paths of all unprocessed events files. + * + * The promise-resolved array is sorted by file mtime, oldest to newest. + */ + _getUnprocessedEventsFiles() { + return (async () => { + try { + let entries = []; + + for (let dir of this._eventsDirs) { + for (let e of await this._getDirectoryEntries(dir, this.ALL_REGEX)) { + entries.push(e); + } + } + + entries.sort((a, b) => { + return a.date - b.date; + }); + + return entries; + } catch (e) { + console.error(e); + return []; + } + })(); + }, + + // See docs/crash-events.rst for the file format specification. + _processEventFile(entry) { + return (async () => { + let data = await IOUtils.read(entry.path); + let store = await this._getStore(); + + let decoder = new TextDecoder(); + data = decoder.decode(data); + + let type, time; + let start = 0; + for (let i = 0; i < 2; i++) { + let index = data.indexOf("\n", start); + if (index == -1) { + return this.EVENT_FILE_ERROR_MALFORMED; + } + + let sub = data.substring(start, index); + switch (i) { + case 0: + type = sub; + break; + case 1: + time = sub; + try { + time = parseInt(time, 10); + } catch (ex) { + return this.EVENT_FILE_ERROR_MALFORMED; + } + } + + start = index + 1; + } + let date = new Date(time * 1000); + let payload = data.substring(start); + + return this._handleEventFilePayload(store, entry, type, date, payload); + })(); + }, + + _filterAnnotations(annotations) { + let filteredAnnotations = {}; + + for (let line in annotations) { + try { + if (Services.appinfo.isAnnotationAllowedForPing(line)) { + filteredAnnotations[line] = annotations[line]; + } + } catch (e) { + // Silently drop unknown annotations + } + } + + return filteredAnnotations; + }, + + /** + * Submit a Glean crash ping with the given parameters. + * + * @param {string} reason - the reason for the crash ping, one of: "crash", "event_found" + * @param {string} type - the process type (from {@link processTypes}) + * @param {DateTime} date - the time of the crash (or the closest time after it) + * @param {object} metadata - the object of Telemetry crash metadata + */ + _submitGleanCrashPing(reason, type, date, metadata) { + if ("UptimeTS" in metadata) { + Glean.crash.uptime.setRaw(parseFloat(metadata.UptimeTS) * 1e3); + } + Glean.crash.processType.set(type); + Glean.crash.time.set(date.getTime() * 1000); + Glean.crash.startup.set( + "StartupCrash" in metadata && parseInt(metadata.StartupCrash) === 1 + ); + GleanPings.crash.submit(reason); + }, + + /** + * Send a crash ping. + * + * @param {string} reason - the reason for the crash ping, one of: "crash", "event_found" + * @param {string} crashId - the crash identifier + * @param {string} type - the process type (from {@link processTypes}) + * @param {DateTime} date - the time of the crash (or the closest time after it) + * @param {object} metadata - Telemetry crash metadata + */ + _sendCrashPing(reason, crashId, type, date, metadata = {}) { + // If we have a saved environment, use it. Otherwise report + // the current environment. + let reportMeta = Cu.cloneInto(metadata, {}); + let crashEnvironment = parseAndRemoveField( + reportMeta, + "TelemetryEnvironment" + ); + let sessionId = getAndRemoveField(reportMeta, "TelemetrySessionId"); + let stackTraces = getAndRemoveField(reportMeta, "StackTraces"); + let minidumpSha256Hash = getAndRemoveField( + reportMeta, + "MinidumpSha256Hash" + ); + // If CrashPingUUID is present then a Telemetry ping was generated by the + // crashreporter for this crash so we only need to send the Glean ping. + let onlyGlean = getAndRemoveField(reportMeta, "CrashPingUUID"); + + // Filter the remaining annotations to remove privacy-sensitive ones + reportMeta = this._filterAnnotations(reportMeta); + + // Glean crash pings should not be sent on Android: they are handled + // separately in lib-crash for Fenix (and potentially other GeckoView + // users). + if (AppConstants.platform !== "android") { + this._submitGleanCrashPing(reason, type, date, reportMeta); + } + + if (onlyGlean) { + return; + } + + this._pingPromise = lazy.TelemetryController.submitExternalPing( + "crash", + { + version: 1, + crashDate: date.toISOString().slice(0, 10), // YYYY-MM-DD + crashTime: date.toISOString().slice(0, 13) + ":00:00.000Z", // per-hour resolution + sessionId, + crashId, + minidumpSha256Hash, + processType: type, + stackTraces, + metadata: reportMeta, + hasCrashEnvironment: crashEnvironment !== null, + }, + { + addClientId: true, + addEnvironment: true, + overrideEnvironment: crashEnvironment, + } + ); + }, + + _handleEventFilePayload(store, entry, type, date, payload) { + // The payload types and formats are documented in docs/crash-events.rst. + // Do not change the format of an existing type. Instead, invent a new + // type. + // DO NOT ADD NEW TYPES WITHOUT DOCUMENTING! + let lines = payload.split("\n"); + + switch (type) { + case "crash.main.1": + case "crash.main.2": + return this.EVENT_FILE_ERROR_OBSOLETE; + + case "crash.main.3": + let crashID = lines[0]; + let metadata = JSON.parse(lines[1]); + store.addCrash( + this.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT], + this.CRASH_TYPE_CRASH, + crashID, + date, + metadata + ); + + this._sendCrashPing( + "event_found", + crashID, + this.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT], + date, + metadata + ); + + break; + + case "crash.submission.1": + if (lines.length == 3) { + let [crashID, result, remoteID] = lines; + store.addCrash( + this.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT], + this.CRASH_TYPE_CRASH, + crashID, + date + ); + + let submissionID = this.generateSubmissionID(); + let succeeded = result === "true"; + store.addSubmissionAttempt(crashID, submissionID, date); + store.addSubmissionResult( + crashID, + submissionID, + date, + succeeded + ? this.SUBMISSION_RESULT_OK + : this.SUBMISSION_RESULT_FAILED + ); + if (succeeded) { + store.setRemoteCrashID(crashID, remoteID); + } + } else { + return this.EVENT_FILE_ERROR_MALFORMED; + } + break; + + default: + return this.EVENT_FILE_ERROR_UNKNOWN_EVENT; + } + + return this.EVENT_FILE_SUCCESS; + }, + + /** + * The resolved promise is an array of objects with the properties: + * + * path -- String filename + * id -- regexp.match()[1] (likely the crash ID) + * date -- Date mtime of the file + */ + _getDirectoryEntries(path, re) { + return (async function () { + let children = await IOUtils.getChildren(path); + let entries = []; + + for (const entry of children) { + let stat = await IOUtils.stat(entry); + if (stat.type == "directory") { + continue; + } + + let filename = PathUtils.filename(entry); + let match = re.exec(filename); + if (!match) { + continue; + } + entries.push({ + path: entry, + id: match[1], + date: stat.lastModified, + }); + } + + entries.sort((a, b) => { + return a.date - b.date; + }); + + return entries; + })(); + }, + + _getStore() { + if (this._getStoreTask) { + return this._getStoreTask; + } + + return (this._getStoreTask = (async () => { + try { + if (!this._store) { + await IOUtils.makeDirectory(this._storeDir, { + permissions: 0o700, + }); + + let store = new CrashStore( + this._storeDir, + this._telemetryStoreSizeKey + ); + await store.load(); + + this._store = store; + this._storeTimer = Cc["@mozilla.org/timer;1"].createInstance( + Ci.nsITimer + ); + } + + // The application can go long periods without interacting with the + // store. Since the store takes up resources, we automatically "free" + // the store after inactivity so resources can be returned to the + // system. We do this via a timer and a mechanism that tracks when the + // store is being accessed. + this._storeTimer.cancel(); + + // This callback frees resources from the store unless the store + // is protected from freeing by some other process. + let timerCB = () => { + if (this._storeProtectedCount) { + this._storeTimer.initWithCallback( + timerCB, + this.STORE_EXPIRATION_MS, + this._storeTimer.TYPE_ONE_SHOT + ); + return; + } + + // We kill the reference that we hold. GC will kill it later. If + // someone else holds a reference, that will prevent GC until that + // reference is gone. + this._store = null; + this._storeTimer = null; + }; + + this._storeTimer.initWithCallback( + timerCB, + this.STORE_EXPIRATION_MS, + this._storeTimer.TYPE_ONE_SHOT + ); + + return this._store; + } finally { + this._getStoreTask = null; + } + })()); + }, + + /** + * Obtain information about all known crashes. + * + * Returns an array of CrashRecord instances. Instances are read-only. + */ + getCrashes() { + return (async () => { + let store = await this._getStore(); + + return store.crashes; + })(); + }, + + getCrashCountsByDay() { + return (async () => { + let store = await this._getStore(); + + return store._countsByDay; + })(); + }, +}); + +var gCrashManager; + +/** + * Interface to storage of crash data. + * + * This type handles storage of crash metadata. It exists as a separate type + * from the crash manager for performance reasons: since all crash metadata + * needs to be loaded into memory for access, we wish to easily dispose of all + * associated memory when this data is no longer needed. Having an isolated + * object whose references can easily be lost faciliates that simple disposal. + * + * When metadata is updated, the caller must explicitly persist the changes + * to disk. This prevents excessive I/O during updates. + * + * The store has a mechanism for ensuring it doesn't grow too large. A ceiling + * is placed on the number of daily events that can occur for events that can + * occur with relatively high frequency. If we've reached + * the high water mark and new data arrives, it's silently dropped. + * However, the count of actual events is always preserved. This allows + * us to report on the severity of problems beyond the storage threshold. + * + * Main process crashes are excluded from limits because they are both + * important and should be rare. + * + * @param storeDir (string) + * Directory the store should be located in. + * @param telemetrySizeKey (string) + * The telemetry histogram that should be used to store the size + * of the data file. + */ +export function CrashStore(storeDir, telemetrySizeKey) { + this._storeDir = storeDir; + this._telemetrySizeKey = telemetrySizeKey; + + this._storePath = PathUtils.join(storeDir, "store.json.mozlz4"); + + // Holds the read data from disk. + this._data = null; + + // Maps days since UNIX epoch to a Map of event types to counts. + // This data structure is populated when the JSON file is loaded + // and is also updated when new events are added. + this._countsByDay = new Map(); +} + +CrashStore.prototype = Object.freeze({ + // Maximum number of events to store per day. This establishes a + // ceiling on the per-type/per-day records that will be stored. + HIGH_WATER_DAILY_THRESHOLD: 500, + + /** + * Reset all data. + */ + reset() { + this._data = { + v: 1, + crashes: new Map(), + corruptDate: null, + }; + this._countsByDay = new Map(); + }, + + /** + * Load data from disk. + * + * @return Promise + */ + load() { + return (async () => { + // Loading replaces data. + this.reset(); + + try { + let decoder = new TextDecoder(); + let data = await IOUtils.read(this._storePath, { decompress: true }); + data = JSON.parse(decoder.decode(data)); + + if (data.corruptDate) { + this._data.corruptDate = new Date(data.corruptDate); + } + + // actualCounts is used to validate that the derived counts by + // days stored in the payload matches up to actual data. + let actualCounts = new Map(); + + // In the past, submissions were stored as separate crash records + // with an id of e.g. "someID-submission". If we find IDs ending + // with "-submission", we will need to convert the data to be stored + // as actual submissions. + // + // The old way of storing submissions was used from FF33 - FF34. We + // drop this old data on the floor. + for (let id in data.crashes) { + if (id.endsWith("-submission")) { + continue; + } + + let crash = data.crashes[id]; + let denormalized = this._denormalize(crash); + + denormalized.submissions = new Map(); + if (crash.submissions) { + for (let submissionID in crash.submissions) { + let submission = crash.submissions[submissionID]; + denormalized.submissions.set( + submissionID, + this._denormalize(submission) + ); + } + } + + this._data.crashes.set(id, denormalized); + + let key = + dateToDays(denormalized.crashDate) + "-" + denormalized.type; + actualCounts.set(key, (actualCounts.get(key) || 0) + 1); + + // If we have an OOM size, count the crash as an OOM in addition to + // being a main process crash. + if ( + denormalized.metadata && + denormalized.metadata.OOMAllocationSize + ) { + let oomKey = key + "-oom"; + actualCounts.set(oomKey, (actualCounts.get(oomKey) || 0) + 1); + } + } + + // The validation in this loop is arguably not necessary. We perform + // it as a defense against unknown bugs. + for (let dayKey in data.countsByDay) { + let day = parseInt(dayKey, 10); + for (let type in data.countsByDay[day]) { + this._ensureCountsForDay(day); + + let count = data.countsByDay[day][type]; + let key = day + "-" + type; + + // If the payload says we have data for a given day but we + // don't, the payload is wrong. Ignore it. + if (!actualCounts.has(key)) { + continue; + } + + // If we encountered more data in the payload than what the + // data structure says, use the proper value. + count = Math.max(count, actualCounts.get(key)); + + this._countsByDay.get(day).set(type, count); + } + } + } catch (ex) { + // Missing files (first use) are allowed. + if (!DOMException.isInstance(ex) || ex.name != "NotFoundError") { + // If we can't load for any reason, mark a corrupt date in the instance + // and swallow the error. + // + // The marking of a corrupted file is intentionally not persisted to + // disk yet. Instead, we wait until the next save(). This is to give + // non-permanent failures the opportunity to recover on their own. + this._data.corruptDate = new Date(); + } + } + })(); + }, + + /** + * Save data to disk. + * + * @return Promise<null> + */ + save() { + return (async () => { + if (!this._data) { + return; + } + + let normalized = { + // The version should be incremented whenever the format + // changes. + v: 1, + // Maps crash IDs to objects defining the crash. + crashes: {}, + // Maps days since UNIX epoch to objects mapping event types to + // counts. This is a mirror of this._countsByDay. e.g. + // { + // 15000: { + // "main-crash": 2, + // "plugin-crash": 1 + // } + // } + countsByDay: {}, + + // When the store was last corrupted. + corruptDate: null, + }; + + if (this._data.corruptDate) { + normalized.corruptDate = this._data.corruptDate.getTime(); + } + + for (let [id, crash] of this._data.crashes) { + let c = this._normalize(crash); + + c.submissions = {}; + for (let [submissionID, submission] of crash.submissions) { + c.submissions[submissionID] = this._normalize(submission); + } + + normalized.crashes[id] = c; + } + + for (let [day, m] of this._countsByDay) { + normalized.countsByDay[day] = {}; + for (let [type, count] of m) { + normalized.countsByDay[day][type] = count; + } + } + + let encoder = new TextEncoder(); + let data = encoder.encode(JSON.stringify(normalized)); + let size = await IOUtils.write(this._storePath, data, { + tmpPath: this._storePath + ".tmp", + compress: true, + }); + if (this._telemetrySizeKey) { + Services.telemetry.getHistogramById(this._telemetrySizeKey).add(size); + } + })(); + }, + + /** + * Normalize an object into one fit for serialization. + * + * This function along with _denormalize() serve to hack around the + * default handling of Date JSON serialization because Date serialization + * is undefined by JSON. + * + * Fields ending with "Date" are assumed to contain Date instances. + * We convert these to milliseconds since epoch on output and back to + * Date on input. + */ + _normalize(o) { + let normalized = {}; + + for (let k in o) { + let v = o[k]; + if (v && k.endsWith("Date")) { + normalized[k] = v.getTime(); + } else { + normalized[k] = v; + } + } + + return normalized; + }, + + /** + * Convert a serialized object back to its native form. + */ + _denormalize(o) { + let n = {}; + + for (let k in o) { + let v = o[k]; + if (v && k.endsWith("Date")) { + n[k] = new Date(parseInt(v, 10)); + } else { + n[k] = v; + } + } + + return n; + }, + + /** + * Prune old crash data. + * + * Crashes without recent activity are pruned from the store so the + * size of the store is not unbounded. If there is activity on a crash, + * that activity will keep the crash and all its data around for longer. + * + * @param date + * (Date) The cutoff at which data will be pruned. If an entry + * doesn't have data newer than this, it will be pruned. + */ + pruneOldCrashes(date) { + for (let crash of this.crashes) { + let newest = crash.newestDate; + if (!newest || newest.getTime() < date.getTime()) { + this._data.crashes.delete(crash.id); + } + } + }, + + /** + * Date the store was last corrupted and required a reset. + * + * May be null (no corruption has ever occurred) or a Date instance. + */ + get corruptDate() { + return this._data.corruptDate; + }, + + /** + * The number of distinct crashes tracked. + */ + get crashesCount() { + return this._data.crashes.size; + }, + + /** + * All crashes tracked. + * + * This is an array of CrashRecord. + */ + get crashes() { + let crashes = []; + for (let [, crash] of this._data.crashes) { + crashes.push(new CrashRecord(crash)); + } + + return crashes; + }, + + /** + * Obtain a particular crash from its ID. + * + * A CrashRecord will be returned if the crash exists. null will be returned + * if the crash is unknown. + */ + getCrash(id) { + for (let crash of this.crashes) { + if (crash.id == id) { + return crash; + } + } + + return null; + }, + + _ensureCountsForDay(day) { + if (!this._countsByDay.has(day)) { + this._countsByDay.set(day, new Map()); + } + }, + + /** + * Ensure the crash record is present in storage. + * + * Returns the crash record if we're allowed to store it or null + * if we've hit the high water mark. + * + * @param processType + * (string) One of the PROCESS_TYPE constants. + * @param crashType + * (string) One of the CRASH_TYPE constants. + * @param id + * (string) The crash ID. + * @param date + * (Date) When this crash occurred. + * @param metadata + * (dictionary) Crash metadata, may be empty. + * + * @return null | object crash record + */ + _ensureCrashRecord(processType, crashType, id, date, metadata) { + if (!id) { + // Crashes are keyed on ID, so it's not really helpful to store crashes + // without IDs. + return null; + } + + let type = processType + "-" + crashType; + + if (!this._data.crashes.has(id)) { + let day = dateToDays(date); + this._ensureCountsForDay(day); + + let count = (this._countsByDay.get(day).get(type) || 0) + 1; + this._countsByDay.get(day).set(type, count); + + if ( + count > this.HIGH_WATER_DAILY_THRESHOLD && + processType != + CrashManager.prototype.processTypes[ + Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT + ] + ) { + return null; + } + + // If we have an OOM size, count the crash as an OOM in addition to + // being a main process crash. + if (metadata && metadata.OOMAllocationSize) { + let oomType = type + "-oom"; + let oomCount = (this._countsByDay.get(day).get(oomType) || 0) + 1; + this._countsByDay.get(day).set(oomType, oomCount); + } + + this._data.crashes.set(id, { + id, + remoteID: null, + type, + crashDate: date, + submissions: new Map(), + classifications: [], + metadata, + }); + } + + let crash = this._data.crashes.get(id); + crash.type = type; + crash.crashDate = date; + + return crash; + }, + + /** + * Record the occurrence of a crash. + * + * @param processType (string) One of the PROCESS_TYPE constants. + * @param crashType (string) One of the CRASH_TYPE constants. + * @param id (string) Crash ID. Likely a UUID. + * @param date (Date) When the crash occurred. + * @param metadata (dictionary) Crash metadata, may be empty. + * + * @return boolean True if the crash was recorded and false if not. + */ + addCrash(processType, crashType, id, date, metadata) { + return !!this._ensureCrashRecord( + processType, + crashType, + id, + date, + metadata + ); + }, + + /** + * @return boolean True if the remote ID was recorded and false if not. + */ + setRemoteCrashID(crashID, remoteID) { + let crash = this._data.crashes.get(crashID); + if (!crash || !remoteID) { + return false; + } + + crash.remoteID = remoteID; + return true; + }, + + /** + * @param processType (string) One of the PROCESS_TYPE constants. + * @param crashType (string) One of the CRASH_TYPE constants. + * + * @return array of crashes + */ + getCrashesOfType(processType, crashType) { + let crashes = []; + for (let crash of this.crashes) { + if (crash.isOfType(processType, crashType)) { + crashes.push(crash); + } + } + + return crashes; + }, + + /** + * Ensure the submission record is present in storage. + * @returns [submission, crash] + */ + _ensureSubmissionRecord(crashID, submissionID) { + let crash = this._data.crashes.get(crashID); + if (!crash || !submissionID) { + return null; + } + + if (!crash.submissions.has(submissionID)) { + crash.submissions.set(submissionID, { + requestDate: null, + responseDate: null, + result: null, + }); + } + + return [crash.submissions.get(submissionID), crash]; + }, + + /** + * @return boolean True if the attempt was recorded. + */ + addSubmissionAttempt(crashID, submissionID, date) { + let [submission, crash] = this._ensureSubmissionRecord( + crashID, + submissionID + ); + if (!submission) { + return false; + } + + submission.requestDate = date; + Services.telemetry + .getKeyedHistogramById("PROCESS_CRASH_SUBMIT_ATTEMPT") + .add(crash.type, 1); + return true; + }, + + /** + * @return boolean True if the response was recorded. + */ + addSubmissionResult(crashID, submissionID, date, result) { + let crash = this._data.crashes.get(crashID); + if (!crash || !submissionID) { + return false; + } + let submission = crash.submissions.get(submissionID); + if (!submission) { + return false; + } + + submission.responseDate = date; + submission.result = result; + Services.telemetry + .getKeyedHistogramById("PROCESS_CRASH_SUBMIT_SUCCESS") + .add(crash.type, result == "ok"); + return true; + }, + + /** + * @return boolean True if the classifications were set. + */ + setCrashClassifications(crashID, classifications) { + let crash = this._data.crashes.get(crashID); + if (!crash) { + return false; + } + + crash.classifications = classifications; + return true; + }, +}); + +/** + * Represents an individual crash with metadata. + * + * This is a wrapper around the low-level anonymous JS objects that define + * crashes. It exposes a consistent and helpful API. + * + * Instances of this type should only be constructured inside this module, + * not externally. The constructor is not considered a public API. + * + * @param o (object) + * The crash's entry from the CrashStore. + */ +function CrashRecord(o) { + this._o = o; +} + +CrashRecord.prototype = Object.freeze({ + get id() { + return this._o.id; + }, + + get remoteID() { + return this._o.remoteID; + }, + + get crashDate() { + return this._o.crashDate; + }, + + /** + * Obtain the newest date in this record. + * + * This is a convenience getter. The returned value is used to determine when + * to expire a record. + */ + get newestDate() { + // We currently only have 1 date, so this is easy. + return this._o.crashDate; + }, + + get oldestDate() { + return this._o.crashDate; + }, + + get type() { + return this._o.type; + }, + + isOfType(processType, crashType) { + return processType + "-" + crashType == this.type; + }, + + get submissions() { + return this._o.submissions; + }, + + get classifications() { + return this._o.classifications; + }, + + get metadata() { + return this._o.metadata; + }, +}); + +ChromeUtils.defineLazyGetter(CrashManager, "_log", () => + lazy.Log.repository.getLogger("Crashes.CrashManager") +); + +/** + * Obtain the global CrashManager instance used by the running application. + * + * CrashManager is likely only ever instantiated once per application lifetime. + * The main reason it's implemented as a reusable type is to facilitate testing. + */ +ChromeUtils.defineLazyGetter(CrashManager, "Singleton", function () { + if (gCrashManager) { + return gCrashManager; + } + + gCrashManager = new CrashManager({ + telemetryStoreSizeKey: "CRASH_STORE_COMPRESSED_BYTES", + }); + + // Automatically aggregate event files shortly after startup. This + // ensures it happens with some frequency. + // + // There are performance considerations here. While this is doing + // work and could negatively impact performance, the amount of work + // is kept small per run by periodically aggregating event files. + // Furthermore, well-behaving installs should not have much work + // here to do. If there is a lot of work, that install has bigger + // issues beyond reduced performance near startup. + gCrashManager.scheduleMaintenance(AGGREGATE_STARTUP_DELAY_MS); + + return gCrashManager; +}); + +export function getCrashManager() { + return CrashManager.Singleton; +} + +/** + * Used for tests to check the crash manager is created on profile creation. + * + * @returns {CrashManager} + */ +export function getCrashManagerNoCreate() { + return gCrashManager; +} diff --git a/toolkit/components/crashes/CrashManagerTest.sys.mjs b/toolkit/components/crashes/CrashManagerTest.sys.mjs new file mode 100644 index 0000000000..f131b67aa5 --- /dev/null +++ b/toolkit/components/crashes/CrashManagerTest.sys.mjs @@ -0,0 +1,185 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * This file provides common and shared functionality to facilitate + * testing of the Crashes component (CrashManager.sys.mjs). + */ + +import { CrashManager } from "resource://gre/modules/CrashManager.sys.mjs"; + +const lazy = {}; + +ChromeUtils.defineESModuleGetters(lazy, { + Log: "resource://gre/modules/Log.sys.mjs", + setTimeout: "resource://gre/modules/Timer.sys.mjs", +}); + +var loggingConfigured = false; + +export var configureLogging = function () { + if (loggingConfigured) { + return; + } + + let log = lazy.Log.repository.getLogger("Crashes.CrashManager"); + log.level = lazy.Log.Level.All; + let appender = new lazy.Log.DumpAppender(); + appender.level = lazy.Log.Level.All; + log.addAppender(appender); + loggingConfigured = true; +}; + +export var sleep = function (wait) { + return new Promise(resolve => { + lazy.setTimeout(() => { + resolve(); + }, wait); + }); +}; + +export var TestingCrashManager = function (options) { + CrashManager.call(this, options); +}; + +TestingCrashManager.prototype = { + createDummyDump(submitted = false, date = new Date(), hr = false) { + let uuid = Services.uuid.generateUUID().toString(); + uuid = uuid.substring(1, uuid.length - 1); + + let path; + let mode; + if (submitted) { + if (hr) { + path = PathUtils.join( + this._submittedDumpsDir, + "bp-hr-" + uuid + ".txt" + ); + } else { + path = PathUtils.join(this._submittedDumpsDir, "bp-" + uuid + ".txt"); + } + mode = 0o644; + } else { + path = PathUtils.join(this._pendingDumpsDir, uuid + ".dmp"); + mode = 0o600; + } + + return (async function () { + await IOUtils.writeUTF8(path, ""); + await IOUtils.setPermissions(path, mode); + await IOUtils.setModificationTime(path, date.valueOf()); + await IOUtils.setAccessTime(path, date.valueOf()); + dump(`Created fake crash: ${path}\n`); + + return uuid; + })(); + }, + + createIgnoredDumpFile(filename, submitted = false) { + let path; + if (submitted) { + path = PathUtils.join(this._submittedDumpsDir, filename); + } else { + path = PathUtils.join(this._pendingDumpsDir, filename); + } + + return (async function () { + let mode = 0o600; + await IOUtils.writeUTF8(path, ""); + await IOUtils.setPermissions(path, mode); + dump(`Create ignored dump file: ${path}\n`); + })(); + }, + + createEventsFile(filename, type, date, id, content, index = 0) { + let path = PathUtils.join(this._eventsDirs[index], filename); + let dateInSecs = Math.floor(date.getTime() / 1000); + let data = type + "\n" + dateInSecs + "\n" + id + "\n" + content; + + return (async function () { + await IOUtils.writeUTF8(path, data); + await IOUtils.setModificationTime(path, date.valueOf()); + await IOUtils.setAccessTime(path, date.valueOf()); + })(); + }, + + deleteEventsDirs() { + let dirs = this._eventsDirs; + + return (async function () { + for (let dir of dirs) { + await IOUtils.remove(dir, { recursive: true }); + } + })(); + }, + + /** + * Overwrite event file handling to process our test file type. + * + * We can probably delete this once we have actual events defined. + */ + _handleEventFilePayload(store, entry, type, date, payload) { + if (type == "test.1") { + if (payload == "malformed") { + return this.EVENT_FILE_ERROR_MALFORMED; + } else if (payload == "success") { + return this.EVENT_FILE_SUCCESS; + } + return this.EVENT_FILE_ERROR_UNKNOWN_EVENT; + } + + return CrashManager.prototype._handleEventFilePayload.call( + this, + store, + entry, + type, + date, + payload + ); + }, +}; +Object.setPrototypeOf(TestingCrashManager.prototype, CrashManager.prototype); + +var DUMMY_DIR_COUNT = 0; + +export var getManager = function () { + return (async function () { + const dirMode = 0o700; + let baseFile = PathUtils.profileDir; + + function makeDir(create = true) { + return (async function () { + let path = PathUtils.join(baseFile, "dummy-dir-" + DUMMY_DIR_COUNT++); + + if (!create) { + return path; + } + + dump("Creating directory: " + path + "\n"); + await IOUtils.makeDirectory(path, { permissions: dirMode }); + + return path; + })(); + } + + let pendingD = await makeDir(); + let submittedD = await makeDir(); + let eventsD1 = await makeDir(); + let eventsD2 = await makeDir(); + + // Store directory is created at run-time if needed. Ensure those code + // paths are triggered. + let storeD = await makeDir(false); + + let m = new TestingCrashManager({ + pendingDumpsDir: pendingD, + submittedDumpsDir: submittedD, + eventsDirs: [eventsD1, eventsD2], + storeDir: storeD, + telemetryStoreSizeKey: "CRASH_STORE_COMPRESSED_BYTES", + }); + + return m; + })(); +}; diff --git a/toolkit/components/crashes/CrashService.sys.mjs b/toolkit/components/crashes/CrashService.sys.mjs new file mode 100644 index 0000000000..cedbca53b9 --- /dev/null +++ b/toolkit/components/crashes/CrashService.sys.mjs @@ -0,0 +1,234 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +import { AppConstants } from "resource://gre/modules/AppConstants.sys.mjs"; +import { AsyncShutdown } from "resource://gre/modules/AsyncShutdown.sys.mjs"; + +// Set to true if the application is quitting +var gQuitting = false; + +// Tracks all the running instances of the minidump-analyzer +var gRunningProcesses = new Set(); + +/** + * Run the minidump-analyzer with the given options unless we're already + * shutting down or the main process has been instructed to shut down in the + * case a content process crashes. Minidump analysis can take a while so we + * don't want to block shutdown waiting for it. + */ +async function maybeRunMinidumpAnalyzer(minidumpPath, allThreads) { + let shutdown = Services.env.exists("MOZ_CRASHREPORTER_SHUTDOWN"); + + if (gQuitting || shutdown) { + return; + } + + await runMinidumpAnalyzer(minidumpPath, allThreads).catch(e => + console.error(e) + ); +} + +function getMinidumpAnalyzerPath() { + const binSuffix = AppConstants.platform === "win" ? ".exe" : ""; + const exeName = "minidump-analyzer" + binSuffix; + + let exe = Services.dirsvc.get("GreBinD", Ci.nsIFile); + exe.append(exeName); + + return exe; +} + +/** + * Run the minidump analyzer tool to gather stack traces from the minidump. The + * stack traces will be stored in the .extra file under the StackTraces= entry. + * + * @param minidumpPath {string} The path to the minidump file + * @param allThreads {bool} Gather stack traces for all threads, not just the + * crashing thread. + * + * @returns {Promise} A promise that gets resolved once minidump analysis has + * finished. + */ +function runMinidumpAnalyzer(minidumpPath, allThreads) { + return new Promise((resolve, reject) => { + try { + let exe = getMinidumpAnalyzerPath(); + let args = [minidumpPath]; + let process = Cc["@mozilla.org/process/util;1"].createInstance( + Ci.nsIProcess + ); + process.init(exe); + process.startHidden = true; + process.noShell = true; + + if (allThreads) { + args.unshift("--full"); + } + + process.runAsync(args, args.length, (subject, topic, data) => { + switch (topic) { + case "process-finished": + gRunningProcesses.delete(process); + resolve(); + break; + case "process-failed": + gRunningProcesses.delete(process); + resolve(); + break; + default: + reject(new Error("Unexpected topic received " + topic)); + break; + } + }); + + gRunningProcesses.add(process); + } catch (e) { + reject(e); + } + }); +} + +/** + * Computes the SHA256 hash of a minidump file + * + * @param minidumpPath {string} The path to the minidump file + * + * @returns {Promise} A promise that resolves to the hash value of the + * minidump. + */ +function computeMinidumpHash(minidumpPath) { + return (async function () { + try { + let minidumpData = await IOUtils.read(minidumpPath); + let hasher = Cc["@mozilla.org/security/hash;1"].createInstance( + Ci.nsICryptoHash + ); + hasher.init(hasher.SHA256); + hasher.update(minidumpData, minidumpData.length); + + let hashBin = hasher.finish(false); + let hash = ""; + + for (let i = 0; i < hashBin.length; i++) { + // Every character in the hash string contains a byte of the hash data + hash += ("0" + hashBin.charCodeAt(i).toString(16)).slice(-2); + } + + return hash; + } catch (e) { + console.error(e); + return null; + } + })(); +} + +/** + * Process the given .extra file and return the annotations it contains in an + * object. + * + * @param extraPath {string} The path to the .extra file + * + * @return {Promise} A promise that resolves to an object holding the crash + * annotations. + */ +function processExtraFile(extraPath) { + return (async function () { + try { + let decoder = new TextDecoder(); + let extraData = await IOUtils.read(extraPath); + + return JSON.parse(decoder.decode(extraData)); + } catch (e) { + console.error(e); + return {}; + } + })(); +} + +/** + * This component makes crash data available throughout the application. + * + * It is a service because some background activity will eventually occur. + */ +export function CrashService() { + Services.obs.addObserver(this, "quit-application"); +} + +CrashService.prototype = Object.freeze({ + classID: Components.ID("{92668367-1b17-4190-86b2-1061b2179744}"), + QueryInterface: ChromeUtils.generateQI(["nsICrashService", "nsIObserver"]), + + async addCrash(processType, crashType, id) { + if (processType === Ci.nsIXULRuntime.PROCESS_TYPE_IPDLUNITTEST) { + return; + } + + processType = Services.crashmanager.processTypes[processType]; + + let allThreads = false; + + switch (crashType) { + case Ci.nsICrashService.CRASH_TYPE_CRASH: + crashType = Services.crashmanager.CRASH_TYPE_CRASH; + break; + case Ci.nsICrashService.CRASH_TYPE_HANG: + crashType = Services.crashmanager.CRASH_TYPE_HANG; + allThreads = true; + break; + default: + throw new Error("Unrecognized CRASH_TYPE: " + crashType); + } + + let minidumpPath = Services.appinfo.getMinidumpForID(id).path; + let extraPath = Services.appinfo.getExtraFileForID(id).path; + let metadata = {}; + let hash = null; + + await maybeRunMinidumpAnalyzer(minidumpPath, allThreads); + metadata = await processExtraFile(extraPath); + hash = await computeMinidumpHash(minidumpPath); + + if (hash) { + metadata.MinidumpSha256Hash = hash; + } + + let blocker = Services.crashmanager.addCrash( + processType, + crashType, + id, + new Date(), + metadata + ); + + AsyncShutdown.profileBeforeChange.addBlocker( + "CrashService waiting for content crash ping to be sent", + blocker + ); + + blocker.then(AsyncShutdown.profileBeforeChange.removeBlocker(blocker)); + + await blocker; + }, + + observe(subject, topic, data) { + switch (topic) { + case "profile-after-change": + // Side-effect is the singleton is instantiated. + Services.crashmanager; + break; + case "quit-application": + gQuitting = true; + gRunningProcesses.forEach(process => { + try { + process.kill(); + } catch (e) { + // If the process has already quit then kill() fails, but since + // this failure is benign it is safe to silently ignore it. + } + Services.obs.notifyObservers(null, "test-minidump-analyzer-killed"); + }); + break; + } + }, +}); diff --git a/toolkit/components/crashes/components.conf b/toolkit/components/crashes/components.conf new file mode 100644 index 0000000000..f3b80a36f2 --- /dev/null +++ b/toolkit/components/crashes/components.conf @@ -0,0 +1,22 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +Classes = [ + { + 'cid': '{92668367-1b17-4190-86b2-1061b2179744}', + 'contract_ids': ['@mozilla.org/crashservice;1'], + 'esModule': 'resource://gre/modules/CrashService.sys.mjs', + 'constructor': 'CrashService', + 'categories': {'profile-after-change': 'CrashService'}, + }, + { + 'js_name': 'crashmanager', + 'cid': '{c887b6a9-a5eb-4566-a440-bebaea3e54fd}', + 'contract_ids': ['@mozilla.org/crashmanager;1'], + 'esModule': 'resource://gre/modules/CrashManager.sys.mjs', + 'constructor': 'getCrashManager', + }, +] diff --git a/toolkit/components/crashes/docs/crash-events.rst b/toolkit/components/crashes/docs/crash-events.rst new file mode 100644 index 0000000000..20afdac482 --- /dev/null +++ b/toolkit/components/crashes/docs/crash-events.rst @@ -0,0 +1,191 @@ +============ +Crash Events +============ + +**Crash Events** refers to a special subsystem of Gecko that aims to capture +events of interest related to process crashing and hanging. + +When an event worthy of recording occurs, a file containing that event's +information is written to a well-defined location on the filesystem. The Gecko +process periodically scans for produced files and consolidates information +into a more unified and efficient backend store. + +Crash Event Files +================= + +When a crash-related event occurs, a file describing that event is written +to a well-defined directory. That directory is likely in the directory of +the currently-active profile. However, if a profile is not yet active in +the Gecko process, that directory likely resides in the user's *app data* +directory (*UAppData* from the directory service). + +The filename of the event file is not relevant. However, producers need +to choose a filename intelligently to avoid name collisions and race +conditions. Since file locking is potentially dangerous at crash time, +the convention of generating a UUID and using it as a filename has been +adopted. + +File Format +----------- + +All crash event files share the same high-level file format. The format +consists of the following fields delimited by a UNIX newline (*\n*) +character: + +* String event name (valid UTF-8, but likely ASCII) +* String representation of integer seconds since UNIX epoch +* Payload + +The payload is event specific and may contain UNIX newline characters. +The recommended method for parsing is to split at most 3 times on UNIX +newline and then dispatch to an event-specific parsed based on the +event name. + +If an unknown event type is encountered, the event can safely be ignored +until later. This helps ensure that application downgrades (potentially +due to elevated crash rate) don't result in data loss. + +The format and semantics of each event type are meant to be constant once +that event type is committed to the main Firefox repository. If new metadata +needs to be captured or the meaning of data captured in an event changes, +that change should be expressed through the invention of a new event type. +For this reason, event names are highly recommended to contain a version. +e.g. instead of a *Gecko process crashed* event, we prefer a *Gecko process +crashed v1* event. + +Event Types +----------- + +Each subsection documents the different types of crash events that may be +produced. Each section name corresponds to the first line of the crash +event file. + +Currently only main process crashes produce event files. Because crashes and +hangs in child processes can be easily recorded by the main process, we do not +foresee the need for writing event files for child processes, design +considerations below notwithstanding. + +crash.main.3 +^^^^^^^^^^^^ + +This event is produced when the main process crashes. + +The payload of this event is delimited by UNIX newlines (*\n*) and contains the +following fields: + +* The crash ID string, very likely a UUID +* One line holding the crash metadata serialized as a JSON string + +crash.main.2 +^^^^^^^^^^^^ + +This event is produced when the main process crashes. + +The payload of this event is delimited by UNIX newlines (*\n*) and contains the +following fields: + +* The crash ID string, very likely a UUID +* 0 or more lines of metadata, each containing one key=value pair of text + +This event is obsolete. + +crash.main.1 +^^^^^^^^^^^^ + +This event is produced when the main process crashes. + +The payload of this event is the string crash ID, very likely a UUID. +There should be ``UUID.dmp`` and ``UUID.extra`` files on disk, saved by +Breakpad. + +This event is obsolete. + +crash.submission.1 +^^^^^^^^^^^^^^^^^^ + +This event is produced when a crash is submitted. + +The payload of this event is delimited by UNIX newlines (*\n*) and contains the +following fields: + +* The crash ID string +* "true" if the submission succeeded or "false" otherwise +* The remote crash ID string if the submission succeeded + +Aggregated Event Log +==================== + +Crash events are aggregated together into a unified event *log*. Currently, +this *log* is really a JSON file. However, this is an implementation detail +and it could change at any time. The interface to crash data provided by +the JavaScript API is the only supported interface. + +Design Considerations +===================== + +There are many considerations influencing the design of this subsystem. +We attempt to document them in this section. + +Decoupling of Event Files from Final Data Structure +--------------------------------------------------- + +While it is certainly possible for the Gecko process to write directly to +the final data structure on disk, there is an intentional decoupling between +the production of events and their transition into final storage. Along the +same vein, the choice to have events written to multiple files by producers +is deliberate. + +Some recorded events are written immediately after a process crash. This is +a very uncertain time for the host system. There is a high liklihood the +system is in an exceptional state, such as memory exhaustion. Therefore, any +action taken after crashing needs to be very deliberate about what it does. +Excessive memory allocation and certain system calls may cause the system +to crash again or the machine's condition to worsen. This means that the act +of recording a crash event must be very light weight. Writing a new file from +nothing is very light weight. This is one reason we write separate files. + +Another reason we write separate files is because if the main Gecko process +itself crashes (as opposed to say a plugin process), the crash reporter (not +Gecko) is running and the crash reporter needs to handle the writing of the +event info. If this writing is involved (say loading, parsing, updating, and +reserializing back to disk), this logic would need to be implemented in both +Gecko and the crash reporter or would need to be implemented in such a way +that both could use. Neither of these is very practical from a software +lifecycle management perspective. It's much easier to have separate processes +write a simple file and to let a single implementation do all the complex +work. + +Idempotent Event Processing +=========================== + +Processing of event files has been designed such that the result is +idempotent regardless of what order those files are processed in. This is +not only a good design decision, but it is arguably necessary. While event +files are processed in order by file mtime, filesystem times may not have +the resolution required for proper sorting. Therefore, processing order is +merely an optimistic assumption. + +Aggregated Storage Format +========================= + +Crash events are aggregated into a unified data structure on disk. That data +structure is currently LZ4-compressed JSON and is represented by a single file. + +The choice of a single JSON file was initially driven by time and complexity +concerns. Before changing the format or adding significant amounts of new +data, some considerations must be taken into account. + +First, in well-behaving installs, crash data should be minimal. Crashes and +hangs will be rare and thus the size of the crash data should remain small +over time. + +The choice of a single JSON file has larger implications as the amount of +crash data grows. As new data is accumulated, we need to read and write +an entire file to make small updates. LZ4 compression helps reduce I/O. +But, there is a potential for unbounded file growth. We establish a +limit for the max age of records. Anything older than that limit is +pruned. We also establish a daily limit on the number of crashes we will +store. All crashes beyond the first N in a day have no payload and are +only recorded by the presence of a count. This count ensures we can +distinguish between ``N`` and ``100 * N``, which are very different +values! diff --git a/toolkit/components/crashes/docs/index.rst b/toolkit/components/crashes/docs/index.rst new file mode 100644 index 0000000000..1a24f2649a --- /dev/null +++ b/toolkit/components/crashes/docs/index.rst @@ -0,0 +1,43 @@ +.. _crashes_crashmanager: + +============= +Crash Manager +============= + +The **Crash Manager** is a service and interface for managing crash +data within the Gecko application. + +From JavaScript, the service can be accessed via:: + + let crashManager = Services.crashmanager; + +That will give you an instance of ``CrashManager`` from ``CrashManager.sys.mjs``. +From there, you can access and manipulate crash data. + +The crash manager stores statistical information about crashes as well as +detailed information for both browser and content crashes. The crash manager +automatically detects new browser crashes at startup by scanning for +:ref:`Crash Events`. Content process crash information on the other hand is +provided externally. + +Crash Pings +=========== + +The Crash Manager is responsible for sending crash pings when a crash occurs +or when a crash event is found. Crash pings are sent using +`Telemetry pings <../../telemetry/data/crash-ping.html>`__. + +Glean +----- +Crash pings are being migrated to use `Glean pings <../../glean/index.html>`__, +however until information parity is reached, the Telemetry pings will still be +sent. The Glean `crash` ping can be found `here <https://dictionary.telemetry.mozilla.org/apps/firefox_desktop/pings/crash>`__. + + +Other Documents +=============== + +.. toctree:: + :maxdepth: 1 + + crash-events diff --git a/toolkit/components/crashes/gen_CrashManager.py b/toolkit/components/crashes/gen_CrashManager.py new file mode 100644 index 0000000000..7c0ba8ab90 --- /dev/null +++ b/toolkit/components/crashes/gen_CrashManager.py @@ -0,0 +1,55 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from geckoprocesstypes import process_types + + +def process_name(string_name): + if string_name == "default": + string_name = "main" + if string_name == "tab": + string_name = "content" + return string_name + + +def gen_process_map(): + kIdentifier = "/* SUBST: CRASH_MANAGER_PROCESS_MAP */" + crashManagerMap = """ + processTypes: {""" + + for p in process_types: + crashManagerMap += """ + // A crash in the %(procname)s process. + %(proctype)d: "%(procname)s",""" % { + "proctype": p.enum_value, + "procname": process_name(p.string_name), + } + crashManagerMap += """ + },""" + + return (kIdentifier, crashManagerMap) + + +def gen_process_pings(): + kIdentifier = "/* SUBST: CRASH_MANAGER_PROCESS_PINGS */" + crashManagerPing = "" + + for p in process_types: + crashManagerPing += """ + "%(proctype)s": %(crashping)s,""" % { + "proctype": process_name(p.string_name), + "crashping": "true" if p.crash_ping else "false", + } + + return (kIdentifier, crashManagerPing) + + +def main(o, crashManager): + subst = [gen_process_map(), gen_process_pings()] + with open(crashManager, "r") as src: + for l in src.readlines(): + for id, value in subst: + if id in l: + l = l.replace(id, value) + o.write(l) diff --git a/toolkit/components/crashes/metrics.yaml b/toolkit/components/crashes/metrics.yaml new file mode 100644 index 0000000000..22d5f8a96c --- /dev/null +++ b/toolkit/components/crashes/metrics.yaml @@ -0,0 +1,86 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# Adding a new metric? We have docs for that! +# https://firefox-source-docs.mozilla.org/toolkit/components/glean/user/new_definitions_file.html + +--- +$schema: moz://mozilla.org/schemas/glean/metrics/2-0-0 +$tags: + - "Toolkit :: Crash Reporting" + +crash: + uptime: + type: timespan + description: > + The application uptime. This is equivalent to the legacy crash ping's + `UptimeTS` field. + notification_emails: + - crash-reporting-wg@mozilla.org + - stability@mozilla.org + bugs: + - https://bugzilla.mozilla.org/show_bug.cgi?id=1790569 + data_reviews: + - https://bugzilla.mozilla.org/show_bug.cgi?id=1790569#c12 + data_sensitivity: + - technical + expires: never + send_in_pings: + - crash + + process_type: + type: string + # yamllint disable + description: > + The type of process that experienced a crash. See the full list of + options + [here](https://firefox-source-docs.mozilla.org/toolkit/components/telemetry/data/crash-ping.html#process-types). + # yamllint enable + notification_emails: + - crash-reporting-wg@mozilla.org + - stability@mozilla.org + bugs: + - https://bugzilla.mozilla.org/show_bug.cgi?id=1790569 + data_reviews: + - https://bugzilla.mozilla.org/show_bug.cgi?id=1790569#c12 + data_sensitivity: + - technical + expires: never + send_in_pings: + - crash + + time: + type: datetime + time_unit: minute + description: > + The time at which the crash occurred. + notification_emails: + - crash-reporting-wg@mozilla.org + - stability@mozilla.org + bugs: + - https://bugzilla.mozilla.org/show_bug.cgi?id=1790569 + data_reviews: + - https://bugzilla.mozilla.org/show_bug.cgi?id=1790569#c12 + data_sensitivity: + - technical + expires: never + send_in_pings: + - crash + + startup: + type: boolean + description: > + If true, the crash occurred during process startup. + notification_emails: + - crash-reporting-wg@mozilla.org + - stability@mozilla.org + bugs: + - https://bugzilla.mozilla.org/show_bug.cgi?id=1790569 + data_reviews: + - https://bugzilla.mozilla.org/show_bug.cgi?id=1790569#c12 + data_sensitivity: + - technical + expires: never + send_in_pings: + - crash diff --git a/toolkit/components/crashes/moz.build b/toolkit/components/crashes/moz.build new file mode 100644 index 0000000000..14bc79f75b --- /dev/null +++ b/toolkit/components/crashes/moz.build @@ -0,0 +1,44 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +SPHINX_TREES["crash-manager"] = "docs" + +with Files("docs/**"): + SCHEDULES.exclusive = ["docs"] + +DEFINES["OBJDIR"] = OBJDIR + +XPIDL_MODULE = "toolkit_crashservice" + +XPIDL_SOURCES += [ + "nsICrashService.idl", +] + +if CONFIG["MOZ_CRASHREPORTER"]: + GeneratedFile( + "CrashManager.sys.mjs", + script="gen_CrashManager.py", + entry_point="main", + inputs=["CrashManager.in.sys.mjs"], + ) + + EXTRA_JS_MODULES += [ + "!CrashManager.sys.mjs", + "CrashService.sys.mjs", + ] + + XPCOM_MANIFESTS += [ + "components.conf", + ] + + TESTING_JS_MODULES += [ + "CrashManagerTest.sys.mjs", + ] + + XPCSHELL_TESTS_MANIFESTS += ["tests/xpcshell/xpcshell.toml"] + +with Files("**"): + BUG_COMPONENT = ("Toolkit", "Crash Reporting") diff --git a/toolkit/components/crashes/nsICrashService.idl b/toolkit/components/crashes/nsICrashService.idl new file mode 100644 index 0000000000..3d090bd503 --- /dev/null +++ b/toolkit/components/crashes/nsICrashService.idl @@ -0,0 +1,26 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.idl" + +[scriptable, uuid(70bd93ff-88fa-4600-8af8-57c8d002dbac)] +interface nsICrashService : nsISupports +{ + /** + * Records the occurrence of a crash. + * + * @param processType + * One of the PROCESS_TYPE constants defined in nsIXULRuntime. + * @param crashType + * One of the CRASH_TYPE constants defined below. + * @param id + * Crash ID. Likely a UUID. + * + * @return A promise that resolves after the crash has been stored + */ + Promise addCrash(in long processType, in long crashType, in AString id); + + const long CRASH_TYPE_CRASH = 0; + const long CRASH_TYPE_HANG = 1; +}; diff --git a/toolkit/components/crashes/pings.yaml b/toolkit/components/crashes/pings.yaml new file mode 100644 index 0000000000..620e185872 --- /dev/null +++ b/toolkit/components/crashes/pings.yaml @@ -0,0 +1,28 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +--- +$schema: moz://mozilla.org/schemas/glean/pings/2-0-0 + +crash: + description: > + A ping to report crash information. This information is sent as soon as + possible after a crash occurs (whether the crash is a background/content + process or the main process). It is expected to be used for crash report + analysis and to reduce blind spots in crash reporting. + include_client_id: true + send_if_empty: false + notification_emails: + - crash-reporting-wg@mozilla.org + - stability@mozilla.org + bugs: + - https://bugzilla.mozilla.org/show_bug.cgi?id=1790569 + data_reviews: + - https://bugzilla.mozilla.org/show_bug.cgi?id=1790569#c12 + reasons: + crash: > + A process crashed and a ping was immediately sent. + event_found: > + A process crashed and produced a crash event, which was later found and + sent in a ping. diff --git a/toolkit/components/crashes/tests/xpcshell/crash.dmp b/toolkit/components/crashes/tests/xpcshell/crash.dmp Binary files differnew file mode 100755 index 0000000000..d94538de83 --- /dev/null +++ b/toolkit/components/crashes/tests/xpcshell/crash.dmp diff --git a/toolkit/components/crashes/tests/xpcshell/crash.extra b/toolkit/components/crashes/tests/xpcshell/crash.extra new file mode 100644 index 0000000000..7b3b7f871c --- /dev/null +++ b/toolkit/components/crashes/tests/xpcshell/crash.extra @@ -0,0 +1 @@ +{"ContentSandboxLevel":"2","TelemetryEnvironment":"{\"EscapedField\":\"EscapedData\\n\\nfoo\"}","EMCheckCompatibility":"true","ProductName":"Firefox","ContentSandboxCapabilities":"119","TelemetryClientId":"","Vendor":"Mozilla","InstallTime":"1000000000","Theme":"classic/1.0","ReleaseChannel":"default","ServerURL":"https://crash-reports.mozilla.com","SafeMode":"0","ContentSandboxCapable":"1","useragent_locale":"en-US","Version":"55.0a1","BuildID":"20170512114708","ProductID":"{ec8030f7-c20a-464f-9b0e-13a3a9e97384}","TelemetryServerURL":"","DOMIPCEnabled":"1","Add-ons":"","CrashTime":"1494582646","UptimeTS":"14.9179586","ContentSandboxEnabled":"1","ProcessType":"content","StartupTime":"1000000000","URL":"about:home"} diff --git a/toolkit/components/crashes/tests/xpcshell/test_crash_manager.js b/toolkit/components/crashes/tests/xpcshell/test_crash_manager.js new file mode 100644 index 0000000000..2f77ea5105 --- /dev/null +++ b/toolkit/components/crashes/tests/xpcshell/test_crash_manager.js @@ -0,0 +1,1070 @@ +/* Any copyright is dedicated to the Public Domain. + * http://creativecommons.org/publicdomain/zero/1.0/ */ + +"use strict"; + +const { CrashManager } = ChromeUtils.importESModule( + "resource://gre/modules/CrashManager.sys.mjs" +); +const { TelemetryArchiveTesting } = ChromeUtils.importESModule( + "resource://testing-common/TelemetryArchiveTesting.sys.mjs" +); +const { configureLogging, getManager, sleep } = ChromeUtils.importESModule( + "resource://testing-common/CrashManagerTest.sys.mjs" +); +const { TelemetryEnvironment } = ChromeUtils.importESModule( + "resource://gre/modules/TelemetryEnvironment.sys.mjs" +); + +const DUMMY_DATE = new Date(Date.now() - 10 * 24 * 60 * 60 * 1000); +DUMMY_DATE.setMilliseconds(0); + +const DUMMY_DATE_2 = new Date(Date.now() - 20 * 24 * 60 * 60 * 1000); +DUMMY_DATE_2.setMilliseconds(0); + +function run_test() { + do_get_profile(); + configureLogging(); + TelemetryArchiveTesting.setup(); + // Initialize FOG for glean tests + Services.fog.initializeFOG(); + run_next_test(); +} + +add_task(async function test_constructor_ok() { + let m = new CrashManager({ + pendingDumpsDir: "/foo", + submittedDumpsDir: "/bar", + eventsDirs: [], + storeDir: "/baz", + }); + Assert.ok(m, "CrashManager can be created."); +}); + +add_task(async function test_constructor_invalid() { + Assert.throws(() => { + new CrashManager({ foo: true }); + }, /Unknown property in options/); +}); + +add_task(async function test_get_manager() { + let m = await getManager(); + Assert.ok(m, "CrashManager obtained."); + + await m.createDummyDump(true); + await m.createDummyDump(false); +}); + +add_task(async function test_valid_process() { + let m = await getManager(); + Assert.ok(m, "CrashManager obtained."); + + Assert.ok(!m.isValidProcessType(42)); + Assert.ok(!m.isValidProcessType(null)); + Assert.ok(!m.isValidProcessType("default")); + + Assert.ok(m.isValidProcessType("main")); +}); + +add_task(async function test_process_ping() { + let m = await getManager(); + Assert.ok(m, "CrashManager obtained."); + + Assert.ok(!m.isPingAllowed(42)); + Assert.ok(!m.isPingAllowed(null)); + Assert.ok(!m.isPingAllowed("default")); + Assert.ok(!m.isPingAllowed("ipdlunittest")); + Assert.ok(!m.isPingAllowed("tab")); + + Assert.ok(m.isPingAllowed("content")); + Assert.ok(m.isPingAllowed("forkserver")); + Assert.ok(m.isPingAllowed("gmplugin")); + Assert.ok(m.isPingAllowed("gpu")); + Assert.ok(m.isPingAllowed("main")); + Assert.ok(m.isPingAllowed("rdd")); + Assert.ok(m.isPingAllowed("sandboxbroker")); + Assert.ok(m.isPingAllowed("socket")); + Assert.ok(m.isPingAllowed("utility")); + Assert.ok(m.isPingAllowed("vr")); +}); + +// Unsubmitted dump files on disk are detected properly. +add_task(async function test_pending_dumps() { + let m = await getManager(); + let now = Date.now(); + let ids = []; + const COUNT = 5; + + for (let i = 0; i < COUNT; i++) { + ids.push(await m.createDummyDump(false, new Date(now - i * 86400000))); + } + await m.createIgnoredDumpFile("ignored", false); + + let entries = await m.pendingDumps(); + Assert.equal(entries.length, COUNT, "proper number detected."); + + for (let entry of entries) { + Assert.equal(typeof entry, "object", "entry is an object"); + Assert.ok("id" in entry, "id in entry"); + Assert.ok("path" in entry, "path in entry"); + Assert.ok("date" in entry, "date in entry"); + Assert.notEqual(ids.indexOf(entry.id), -1, "ID is known"); + } + + for (let i = 0; i < COUNT; i++) { + Assert.equal(entries[i].id, ids[COUNT - i - 1], "Entries sorted by mtime"); + } +}); + +// Submitted dump files on disk are detected properly. +add_task(async function test_submitted_dumps() { + let m = await getManager(); + let COUNT = 5; + + for (let i = 0; i < COUNT; i++) { + await m.createDummyDump(true); + } + await m.createIgnoredDumpFile("ignored", true); + + let entries = await m.submittedDumps(); + Assert.equal(entries.length, COUNT, "proper number detected."); + + let hrID = await m.createDummyDump(true, new Date(), true); + entries = await m.submittedDumps(); + Assert.equal(entries.length, COUNT + 1, "hr- in filename detected."); + + let gotIDs = new Set(entries.map(e => e.id)); + Assert.ok(gotIDs.has(hrID)); +}); + +// The store should expire after inactivity. +add_task(async function test_store_expires() { + let m = await getManager(); + + Object.defineProperty(m, "STORE_EXPIRATION_MS", { + value: 250, + }); + + let store = await m._getStore(); + Assert.ok(store); + Assert.equal(store, m._store); + + await sleep(300); + Assert.ok(!m._store, "Store has gone away."); +}); + +// Ensure errors are handled when the events dir is missing. +add_task(async function test_empty_events_dir() { + let m = await getManager(); + await m.deleteEventsDirs(); + + let paths = await m._getUnprocessedEventsFiles(); + Assert.equal(paths.length, 0); +}); + +// Ensure discovery of unprocessed events files works. +add_task(async function test_unprocessed_events_files() { + let m = await getManager(); + await m.createEventsFile("1", "test.1", new Date(), "foo", "{}", 0); + await m.createEventsFile("2", "test.1", new Date(), "bar", "{}", 0); + await m.createEventsFile("1", "test.1", new Date(), "baz", "{}", 1); + + let paths = await m._getUnprocessedEventsFiles(); + Assert.equal(paths.length, 3); +}); + +// Ensure only 1 aggregateEventsFiles() is allowed at a time. +add_task(async function test_aggregate_events_locking() { + let m = await getManager(); + + let p1 = m.aggregateEventsFiles(); + let p2 = m.aggregateEventsFiles(); + + Assert.strictEqual(p1, p2, "Same promise should be returned."); +}); + +// Malformed events files should be deleted. +add_task(async function test_malformed_files_deleted() { + let m = await getManager(); + + await m.createEventsFile("1", "crash.main.1", new Date(), "foo\nbar"); + + let count = await m.aggregateEventsFiles(); + Assert.equal(count, 1); + let crashes = await m.getCrashes(); + Assert.equal(crashes.length, 0); + + count = await m.aggregateEventsFiles(); + Assert.equal(count, 0); +}); + +// Unknown event types should be ignored. +add_task(async function test_aggregate_ignore_unknown_events() { + let m = await getManager(); + + await m.createEventsFile("1", "crash.main.3", DUMMY_DATE, "id1", "{}"); + await m.createEventsFile("2", "foobar.1", new Date(), "dummy"); + + let count = await m.aggregateEventsFiles(); + Assert.equal(count, 2); + + count = await m.aggregateEventsFiles(); + Assert.equal(count, 1); + + count = await m.aggregateEventsFiles(); + Assert.equal(count, 1); +}); + +add_task(async function test_prune_old() { + let m = await getManager(); + let oldDate = new Date(Date.now() - 86400000); + let newDate = new Date(Date.now() - 10000); + await m.createEventsFile("1", "crash.main.3", oldDate, "id1", "{}"); + await m.addCrash( + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_CONTENT], + m.CRASH_TYPE_CRASH, + "id2", + newDate + ); + + await m.aggregateEventsFiles(); + + let crashes = await m.getCrashes(); + Assert.equal(crashes.length, 2); + + await m.pruneOldCrashes(new Date(oldDate.getTime() + 10000)); + + crashes = await m.getCrashes(); + Assert.equal(crashes.length, 1, "Old crash has been pruned."); + + let c = crashes[0]; + Assert.equal(c.id, "id2", "Proper crash was pruned."); + + // We can't test exact boundary conditions because dates from filesystem + // don't have same guarantees as JS dates. + await m.pruneOldCrashes(new Date(newDate.getTime() + 5000)); + crashes = await m.getCrashes(); + Assert.equal(crashes.length, 0); +}); + +add_task(async function test_schedule_maintenance() { + let m = await getManager(); + await m.createEventsFile("1", "crash.main.3", DUMMY_DATE, "id1", "{}"); + + let oldDate = new Date( + Date.now() - m.PURGE_OLDER_THAN_DAYS * 2 * 24 * 60 * 60 * 1000 + ); + await m.createEventsFile("2", "crash.main.3", oldDate, "id2", "{}"); + + await m.scheduleMaintenance(25); + let crashes = await m.getCrashes(); + Assert.equal(crashes.length, 1); + Assert.equal(crashes[0].id, "id1"); +}); + +const crashId = "3cb67eba-0dc7-6f78-6a569a0e-172287ec"; +const productName = "Firefox"; +const productId = "{ec8030f7-c20a-464f-9b0e-13a3a9e97384}"; +const sha256Hash = + "f8410c3ac4496cfa9191a1240f0e365101aef40c7bf34fc5bcb8ec511832ed79"; +const stackTraces = { status: "OK" }; + +add_task(async function test_main_crash_event_file() { + let ac = new TelemetryArchiveTesting.Checker(); + await ac.promiseInit(); + let theEnvironment = TelemetryEnvironment.currentEnvironment; + const sessionId = "be66af2f-2ee5-4330-ae95-44462dfbdf0c"; + + // To test proper escaping, add data to the environment with an embedded + // double-quote + theEnvironment.testValue = 'MyValue"'; + + let m = await getManager(); + const metadata = JSON.stringify({ + ProductName: productName, + ProductID: productId, + TelemetryEnvironment: JSON.stringify(theEnvironment), + TelemetrySessionId: sessionId, + MinidumpSha256Hash: sha256Hash, + StackTraces: stackTraces, + ThisShouldNot: "end-up-in-the-ping", + }); + + await m.createEventsFile( + crashId, + "crash.main.3", + DUMMY_DATE, + crashId, + metadata + ); + let count = await m.aggregateEventsFiles(); + Assert.equal(count, 1); + + let crashes = await m.getCrashes(); + Assert.equal(crashes.length, 1); + Assert.equal(crashes[0].id, crashId); + Assert.equal(crashes[0].type, "main-crash"); + Assert.equal(crashes[0].metadata.ProductName, productName); + Assert.equal(crashes[0].metadata.ProductID, productId); + Assert.ok(crashes[0].metadata.TelemetryEnvironment); + Assert.equal(Object.getOwnPropertyNames(crashes[0].metadata).length, 7); + Assert.equal(crashes[0].metadata.TelemetrySessionId, sessionId); + Assert.ok(crashes[0].metadata.StackTraces); + Assert.deepEqual(crashes[0].crashDate, DUMMY_DATE); + + let found = await ac.promiseFindPing("crash", [ + [["payload", "hasCrashEnvironment"], true], + [["payload", "metadata", "ProductName"], productName], + [["payload", "metadata", "ProductID"], productId], + [["payload", "minidumpSha256Hash"], sha256Hash], + [["payload", "crashId"], crashId], + [["payload", "stackTraces", "status"], "OK"], + [["payload", "sessionId"], sessionId], + ]); + Assert.ok(found, "Telemetry ping submitted for found crash"); + Assert.deepEqual( + found.environment, + theEnvironment, + "The saved environment should be present" + ); + Assert.equal( + found.payload.metadata.ThisShouldNot, + undefined, + "Non-allowed fields should be filtered out" + ); + + count = await m.aggregateEventsFiles(); + Assert.equal(count, 0); +}); + +add_task(async function test_main_crash_event_file_noenv() { + let ac = new TelemetryArchiveTesting.Checker(); + await ac.promiseInit(); + const metadata = JSON.stringify({ + ProductName: productName, + ProductID: productId, + }); + + let m = await getManager(); + await m.createEventsFile( + crashId, + "crash.main.3", + DUMMY_DATE, + crashId, + metadata + ); + let count = await m.aggregateEventsFiles(); + Assert.equal(count, 1); + + let crashes = await m.getCrashes(); + Assert.equal(crashes.length, 1); + Assert.equal(crashes[0].id, crashId); + Assert.equal(crashes[0].type, "main-crash"); + Assert.deepEqual(crashes[0].metadata, { + ProductName: productName, + ProductID: productId, + }); + Assert.deepEqual(crashes[0].crashDate, DUMMY_DATE); + + let found = await ac.promiseFindPing("crash", [ + [["payload", "hasCrashEnvironment"], false], + [["payload", "metadata", "ProductName"], productName], + [["payload", "metadata", "ProductID"], productId], + ]); + Assert.ok(found, "Telemetry ping submitted for found crash"); + Assert.ok(found.environment, "There is an environment"); + + count = await m.aggregateEventsFiles(); + Assert.equal(count, 0); +}); + +add_task(async function test_crash_submission_event_file() { + let m = await getManager(); + await m.createEventsFile("1", "crash.main.3", DUMMY_DATE, "crash1", "{}"); + await m.createEventsFile( + "1-submission", + "crash.submission.1", + DUMMY_DATE_2, + "crash1", + "false\n" + ); + + // The line below has been intentionally commented out to make sure that + // the crash record is created when one does not exist. + // yield m.createEventsFile("2", "crash.main.1", DUMMY_DATE, "crash2"); + await m.createEventsFile( + "2-submission", + "crash.submission.1", + DUMMY_DATE_2, + "crash2", + "true\nbp-2" + ); + let count = await m.aggregateEventsFiles(); + Assert.equal(count, 3); + + let crashes = await m.getCrashes(); + Assert.equal(crashes.length, 2); + + let map = new Map(crashes.map(crash => [crash.id, crash])); + + let crash1 = map.get("crash1"); + Assert.ok(!!crash1); + Assert.equal(crash1.remoteID, null); + let crash2 = map.get("crash2"); + Assert.ok(!!crash2); + Assert.equal(crash2.remoteID, "bp-2"); + + Assert.equal(crash1.submissions.size, 1); + let submission = crash1.submissions.values().next().value; + Assert.equal(submission.result, m.SUBMISSION_RESULT_FAILED); + Assert.equal(submission.requestDate.getTime(), DUMMY_DATE_2.getTime()); + Assert.equal(submission.responseDate.getTime(), DUMMY_DATE_2.getTime()); + + Assert.equal(crash2.submissions.size, 1); + submission = crash2.submissions.values().next().value; + Assert.equal(submission.result, m.SUBMISSION_RESULT_OK); + Assert.equal(submission.requestDate.getTime(), DUMMY_DATE_2.getTime()); + Assert.equal(submission.responseDate.getTime(), DUMMY_DATE_2.getTime()); + + count = await m.aggregateEventsFiles(); + Assert.equal(count, 0); +}); + +add_task(async function test_multiline_crash_id_rejected() { + let m = await getManager(); + await m.createEventsFile("1", "crash.main.1", DUMMY_DATE, "id1\nid2"); + await m.aggregateEventsFiles(); + let crashes = await m.getCrashes(); + Assert.equal(crashes.length, 0); +}); + +// Main process crashes should be remembered beyond the high water mark. +add_task(async function test_high_water_mark() { + let m = await getManager(); + + let store = await m._getStore(); + + for (let i = 0; i < store.HIGH_WATER_DAILY_THRESHOLD + 1; i++) { + await m.createEventsFile( + "m" + i, + "crash.main.3", + DUMMY_DATE, + "m" + i, + "{}" + ); + } + + let count = await m.aggregateEventsFiles(); + Assert.equal(count, store.HIGH_WATER_DAILY_THRESHOLD + 1); + + // Need to fetch again in case the first one was garbage collected. + store = await m._getStore(); + + Assert.equal(store.crashesCount, store.HIGH_WATER_DAILY_THRESHOLD + 1); +}); + +add_task(async function test_addCrash() { + let m = await getManager(); + + let crashes = await m.getCrashes(); + Assert.equal(crashes.length, 0); + + await m.addCrash( + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT], + m.CRASH_TYPE_CRASH, + "main-crash", + DUMMY_DATE + ); + await m.addCrash( + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT], + m.CRASH_TYPE_HANG, + "main-hang", + DUMMY_DATE + ); + await m.addCrash( + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_CONTENT], + m.CRASH_TYPE_CRASH, + "content-crash", + DUMMY_DATE + ); + await m.addCrash( + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_CONTENT], + m.CRASH_TYPE_HANG, + "content-hang", + DUMMY_DATE + ); + await m.addCrash( + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_GMPLUGIN], + m.CRASH_TYPE_CRASH, + "gmplugin-crash", + DUMMY_DATE + ); + await m.addCrash( + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_GPU], + m.CRASH_TYPE_CRASH, + "gpu-crash", + DUMMY_DATE + ); + await m.addCrash( + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_VR], + m.CRASH_TYPE_CRASH, + "vr-crash", + DUMMY_DATE + ); + await m.addCrash( + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_RDD], + m.CRASH_TYPE_CRASH, + "rdd-crash", + DUMMY_DATE + ); + await m.addCrash( + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_SOCKET], + m.CRASH_TYPE_CRASH, + "socket-crash", + DUMMY_DATE + ); + + await m.addCrash( + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT], + m.CRASH_TYPE_CRASH, + "changing-item", + DUMMY_DATE + ); + await m.addCrash( + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_CONTENT], + m.CRASH_TYPE_HANG, + "changing-item", + DUMMY_DATE_2 + ); + + crashes = await m.getCrashes(); + Assert.equal(crashes.length, 10); + + let map = new Map(crashes.map(crash => [crash.id, crash])); + + let crash = map.get("main-crash"); + Assert.ok(!!crash); + Assert.equal(crash.crashDate, DUMMY_DATE); + Assert.equal( + crash.type, + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT] + + "-" + + m.CRASH_TYPE_CRASH + ); + Assert.ok( + crash.isOfType( + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT], + m.CRASH_TYPE_CRASH + ) + ); + + crash = map.get("main-hang"); + Assert.ok(!!crash); + Assert.equal(crash.crashDate, DUMMY_DATE); + Assert.equal( + crash.type, + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT] + + "-" + + m.CRASH_TYPE_HANG + ); + Assert.ok( + crash.isOfType( + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT], + m.CRASH_TYPE_HANG + ) + ); + + crash = map.get("content-crash"); + Assert.ok(!!crash); + Assert.equal(crash.crashDate, DUMMY_DATE); + Assert.equal( + crash.type, + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_CONTENT] + + "-" + + m.CRASH_TYPE_CRASH + ); + Assert.ok( + crash.isOfType( + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_CONTENT], + m.CRASH_TYPE_CRASH + ) + ); + + crash = map.get("content-hang"); + Assert.ok(!!crash); + Assert.equal(crash.crashDate, DUMMY_DATE); + Assert.equal( + crash.type, + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_CONTENT] + + "-" + + m.CRASH_TYPE_HANG + ); + Assert.ok( + crash.isOfType( + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_CONTENT], + m.CRASH_TYPE_HANG + ) + ); + + crash = map.get("gmplugin-crash"); + Assert.ok(!!crash); + Assert.equal(crash.crashDate, DUMMY_DATE); + Assert.equal( + crash.type, + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_GMPLUGIN] + + "-" + + m.CRASH_TYPE_CRASH + ); + Assert.ok( + crash.isOfType( + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_GMPLUGIN], + m.CRASH_TYPE_CRASH + ) + ); + + crash = map.get("gpu-crash"); + Assert.ok(!!crash); + Assert.equal(crash.crashDate, DUMMY_DATE); + Assert.equal( + crash.type, + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_GPU] + "-" + m.CRASH_TYPE_CRASH + ); + Assert.ok( + crash.isOfType( + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_GPU], + m.CRASH_TYPE_CRASH + ) + ); + + crash = map.get("vr-crash"); + Assert.ok(!!crash); + Assert.equal(crash.crashDate, DUMMY_DATE); + Assert.equal( + crash.type, + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_VR] + "-" + m.CRASH_TYPE_CRASH + ); + Assert.ok( + crash.isOfType( + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_VR], + m.CRASH_TYPE_CRASH + ) + ); + + crash = map.get("rdd-crash"); + Assert.ok(!!crash); + Assert.equal(crash.crashDate, DUMMY_DATE); + Assert.equal( + crash.type, + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_RDD] + "-" + m.CRASH_TYPE_CRASH + ); + Assert.ok( + crash.isOfType( + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_RDD], + m.CRASH_TYPE_CRASH + ) + ); + + crash = map.get("socket-crash"); + Assert.ok(!!crash); + Assert.equal(crash.crashDate, DUMMY_DATE); + Assert.equal( + crash.type, + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_SOCKET] + + "-" + + m.CRASH_TYPE_CRASH + ); + Assert.ok( + crash.isOfType( + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_SOCKET], + m.CRASH_TYPE_CRASH + ) + ); + + crash = map.get("changing-item"); + Assert.ok(!!crash); + Assert.equal(crash.crashDate, DUMMY_DATE_2); + Assert.equal( + crash.type, + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_CONTENT] + + "-" + + m.CRASH_TYPE_HANG + ); + Assert.ok( + crash.isOfType( + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_CONTENT], + m.CRASH_TYPE_HANG + ) + ); +}); + +add_task(async function test_child_process_crash_ping() { + let m = await getManager(); + const EXPECTED_PROCESSES = [ + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT], + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_CONTENT], + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_GMPLUGIN], + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_GPU], + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_VR], + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_RDD], + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_SOCKET], + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_REMOTESANDBOXBROKER], + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_FORKSERVER], + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_UTILITY], + ]; + + const UNEXPECTED_PROCESSES = [ + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_IPDLUNITTEST], + null, + 12, // non-string process type + ]; + + let ac = new TelemetryArchiveTesting.Checker(); + await ac.promiseInit(); + + // Add a child-process crash for each allowed process type. + for (let p of EXPECTED_PROCESSES) { + // Generate a ping. + const remoteType = + p === m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_CONTENT] + ? "web" + : undefined; + let id = await m.createDummyDump(); + await m.addCrash(p, m.CRASH_TYPE_CRASH, id, DUMMY_DATE, { + RemoteType: remoteType, + StackTraces: stackTraces, + MinidumpSha256Hash: sha256Hash, + ipc_channel_error: "ShutDownKill", + ThisShouldNot: "end-up-in-the-ping", + }); + await m._pingPromise; + + let found = await ac.promiseFindPing("crash", [ + [["payload", "crashId"], id], + [["payload", "minidumpSha256Hash"], sha256Hash], + [["payload", "processType"], p], + [["payload", "stackTraces", "status"], "OK"], + ]); + Assert.ok(found, "Telemetry ping submitted for " + p + " crash"); + + let hoursOnly = new Date(DUMMY_DATE); + hoursOnly.setSeconds(0); + hoursOnly.setMinutes(0); + Assert.equal( + new Date(found.payload.crashTime).getTime(), + hoursOnly.getTime() + ); + + Assert.equal( + found.payload.metadata.ThisShouldNot, + undefined, + "Non-allowed fields should be filtered out" + ); + Assert.equal( + found.payload.metadata.RemoteType, + remoteType, + "RemoteType should be allowed for content crashes" + ); + Assert.equal( + found.payload.metadata.ipc_channel_error, + "ShutDownKill", + "ipc_channel_error should be allowed for content crashes" + ); + } + + // Check that we don't generate a crash ping for invalid/unexpected process + // types. + for (let p of UNEXPECTED_PROCESSES) { + let id = await m.createDummyDump(); + await m.addCrash(p, m.CRASH_TYPE_CRASH, id, DUMMY_DATE, { + StackTraces: stackTraces, + MinidumpSha256Hash: sha256Hash, + ThisShouldNot: "end-up-in-the-ping", + }); + await m._pingPromise; + + // Check that we didn't receive any new ping. + let found = await ac.promiseFindPing("crash", [ + [["payload", "crashId"], id], + ]); + Assert.ok( + !found, + "No telemetry ping must be submitted for invalid process types" + ); + } +}); + +add_task(async function test_glean_crash_ping() { + let m = await getManager(); + + let id = await m.createDummyDump(); + + // Test bare minumum (with missing optional fields) + let submitted = false; + GleanPings.crash.testBeforeNextSubmit(_ => { + submitted = true; + const MINUTES = new Date(DUMMY_DATE); + MINUTES.setSeconds(0); + Assert.equal(Glean.crash.time.testGetValue().getTime(), MINUTES.getTime()); + Assert.equal( + Glean.crash.processType.testGetValue(), + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_CONTENT] + ); + Assert.equal(Glean.crash.startup.testGetValue(), false); + }); + + await m.addCrash( + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_CONTENT], + m.CRASH_TYPE_CRASH, + id, + DUMMY_DATE, + {} + ); + + Assert.ok(submitted); + + // Test with additional fields + submitted = false; + GleanPings.crash.testBeforeNextSubmit(reason => { + submitted = true; + const MINUTES = new Date(DUMMY_DATE_2); + MINUTES.setSeconds(0); + Assert.equal(Glean.crash.uptime.testGetValue(), 600.1 * 1000); + Assert.equal(Glean.crash.time.testGetValue().getTime(), MINUTES.getTime()); + Assert.equal( + Glean.crash.processType.testGetValue(), + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_CONTENT] + ); + Assert.equal(Glean.crash.startup.testGetValue(), true); + }); + + await m.addCrash( + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_CONTENT], + m.CRASH_TYPE_CRASH, + id, + DUMMY_DATE_2, + { + StackTraces: stackTraces, + MinidumpSha256Hash: sha256Hash, + UptimeTS: "600.1", + StartupCrash: "1", + } + ); + + Assert.ok(submitted); +}); + +add_task(async function test_generateSubmissionID() { + let m = await getManager(); + + const SUBMISSION_ID_REGEX = + /^(sub-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})$/i; + let id = m.generateSubmissionID(); + Assert.ok(SUBMISSION_ID_REGEX.test(id)); +}); + +add_task(async function test_addSubmissionAttemptAndResult() { + let m = await getManager(); + + let crashes = await m.getCrashes(); + Assert.equal(crashes.length, 0); + + await m.addCrash( + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT], + m.CRASH_TYPE_CRASH, + "main-crash", + DUMMY_DATE + ); + await m.addSubmissionAttempt("main-crash", "submission", DUMMY_DATE); + await m.addSubmissionResult( + "main-crash", + "submission", + DUMMY_DATE_2, + m.SUBMISSION_RESULT_OK + ); + + crashes = await m.getCrashes(); + Assert.equal(crashes.length, 1); + + let submissions = crashes[0].submissions; + Assert.ok(!!submissions); + + let submission = submissions.get("submission"); + Assert.ok(!!submission); + Assert.equal(submission.requestDate.getTime(), DUMMY_DATE.getTime()); + Assert.equal(submission.responseDate.getTime(), DUMMY_DATE_2.getTime()); + Assert.equal(submission.result, m.SUBMISSION_RESULT_OK); +}); + +add_task(async function test_addSubmissionAttemptEarlyCall() { + let m = await getManager(); + + let crashes = await m.getCrashes(); + Assert.equal(crashes.length, 0); + + let p = m + .ensureCrashIsPresent("main-crash") + .then(() => { + return m.addSubmissionAttempt("main-crash", "submission", DUMMY_DATE); + }) + .then(() => { + return m.addSubmissionResult( + "main-crash", + "submission", + DUMMY_DATE_2, + m.SUBMISSION_RESULT_OK + ); + }); + + await m.addCrash( + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT], + m.CRASH_TYPE_CRASH, + "main-crash", + DUMMY_DATE + ); + + crashes = await m.getCrashes(); + Assert.equal(crashes.length, 1); + + await p; + let submissions = crashes[0].submissions; + Assert.ok(!!submissions); + + let submission = submissions.get("submission"); + Assert.ok(!!submission); + Assert.equal(submission.requestDate.getTime(), DUMMY_DATE.getTime()); + Assert.equal(submission.responseDate.getTime(), DUMMY_DATE_2.getTime()); + Assert.equal(submission.result, m.SUBMISSION_RESULT_OK); +}); + +add_task(async function test_setCrashClassifications() { + let m = await getManager(); + + await m.addCrash( + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT], + m.CRASH_TYPE_CRASH, + "main-crash", + DUMMY_DATE + ); + await m.setCrashClassifications("main-crash", ["a"]); + let classifications = (await m.getCrashes())[0].classifications; + Assert.ok(classifications.includes("a")); +}); + +add_task(async function test_setRemoteCrashID() { + let m = await getManager(); + + await m.addCrash( + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT], + m.CRASH_TYPE_CRASH, + "main-crash", + DUMMY_DATE + ); + await m.setRemoteCrashID("main-crash", "bp-1"); + Assert.equal((await m.getCrashes())[0].remoteID, "bp-1"); +}); + +add_task(async function test_addCrashWrong() { + let m = await getManager(); + + let crashes = await m.getCrashes(); + Assert.equal(crashes.length, 0); + + await m.addCrash( + m.processTypes[-1], // passing a wrong type to force 'undefined', it should + m.CRASH_TYPE_CRASH, // fail in the end and not record it + "wrong-content-crash", + DUMMY_DATE + ); + + crashes = await m.getCrashes(); + Assert.equal(crashes.length, 0); +}); + +add_task(async function test_telemetryHistogram() { + let Telemetry = Services.telemetry; + let h = Telemetry.getKeyedHistogramById("PROCESS_CRASH_SUBMIT_ATTEMPT"); + h.clear(); + Telemetry.clearScalars(); + + let m = await getManager(); + let processTypes = []; + let crashTypes = []; + + // Gather all process types + for (let field in m.processTypes) { + if (m.isPingAllowed(m.processTypes[field])) { + processTypes.push(m.processTypes[field]); + } + } + + // Gather all crash types + for (let field in m) { + if (field.startsWith("CRASH_TYPE_")) { + crashTypes.push(m[field]); + } + } + + let keysCount = 0; + let keys = []; + + for (let processType of processTypes) { + for (let crashType of crashTypes) { + let key = processType + "-" + crashType; + + keys.push(key); + h.add(key, 1); + keysCount++; + } + } + + // Ensure that we have generated some crash, otherwise it could indicate + // something silently regressing + Assert.greater(keysCount, 2); + + // Check that we have the expected keys. + let snap = h.snapshot(); + Assert.equal( + Object.keys(snap).length, + keysCount, + "Some crash types have not been recorded, see the list in Histograms.json" + ); + Assert.deepEqual( + Object.keys(snap).sort(), + keys.sort(), + "Some crash types do not match" + ); +}); + +// Test that a ping with `CrashPingUUID` in the metadata (as set by the +// external crash reporter) is sent with Glean but not with Telemetry (because +// the crash reporter already sends it using Telemetry). +add_task(async function test_crash_reporter_ping_with_uuid() { + let m = await getManager(); + + let id = await m.createDummyDump(); + + // Realistically this case will only happen through + // `_handleEventFilePayload`, however the `_sendCrashPing` method will check + // for it regardless of where it is called. + let metadata = { CrashPingUUID: "bff6bde4-f96c-4859-8c56-6b3f40878c26" }; + + // Glean hooks + let glean_submitted = false; + GleanPings.crash.testBeforeNextSubmit(_ => { + glean_submitted = true; + }); + + await m.addCrash( + m.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_CONTENT], + m.CRASH_TYPE_CRASH, + id, + DUMMY_DATE, + metadata + ); + + // Ping promise is only set if the Telemetry ping is submitted. + let telemetry_submitted = !!m._pingPromise; + + Assert.ok(glean_submitted); + Assert.ok(!telemetry_submitted); +}); diff --git a/toolkit/components/crashes/tests/xpcshell/test_crash_service.js b/toolkit/components/crashes/tests/xpcshell/test_crash_service.js new file mode 100644 index 0000000000..63ff3343d6 --- /dev/null +++ b/toolkit/components/crashes/tests/xpcshell/test_crash_service.js @@ -0,0 +1,215 @@ +/* Any copyright is dedicated to the Public Domain. + * http://creativecommons.org/publicdomain/zero/1.0/ */ + +"use strict"; + +const { getCrashManagerNoCreate } = ChromeUtils.importESModule( + "resource://gre/modules/CrashManager.sys.mjs" +); +const { makeFakeAppDir } = ChromeUtils.importESModule( + "resource://testing-common/AppData.sys.mjs" +); + +add_task(async function test_instantiation() { + Assert.ok( + !getCrashManagerNoCreate(), + "CrashManager global instance not initially defined." + ); + + do_get_profile(); + await makeFakeAppDir(); + + // Fake profile creation. + Cc["@mozilla.org/crashservice;1"] + .getService(Ci.nsIObserver) + .observe(null, "profile-after-change", null); + + Assert.ok(getCrashManagerNoCreate(), "Profile creation makes it available."); + Assert.ok(Services.crashmanager, "CrashManager available via Services."); + Assert.strictEqual( + getCrashManagerNoCreate(), + Services.crashmanager, + "The objects are the same." + ); +}); + +var gMinidumpDir = do_get_tempdir(); + +// Ensure that the nsICrashReporter methods can find the dump +Services.appinfo.minidumpPath = gMinidumpDir; + +var gDumpFile; +var gExtraFile; + +// Sets up a fake crash dump and sets up the crashreporter so that it will be +// able to find it. +async function setup(crashId) { + const cwd = Services.dirsvc.get("CurWorkD", Ci.nsIFile).path; + const minidump = PathUtils.join(cwd, "crash.dmp"); + const extra = PathUtils.join(cwd, "crash.extra"); + + // Make a copy of the files because the .extra file will be modified + gDumpFile = PathUtils.join(gMinidumpDir.path, `${crashId}.dmp`); + await IOUtils.copy(minidump, gDumpFile); + gExtraFile = PathUtils.join(gMinidumpDir.path, `${crashId}.extra`); + await IOUtils.copy(extra, gExtraFile); +} + +// Cleans up the fake crash dump and resets the minidump path +async function teardown() { + await IOUtils.remove(gDumpFile); + await IOUtils.remove(gExtraFile); +} + +async function addCrash(id, type = Ci.nsICrashService.CRASH_TYPE_CRASH) { + let cs = Cc["@mozilla.org/crashservice;1"].getService(Ci.nsICrashService); + return cs.addCrash(Ci.nsIXULRuntime.PROCESS_TYPE_CONTENT, type, id); +} + +async function getCrash(crashId) { + let crashes = await Services.crashmanager.getCrashes(); + return crashes.find(c => { + return c.id === crashId; + }); +} + +async function test_addCrashBase(crashId, allThreads) { + await setup(crashId); + + let crashType = Ci.nsICrashService.CRASH_TYPE_CRASH; + if (allThreads) { + crashType = Ci.nsICrashService.CRASH_TYPE_HANG; + } + await addCrash(crashId, crashType); + let crash = await getCrash(crashId); + Assert.ok(crash, "Crash " + crashId + " has been stored successfully."); + Assert.equal(crash.metadata.ProcessType, "content"); + Assert.equal( + crash.metadata.MinidumpSha256Hash, + "c8ad56a2096310f40c8a4b46c890625a740fdd72e409f412933011ff947c5a40" + ); + Assert.ok(crash.metadata.StackTraces, "The StackTraces field is present.\n"); + + try { + let stackTraces = crash.metadata.StackTraces; + Assert.equal(stackTraces.status, "OK"); + Assert.ok(stackTraces.crash_info, "The crash_info field is populated."); + Assert.ok( + stackTraces.modules && !!stackTraces.modules.length, + "The module list is populated." + ); + Assert.ok( + stackTraces.threads && !!stackTraces.threads.length, + "The thread list is populated." + ); + + if (allThreads) { + Assert.ok( + stackTraces.threads.length > 1, + "The stack trace contains more than one thread." + ); + } else { + Assert.ok( + stackTraces.threads.length == 1, + "The stack trace contains exactly one thread." + ); + } + + let frames = stackTraces.threads[0].frames; + Assert.ok(frames && !!frames.length, "The stack trace is present.\n"); + } catch (e) { + Assert.ok(false, "StackTraces does not contain valid JSON."); + } + + try { + let telemetryEnvironment = JSON.parse(crash.metadata.TelemetryEnvironment); + Assert.equal(telemetryEnvironment.EscapedField, "EscapedData\n\nfoo"); + } catch (e) { + Assert.ok( + false, + "TelemetryEnvironment contents were not properly escaped\n" + ); + } + + await teardown(); +} + +add_task(async function test_addCrash() { + await test_addCrashBase("56cd87bc-bb26-339b-3a8e-f00c0f11380e", false); +}); + +add_task(async function test_addCrashAllThreads() { + await test_addCrashBase("071843c4-da89-4447-af9f-965163e0b253", true); +}); + +add_task(async function test_addCrash_shutdownOnCrash() { + const crashId = "de7f63dd-7516-4525-a44b-6d2f2bd3934a"; + await setup(crashId); + + // Set the MOZ_CRASHREPORTER_SHUTDOWN environment variable + Services.env.set("MOZ_CRASHREPORTER_SHUTDOWN", "1"); + + await addCrash(crashId); + + let crash = await getCrash(crashId); + Assert.ok(crash, "Crash " + crashId + " has been stored successfully."); + Assert.ok( + crash.metadata.StackTraces === undefined, + "The StackTraces field is not present because the minidump " + + "analyzer did not start.\n" + ); + + Services.env.set("MOZ_CRASHREPORTER_SHUTDOWN", ""); // Unset the environment variable + await teardown(); +}); + +add_task(async function test_addCrash_quitting() { + const firstCrashId = "0e578a74-a887-48cb-b270-d4775d01e715"; + const secondCrashId = "208379e5-1979-430d-a066-f6e57a8130ce"; + + await setup(firstCrashId); + + let minidumpAnalyzerKilledPromise = new Promise((resolve, reject) => { + Services.obs.addObserver((subject, topic, data) => { + if (topic === "test-minidump-analyzer-killed") { + resolve(); + } + + reject(); + }, "test-minidump-analyzer-killed"); + }); + + let addCrashPromise = addCrash(firstCrashId); + + // Spin the event loop so that the minidump analyzer is launched + await new Promise(resolve => { + executeSoon(resolve); + }); + + // Pretend we're quitting + let cs = Cc["@mozilla.org/crashservice;1"].getService(Ci.nsICrashService); + let obs = cs.QueryInterface(Ci.nsIObserver); + obs.observe(null, "quit-application", null); + + // Wait for the minidump analyzer to be killed + await minidumpAnalyzerKilledPromise; + + // Now wait for the crash to be recorded + await addCrashPromise; + let crash = await getCrash(firstCrashId); + Assert.ok(crash, "Crash " + firstCrashId + " has been stored successfully."); + + // Cleanup the fake crash and generate a new one + await teardown(); + await setup(secondCrashId); + + await addCrash(secondCrashId); + crash = await getCrash(secondCrashId); + Assert.ok(crash, "Crash " + secondCrashId + " has been stored successfully."); + Assert.ok( + crash.metadata.StackTraces === undefined, + "The StackTraces field is not present because the minidump " + + "analyzer did not start.\n" + ); + await teardown(); +}); diff --git a/toolkit/components/crashes/tests/xpcshell/test_crash_store.js b/toolkit/components/crashes/tests/xpcshell/test_crash_store.js new file mode 100644 index 0000000000..515aec86a0 --- /dev/null +++ b/toolkit/components/crashes/tests/xpcshell/test_crash_store.js @@ -0,0 +1,686 @@ +/* Any copyright is dedicated to the Public Domain. + * http://creativecommons.org/publicdomain/zero/1.0/ */ + +/* + * This file tests the CrashStore type in CrashManager.jsm. + */ + +"use strict"; + +const { CrashManager, CrashStore, dateToDays } = ChromeUtils.importESModule( + "resource://gre/modules/CrashManager.sys.mjs" +); + +const DUMMY_DATE = new Date(Date.now() - 10 * 24 * 60 * 60 * 1000); +DUMMY_DATE.setMilliseconds(0); + +const DUMMY_DATE_2 = new Date(Date.now() - 5 * 24 * 60 * 60 * 1000); +DUMMY_DATE_2.setMilliseconds(0); + +const { + CRASH_TYPE_CRASH, + CRASH_TYPE_HANG, + SUBMISSION_RESULT_OK, + SUBMISSION_RESULT_FAILED, +} = CrashManager.prototype; + +var STORE_DIR_COUNT = 0; + +function getStore() { + return (async function () { + let storeDir = do_get_tempdir().path; + storeDir = PathUtils.join(storeDir, "store-" + STORE_DIR_COUNT++); + + await IOUtils.makeDirectory(storeDir, { permissions: 0o700 }); + + let s = new CrashStore(storeDir); + await s.load(); + + return s; + })(); +} + +add_task(async function test_constructor() { + let s = new CrashStore(do_get_tempdir().path); + Assert.ok(s instanceof CrashStore); +}); + +add_task(async function test_add_crash() { + let s = await getStore(); + + Assert.equal(s.crashesCount, 0); + let d = new Date(Date.now() - 5000); + Assert.ok( + s.addCrash( + CrashManager.prototype.processTypes[ + Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT + ], + CRASH_TYPE_CRASH, + "id1", + d + ) + ); + + Assert.equal(s.crashesCount, 1); + + let crashes = s.crashes; + Assert.equal(crashes.length, 1); + let c = crashes[0]; + + Assert.equal(c.id, "id1", "ID set properly."); + Assert.equal(c.crashDate.getTime(), d.getTime(), "Date set."); + + Assert.ok( + s.addCrash( + CrashManager.prototype.processTypes[ + Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT + ], + CRASH_TYPE_CRASH, + "id2", + new Date() + ) + ); + Assert.equal(s.crashesCount, 2); +}); + +add_task(async function test_reset() { + let s = await getStore(); + + Assert.ok( + s.addCrash( + CrashManager.prototype.processTypes[ + Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT + ], + CRASH_TYPE_CRASH, + "id1", + DUMMY_DATE + ) + ); + Assert.equal(s.crashes.length, 1); + s.reset(); + Assert.equal(s.crashes.length, 0); +}); + +add_task(async function test_save_load() { + let s = await getStore(); + + await s.save(); + + let d1 = new Date(); + let d2 = new Date(d1.getTime() - 10000); + Assert.ok( + s.addCrash( + CrashManager.prototype.processTypes[ + Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT + ], + CRASH_TYPE_CRASH, + "id1", + d1 + ) + ); + Assert.ok( + s.addCrash( + CrashManager.prototype.processTypes[ + Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT + ], + CRASH_TYPE_CRASH, + "id2", + d2 + ) + ); + Assert.ok(s.addSubmissionAttempt("id1", "sub1", d1)); + Assert.ok(s.addSubmissionResult("id1", "sub1", d2, SUBMISSION_RESULT_OK)); + Assert.ok(s.setRemoteCrashID("id1", "bp-1")); + + await s.save(); + + await s.load(); + Assert.ok(!s.corruptDate); + let crashes = s.crashes; + + Assert.equal(crashes.length, 2); + let c = s.getCrash("id1"); + Assert.equal(c.crashDate.getTime(), d1.getTime()); + Assert.equal(c.remoteID, "bp-1"); + + Assert.ok(!!c.submissions); + let submission = c.submissions.get("sub1"); + Assert.ok(!!submission); + Assert.equal(submission.requestDate.getTime(), d1.getTime()); + Assert.equal(submission.responseDate.getTime(), d2.getTime()); + Assert.equal(submission.result, SUBMISSION_RESULT_OK); +}); + +add_task(async function test_corrupt_json() { + let s = await getStore(); + + let buffer = new TextEncoder().encode("{bad: json-file"); + await IOUtils.write(s._storePath, buffer, { compress: true }); + + await s.load(); + Assert.ok(s.corruptDate, "Corrupt date is defined."); + + let date = s.corruptDate; + await s.save(); + s._data = null; + await s.load(); + Assert.ok(s.corruptDate); + Assert.equal(date.getTime(), s.corruptDate.getTime()); +}); + +async function test_add_process_crash(processType) { + let s = await getStore(); + + const ptName = CrashManager.prototype.processTypes[processType]; + + Assert.ok(s.addCrash(ptName, CRASH_TYPE_CRASH, "id1", new Date())); + Assert.equal(s.crashesCount, 1); + + let c = s.crashes[0]; + Assert.ok(c.crashDate); + Assert.equal(c.type, ptName + "-" + CRASH_TYPE_CRASH); + Assert.ok(c.isOfType(ptName, CRASH_TYPE_CRASH)); + + Assert.ok(s.addCrash(ptName, CRASH_TYPE_CRASH, "id2", new Date())); + Assert.equal(s.crashesCount, 2); + + // Duplicate. + Assert.ok(s.addCrash(ptName, CRASH_TYPE_CRASH, "id1", new Date())); + Assert.equal(s.crashesCount, 2); + + Assert.ok( + s.addCrash(ptName, CRASH_TYPE_CRASH, "id3", new Date(), { + OOMAllocationSize: 1048576, + }) + ); + Assert.equal(s.crashesCount, 3); + Assert.deepEqual(s.crashes[2].metadata, { OOMAllocationSize: 1048576 }); + + let crashes = s.getCrashesOfType(ptName, CRASH_TYPE_CRASH); + Assert.equal(crashes.length, 3); +} + +async function test_add_process_hang(processType) { + let s = await getStore(); + + const ptName = CrashManager.prototype.processTypes[processType]; + + Assert.ok(s.addCrash(ptName, CRASH_TYPE_HANG, "id1", new Date())); + Assert.equal(s.crashesCount, 1); + + let c = s.crashes[0]; + Assert.ok(c.crashDate); + Assert.equal(c.type, ptName + "-" + CRASH_TYPE_HANG); + Assert.ok(c.isOfType(ptName, CRASH_TYPE_HANG)); + + Assert.ok(s.addCrash(ptName, CRASH_TYPE_HANG, "id2", new Date())); + Assert.equal(s.crashesCount, 2); + + Assert.ok(s.addCrash(ptName, CRASH_TYPE_HANG, "id1", new Date())); + Assert.equal(s.crashesCount, 2); + + let crashes = s.getCrashesOfType(ptName, CRASH_TYPE_HANG); + Assert.equal(crashes.length, 2); +} + +function iterate_over_processTypes(fn1, fn2) { + for (const pt in CrashManager.prototype.processTypes) { + const ptName = CrashManager.prototype.processTypes[pt]; + if (pt !== Ci.nsIXULRuntime.PROCESS_TYPE_IPDLUNITTEST) { + fn1(pt, ptName); + if ( + pt === Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT || + pt === Ci.nsIXULRuntime.PROCESS_TYPE_CONTENT + ) { + fn2(pt, ptName); + } + } + } +} + +iterate_over_processTypes( + (pt, _) => { + add_task(test_add_process_crash.bind(null, pt)); + }, + (pt, _) => { + add_task(test_add_process_hang.bind(null, pt)); + } +); + +add_task(async function test_add_mixed_types() { + let s = await getStore(); + let allAdd = true; + + iterate_over_processTypes( + (_, ptName) => { + allAdd = + allAdd && + s.addCrash(ptName, CRASH_TYPE_CRASH, ptName + "crash", new Date()); + }, + (_, ptName) => { + allAdd = + allAdd && + s.addCrash( + CrashManager.prototype.processTypes[ + Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT + ], + CRASH_TYPE_HANG, + "mhang", + new Date() + ); + } + ); + + const expectedCrashes = Object.keys( + CrashManager.prototype.processTypes + ).length; + + Assert.ok(allAdd); + + Assert.equal(s.crashesCount, expectedCrashes); + + await s.save(); + + s._data.crashes.clear(); + Assert.equal(s.crashesCount, 0); + + await s.load(); + + Assert.equal(s.crashesCount, expectedCrashes); + + iterate_over_processTypes( + (_, ptName) => { + const crashes = s.getCrashesOfType(ptName, CRASH_TYPE_CRASH); + Assert.equal(crashes.length, 1); + }, + (_, ptName) => { + const hangs = s.getCrashesOfType(ptName, CRASH_TYPE_HANG); + Assert.equal(hangs.length, 1); + } + ); +}); + +// Crashes added beyond the high water mark behave properly. +add_task(async function test_high_water() { + let s = await getStore(); + + let d1 = new Date(2014, 0, 1, 0, 0, 0); + let d2 = new Date(2014, 0, 2, 0, 0, 0); + + let i = 0; + for (; i < s.HIGH_WATER_DAILY_THRESHOLD; i++) { + Assert.ok( + s.addCrash( + CrashManager.prototype.processTypes[ + Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT + ], + CRASH_TYPE_CRASH, + "mc1" + i, + d1 + ) && + s.addCrash( + CrashManager.prototype.processTypes[ + Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT + ], + CRASH_TYPE_CRASH, + "mc2" + i, + d2 + ) && + s.addCrash( + CrashManager.prototype.processTypes[ + Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT + ], + CRASH_TYPE_HANG, + "mh1" + i, + d1 + ) && + s.addCrash( + CrashManager.prototype.processTypes[ + Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT + ], + CRASH_TYPE_HANG, + "mh2" + i, + d2 + ) && + s.addCrash( + CrashManager.prototype.processTypes[ + Ci.nsIXULRuntime.PROCESS_TYPE_CONTENT + ], + CRASH_TYPE_CRASH, + "cc1" + i, + d1 + ) && + s.addCrash( + CrashManager.prototype.processTypes[ + Ci.nsIXULRuntime.PROCESS_TYPE_CONTENT + ], + CRASH_TYPE_CRASH, + "cc2" + i, + d2 + ) && + s.addCrash( + CrashManager.prototype.processTypes[ + Ci.nsIXULRuntime.PROCESS_TYPE_CONTENT + ], + CRASH_TYPE_HANG, + "ch1" + i, + d1 + ) && + s.addCrash( + CrashManager.prototype.processTypes[ + Ci.nsIXULRuntime.PROCESS_TYPE_CONTENT + ], + CRASH_TYPE_HANG, + "ch2" + i, + d2 + ) + ); + } + + Assert.ok( + s.addCrash( + CrashManager.prototype.processTypes[ + Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT + ], + CRASH_TYPE_CRASH, + "mc1" + i, + d1 + ) && + s.addCrash( + CrashManager.prototype.processTypes[ + Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT + ], + CRASH_TYPE_CRASH, + "mc2" + i, + d2 + ) && + s.addCrash( + CrashManager.prototype.processTypes[ + Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT + ], + CRASH_TYPE_HANG, + "mh1" + i, + d1 + ) && + s.addCrash( + CrashManager.prototype.processTypes[ + Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT + ], + CRASH_TYPE_HANG, + "mh2" + i, + d2 + ) + ); + + Assert.ok( + !s.addCrash( + CrashManager.prototype.processTypes[ + Ci.nsIXULRuntime.PROCESS_TYPE_CONTENT + ], + CRASH_TYPE_CRASH, + "cc1" + i, + d1 + ) + ); + Assert.ok( + !s.addCrash( + CrashManager.prototype.processTypes[ + Ci.nsIXULRuntime.PROCESS_TYPE_CONTENT + ], + CRASH_TYPE_CRASH, + "cc2" + i, + d2 + ) + ); + Assert.ok( + !s.addCrash( + CrashManager.prototype.processTypes[ + Ci.nsIXULRuntime.PROCESS_TYPE_CONTENT + ], + CRASH_TYPE_HANG, + "ch1" + i, + d1 + ) + ); + Assert.ok( + !s.addCrash( + CrashManager.prototype.processTypes[ + Ci.nsIXULRuntime.PROCESS_TYPE_CONTENT + ], + CRASH_TYPE_HANG, + "ch2" + i, + d2 + ) + ); + + // We preserve main process crashes and hangs. Content crashes and + // hangs beyond should be discarded. + Assert.equal(s.crashesCount, 8 * s.HIGH_WATER_DAILY_THRESHOLD + 4); + + let crashes = s.getCrashesOfType( + CrashManager.prototype.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT], + CRASH_TYPE_CRASH + ); + Assert.equal(crashes.length, 2 * s.HIGH_WATER_DAILY_THRESHOLD + 2); + crashes = s.getCrashesOfType( + CrashManager.prototype.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT], + CRASH_TYPE_HANG + ); + Assert.equal(crashes.length, 2 * s.HIGH_WATER_DAILY_THRESHOLD + 2); + + crashes = s.getCrashesOfType( + CrashManager.prototype.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_CONTENT], + CRASH_TYPE_CRASH + ); + Assert.equal(crashes.length, 2 * s.HIGH_WATER_DAILY_THRESHOLD); + crashes = s.getCrashesOfType( + CrashManager.prototype.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_CONTENT], + CRASH_TYPE_HANG + ); + Assert.equal(crashes.length, 2 * s.HIGH_WATER_DAILY_THRESHOLD); + + // But raw counts should be preserved. + let day1 = dateToDays(d1); + let day2 = dateToDays(d2); + Assert.ok(s._countsByDay.has(day1)); + Assert.ok(s._countsByDay.has(day2)); + + Assert.equal( + s._countsByDay + .get(day1) + .get( + CrashManager.prototype.processTypes[ + Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT + ] + + "-" + + CRASH_TYPE_CRASH + ), + s.HIGH_WATER_DAILY_THRESHOLD + 1 + ); + Assert.equal( + s._countsByDay + .get(day1) + .get( + CrashManager.prototype.processTypes[ + Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT + ] + + "-" + + CRASH_TYPE_HANG + ), + s.HIGH_WATER_DAILY_THRESHOLD + 1 + ); + + Assert.equal( + s._countsByDay + .get(day1) + .get( + CrashManager.prototype.processTypes[ + Ci.nsIXULRuntime.PROCESS_TYPE_CONTENT + ] + + "-" + + CRASH_TYPE_CRASH + ), + s.HIGH_WATER_DAILY_THRESHOLD + 1 + ); + Assert.equal( + s._countsByDay + .get(day1) + .get( + CrashManager.prototype.processTypes[ + Ci.nsIXULRuntime.PROCESS_TYPE_CONTENT + ] + + "-" + + CRASH_TYPE_HANG + ), + s.HIGH_WATER_DAILY_THRESHOLD + 1 + ); + + await s.save(); + await s.load(); + + Assert.ok(s._countsByDay.has(day1)); + Assert.ok(s._countsByDay.has(day2)); + + Assert.equal( + s._countsByDay + .get(day1) + .get( + CrashManager.prototype.processTypes[ + Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT + ] + + "-" + + CRASH_TYPE_CRASH + ), + s.HIGH_WATER_DAILY_THRESHOLD + 1 + ); + Assert.equal( + s._countsByDay + .get(day1) + .get( + CrashManager.prototype.processTypes[ + Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT + ] + + "-" + + CRASH_TYPE_HANG + ), + s.HIGH_WATER_DAILY_THRESHOLD + 1 + ); + + Assert.equal( + s._countsByDay + .get(day1) + .get( + CrashManager.prototype.processTypes[ + Ci.nsIXULRuntime.PROCESS_TYPE_CONTENT + ] + + "-" + + CRASH_TYPE_CRASH + ), + s.HIGH_WATER_DAILY_THRESHOLD + 1 + ); + Assert.equal( + s._countsByDay + .get(day1) + .get( + CrashManager.prototype.processTypes[ + Ci.nsIXULRuntime.PROCESS_TYPE_CONTENT + ] + + "-" + + CRASH_TYPE_HANG + ), + s.HIGH_WATER_DAILY_THRESHOLD + 1 + ); +}); + +add_task(async function test_addSubmission() { + let s = await getStore(); + + Assert.ok( + s.addCrash( + CrashManager.prototype.processTypes[ + Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT + ], + CRASH_TYPE_CRASH, + "crash1", + DUMMY_DATE + ) + ); + + Assert.ok(s.addSubmissionAttempt("crash1", "sub1", DUMMY_DATE)); + + let crash = s.getCrash("crash1"); + let submission = crash.submissions.get("sub1"); + Assert.ok(!!submission); + Assert.equal(submission.requestDate.getTime(), DUMMY_DATE.getTime()); + Assert.equal(submission.responseDate, null); + Assert.equal(submission.result, null); + + Assert.ok( + s.addSubmissionResult( + "crash1", + "sub1", + DUMMY_DATE_2, + SUBMISSION_RESULT_FAILED + ) + ); + + crash = s.getCrash("crash1"); + Assert.equal(crash.submissions.size, 1); + submission = crash.submissions.get("sub1"); + Assert.ok(!!submission); + Assert.equal(submission.requestDate.getTime(), DUMMY_DATE.getTime()); + Assert.equal(submission.responseDate.getTime(), DUMMY_DATE_2.getTime()); + Assert.equal(submission.result, SUBMISSION_RESULT_FAILED); + + Assert.ok(s.addSubmissionAttempt("crash1", "sub2", DUMMY_DATE)); + Assert.ok( + s.addSubmissionResult("crash1", "sub2", DUMMY_DATE_2, SUBMISSION_RESULT_OK) + ); + + Assert.equal(crash.submissions.size, 2); + submission = crash.submissions.get("sub2"); + Assert.ok(!!submission); + Assert.equal(submission.result, SUBMISSION_RESULT_OK); +}); + +add_task(async function test_setCrashClassification() { + let s = await getStore(); + + Assert.ok( + s.addCrash( + CrashManager.prototype.processTypes[ + Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT + ], + CRASH_TYPE_CRASH, + "crash1", + new Date() + ) + ); + let classifications = s.crashes[0].classifications; + Assert.ok(!!classifications); + Assert.equal(classifications.length, 0); + + Assert.ok(s.setCrashClassifications("crash1", ["foo", "bar"])); + classifications = s.crashes[0].classifications; + Assert.equal(classifications.length, 2); + Assert.ok(classifications.includes("foo")); + Assert.ok(classifications.includes("bar")); +}); + +add_task(async function test_setRemoteCrashID() { + let s = await getStore(); + + Assert.ok( + s.addCrash( + CrashManager.prototype.processTypes[ + Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT + ], + CRASH_TYPE_CRASH, + "crash1", + new Date() + ) + ); + Assert.equal(s.crashes[0].remoteID, null); + Assert.ok(s.setRemoteCrashID("crash1", "bp-1")); + Assert.equal(s.crashes[0].remoteID, "bp-1"); +}); diff --git a/toolkit/components/crashes/tests/xpcshell/xpcshell.toml b/toolkit/components/crashes/tests/xpcshell/xpcshell.toml new file mode 100644 index 0000000000..1898168705 --- /dev/null +++ b/toolkit/components/crashes/tests/xpcshell/xpcshell.toml @@ -0,0 +1,14 @@ +[DEFAULT] +head = "" +skip-if = ["os == 'android'"] +support-files = [ + "crash.dmp", + "crash.extra", +] + +["test_crash_manager.js"] + +["test_crash_service.js"] +run-sequentially = "very high failure rate in parallel" + +["test_crash_store.js"] |