diff options
Diffstat (limited to '')
-rw-r--r-- | browser/components/doh/DoHController.jsm | 648 |
1 files changed, 648 insertions, 0 deletions
diff --git a/browser/components/doh/DoHController.jsm b/browser/components/doh/DoHController.jsm new file mode 100644 index 0000000000..dd59deb64d --- /dev/null +++ b/browser/components/doh/DoHController.jsm @@ -0,0 +1,648 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +"use strict"; + +/* + * This module runs the automated heuristics to enable/disable DoH on different + * networks. Heuristics are run at startup and upon network changes. + * Heuristics are disabled if the user sets their DoH provider or mode manually. + */ +var EXPORTED_SYMBOLS = ["DoHController"]; + +const { XPCOMUtils } = ChromeUtils.importESModule( + "resource://gre/modules/XPCOMUtils.sys.mjs" +); + +const lazy = {}; + +ChromeUtils.defineESModuleGetters(lazy, { + AsyncShutdown: "resource://gre/modules/AsyncShutdown.sys.mjs", + ClientID: "resource://gre/modules/ClientID.sys.mjs", + Preferences: "resource://gre/modules/Preferences.sys.mjs", + clearTimeout: "resource://gre/modules/Timer.sys.mjs", + setTimeout: "resource://gre/modules/Timer.sys.mjs", +}); + +XPCOMUtils.defineLazyModuleGetters(lazy, { + DoHConfigController: "resource:///modules/DoHConfig.jsm", + Heuristics: "resource:///modules/DoHHeuristics.jsm", +}); + +// When this is set we suppress automatic TRR selection beyond dry-run as well +// as sending observer notifications during heuristics throttling. +XPCOMUtils.defineLazyPreferenceGetter( + lazy, + "kIsInAutomation", + "doh-rollout._testing", + false +); + +// We wait until the network has been stably up for this many milliseconds +// before triggering a heuristics run. +XPCOMUtils.defineLazyPreferenceGetter( + lazy, + "kNetworkDebounceTimeout", + "doh-rollout.network-debounce-timeout", + 1000 +); + +// If consecutive heuristics runs are attempted within this period after a first, +// we suppress them for this duration, at the end of which point we decide whether +// to do one coalesced run or to extend the timer if the rate limit was exceeded. +// Note that the very first run is allowed, after which we start the timer. +// This throttling is necessary due to evidence of clients that experience +// network volatility leading to thousands of runs per hour. See bug 1626083. +XPCOMUtils.defineLazyPreferenceGetter( + lazy, + "kHeuristicsThrottleTimeout", + "doh-rollout.heuristics-throttle-timeout", + 15000 +); + +// After the throttle timeout described above, if there are more than this many +// heuristics attempts during the timeout, we restart the timer without running +// heuristics. Thus, heuristics are suppressed completely as long as the rate +// exceeds this limit. +XPCOMUtils.defineLazyPreferenceGetter( + lazy, + "kHeuristicsRateLimit", + "doh-rollout.heuristics-throttle-rate-limit", + 2 +); + +XPCOMUtils.defineLazyServiceGetter( + lazy, + "gCaptivePortalService", + "@mozilla.org/network/captive-portal-service;1", + "nsICaptivePortalService" +); + +XPCOMUtils.defineLazyServiceGetter( + lazy, + "gNetworkLinkService", + "@mozilla.org/network/network-link-service;1", + "nsINetworkLinkService" +); + +// Stores whether we've done first-run. +const FIRST_RUN_PREF = "doh-rollout.doneFirstRun"; + +// Set when we detect that the user set their DoH provider or mode manually. +// If set, we don't run heuristics. +const DISABLED_PREF = "doh-rollout.disable-heuristics"; + +// Set when we detect either a non-DoH enterprise policy, or a DoH policy that +// tells us to disable it. This pref's effect is to suppress the opt-out CFR. +const SKIP_HEURISTICS_PREF = "doh-rollout.skipHeuristicsCheck"; + +// Whether to clear doh-rollout.mode on shutdown. When false, the mode value +// that exists at shutdown will be used at startup until heuristics re-run. +const CLEAR_ON_SHUTDOWN_PREF = "doh-rollout.clearModeOnShutdown"; + +const BREADCRUMB_PREF = "doh-rollout.self-enabled"; + +// Necko TRR prefs to watch for user-set values. +const NETWORK_TRR_MODE_PREF = "network.trr.mode"; +const NETWORK_TRR_URI_PREF = "network.trr.uri"; + +const ROLLOUT_MODE_PREF = "doh-rollout.mode"; +const ROLLOUT_URI_PREF = "doh-rollout.uri"; + +const TRR_SELECT_DRY_RUN_RESULT_PREF = + "doh-rollout.trr-selection.dry-run-result"; + +const HEURISTICS_TELEMETRY_CATEGORY = "doh"; +const TRRSELECT_TELEMETRY_CATEGORY = "security.doh.trrPerformance"; + +const kLinkStatusChangedTopic = "network:link-status-changed"; +const kConnectivityTopic = "network:captive-portal-connectivity-changed"; +const kPrefChangedTopic = "nsPref:changed"; + +// Helper function to hash the network ID concatenated with telemetry client ID. +// This prevents us from being able to tell if 2 clients are on the same network. +function getHashedNetworkID() { + let currentNetworkID = lazy.gNetworkLinkService.networkID; + if (!currentNetworkID) { + return ""; + } + + let hasher = Cc["@mozilla.org/security/hash;1"].createInstance( + Ci.nsICryptoHash + ); + + hasher.init(Ci.nsICryptoHash.SHA256); + // Concat the client ID with the network ID before hashing. + let clientNetworkID = lazy.ClientID.getClientID() + currentNetworkID; + hasher.update( + clientNetworkID.split("").map(c => c.charCodeAt(0)), + clientNetworkID.length + ); + return hasher.finish(true); +} + +const DoHController = { + _heuristicsAreEnabled: false, + + async init() { + Services.telemetry.setEventRecordingEnabled( + HEURISTICS_TELEMETRY_CATEGORY, + true + ); + Services.telemetry.setEventRecordingEnabled( + TRRSELECT_TELEMETRY_CATEGORY, + true + ); + + await lazy.DoHConfigController.initComplete; + + Services.obs.addObserver(this, lazy.DoHConfigController.kConfigUpdateTopic); + lazy.Preferences.observe(NETWORK_TRR_MODE_PREF, this); + lazy.Preferences.observe(NETWORK_TRR_URI_PREF, this); + + if (lazy.DoHConfigController.currentConfig.enabled) { + await this.maybeEnableHeuristics(); + } else if (lazy.Preferences.get(FIRST_RUN_PREF, false)) { + await this.rollback(); + } + + this._asyncShutdownBlocker = async () => { + await this.disableHeuristics("shutdown"); + }; + + lazy.AsyncShutdown.profileBeforeChange.addBlocker( + "DoHController: clear state and remove observers", + this._asyncShutdownBlocker + ); + + lazy.Preferences.set(FIRST_RUN_PREF, true); + }, + + // Also used by tests to reset DoHController state (prefs are not cleared + // here - tests do that when needed between _uninit and init). + async _uninit() { + Services.obs.removeObserver( + this, + lazy.DoHConfigController.kConfigUpdateTopic + ); + lazy.Preferences.ignore(NETWORK_TRR_MODE_PREF, this); + lazy.Preferences.ignore(NETWORK_TRR_URI_PREF, this); + lazy.AsyncShutdown.profileBeforeChange.removeBlocker( + this._asyncShutdownBlocker + ); + await this.disableHeuristics("shutdown"); + }, + + // Called to reset state when a new config is available. + resetPromise: Promise.resolve(), + async reset() { + this.resetPromise = this.resetPromise.then(async () => { + await this._uninit(); + await this.init(); + Services.obs.notifyObservers(null, "doh:controller-reloaded"); + }); + + return this.resetPromise; + }, + + // The "maybe" is because there are two cases when we don't enable heuristics: + // 1. If we detect that TRR mode or URI have user values, or we previously + // detected this (i.e. DISABLED_PREF is true) + // 2. If there are any non-DoH enterprise policies active + async maybeEnableHeuristics() { + if (lazy.Preferences.get(DISABLED_PREF)) { + return; + } + + let policyResult = await lazy.Heuristics.checkEnterprisePolicy(); + + if (["policy_without_doh", "disable_doh"].includes(policyResult)) { + await this.setState("policyDisabled"); + lazy.Preferences.set(SKIP_HEURISTICS_PREF, true); + return; + } + + lazy.Preferences.reset(SKIP_HEURISTICS_PREF); + + if ( + lazy.Preferences.isSet(NETWORK_TRR_MODE_PREF) || + lazy.Preferences.isSet(NETWORK_TRR_URI_PREF) + ) { + await this.setState("manuallyDisabled"); + lazy.Preferences.set(DISABLED_PREF, true); + return; + } + + await this.runTRRSelection(); + // If we enter this branch it means that no automatic selection was possible. + // In this case, we try to set a fallback (as defined by DoHConfigController). + if (!lazy.Preferences.isSet(ROLLOUT_URI_PREF)) { + lazy.Preferences.set( + ROLLOUT_URI_PREF, + lazy.DoHConfigController.currentConfig.fallbackProviderURI + ); + } + this.runHeuristicsThrottled("startup"); + Services.obs.addObserver(this, kLinkStatusChangedTopic); + Services.obs.addObserver(this, kConnectivityTopic); + + this._heuristicsAreEnabled = true; + }, + + _runsWhileThrottling: 0, + _wasThrottleExtended: false, + _throttleHeuristics() { + if (lazy.kHeuristicsThrottleTimeout < 0) { + // Skip throttling in tests that set timeout to a negative value. + return false; + } + + if (this._throttleTimer) { + // Already throttling - nothing to do. + this._runsWhileThrottling++; + return true; + } + + this._runsWhileThrottling = 0; + + this._throttleTimer = lazy.setTimeout( + this._handleThrottleTimeout.bind(this), + lazy.kHeuristicsThrottleTimeout + ); + + return false; + }, + + _handleThrottleTimeout() { + delete this._throttleTimer; + if (this._runsWhileThrottling > lazy.kHeuristicsRateLimit) { + // During the throttle period, we saw that the rate limit was exceeded. + // We extend the throttle period, and don't bother running heuristics yet. + this._wasThrottleExtended = true; + // Restart the throttle timer. + this._throttleHeuristics(); + if (lazy.kIsInAutomation) { + Services.obs.notifyObservers(null, "doh:heuristics-throttle-extend"); + } + return; + } + + // If this was an extended throttle and there were no runs during the + // extended period, we still want to run heuristics, since the extended + // throttle implies we had a non-zero number of attempts before extension. + if (this._runsWhileThrottling > 0 || this._wasThrottleExtended) { + this.runHeuristicsThrottled("throttled"); + } + + this._wasThrottleExtended = false; + + if (lazy.kIsInAutomation) { + Services.obs.notifyObservers(null, "doh:heuristics-throttle-done"); + } + }, + + runHeuristicsThrottled(evaluateReason) { + // _throttleHeuristics returns true if we've already witnessed a run and the + // timeout period hasn't lapsed yet. If it does so, we suppress this run. + if (this._throttleHeuristics()) { + return; + } + + // _throttleHeuristics returned false - we're good to run heuristics. + // At this point the timer has been started and subsequent calls will be + // suppressed if it hasn't fired yet. + this.runHeuristics(evaluateReason); + }, + async runHeuristics(evaluateReason) { + let start = Date.now(); + + let results = await lazy.Heuristics.run(); + + if ( + !lazy.gNetworkLinkService.isLinkUp || + this._lastDebounceTimestamp > start || + lazy.gCaptivePortalService.state == + lazy.gCaptivePortalService.LOCKED_PORTAL + ) { + // If the network is currently down or there was a debounce triggered + // while we were running heuristics, it means the network fluctuated + // during this heuristics run. We simply discard the results in this case. + // Same thing if there was another heuristics run triggered or if we have + // detected a locked captive portal while this one was ongoing. + return; + } + + let decision = Object.values(results).includes(lazy.Heuristics.DISABLE_DOH) + ? lazy.Heuristics.DISABLE_DOH + : lazy.Heuristics.ENABLE_DOH; + + let getCaptiveStateString = () => { + switch (lazy.gCaptivePortalService.state) { + case lazy.gCaptivePortalService.NOT_CAPTIVE: + return "not_captive"; + case lazy.gCaptivePortalService.UNLOCKED_PORTAL: + return "unlocked"; + case lazy.gCaptivePortalService.LOCKED_PORTAL: + return "locked"; + default: + return "unknown"; + } + }; + + let resultsForTelemetry = { + evaluateReason, + steeredProvider: "", + captiveState: getCaptiveStateString(), + // NOTE: This might not yet be available after a network change. We mainly + // care about the startup case though - we want to look at whether the + // heuristics result is consistent for networkIDs often seen at startup. + // TODO: Use this data to implement cached results to use early at startup. + networkID: getHashedNetworkID(), + }; + + if (results.steeredProvider) { + Services.dns.setDetectedTrrURI(results.steeredProvider.uri); + resultsForTelemetry.steeredProvider = results.steeredProvider.id; + } + + if (decision === lazy.Heuristics.DISABLE_DOH) { + await this.setState("disabled"); + } else { + await this.setState("enabled"); + } + + // For telemetry, we group the heuristics results into three categories. + // Only heuristics with a DISABLE_DOH result are included. + // Each category is finally included in the event as a comma-separated list. + let canaries = []; + let filtering = []; + let enterprise = []; + let platform = []; + + for (let [heuristicName, result] of Object.entries(results)) { + if (result !== lazy.Heuristics.DISABLE_DOH) { + continue; + } + + if (["canary", "zscalerCanary"].includes(heuristicName)) { + canaries.push(heuristicName); + } else if ( + ["browserParent", "google", "youtube"].includes(heuristicName) + ) { + filtering.push(heuristicName); + } else if ( + ["policy", "modifiedRoots", "thirdPartyRoots"].includes(heuristicName) + ) { + enterprise.push(heuristicName); + } else if (["vpn", "proxy", "nrpt"].includes(heuristicName)) { + platform.push(heuristicName); + } + } + + resultsForTelemetry.canaries = canaries.join(","); + resultsForTelemetry.filtering = filtering.join(","); + resultsForTelemetry.enterprise = enterprise.join(","); + resultsForTelemetry.platform = platform.join(","); + + Services.telemetry.recordEvent( + HEURISTICS_TELEMETRY_CATEGORY, + "evaluate_v2", + "heuristics", + decision, + resultsForTelemetry + ); + }, + + async setState(state) { + switch (state) { + case "disabled": + lazy.Preferences.set(ROLLOUT_MODE_PREF, 0); + break; + case "UIOk": + lazy.Preferences.set(BREADCRUMB_PREF, true); + break; + case "enabled": + lazy.Preferences.set(ROLLOUT_MODE_PREF, 2); + lazy.Preferences.set(BREADCRUMB_PREF, true); + break; + case "policyDisabled": + case "manuallyDisabled": + case "UIDisabled": + lazy.Preferences.reset(BREADCRUMB_PREF); + // Fall through. + case "rollback": + lazy.Preferences.reset(ROLLOUT_MODE_PREF); + break; + case "shutdown": + if (lazy.Preferences.get(CLEAR_ON_SHUTDOWN_PREF, true)) { + lazy.Preferences.reset(ROLLOUT_MODE_PREF); + } + break; + } + + Services.telemetry.recordEvent( + HEURISTICS_TELEMETRY_CATEGORY, + "state", + state, + "null" + ); + }, + + async disableHeuristics(state) { + await this.setState(state); + + if (!this._heuristicsAreEnabled) { + return; + } + + Services.obs.removeObserver(this, kLinkStatusChangedTopic); + Services.obs.removeObserver(this, kConnectivityTopic); + if (this._debounceTimer) { + lazy.clearTimeout(this._debounceTimer); + delete this._debounceTimer; + } + if (this._throttleTimer) { + lazy.clearTimeout(this._throttleTimer); + delete this._throttleTimer; + } + this._heuristicsAreEnabled = false; + }, + + async rollback() { + await this.disableHeuristics("rollback"); + }, + + async runTRRSelection() { + // If persisting the selection is disabled, clear the existing + // selection. + if (!lazy.DoHConfigController.currentConfig.trrSelection.commitResult) { + lazy.Preferences.reset(ROLLOUT_URI_PREF); + } + + if (!lazy.DoHConfigController.currentConfig.trrSelection.enabled) { + return; + } + + if ( + lazy.Preferences.isSet(ROLLOUT_URI_PREF) && + lazy.Preferences.get(ROLLOUT_URI_PREF) == + lazy.Preferences.get(TRR_SELECT_DRY_RUN_RESULT_PREF) + ) { + return; + } + + await this.runTRRSelectionDryRun(); + + // If persisting the selection is disabled, don't commit the value. + if (!lazy.DoHConfigController.currentConfig.trrSelection.commitResult) { + return; + } + + lazy.Preferences.set( + ROLLOUT_URI_PREF, + lazy.Preferences.get(TRR_SELECT_DRY_RUN_RESULT_PREF) + ); + }, + + async runTRRSelectionDryRun() { + if (lazy.Preferences.isSet(TRR_SELECT_DRY_RUN_RESULT_PREF)) { + // Check whether the existing dry-run-result is in the default + // list of TRRs. If it is, all good. Else, run the dry run again. + let dryRunResult = lazy.Preferences.get(TRR_SELECT_DRY_RUN_RESULT_PREF); + let dryRunResultIsValid = lazy.DoHConfigController.currentConfig.providerList.some( + trr => trr.uri == dryRunResult + ); + if (dryRunResultIsValid) { + return; + } + } + + let setDryRunResultAndRecordTelemetry = trrUri => { + lazy.Preferences.set(TRR_SELECT_DRY_RUN_RESULT_PREF, trrUri); + Services.telemetry.recordEvent( + TRRSELECT_TELEMETRY_CATEGORY, + "trrselect", + "dryrunresult", + trrUri.substring(0, 40) // Telemetry payload max length + ); + }; + + if (lazy.kIsInAutomation) { + // For mochitests, just record telemetry with a dummy result. + // TRRPerformance.jsm is tested in xpcshell. + setDryRunResultAndRecordTelemetry("https://example.com/dns-query"); + return; + } + + // Importing the module here saves us from having to do it at startup, and + // ensures tests have time to set prefs before the module initializes. + let { TRRRacer } = ChromeUtils.import( + "resource:///modules/TRRPerformance.jsm" + ); + await new Promise(resolve => { + let trrList = lazy.DoHConfigController.currentConfig.trrSelection.providerList.map( + trr => trr.uri + ); + let racer = new TRRRacer(() => { + setDryRunResultAndRecordTelemetry(racer.getFastestTRR(true)); + resolve(); + }, trrList); + racer.run(); + }); + }, + + observe(subject, topic, data) { + switch (topic) { + case kLinkStatusChangedTopic: + this.onConnectionChanged(); + break; + case kConnectivityTopic: + this.onConnectivityAvailable(); + break; + case kPrefChangedTopic: + this.onPrefChanged(data); + break; + case lazy.DoHConfigController.kConfigUpdateTopic: + this.reset(); + break; + } + }, + + async onPrefChanged(pref) { + switch (pref) { + case NETWORK_TRR_URI_PREF: + case NETWORK_TRR_MODE_PREF: + lazy.Preferences.set(DISABLED_PREF, true); + await this.disableHeuristics("manuallyDisabled"); + break; + } + }, + + // Connection change events are debounced to allow the network to settle. + // We wait for the network to be up for a period of kDebounceTimeout before + // handling the change. The timer is canceled when the network goes down and + // restarted the first time we learn that it went back up. + _debounceTimer: null, + _cancelDebounce() { + if (!this._debounceTimer) { + return; + } + + lazy.clearTimeout(this._debounceTimer); + this._debounceTimer = null; + }, + + _lastDebounceTimestamp: 0, + onConnectionChanged() { + if (!lazy.gNetworkLinkService.isLinkUp) { + // Network is down - reset debounce timer. + this._cancelDebounce(); + return; + } + + if (this._debounceTimer) { + // Already debouncing - nothing to do. + return; + } + + if (lazy.kNetworkDebounceTimeout < 0) { + // Skip debouncing in tests that set timeout to a negative value. + this.onConnectionChangedDebounced(); + return; + } + + this._lastDebounceTimestamp = Date.now(); + this._debounceTimer = lazy.setTimeout(() => { + this._cancelDebounce(); + this.onConnectionChangedDebounced(); + }, lazy.kNetworkDebounceTimeout); + }, + + onConnectionChangedDebounced() { + if (!lazy.gNetworkLinkService.isLinkUp) { + return; + } + + if ( + lazy.gCaptivePortalService.state == + lazy.gCaptivePortalService.LOCKED_PORTAL + ) { + return; + } + + // The network is up and we don't know that we're in a locked portal. + // Run heuristics. If we detect a portal later, we'll run heuristics again + // when it's unlocked. In that case, this run will likely have failed. + this.runHeuristicsThrottled("netchange"); + }, + + onConnectivityAvailable() { + if (this._debounceTimer) { + // Already debouncing - nothing to do. + return; + } + + this.runHeuristicsThrottled("connectivity"); + }, +}; |