diff options
Diffstat (limited to 'browser/components/places/InteractionsBlocklist.sys.mjs')
-rw-r--r-- | browser/components/places/InteractionsBlocklist.sys.mjs | 286 |
1 files changed, 286 insertions, 0 deletions
diff --git a/browser/components/places/InteractionsBlocklist.sys.mjs b/browser/components/places/InteractionsBlocklist.sys.mjs new file mode 100644 index 0000000000..c29e8beb77 --- /dev/null +++ b/browser/components/places/InteractionsBlocklist.sys.mjs @@ -0,0 +1,286 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +import { XPCOMUtils } from "resource://gre/modules/XPCOMUtils.sys.mjs"; + +const lazy = {}; + +ChromeUtils.defineESModuleGetters(lazy, { + UrlbarUtils: "resource:///modules/UrlbarUtils.sys.mjs", +}); + +XPCOMUtils.defineLazyModuleGetters(lazy, { + FilterAdult: "resource://activity-stream/lib/FilterAdult.jsm", +}); + +XPCOMUtils.defineLazyGetter(lazy, "logConsole", function () { + return console.createInstance({ + prefix: "InteractionsBlocklist", + maxLogLevel: Services.prefs.getBoolPref( + "browser.places.interactions.log", + false + ) + ? "Debug" + : "Warn", + }); +}); + +// A blocklist of regular expressions. Maps base hostnames to a list regular +// expressions for URLs with that base hostname. In this context, "base +// hostname" means the hostname without any subdomains or a public suffix. For +// example, the base hostname for "https://www.maps.google.com/a/place" is +// "google". We do this mapping to improve performance; otherwise we'd have to +// check all URLs against a long list of regular expressions. The regexes are +// defined as escaped strings so that we build them lazily. +// We may want to migrate this list to Remote Settings in the future. +let HOST_BLOCKLIST = { + auth0: [ + // Auth0 OAuth. + // XXX: Used alone this could produce false positives where an auth0 URL + // appears after another valid domain and TLD, but since we limit this to + // the auth0 hostname those occurrences will be filtered out. + "^https:\\/\\/.*\\.auth0\\.com\\/login", + ], + baidu: [ + // Baidu SERP + "^(https?:\\/\\/)?(www\\.)?baidu\\.com\\/s.*(\\?|&)wd=.*", + ], + bing: [ + // Bing SERP + "^(https?:\\/\\/)?(www\\.)?bing\\.com\\/search.*(\\?|&)q=.*", + ], + duckduckgo: [ + // DuckDuckGo SERP + "^(https?:\\/\\/)?(www\\.)?duckduckgo\\.com\\/.*(\\?|&)q=.*", + ], + google: [ + // Google SERP + "^(https?:\\/\\/)?(www\\.)?google\\.(\\w|\\.){2,}\\/search.*(\\?|&)q=.*", + // Google OAuth + "^https:\\/\\/accounts\\.google\\.com\\/o\\/oauth2\\/v2\\/auth", + "^https:\\/\\/accounts\\.google\\.com\\/signin\\/oauth\\/consent", + ], + microsoftonline: [ + // Microsoft OAuth + "^https:\\/\\/login\\.microsoftonline\\.com\\/common\\/oauth2\\/v2\\.0\\/authorize", + ], + yandex: [ + // Yandex SERP + "^(https?:\\/\\/)?(www\\.)?yandex\\.(\\w|\\.){2,}\\/search.*(\\?|&)text=.*", + ], + zoom: [ + // Zoom meeting interstitial + "^(https?:\\/\\/)?(www\\.)?.*\\.zoom\\.us\\/j\\/\\d+", + ], +}; + +HOST_BLOCKLIST = new Proxy(HOST_BLOCKLIST, { + get(target, property) { + let regexes = target[property]; + if (!regexes || !Array.isArray(regexes)) { + return null; + } + + for (let i = 0; i < regexes.length; i++) { + let regex = regexes[i]; + if (typeof regex === "string") { + regex = new RegExp(regex, "i"); + if (regex) { + regexes[i] = regex; + } else { + throw new Error("Blocklist contains invalid regex."); + } + } + } + return regexes; + }, +}); + +/** + * A class that maintains a blocklist of URLs. The class exposes a method to + * check if a particular URL is contained on the blocklist. + */ +class _InteractionsBlocklist { + constructor() { + // Load custom blocklist items from pref. + try { + let customBlocklist = JSON.parse( + Services.prefs.getStringPref( + "places.interactions.customBlocklist", + "[]" + ) + ); + if (!Array.isArray(customBlocklist)) { + throw new Error(); + } + let parsedBlocklist = customBlocklist.map( + regexStr => new RegExp(regexStr) + ); + HOST_BLOCKLIST["*"] = parsedBlocklist; + } catch (ex) { + lazy.logConsole.warn("places.interactions.customBlocklist is corrupted."); + } + } + + /** + * Only certain urls can be added as Interactions, either manually or + * automatically. + * + * @returns {Map} A Map keyed by protocol, for each protocol an object may + * define stricter requirements, like extension. + */ + get urlRequirements() { + return new Map([ + ["http:", {}], + ["https:", {}], + ["file:", { extension: "pdf" }], + ]); + } + + /** + * Whether to record interactions for a given URL. + * The rules are defined in InteractionsBlocklist.urlRequirements. + * + * @param {string|URL|nsIURI} url The URL to check. + * @returns {boolean} whether the url can be recorded. + */ + canRecordUrl(url) { + let protocol, pathname; + if (typeof url == "string") { + url = new URL(url); + } + if (url instanceof Ci.nsIURI) { + protocol = url.scheme + ":"; + pathname = url.filePath; + } else { + protocol = url.protocol; + pathname = url.pathname; + } + let requirements = InteractionsBlocklist.urlRequirements.get(protocol); + return ( + requirements && + (!requirements.extension || pathname.endsWith(requirements.extension)) + ); + } + + /** + * Checks a URL against a blocklist of URLs. If the URL is blocklisted, we + * should not record an interaction. + * + * @param {string} urlToCheck + * The URL we are looking for on the blocklist. + * @returns {boolean} + * True if `url` is on a blocklist. False otherwise. + */ + isUrlBlocklisted(urlToCheck) { + if (lazy.FilterAdult.isAdultUrl(urlToCheck)) { + return true; + } + + if (!this.canRecordUrl(urlToCheck)) { + return true; + } + + // First, find the URL's base host: the hostname without any subdomains or a + // public suffix. + let url; + try { + url = new URL(urlToCheck); + if (!url) { + throw new Error(); + } + } catch (ex) { + lazy.logConsole.warn( + `Invalid URL passed to InteractionsBlocklist.isUrlBlocklisted: ${url}` + ); + return false; + } + + if (url.protocol == "file:") { + return false; + } + + let hostWithoutSuffix = lazy.UrlbarUtils.stripPublicSuffixFromHost( + url.host + ); + let [hostWithSubdomains] = lazy.UrlbarUtils.stripPrefixAndTrim( + hostWithoutSuffix, + { + stripWww: true, + trimTrailingDot: true, + } + ); + let baseHost = hostWithSubdomains.substring( + hostWithSubdomains.lastIndexOf(".") + 1 + ); + // Then fetch blocked regexes for that baseHost and compare them to the full + // URL. Also check the URL against the custom blocklist. + let regexes = HOST_BLOCKLIST[baseHost.toLocaleLowerCase()] || []; + regexes.push(...(HOST_BLOCKLIST["*"] || [])); + if (!regexes) { + return false; + } + + return regexes.some(r => r.test(url.href)); + } + + /** + * Adds a regex to HOST_BLOCKLIST. Since we can't parse the base host from + * the regex, we add it to a list of wildcard regexes. All URLs are checked + * against these wildcard regexes. Currently only exposed for tests and use in + * the console. In the future we could hook this up to a UI component. + * + * @param {string|RegExp} regexToAdd + * The regular expression to add to our blocklist. + */ + addRegexToBlocklist(regexToAdd) { + let regex; + try { + regex = new RegExp(regexToAdd, "i"); + } catch (ex) { + this.logConsole.warn("Invalid regex passed to addRegexToBlocklist."); + return; + } + + if (!HOST_BLOCKLIST["*"]) { + HOST_BLOCKLIST["*"] = []; + } + HOST_BLOCKLIST["*"].push(regex); + Services.prefs.setStringPref( + "places.interactions.customBlocklist", + JSON.stringify(HOST_BLOCKLIST["*"].map(reg => reg.toString())) + ); + } + + /** + * Removes a regex from HOST_BLOCKLIST. If `regexToRemove` is not in the + * blocklist, this is a no-op. Currently only exposed for tests and use in the + * console. In the future we could hook this up to a UI component. + * + * @param {string|RegExp} regexToRemove + * The regular expression to add to our blocklist. + */ + removeRegexFromBlocklist(regexToRemove) { + let regex; + try { + regex = new RegExp(regexToRemove, "i"); + } catch (ex) { + this.logConsole.warn("Invalid regex passed to addRegexToBlocklist."); + return; + } + + if (!HOST_BLOCKLIST["*"] || !Array.isArray(HOST_BLOCKLIST["*"])) { + return; + } + HOST_BLOCKLIST["*"] = HOST_BLOCKLIST["*"].filter( + curr => curr.source != regex.source + ); + Services.prefs.setStringPref( + "places.interactions.customBlocklist", + JSON.stringify(HOST_BLOCKLIST["*"].map(reg => reg.toString())) + ); + } +} + +export const InteractionsBlocklist = new _InteractionsBlocklist(); |