summaryrefslogtreecommitdiffstats
path: root/src/js/heuristicblocking.js
diff options
context:
space:
mode:
Diffstat (limited to 'src/js/heuristicblocking.js')
-rw-r--r--src/js/heuristicblocking.js557
1 files changed, 557 insertions, 0 deletions
diff --git a/src/js/heuristicblocking.js b/src/js/heuristicblocking.js
new file mode 100644
index 0000000..b0d3bc9
--- /dev/null
+++ b/src/js/heuristicblocking.js
@@ -0,0 +1,557 @@
+/*
+ * This file is part of Privacy Badger <https://www.eff.org/privacybadger>
+ * Copyright (C) 2014 Electronic Frontier Foundation
+ *
+ * Privacy Badger is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 3 as
+ * published by the Free Software Foundation.
+ *
+ * Privacy Badger is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Privacy Badger. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* globals badger:false, log:false, URI:false */
+
+var constants = require("constants");
+var utils = require("utils");
+
+require.scopes.heuristicblocking = (function() {
+
+
+
+/*********************** heuristicblocking scope **/
+// make heuristic obj with utils and storage properties and put the things on it
+function HeuristicBlocker(pbStorage) {
+ this.storage = pbStorage;
+
+ // TODO roll into tabData? -- 6/10/2019 not for now, since tabData is populated
+ // by the synchronous listeners in webrequests.js and tabOrigins is used by the
+ // async listeners here; there's no way to enforce ordering of requests among
+ // those two. Also, tabData is cleaned up every time a tab is closed, so
+ // dangling requests that don't trigger listeners until after the tab closes are
+ // impossible to attribute to a tab.
+ this.tabOrigins = {};
+ this.tabUrls = {};
+}
+
+HeuristicBlocker.prototype = {
+
+ /**
+ * Blocklists an FQDN/origin:
+ *
+ * - Blocks or cookieblocks an FQDN.
+ * - Blocks or cookieblocks its base domain.
+ * - Cookieblocks any yellowlisted subdomains that share the base domain with the FQDN.
+ *
+ * @param {String} base The base domain (etld+1) to blocklist
+ * @param {String} fqdn The FQDN
+ */
+ blocklistOrigin: function (base, fqdn) {
+ let self = this,
+ ylistStorage = self.storage.getStore("cookieblock_list");
+
+ // cookieblock or block the base domain
+ if (ylistStorage.hasItem(base)) {
+ self.storage.setupHeuristicAction(base, constants.COOKIEBLOCK);
+ } else {
+ self.storage.setupHeuristicAction(base, constants.BLOCK);
+ }
+
+ // cookieblock or block the fqdn
+ //
+ // cookieblock if a "parent" domain of the fqdn is on the yellowlist
+ //
+ // ignore base domains when exploding to work around PSL TLDs:
+ // still want to cookieblock somedomain.googleapis.com with only
+ // googleapis.com (and not somedomain.googleapis.com itself) on the ylist
+ let set = false,
+ subdomains = utils.explodeSubdomains(fqdn, true);
+ for (let i = 0; i < subdomains.length; i++) {
+ if (ylistStorage.hasItem(subdomains[i])) {
+ set = true;
+ break;
+ }
+ }
+ if (set) {
+ self.storage.setupHeuristicAction(fqdn, constants.COOKIEBLOCK);
+ } else {
+ self.storage.setupHeuristicAction(fqdn, constants.BLOCK);
+ }
+
+ // cookieblock any yellowlisted subdomains with the same base domain
+ //
+ // for example, when google.com is blocked,
+ // books.google.com should be cookieblocked
+ let base_with_dot = '.' + base;
+ ylistStorage.keys().forEach(domain => {
+ if (base != domain && domain.endsWith(base_with_dot)) {
+ self.storage.setupHeuristicAction(domain, constants.COOKIEBLOCK);
+ }
+ });
+
+ },
+
+ /**
+ * Wraps _recordPrevalence for use from webRequest listeners.
+ * Use updateTrackerPrevalence for non-webRequest initiated bookkeeping.
+ *
+ * @param {Object} details request/response details
+ */
+ heuristicBlockingAccounting: function (details) {
+ // ignore requests that are outside a tabbed window
+ if (details.tabId < 0 || !badger.isLearningEnabled(details.tabId)) {
+ return {};
+ }
+
+ let self = this,
+ request_host = (new URI(details.url)).host,
+ request_origin = window.getBaseDomain(request_host);
+
+ // if this is a main window request, update tab data and quit
+ if (details.type == "main_frame") {
+ self.tabOrigins[details.tabId] = request_origin;
+ self.tabUrls[details.tabId] = details.url;
+ return {};
+ }
+
+ let tab_origin = self.tabOrigins[details.tabId];
+
+ // ignore first-party requests
+ if (!tab_origin || !utils.isThirdPartyDomain(request_origin, tab_origin)) {
+ return {};
+ }
+
+ // short-circuit if we already observed this origin tracking on this site
+ let firstParties = self.storage.getStore('snitch_map').getItem(request_origin);
+ if (firstParties && firstParties.indexOf(tab_origin) > -1) {
+ return {};
+ }
+
+ // abort if we already made a decision for this FQDN
+ let action = self.storage.getAction(request_host);
+ if (action != constants.NO_TRACKING && action != constants.ALLOW) {
+ return {};
+ }
+
+ // check if there are tracking cookies
+ if (hasCookieTracking(details, request_origin)) {
+ self._recordPrevalence(request_host, request_origin, tab_origin);
+ return {};
+ }
+ },
+
+ /**
+ * Wraps _recordPrevalence for use outside of webRequest listeners.
+ *
+ * @param {String} tracker_fqdn The fully qualified domain name of the tracker
+ * @param {String} tracker_origin Base domain of the third party tracker
+ * @param {String} page_origin Base domain of page where tracking occurred
+ */
+ updateTrackerPrevalence: function (tracker_fqdn, tracker_origin, page_origin) {
+ // abort if we already made a decision for this fqdn
+ let action = this.storage.getAction(tracker_fqdn);
+ if (action != constants.NO_TRACKING && action != constants.ALLOW) {
+ return;
+ }
+
+ this._recordPrevalence(
+ tracker_fqdn,
+ tracker_origin,
+ page_origin
+ );
+ },
+
+ /**
+ * Record HTTP request prevalence. Block a tracker if seen on more
+ * than constants.TRACKING_THRESHOLD pages
+ *
+ * NOTE: This is a private function and should never be called directly.
+ * All calls should be routed through heuristicBlockingAccounting for normal usage
+ * and updateTrackerPrevalence for manual modifications (e.g. importing
+ * tracker lists).
+ *
+ * @param {String} tracker_fqdn The FQDN of the third party tracker
+ * @param {String} tracker_origin Base domain of the third party tracker
+ * @param {String} page_origin Base domain of page where tracking occurred
+ */
+ _recordPrevalence: function (tracker_fqdn, tracker_origin, page_origin) {
+ var snitchMap = this.storage.getStore('snitch_map');
+ var firstParties = [];
+ if (snitchMap.hasItem(tracker_origin)) {
+ firstParties = snitchMap.getItem(tracker_origin);
+ }
+
+ // GDPR Consent Management Provider
+ // https://github.com/EFForg/privacybadger/pull/2245#issuecomment-545545717
+ if (tracker_origin == "consensu.org") {
+ return;
+ }
+
+ if (firstParties.indexOf(page_origin) != -1) {
+ return; // We already know about the presence of this tracker on the given domain
+ }
+
+ // record that we've seen this tracker on this domain (in snitch map)
+ firstParties.push(page_origin);
+ snitchMap.setItem(tracker_origin, firstParties);
+
+ // ALLOW indicates this is a tracker still below TRACKING_THRESHOLD
+ // (vs. NO_TRACKING for resources we haven't seen perform tracking yet).
+ // see https://github.com/EFForg/privacybadger/pull/1145#discussion_r96676710
+ this.storage.setupHeuristicAction(tracker_fqdn, constants.ALLOW);
+ this.storage.setupHeuristicAction(tracker_origin, constants.ALLOW);
+
+ // Blocking based on outbound cookies
+ var httpRequestPrevalence = firstParties.length;
+
+ // block the origin if it has been seen on multiple first party domains
+ if (httpRequestPrevalence >= constants.TRACKING_THRESHOLD) {
+ log('blocklisting origin', tracker_fqdn);
+ this.blocklistOrigin(tracker_origin, tracker_fqdn);
+ }
+ }
+};
+
+
+// This maps cookies to a rough estimate of how many bits of
+// identifying info we might be letting past by allowing them.
+// (map values to lower case before using)
+// TODO: We need a better heuristic
+var lowEntropyCookieValues = {
+ "":3,
+ "nodata":3,
+ "no_data":3,
+ "yes":3,
+ "no":3,
+ "true":3,
+ "false":3,
+ "dnt":3,
+ "opt-out":3,
+ "optout":3,
+ "opt_out":3,
+ "0":4,
+ "1":4,
+ "2":4,
+ "3":4,
+ "4":4,
+ "5":4,
+ "6":4,
+ "7":4,
+ "8":4,
+ "9":4,
+ // ISO 639-1 language codes
+ "aa":8,
+ "ab":8,
+ "ae":8,
+ "af":8,
+ "ak":8,
+ "am":8,
+ "an":8,
+ "ar":8,
+ "as":8,
+ "av":8,
+ "ay":8,
+ "az":8,
+ "ba":8,
+ "be":8,
+ "bg":8,
+ "bh":8,
+ "bi":8,
+ "bm":8,
+ "bn":8,
+ "bo":8,
+ "br":8,
+ "bs":8,
+ "by":8,
+ "ca":8,
+ "ce":8,
+ "ch":8,
+ "co":8,
+ "cr":8,
+ "cs":8,
+ "cu":8,
+ "cv":8,
+ "cy":8,
+ "da":8,
+ "de":8,
+ "dv":8,
+ "dz":8,
+ "ee":8,
+ "el":8,
+ "en":8,
+ "eo":8,
+ "es":8,
+ "et":8,
+ "eu":8,
+ "fa":8,
+ "ff":8,
+ "fi":8,
+ "fj":8,
+ "fo":8,
+ "fr":8,
+ "fy":8,
+ "ga":8,
+ "gd":8,
+ "gl":8,
+ "gn":8,
+ "gu":8,
+ "gv":8,
+ "ha":8,
+ "he":8,
+ "hi":8,
+ "ho":8,
+ "hr":8,
+ "ht":8,
+ "hu":8,
+ "hy":8,
+ "hz":8,
+ "ia":8,
+ "id":8,
+ "ie":8,
+ "ig":8,
+ "ii":8,
+ "ik":8,
+ "in":8,
+ "io":8,
+ "is":8,
+ "it":8,
+ "iu":8,
+ "ja":8,
+ "jv":8,
+ "ka":8,
+ "kg":8,
+ "ki":8,
+ "kj":8,
+ "kk":8,
+ "kl":8,
+ "km":8,
+ "kn":8,
+ "ko":8,
+ "kr":8,
+ "ks":8,
+ "ku":8,
+ "kv":8,
+ "kw":8,
+ "ky":8,
+ "la":8,
+ "lb":8,
+ "lg":8,
+ "li":8,
+ "ln":8,
+ "lo":8,
+ "lt":8,
+ "lu":8,
+ "lv":8,
+ "mg":8,
+ "mh":8,
+ "mi":8,
+ "mk":8,
+ "ml":8,
+ "mn":8,
+ "mr":8,
+ "ms":8,
+ "mt":8,
+ "my":8,
+ "na":8,
+ "nb":8,
+ "nd":8,
+ "ne":8,
+ "ng":8,
+ "nl":8,
+ "nn":8,
+ "nr":8,
+ "nv":8,
+ "ny":8,
+ "oc":8,
+ "of":8,
+ "oj":8,
+ "om":8,
+ "or":8,
+ "os":8,
+ "pa":8,
+ "pi":8,
+ "pl":8,
+ "ps":8,
+ "pt":8,
+ "qu":8,
+ "rm":8,
+ "rn":8,
+ "ro":8,
+ "ru":8,
+ "rw":8,
+ "sa":8,
+ "sc":8,
+ "sd":8,
+ "se":8,
+ "sg":8,
+ "si":8,
+ "sk":8,
+ "sl":8,
+ "sm":8,
+ "sn":8,
+ "so":8,
+ "sq":8,
+ "sr":8,
+ "ss":8,
+ "st":8,
+ "su":8,
+ "sv":8,
+ "sw":8,
+ "ta":8,
+ "te":8,
+ "tg":8,
+ "th":8,
+ "ti":8,
+ "tk":8,
+ "tl":8,
+ "tn":8,
+ "to":8,
+ "tr":8,
+ "ts":8,
+ "tt":8,
+ "tw":8,
+ "ty":8,
+ "ug":8,
+ "uk":8,
+ "ur":8,
+ "uz":8,
+ "ve":8,
+ "vi":8,
+ "vo":8,
+ "wa":8,
+ "wo":8,
+ "xh":8,
+ "yi":8,
+ "yo":8,
+ "za":8,
+ "zh":8,
+ "zu":8
+};
+
+/**
+ * Extract cookies from onBeforeSendHeaders
+ *
+ * @param details Details for onBeforeSendHeaders
+ * @returns {*} an array combining all Cookies
+ */
+function _extractCookies(details) {
+ let cookies = [],
+ headers = [];
+
+ if (details.requestHeaders) {
+ headers = details.requestHeaders;
+ } else if (details.responseHeaders) {
+ headers = details.responseHeaders;
+ }
+
+ for (let i = 0; i < headers.length; i++) {
+ let header = headers[i];
+ if (header.name.toLowerCase() == "cookie" || header.name.toLowerCase() == "set-cookie") {
+ cookies.push(header.value);
+ }
+ }
+
+ return cookies;
+}
+
+/**
+ * Check if page is doing cookie tracking. Doing this by estimating the entropy of the cookies
+ *
+ * @param details details onBeforeSendHeaders details
+ * @param {String} origin URL
+ * @returns {boolean} true if it has cookie tracking
+ */
+function hasCookieTracking(details, origin) {
+ let cookies = _extractCookies(details);
+ if (!cookies.length) {
+ return false;
+ }
+
+ let estimatedEntropy = 0;
+
+ // loop over every cookie
+ for (let i = 0; i < cookies.length; i++) {
+ let cookie = utils.parseCookie(cookies[i], {
+ noDecode: true,
+ skipAttributes: true,
+ skipNonValues: true
+ });
+
+ // loop over every name/value pair in every cookie
+ for (let name in cookie) {
+ if (!cookie.hasOwnProperty(name)) {
+ continue;
+ }
+
+ // ignore CloudFlare
+ // https://support.cloudflare.com/hc/en-us/articles/200170156-Understanding-the-Cloudflare-Cookies
+ if (name == "__cfduid" || name == "__cf_bm") {
+ continue;
+ }
+
+ let value = cookie[name].toLowerCase();
+
+ if (!(value in lowEntropyCookieValues)) {
+ return true;
+ }
+
+ estimatedEntropy += lowEntropyCookieValues[value];
+ }
+ }
+
+ log("All cookies for " + origin + " deemed low entropy...");
+ if (estimatedEntropy > constants.MAX_COOKIE_ENTROPY) {
+ log("But total estimated entropy is " + estimatedEntropy + " bits, so blocking");
+ return true;
+ }
+
+ return false;
+}
+
+function startListeners() {
+ /**
+ * Adds heuristicBlockingAccounting as listened to onBeforeSendHeaders request
+ */
+ let extraInfoSpec = ['requestHeaders'];
+ if (chrome.webRequest.OnBeforeSendHeadersOptions.hasOwnProperty('EXTRA_HEADERS')) {
+ extraInfoSpec.push('extraHeaders');
+ }
+ chrome.webRequest.onBeforeSendHeaders.addListener(function(details) {
+ return badger.heuristicBlocking.heuristicBlockingAccounting(details);
+ }, {urls: ["<all_urls>"]}, extraInfoSpec);
+
+ /**
+ * Adds onResponseStarted listener. Monitor for cookies
+ */
+ extraInfoSpec = ['responseHeaders'];
+ if (chrome.webRequest.OnResponseStartedOptions.hasOwnProperty('EXTRA_HEADERS')) {
+ extraInfoSpec.push('extraHeaders');
+ }
+ chrome.webRequest.onResponseStarted.addListener(function(details) {
+ var hasSetCookie = false;
+ for (var i = 0; i < details.responseHeaders.length; i++) {
+ if (details.responseHeaders[i].name.toLowerCase() == "set-cookie") {
+ hasSetCookie = true;
+ break;
+ }
+ }
+ if (hasSetCookie) {
+ return badger.heuristicBlocking.heuristicBlockingAccounting(details);
+ }
+ },
+ {urls: ["<all_urls>"]}, extraInfoSpec);
+}
+
+/************************************** exports */
+var exports = {};
+exports.HeuristicBlocker = HeuristicBlocker;
+exports.startListeners = startListeners;
+exports.hasCookieTracking = hasCookieTracking;
+return exports;
+/************************************** exports */
+})();