diff options
Diffstat (limited to 'src/js/reverselookup-worker.js')
-rw-r--r-- | src/js/reverselookup-worker.js | 287 |
1 files changed, 287 insertions, 0 deletions
diff --git a/src/js/reverselookup-worker.js b/src/js/reverselookup-worker.js new file mode 100644 index 0000000..37b8b65 --- /dev/null +++ b/src/js/reverselookup-worker.js @@ -0,0 +1,287 @@ +/******************************************************************************* + + uBlock Origin - a comprehensive, efficient content blocker + Copyright (C) 2015-present Raymond Hill + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see {http://www.gnu.org/licenses/}. + + Home: https://github.com/gorhill/uBlock +*/ + +'use strict'; + +/******************************************************************************/ + +let listEntries = Object.create(null); + +/******************************************************************************/ + +// https://github.com/uBlockOrigin/uBlock-issues/issues/2092 +// Order of ids matters + +const extractBlocks = function(content, ...ids) { + const out = []; + for ( const id of ids ) { + const pattern = `#block-start-${id}\n`; + let beg = content.indexOf(pattern); + if ( beg === -1 ) { continue; } + beg += pattern.length; + const end = content.indexOf(`#block-end-${id}`, beg); + out.push(content.slice(beg, end)); + } + return out.join('\n'); +}; + +/******************************************************************************/ + +// https://github.com/MajkiIT/polish-ads-filter/issues/14768#issuecomment-536006312 +// Avoid reporting badfilter-ed filters. + +const fromNetFilter = function(details) { + const lists = []; + const compiledFilter = details.compiledFilter; + + for ( const assetKey in listEntries ) { + const entry = listEntries[assetKey]; + if ( entry === undefined ) { continue; } + if ( entry.networkContent === undefined ) { + entry.networkContent = extractBlocks(entry.content, 'NETWORK_FILTERS:GOOD'); + } + const content = entry.networkContent; + let pos = 0; + for (;;) { + pos = content.indexOf(compiledFilter, pos); + if ( pos === -1 ) { break; } + // We need an exact match. + // https://github.com/gorhill/uBlock/issues/1392 + // https://github.com/gorhill/uBlock/issues/835 + const notFound = pos !== 0 && content.charCodeAt(pos - 1) !== 0x0A; + pos += compiledFilter.length; + if ( + notFound || + pos !== content.length && content.charCodeAt(pos) !== 0x0A + ) { + continue; + } + lists.push({ + assetKey: assetKey, + title: entry.title, + supportURL: entry.supportURL + }); + break; + } + } + + const response = {}; + response[details.rawFilter] = lists; + + self.postMessage({ id: details.id, response }); +}; + +/******************************************************************************/ + +// Looking up filter lists from a cosmetic filter is a bit more complicated +// than with network filters: +// +// The filter is its raw representation, not its compiled version. This is +// because the cosmetic filtering engine can't translate a live cosmetic +// filter into its compiled version. Reason is I do not want to burden +// cosmetic filtering with the resource overhead of being able to recompile +// live cosmetic filters. I want the cosmetic filtering code to be left +// completely unaffected by reverse lookup requirements. +// +// Mainly, given a CSS selector and a hostname as context, we will derive +// various versions of compiled filters and see if there are matches. This +// way the whole CPU cost is incurred by the reverse lookup code -- in a +// worker thread, and the cosmetic filtering engine incurs no cost at all. +// +// For this though, the reverse lookup code here needs some knowledge of +// the inners of the cosmetic filtering engine. +// FilterContainer.fromCompiledContent() is our reference code to create +// the various compiled versions. + +const fromExtendedFilter = function(details) { + const match = /^#@?#\^?/.exec(details.rawFilter); + const prefix = match[0]; + const exception = prefix.charAt(1) === '@'; + const selector = details.rawFilter.slice(prefix.length); + const isHtmlFilter = prefix.endsWith('^'); + const hostname = details.hostname; + + // The longer the needle, the lower the number of false positives. + // https://github.com/uBlockOrigin/uBlock-issues/issues/1139 + // Mind that there is no guarantee a selector has `\w` characters. + const needle = selector.match(/\w+|\*/g).reduce(function(a, b) { + return a.length > b.length ? a : b; + }); + + const regexFromLabels = (prefix, hn, suffix) => + new RegExp( + prefix + + hn.split('.').reduce((acc, item) => `(${acc}\\.)?${item}`) + + suffix + ); + + // https://github.com/uBlockOrigin/uBlock-issues/issues/803 + // Support looking up selectors of the form `*##...` + const reHostname = regexFromLabels('^', hostname, '$'); + let reEntity; + { + const domain = details.domain; + const pos = domain.indexOf('.'); + if ( pos !== -1 ) { + reEntity = regexFromLabels( + '^(', + hostname.slice(0, pos + hostname.length - domain.length), + '\\.)?\\*$' + ); + } + } + + const hostnameMatches = hn => { + if ( hn === '' ) { return true; } + if ( hn.charCodeAt(0) === 0x2F /* / */ ) { + return (new RegExp(hn.slice(1,-1))).test(hostname); + } + if ( reHostname.test(hn) ) { return true; } + if ( reEntity === undefined ) { return false; } + if ( reEntity.test(hn) ) { return true; } + return false; + }; + + const response = Object.create(null); + + for ( const assetKey in listEntries ) { + const entry = listEntries[assetKey]; + if ( entry === undefined ) { continue; } + if ( entry.extendedContent === undefined ) { + entry.extendedContent = extractBlocks( + entry.content, + 'COSMETIC_FILTERS:SPECIFIC', + 'COSMETIC_FILTERS:GENERIC', + 'SCRIPTLET_FILTERS', + 'HTML_FILTERS', + 'HTTPHEADER_FILTERS' + ); + } + const content = entry.extendedContent; + let found; + let pos = 0; + while ( (pos = content.indexOf(needle, pos)) !== -1 ) { + let beg = content.lastIndexOf('\n', pos); + if ( beg === -1 ) { beg = 0; } + let end = content.indexOf('\n', pos); + if ( end === -1 ) { end = content.length; } + pos = end; + const fargs = JSON.parse(content.slice(beg, end)); + const filterType = fargs[0]; + + // https://github.com/gorhill/uBlock/issues/2763 + if ( filterType === 0 && details.ignoreGeneric ) { continue; } + + // Do not confuse cosmetic filters with HTML ones. + if ( (filterType === 64) !== isHtmlFilter ) { continue; } + + switch ( filterType ) { + // Lowly generic cosmetic filters + case 0: + if ( exception ) { break; } + if ( fargs[2] !== selector ) { break; } + found = prefix + selector; + break; + // Highly generic cosmetic filters + case 4: // simple highly generic + case 5: // complex highly generic + if ( exception ) { break; } + if ( fargs[1] !== selector ) { break; } + found = prefix + selector; + break; + // Specific cosmetic filtering + // Generic exception + case 8: + // HTML filtering + // Response header filtering + case 64: { + if ( exception !== ((fargs[2] & 0b001) !== 0) ) { break; } + const isProcedural = (fargs[2] & 0b010) !== 0; + if ( + isProcedural === false && fargs[3] !== selector || + isProcedural && JSON.parse(fargs[3]).raw !== selector + ) { + break; + } + if ( hostnameMatches(fargs[1]) === false ) { break; } + // https://www.reddit.com/r/uBlockOrigin/comments/d6vxzj/ + // Ignore match if specific cosmetic filters are disabled + if ( + filterType === 8 && + exception === false && + details.ignoreSpecific + ) { + break; + } + found = fargs[1] + prefix + selector; + break; + } + // Scriptlet injection + case 32: + if ( exception !== ((fargs[2] & 0b001) !== 0) ) { break; } + if ( fargs[3] !== details.compiled ) { break; } + if ( hostnameMatches(fargs[1]) ) { + found = fargs[1] + prefix + selector; + } + break; + } + if ( found !== undefined ) { + if ( response[found] === undefined ) { + response[found] = []; + } + response[found].push({ + assetKey: assetKey, + title: entry.title, + supportURL: entry.supportURL + }); + break; + } + } + } + + self.postMessage({ id: details.id, response }); +}; + +/******************************************************************************/ + +self.onmessage = function(e) { + const msg = e.data; + + switch ( msg.what ) { + case 'resetLists': + listEntries = Object.create(null); + break; + + case 'setList': + listEntries[msg.details.assetKey] = msg.details; + break; + + case 'fromNetFilter': + fromNetFilter(msg); + break; + + case 'fromExtendedFilter': + fromExtendedFilter(msg); + break; + } +}; + +/******************************************************************************/ |