From 31d6ff6f931696850c348007241195ab3b2eddc7 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 07:47:55 +0200 Subject: Adding upstream version 1.55.0+dfsg. Signed-off-by: Daniel Baumann --- platform/nodejs/README.md | 158 ++++++++++++++++++++++++++ platform/nodejs/build.js | 34 ++++++ platform/nodejs/index.js | 281 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 473 insertions(+) create mode 100644 platform/nodejs/README.md create mode 100644 platform/nodejs/build.js create mode 100644 platform/nodejs/index.js (limited to 'platform/nodejs') diff --git a/platform/nodejs/README.md b/platform/nodejs/README.md new file mode 100644 index 0000000..0b3e3d8 --- /dev/null +++ b/platform/nodejs/README.md @@ -0,0 +1,158 @@ +# uBlock Origin Core + +The core filtering engines used in the uBlock Origin ("uBO") extension, and has +no external dependencies. + +## Installation + +Install: `npm install @gorhill/ubo-core` + +This is a very early version and the API is subject to change at any time. + +This package uses [native JavaScript modules](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Modules). + + +## Description + +The package contains uBO's static network filtering engine ("SNFE"), which +purpose is to parse and enforce filter lists. The matching algorithm is highly +efficient, and _especially_ optimized to match against large sets of pure +hostnames. + +The SNFE can be fed filter lists from a variety of sources, such as [EasyList/EasyPrivacy](https://easylist.to/), +[uBlock filters](https://github.com/uBlockOrigin/uAssets/tree/master/filters), +and also lists of domain names or hosts file format (i.e. block lists from [The Block List Project](https://github.com/blocklistproject/Lists#the-block-list-project), +[Steven Black's HOSTS](https://github.com/StevenBlack/hosts#readme), etc). + + +## Usage + +At the moment, there can be only one instance of the static network filtering +engine ("SNFE"), which proxy API must be imported as follow: + +```js +import { StaticNetFilteringEngine } from '@gorhill/ubo-core'; +``` + +If you must import as a NodeJS module: + +```js +const { StaticNetFilteringEngine } = await import('@gorhill/ubo-core'); +``` + + +Create an instance of SNFE: + +```js +const snfe = StaticNetFilteringEngine.create(); +``` + +Feed the SNFE with filter lists -- `useLists()` accepts an array of +objects (or promises to object) which expose the raw text of a list +through the `raw` property, and optionally the name of the list through the +`name` property (how you fetch the lists is up to you): + +```js +await snfe.useLists([ + fetch('easylist').then(raw => ({ name: 'easylist', raw })), + fetch('easyprivacy').then(raw => ({ name: 'easyprivacy', raw })), +]); +``` + +Now we are ready to match network requests: + +```js +// Not blocked +if ( snfe.matchRequest({ + originURL: 'https://www.bloomberg.com/', + url: 'https://www.bloomberg.com/tophat/assets/v2.6.1/that.css', + type: 'stylesheet' +}) !== 0 ) { + console.log(snfe.toLogData()); +} + +// Blocked +if ( snfe.matchRequest({ + originURL: 'https://www.bloomberg.com/', + url: 'https://securepubads.g.doubleclick.net/tag/js/gpt.js', + type: 'script' +}) !== 0 ) { + console.log(snfe.toLogData()); +} + +// Unblocked +if ( snfe.matchRequest({ + originURL: 'https://www.bloomberg.com/', + url: 'https://sourcepointcmp.bloomberg.com/ccpa.js', + type: 'script' +}) !== 0 ) { + console.log(snfe.toLogData()); +} +``` + +It is possible to pre-parse filter lists and save the intermediate results for +later use -- useful to speed up the loading of filter lists. This will be +documented eventually, but if you feel adventurous, you can look at the code +and use this capability now if you figure out the details. + +--- + +## Extras + +You can directly use specific APIs exposed by this package, here are some of +them, which are used internally by uBO's SNFE. + +### HNTrieContainer + +A well optimised [compressed trie](https://en.wikipedia.org/wiki/Trie#Compressing_tries) +container specialized to specifically store and lookup hostnames. + +The matching algorithm is designed for hostnames, i.e. the hostname labels +making up a hostname are matched from right to left, such that `www.example.org` +with be a match if `example.org` is stored into the trie, while +`anotherexample.org` won't be a match. + +`HNTrieContainer` is designed to store a large number of hostnames with CPU and +memory efficiency as a main concern -- and is a key component of uBO. + +To create and use a standalone `HNTrieContainer` object: + +```js +import HNTrieContainer from '@gorhill/ubo-core/js/hntrie.js'; + +const trieContainer = new HNTrieContainer(); + +const aTrie = trieContainer.createOne(); +trieContainer.add(aTrie, 'example.org'); +trieContainer.add(aTrie, 'example.com'); + +const anotherTrie = trieContainer.createOne(); +trieContainer.add(anotherTrie, 'foo.invalid'); +trieContainer.add(anotherTrie, 'bar.invalid'); + +// matches() return the position at which the match starts, or -1 when +// there is no match. + +// Matches: return 4 +console.log("trieContainer.matches(aTrie, 'www.example.org')", trieContainer.matches(aTrie, 'www.example.org')); + +// Does not match: return -1 +console.log("trieContainer.matches(aTrie, 'www.foo.invalid')", trieContainer.matches(aTrie, 'www.foo.invalid')); + +// Does not match: return -1 +console.log("trieContainer.matches(anotherTrie, 'www.example.org')", trieContainer.matches(anotherTrie, 'www.example.org')); + +// Matches: return 0 +console.log("trieContainer.matches(anotherTrie, 'foo.invalid')", trieContainer.matches(anotherTrie, 'foo.invalid')); +``` + +The `reset()` method must be used to remove all the tries from a trie container, +you can't remove a single trie from the container. + +```js +trieContainer.reset(); +``` + +When you reset a trie container, you can't use the reference to prior instances +of trie, i.e. `aTrie` and `anotherTrie` are no longer valid and shouldn't be +used following a reset. diff --git a/platform/nodejs/build.js b/platform/nodejs/build.js new file mode 100644 index 0000000..dbc0843 --- /dev/null +++ b/platform/nodejs/build.js @@ -0,0 +1,34 @@ +/******************************************************************************* + + uBlock Origin - a comprehensive, efficient content blocker + Copyright (C) 2014-present Raymond Hill + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see {http://www.gnu.org/licenses/}. + + Home: https://github.com/gorhill/uBlock +*/ + +'use strict'; + +/******************************************************************************/ + +import fs from 'fs'; + +import { pslInit } from './index.js'; + +/******************************************************************************/ + +fs.mkdirSync('./build', { recursive: true }); +fs.writeFileSync('./build/publicsuffixlist.json', + JSON.stringify(pslInit().toSelfie())); diff --git a/platform/nodejs/index.js b/platform/nodejs/index.js new file mode 100644 index 0000000..1d39a7d --- /dev/null +++ b/platform/nodejs/index.js @@ -0,0 +1,281 @@ +/******************************************************************************* + + uBlock Origin - a comprehensive, efficient content blocker + Copyright (C) 2014-present Raymond Hill + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see {http://www.gnu.org/licenses/}. + + Home: https://github.com/gorhill/uBlock +*/ + +/* globals WebAssembly */ + +'use strict'; + +/******************************************************************************/ + +import { createRequire } from 'module'; + +import { readFileSync } from 'fs'; +import { dirname, resolve } from 'path'; +import { domainToASCII, fileURLToPath } from 'url'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); + +import publicSuffixList from './lib/publicsuffixlist/publicsuffixlist.js'; + +import snfe from './js/static-net-filtering.js'; +import { FilteringContext } from './js/filtering-context.js'; +import { LineIterator } from './js/text-utils.js'; +import * as sfp from './js/static-filtering-parser.js'; + +import { + CompiledListReader, + CompiledListWriter, +} from './js/static-filtering-io.js'; + +/******************************************************************************/ + +function loadJSON(path) { + return JSON.parse(readFileSync(resolve(__dirname, path), 'utf8')); +} + +/******************************************************************************/ + +async function enableWASM() { + const wasmModuleFetcher = function(path) { + const require = createRequire(import.meta.url); // jshint ignore:line + const wasm = new Uint8Array(require(`${path}.wasm.json`)); + return WebAssembly.compile(wasm); + }; + try { + const results = await Promise.all([ + publicSuffixList.enableWASM(wasmModuleFetcher, './lib/publicsuffixlist/wasm/'), + snfe.enableWASM(wasmModuleFetcher, './js/wasm/'), + ]); + return results.every(a => a === true); + } catch(reason) { + console.log(reason); + } + return false; +} + +/******************************************************************************/ + +function pslInit(raw) { + if ( typeof raw === 'string' && raw.trim() !== '' ) { + publicSuffixList.parse(raw, domainToASCII); + return publicSuffixList; + } + + // Use serialized version if available + let serialized = null; + try { + // Use loadJSON() because require() would keep the string in memory. + serialized = loadJSON('build/publicsuffixlist.json'); + } catch (error) { + if ( process.env.npm_lifecycle_event !== 'build' ) { + // This should never happen except during package building. + console.error(error); + } + } + if ( serialized !== null ) { + publicSuffixList.fromSelfie(serialized); + return publicSuffixList; + } + + raw = readFileSync( + resolve(__dirname, './assets/thirdparties/publicsuffix.org/list/effective_tld_names.dat'), + 'utf8' + ); + if ( typeof raw !== 'string' || raw.trim() === '' ) { + console.error('Unable to populate public suffix list'); + return; + } + publicSuffixList.parse(raw, domainToASCII); + return publicSuffixList; +} + +/******************************************************************************/ + +function compileList({ name, raw }, compiler, writer, options = {}) { + const lineIter = new LineIterator(raw); + const events = Array.isArray(options.events) ? options.events : undefined; + + if ( name ) { + writer.properties.set('name', name); + } + + const parser = new sfp.AstFilterParser({ + maxTokenLength: snfe.MAX_TOKEN_LENGTH, + }); + + while ( lineIter.eot() === false ) { + let line = lineIter.next(); + while ( line.endsWith(' \\') ) { + if ( lineIter.peek(4) !== ' ' ) { break; } + line = line.slice(0, -2).trim() + lineIter.next().trim(); + } + parser.parse(line); + if ( parser.isFilter() === false ) { continue; } + if ( parser.isNetworkFilter() === false ) { continue; } + if ( compiler.compile(parser, writer) ) { continue; } + if ( compiler.error !== undefined && events !== undefined ) { + options.events.push({ + type: 'error', + text: compiler.error + }); + } + } + + return writer.toString(); +} + +/******************************************************************************/ + +async function useLists(lists, options = {}) { + if ( useLists.promise !== null ) { + throw new Error('Pending useLists() operation'); + } + + // Remove all filters + snfe.reset(); + + if ( Array.isArray(lists) === false || lists.length === 0 ) { + return; + } + + let compiler = null; + + const consumeList = list => { + let { compiled } = list; + if ( typeof compiled !== 'string' || compiled === '' ) { + const writer = new CompiledListWriter(); + if ( compiler === null ) { + compiler = snfe.createCompiler(); + } + compiled = compileList(list, compiler, writer, options); + } + snfe.fromCompiled(new CompiledListReader(compiled)); + }; + + // Populate filtering engine with resolved filter lists + const promises = []; + for ( const list of lists ) { + const promise = list instanceof Promise ? list : Promise.resolve(list); + promises.push(promise.then(list => consumeList(list))); + } + + useLists.promise = Promise.all(promises); + await useLists.promise; + useLists.promise = null; // eslint-disable-line require-atomic-updates + + // Commit changes + snfe.freeze(); + snfe.optimize(); +} + +useLists.promise = null; + +/******************************************************************************/ + +const fctx = new FilteringContext(); +let snfeProxyInstance = null; + +class StaticNetFilteringEngine { + constructor() { + if ( snfeProxyInstance !== null ) { + throw new Error('Only a single instance is supported.'); + } + snfeProxyInstance = this; + } + + useLists(lists) { + return useLists(lists); + } + + matchRequest(details) { + return snfe.matchRequest(fctx.fromDetails(details)); + } + + matchAndFetchModifiers(details, modifier) { + return snfe.matchAndFetchModifiers(fctx.fromDetails(details), modifier); + } + + hasQuery(details) { + return snfe.hasQuery(details); + } + + filterQuery(details) { + const directives = snfe.filterQuery(fctx.fromDetails(details)); + if ( directives === undefined ) { return; } + return { redirectURL: fctx.redirectURL, directives }; + } + + isBlockImportant() { + return snfe.isBlockImportant(); + } + + toLogData() { + return snfe.toLogData(); + } + + createCompiler(parser) { + return snfe.createCompiler(parser); + } + + compileList(...args) { + return compileList(...args); + } + + serialize() { + return snfe.serialize(); + } + + deserialize(serialized) { + return snfe.unserialize(serialized); + } + + static async create({ noPSL = false } = {}) { + const instance = new StaticNetFilteringEngine(); + + if ( noPSL !== true && !pslInit() ) { + throw new Error('Failed to initialize public suffix list.'); + } + + return instance; + } + + static async release() { + if ( snfeProxyInstance === null ) { return; } + snfeProxyInstance = null; + await useLists([]); + } +} + +/******************************************************************************/ + +// rollup.js needs module.exports to be set back to the local exports object. +// This is because some of the code (e.g. publicsuffixlist.js) sets +// module.exports. Once all included files are written like ES modules, using +// export statements, this should no longer be necessary. +if ( typeof module !== 'undefined' && typeof exports !== 'undefined' ) { + module.exports = exports; +} + +export { + enableWASM, + pslInit, + StaticNetFilteringEngine, +}; -- cgit v1.2.3