summaryrefslogtreecommitdiffstats
path: root/platform/nodejs
diff options
context:
space:
mode:
Diffstat (limited to 'platform/nodejs')
-rw-r--r--platform/nodejs/README.md158
-rw-r--r--platform/nodejs/build.js34
-rw-r--r--platform/nodejs/index.js281
3 files changed, 473 insertions, 0 deletions
diff --git a/platform/nodejs/README.md b/platform/nodejs/README.md
new file mode 100644
index 0000000..0b3e3d8
--- /dev/null
+++ b/platform/nodejs/README.md
@@ -0,0 +1,158 @@
+# uBlock Origin Core
+
+The core filtering engines used in the uBlock Origin ("uBO") extension, and has
+no external dependencies.
+
+## Installation
+
+Install: `npm install @gorhill/ubo-core`
+
+This is a very early version and the API is subject to change at any time.
+
+This package uses [native JavaScript modules](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Modules).
+
+
+## Description
+
+The package contains uBO's static network filtering engine ("SNFE"), which
+purpose is to parse and enforce filter lists. The matching algorithm is highly
+efficient, and _especially_ optimized to match against large sets of pure
+hostnames.
+
+The SNFE can be fed filter lists from a variety of sources, such as [EasyList/EasyPrivacy](https://easylist.to/),
+[uBlock filters](https://github.com/uBlockOrigin/uAssets/tree/master/filters),
+and also lists of domain names or hosts file format (i.e. block lists from [The Block List Project](https://github.com/blocklistproject/Lists#the-block-list-project),
+[Steven Black's HOSTS](https://github.com/StevenBlack/hosts#readme), etc).
+
+
+## Usage
+
+At the moment, there can be only one instance of the static network filtering
+engine ("SNFE"), which proxy API must be imported as follow:
+
+```js
+import { StaticNetFilteringEngine } from '@gorhill/ubo-core';
+```
+
+If you must import as a NodeJS module:
+
+```js
+const { StaticNetFilteringEngine } = await import('@gorhill/ubo-core');
+```
+
+
+Create an instance of SNFE:
+
+```js
+const snfe = StaticNetFilteringEngine.create();
+```
+
+Feed the SNFE with filter lists -- `useLists()` accepts an array of
+objects (or promises to object) which expose the raw text of a list
+through the `raw` property, and optionally the name of the list through the
+`name` property (how you fetch the lists is up to you):
+
+```js
+await snfe.useLists([
+ fetch('easylist').then(raw => ({ name: 'easylist', raw })),
+ fetch('easyprivacy').then(raw => ({ name: 'easyprivacy', raw })),
+]);
+```
+
+Now we are ready to match network requests:
+
+```js
+// Not blocked
+if ( snfe.matchRequest({
+ originURL: 'https://www.bloomberg.com/',
+ url: 'https://www.bloomberg.com/tophat/assets/v2.6.1/that.css',
+ type: 'stylesheet'
+}) !== 0 ) {
+ console.log(snfe.toLogData());
+}
+
+// Blocked
+if ( snfe.matchRequest({
+ originURL: 'https://www.bloomberg.com/',
+ url: 'https://securepubads.g.doubleclick.net/tag/js/gpt.js',
+ type: 'script'
+}) !== 0 ) {
+ console.log(snfe.toLogData());
+}
+
+// Unblocked
+if ( snfe.matchRequest({
+ originURL: 'https://www.bloomberg.com/',
+ url: 'https://sourcepointcmp.bloomberg.com/ccpa.js',
+ type: 'script'
+}) !== 0 ) {
+ console.log(snfe.toLogData());
+}
+```
+
+It is possible to pre-parse filter lists and save the intermediate results for
+later use -- useful to speed up the loading of filter lists. This will be
+documented eventually, but if you feel adventurous, you can look at the code
+and use this capability now if you figure out the details.
+
+---
+
+## Extras
+
+You can directly use specific APIs exposed by this package, here are some of
+them, which are used internally by uBO's SNFE.
+
+### HNTrieContainer
+
+A well optimised [compressed trie](https://en.wikipedia.org/wiki/Trie#Compressing_tries)
+container specialized to specifically store and lookup hostnames.
+
+The matching algorithm is designed for hostnames, i.e. the hostname labels
+making up a hostname are matched from right to left, such that `www.example.org`
+with be a match if `example.org` is stored into the trie, while
+`anotherexample.org` won't be a match.
+
+`HNTrieContainer` is designed to store a large number of hostnames with CPU and
+memory efficiency as a main concern -- and is a key component of uBO.
+
+To create and use a standalone `HNTrieContainer` object:
+
+```js
+import HNTrieContainer from '@gorhill/ubo-core/js/hntrie.js';
+
+const trieContainer = new HNTrieContainer();
+
+const aTrie = trieContainer.createOne();
+trieContainer.add(aTrie, 'example.org');
+trieContainer.add(aTrie, 'example.com');
+
+const anotherTrie = trieContainer.createOne();
+trieContainer.add(anotherTrie, 'foo.invalid');
+trieContainer.add(anotherTrie, 'bar.invalid');
+
+// matches() return the position at which the match starts, or -1 when
+// there is no match.
+
+// Matches: return 4
+console.log("trieContainer.matches(aTrie, 'www.example.org')", trieContainer.matches(aTrie, 'www.example.org'));
+
+// Does not match: return -1
+console.log("trieContainer.matches(aTrie, 'www.foo.invalid')", trieContainer.matches(aTrie, 'www.foo.invalid'));
+
+// Does not match: return -1
+console.log("trieContainer.matches(anotherTrie, 'www.example.org')", trieContainer.matches(anotherTrie, 'www.example.org'));
+
+// Matches: return 0
+console.log("trieContainer.matches(anotherTrie, 'foo.invalid')", trieContainer.matches(anotherTrie, 'foo.invalid'));
+```
+
+The `reset()` method must be used to remove all the tries from a trie container,
+you can't remove a single trie from the container.
+
+```js
+trieContainer.reset();
+```
+
+When you reset a trie container, you can't use the reference to prior instances
+of trie, i.e. `aTrie` and `anotherTrie` are no longer valid and shouldn't be
+used following a reset.
diff --git a/platform/nodejs/build.js b/platform/nodejs/build.js
new file mode 100644
index 0000000..dbc0843
--- /dev/null
+++ b/platform/nodejs/build.js
@@ -0,0 +1,34 @@
+/*******************************************************************************
+
+ uBlock Origin - a comprehensive, efficient content blocker
+ Copyright (C) 2014-present Raymond Hill
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see {http://www.gnu.org/licenses/}.
+
+ Home: https://github.com/gorhill/uBlock
+*/
+
+'use strict';
+
+/******************************************************************************/
+
+import fs from 'fs';
+
+import { pslInit } from './index.js';
+
+/******************************************************************************/
+
+fs.mkdirSync('./build', { recursive: true });
+fs.writeFileSync('./build/publicsuffixlist.json',
+ JSON.stringify(pslInit().toSelfie()));
diff --git a/platform/nodejs/index.js b/platform/nodejs/index.js
new file mode 100644
index 0000000..1d39a7d
--- /dev/null
+++ b/platform/nodejs/index.js
@@ -0,0 +1,281 @@
+/*******************************************************************************
+
+ uBlock Origin - a comprehensive, efficient content blocker
+ Copyright (C) 2014-present Raymond Hill
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see {http://www.gnu.org/licenses/}.
+
+ Home: https://github.com/gorhill/uBlock
+*/
+
+/* globals WebAssembly */
+
+'use strict';
+
+/******************************************************************************/
+
+import { createRequire } from 'module';
+
+import { readFileSync } from 'fs';
+import { dirname, resolve } from 'path';
+import { domainToASCII, fileURLToPath } from 'url';
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+
+import publicSuffixList from './lib/publicsuffixlist/publicsuffixlist.js';
+
+import snfe from './js/static-net-filtering.js';
+import { FilteringContext } from './js/filtering-context.js';
+import { LineIterator } from './js/text-utils.js';
+import * as sfp from './js/static-filtering-parser.js';
+
+import {
+ CompiledListReader,
+ CompiledListWriter,
+} from './js/static-filtering-io.js';
+
+/******************************************************************************/
+
+function loadJSON(path) {
+ return JSON.parse(readFileSync(resolve(__dirname, path), 'utf8'));
+}
+
+/******************************************************************************/
+
+async function enableWASM() {
+ const wasmModuleFetcher = function(path) {
+ const require = createRequire(import.meta.url); // jshint ignore:line
+ const wasm = new Uint8Array(require(`${path}.wasm.json`));
+ return WebAssembly.compile(wasm);
+ };
+ try {
+ const results = await Promise.all([
+ publicSuffixList.enableWASM(wasmModuleFetcher, './lib/publicsuffixlist/wasm/'),
+ snfe.enableWASM(wasmModuleFetcher, './js/wasm/'),
+ ]);
+ return results.every(a => a === true);
+ } catch(reason) {
+ console.log(reason);
+ }
+ return false;
+}
+
+/******************************************************************************/
+
+function pslInit(raw) {
+ if ( typeof raw === 'string' && raw.trim() !== '' ) {
+ publicSuffixList.parse(raw, domainToASCII);
+ return publicSuffixList;
+ }
+
+ // Use serialized version if available
+ let serialized = null;
+ try {
+ // Use loadJSON() because require() would keep the string in memory.
+ serialized = loadJSON('build/publicsuffixlist.json');
+ } catch (error) {
+ if ( process.env.npm_lifecycle_event !== 'build' ) {
+ // This should never happen except during package building.
+ console.error(error);
+ }
+ }
+ if ( serialized !== null ) {
+ publicSuffixList.fromSelfie(serialized);
+ return publicSuffixList;
+ }
+
+ raw = readFileSync(
+ resolve(__dirname, './assets/thirdparties/publicsuffix.org/list/effective_tld_names.dat'),
+ 'utf8'
+ );
+ if ( typeof raw !== 'string' || raw.trim() === '' ) {
+ console.error('Unable to populate public suffix list');
+ return;
+ }
+ publicSuffixList.parse(raw, domainToASCII);
+ return publicSuffixList;
+}
+
+/******************************************************************************/
+
+function compileList({ name, raw }, compiler, writer, options = {}) {
+ const lineIter = new LineIterator(raw);
+ const events = Array.isArray(options.events) ? options.events : undefined;
+
+ if ( name ) {
+ writer.properties.set('name', name);
+ }
+
+ const parser = new sfp.AstFilterParser({
+ maxTokenLength: snfe.MAX_TOKEN_LENGTH,
+ });
+
+ while ( lineIter.eot() === false ) {
+ let line = lineIter.next();
+ while ( line.endsWith(' \\') ) {
+ if ( lineIter.peek(4) !== ' ' ) { break; }
+ line = line.slice(0, -2).trim() + lineIter.next().trim();
+ }
+ parser.parse(line);
+ if ( parser.isFilter() === false ) { continue; }
+ if ( parser.isNetworkFilter() === false ) { continue; }
+ if ( compiler.compile(parser, writer) ) { continue; }
+ if ( compiler.error !== undefined && events !== undefined ) {
+ options.events.push({
+ type: 'error',
+ text: compiler.error
+ });
+ }
+ }
+
+ return writer.toString();
+}
+
+/******************************************************************************/
+
+async function useLists(lists, options = {}) {
+ if ( useLists.promise !== null ) {
+ throw new Error('Pending useLists() operation');
+ }
+
+ // Remove all filters
+ snfe.reset();
+
+ if ( Array.isArray(lists) === false || lists.length === 0 ) {
+ return;
+ }
+
+ let compiler = null;
+
+ const consumeList = list => {
+ let { compiled } = list;
+ if ( typeof compiled !== 'string' || compiled === '' ) {
+ const writer = new CompiledListWriter();
+ if ( compiler === null ) {
+ compiler = snfe.createCompiler();
+ }
+ compiled = compileList(list, compiler, writer, options);
+ }
+ snfe.fromCompiled(new CompiledListReader(compiled));
+ };
+
+ // Populate filtering engine with resolved filter lists
+ const promises = [];
+ for ( const list of lists ) {
+ const promise = list instanceof Promise ? list : Promise.resolve(list);
+ promises.push(promise.then(list => consumeList(list)));
+ }
+
+ useLists.promise = Promise.all(promises);
+ await useLists.promise;
+ useLists.promise = null; // eslint-disable-line require-atomic-updates
+
+ // Commit changes
+ snfe.freeze();
+ snfe.optimize();
+}
+
+useLists.promise = null;
+
+/******************************************************************************/
+
+const fctx = new FilteringContext();
+let snfeProxyInstance = null;
+
+class StaticNetFilteringEngine {
+ constructor() {
+ if ( snfeProxyInstance !== null ) {
+ throw new Error('Only a single instance is supported.');
+ }
+ snfeProxyInstance = this;
+ }
+
+ useLists(lists) {
+ return useLists(lists);
+ }
+
+ matchRequest(details) {
+ return snfe.matchRequest(fctx.fromDetails(details));
+ }
+
+ matchAndFetchModifiers(details, modifier) {
+ return snfe.matchAndFetchModifiers(fctx.fromDetails(details), modifier);
+ }
+
+ hasQuery(details) {
+ return snfe.hasQuery(details);
+ }
+
+ filterQuery(details) {
+ const directives = snfe.filterQuery(fctx.fromDetails(details));
+ if ( directives === undefined ) { return; }
+ return { redirectURL: fctx.redirectURL, directives };
+ }
+
+ isBlockImportant() {
+ return snfe.isBlockImportant();
+ }
+
+ toLogData() {
+ return snfe.toLogData();
+ }
+
+ createCompiler(parser) {
+ return snfe.createCompiler(parser);
+ }
+
+ compileList(...args) {
+ return compileList(...args);
+ }
+
+ serialize() {
+ return snfe.serialize();
+ }
+
+ deserialize(serialized) {
+ return snfe.unserialize(serialized);
+ }
+
+ static async create({ noPSL = false } = {}) {
+ const instance = new StaticNetFilteringEngine();
+
+ if ( noPSL !== true && !pslInit() ) {
+ throw new Error('Failed to initialize public suffix list.');
+ }
+
+ return instance;
+ }
+
+ static async release() {
+ if ( snfeProxyInstance === null ) { return; }
+ snfeProxyInstance = null;
+ await useLists([]);
+ }
+}
+
+/******************************************************************************/
+
+// rollup.js needs module.exports to be set back to the local exports object.
+// This is because some of the code (e.g. publicsuffixlist.js) sets
+// module.exports. Once all included files are written like ES modules, using
+// export statements, this should no longer be necessary.
+if ( typeof module !== 'undefined' && typeof exports !== 'undefined' ) {
+ module.exports = exports;
+}
+
+export {
+ enableWASM,
+ pslInit,
+ StaticNetFilteringEngine,
+};