diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 05:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 05:47:55 +0000 |
commit | 31d6ff6f931696850c348007241195ab3b2eddc7 (patch) | |
tree | 615cb1c57ce9f6611bad93326b9105098f379609 /src/lib/publicsuffixlist/publicsuffixlist.js | |
parent | Initial commit. (diff) | |
download | ublock-origin-31d6ff6f931696850c348007241195ab3b2eddc7.tar.xz ublock-origin-31d6ff6f931696850c348007241195ab3b2eddc7.zip |
Adding upstream version 1.55.0+dfsg.upstream/1.55.0+dfsg
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/lib/publicsuffixlist/publicsuffixlist.js')
-rw-r--r-- | src/lib/publicsuffixlist/publicsuffixlist.js | 641 |
1 files changed, 641 insertions, 0 deletions
diff --git a/src/lib/publicsuffixlist/publicsuffixlist.js b/src/lib/publicsuffixlist/publicsuffixlist.js new file mode 100644 index 0000000..6483c89 --- /dev/null +++ b/src/lib/publicsuffixlist/publicsuffixlist.js @@ -0,0 +1,641 @@ +/******************************************************************************* + + publicsuffixlist.js - an efficient javascript implementation to deal with + Mozilla Foundation's Public Suffix List <http://publicsuffix.org/list/> + + Copyright (C) 2013-present Raymond Hill + + License: pick the one which suits you: + GPL v3 see <https://www.gnu.org/licenses/gpl.html> + APL v2 see <http://www.apache.org/licenses/LICENSE-2.0> + +*/ + +/*! Home: https://github.com/gorhill/publicsuffixlist.js -- GPLv3 APLv2 */ + +/* globals WebAssembly, exports:true, module */ + +'use strict'; + +/******************************************************************************* + + Reference: + https://publicsuffix.org/list/ + + Excerpt: + + > Algorithm + > + > 1. Match domain against all rules and take note of the matching ones. + > 2. If no rules match, the prevailing rule is "*". + > 3. If more than one rule matches, the prevailing rule is the one which + is an exception rule. + > 4. If there is no matching exception rule, the prevailing rule is the + one with the most labels. + > 5. If the prevailing rule is a exception rule, modify it by removing + the leftmost label. + > 6. The public suffix is the set of labels from the domain which match + the labels of the prevailing rule, using the matching algorithm above. + > 7. The registered or registrable domain is the public suffix plus one + additional label. + +*/ + +/******************************************************************************/ + +export default (function() { +// >>>>>>>> start of anonymous namespace + +/******************************************************************************* + + Tree encoding in array buffer: + + Node: + + u8: length of char data + + u8: flags => bit 0: is_publicsuffix, bit 1: is_exception + + u16: length of array of children + + u32: char data or offset to char data + + u32: offset to array of children + = 12 bytes + + More bits in flags could be used; for example: + - to distinguish private suffixes + +*/ + + // i32 / i8 +const HOSTNAME_SLOT = 0; // jshint ignore:line +const LABEL_INDICES_SLOT = 256; // -- / 256 (256/2 => 128 labels max) +const RULES_PTR_SLOT = 100; // 100 / 400 (400-256=144 => 144>128) +const SUFFIX_NOT_FOUND_SLOT = 399; // -- / 399 (safe, see above) +const CHARDATA_PTR_SLOT = 101; // 101 / 404 +const EMPTY_STRING = ''; +const SELFIE_MAGIC = 2; + +let wasmMemory; +let pslBuffer32; +let pslBuffer8; +let pslByteLength = 0; +let hostnameArg = EMPTY_STRING; + +/******************************************************************************/ + +const fireChangedEvent = function() { + if ( typeof window !== 'object' ) { return; } + if ( window instanceof Object === false ) { return; } + if ( window.dispatchEvent instanceof Function === false ) { return; } + if ( window.CustomEvent instanceof Function === false ) { return; } + window.dispatchEvent(new CustomEvent('publicSuffixListChanged')); +}; + +/******************************************************************************/ + +const allocateBuffers = function(byteLength) { + pslByteLength = byteLength + 3 & ~3; + if ( + pslBuffer32 !== undefined && + pslBuffer32.byteLength >= pslByteLength + ) { + return; + } + if ( wasmMemory !== undefined ) { + const newPageCount = pslByteLength + 0xFFFF >>> 16; + const curPageCount = wasmMemory.buffer.byteLength >>> 16; + const delta = newPageCount - curPageCount; + if ( delta > 0 ) { + wasmMemory.grow(delta); + pslBuffer32 = new Uint32Array(wasmMemory.buffer); + pslBuffer8 = new Uint8Array(wasmMemory.buffer); + } + } else { + pslBuffer8 = new Uint8Array(pslByteLength); + pslBuffer32 = new Uint32Array(pslBuffer8.buffer); + } + hostnameArg = EMPTY_STRING; + pslBuffer8[LABEL_INDICES_SLOT] = 0; +}; + +/******************************************************************************/ + +// Parse and set a UTF-8 text-based suffix list. Format is same as found at: +// http://publicsuffix.org/list/ +// +// `toAscii` is a converter from unicode to punycode. Required since the +// Public Suffix List contains unicode characters. +// Suggestion: use <https://github.com/bestiejs/punycode.js> + +const parse = function(text, toAscii) { + // Use short property names for better minifying results + const rootRule = { + l: EMPTY_STRING, // l => label + f: 0, // f => flags + c: undefined // c => children + }; + + // Tree building + { + const compareLabels = function(a, b) { + let n = a.length; + let d = n - b.length; + if ( d !== 0 ) { return d; } + for ( let i = 0; i < n; i++ ) { + d = a.charCodeAt(i) - b.charCodeAt(i); + if ( d !== 0 ) { return d; } + } + return 0; + }; + + const addToTree = function(rule, exception) { + let node = rootRule; + let end = rule.length; + while ( end > 0 ) { + const beg = rule.lastIndexOf('.', end - 1); + const label = rule.slice(beg + 1, end); + end = beg; + + if ( Array.isArray(node.c) === false ) { + const child = { l: label, f: 0, c: undefined }; + node.c = [ child ]; + node = child; + continue; + } + + let left = 0; + let right = node.c.length; + while ( left < right ) { + const i = left + right >>> 1; + const d = compareLabels(label, node.c[i].l); + if ( d < 0 ) { + right = i; + if ( right === left ) { + const child = { + l: label, + f: 0, + c: undefined + }; + node.c.splice(left, 0, child); + node = child; + break; + } + continue; + } + if ( d > 0 ) { + left = i + 1; + if ( left === right ) { + const child = { + l: label, + f: 0, + c: undefined + }; + node.c.splice(right, 0, child); + node = child; + break; + } + continue; + } + /* d === 0 */ + node = node.c[i]; + break; + } + } + node.f |= 0b01; + if ( exception ) { + node.f |= 0b10; + } + }; + + // 2. If no rules match, the prevailing rule is "*". + addToTree('*', false); + + const mustPunycode = /[^a-z0-9.-]/; + const textEnd = text.length; + let lineBeg = 0; + + while ( lineBeg < textEnd ) { + let lineEnd = text.indexOf('\n', lineBeg); + if ( lineEnd === -1 ) { + lineEnd = text.indexOf('\r', lineBeg); + if ( lineEnd === -1 ) { + lineEnd = textEnd; + } + } + let line = text.slice(lineBeg, lineEnd).trim(); + lineBeg = lineEnd + 1; + + // Ignore comments + const pos = line.indexOf('//'); + if ( pos !== -1 ) { + line = line.slice(0, pos); + } + + // Ignore surrounding whitespaces + line = line.trim(); + if ( line.length === 0 ) { continue; } + + const exception = line.charCodeAt(0) === 0x21 /* '!' */; + if ( exception ) { + line = line.slice(1); + } + + if ( mustPunycode.test(line) ) { + line = toAscii(line.toLowerCase()); + } + + addToTree(line, exception); + } + } + + { + const labelToOffsetMap = new Map(); + const treeData = []; + const charData = []; + + const allocate = function(n) { + const ibuf = treeData.length; + for ( let i = 0; i < n; i++ ) { + treeData.push(0); + } + return ibuf; + }; + + const storeNode = function(ibuf, node) { + const nChars = node.l.length; + const nChildren = node.c !== undefined + ? node.c.length + : 0; + treeData[ibuf+0] = nChildren << 16 | node.f << 8 | nChars; + // char data + if ( nChars <= 4 ) { + let v = 0; + if ( nChars > 0 ) { + v |= node.l.charCodeAt(0); + if ( nChars > 1 ) { + v |= node.l.charCodeAt(1) << 8; + if ( nChars > 2 ) { + v |= node.l.charCodeAt(2) << 16; + if ( nChars > 3 ) { + v |= node.l.charCodeAt(3) << 24; + } + } + } + } + treeData[ibuf+1] = v; + } else { + let offset = labelToOffsetMap.get(node.l); + if ( offset === undefined ) { + offset = charData.length; + for ( let i = 0; i < nChars; i++ ) { + charData.push(node.l.charCodeAt(i)); + } + labelToOffsetMap.set(node.l, offset); + } + treeData[ibuf+1] = offset; + } + // child nodes + if ( Array.isArray(node.c) === false ) { + treeData[ibuf+2] = 0; + return; + } + + const iarray = allocate(nChildren * 3); + treeData[ibuf+2] = iarray; + for ( let i = 0; i < nChildren; i++ ) { + storeNode(iarray + i * 3, node.c[i]); + } + }; + + // First 512 bytes are reserved for internal use + allocate(512 >> 2); + + const iRootRule = allocate(3); + storeNode(iRootRule, rootRule); + treeData[RULES_PTR_SLOT] = iRootRule; + + const iCharData = treeData.length << 2; + treeData[CHARDATA_PTR_SLOT] = iCharData; + + const byteLength = (treeData.length << 2) + (charData.length + 3 & ~3); + allocateBuffers(byteLength); + pslBuffer32.set(treeData); + pslBuffer8.set(charData, treeData.length << 2); + } + + fireChangedEvent(); +}; + +/******************************************************************************/ + +const setHostnameArg = function(hostname) { + const buf = pslBuffer8; + if ( hostname === hostnameArg ) { return buf[LABEL_INDICES_SLOT]; } + if ( hostname === null || hostname.length === 0 ) { + hostnameArg = EMPTY_STRING; + return (buf[LABEL_INDICES_SLOT] = 0); + } + hostname = hostname.toLowerCase(); + hostnameArg = hostname; + let n = hostname.length; + if ( n > 255 ) { n = 255; } + buf[LABEL_INDICES_SLOT] = n; + let i = n; + let j = LABEL_INDICES_SLOT + 1; + while ( i-- ) { + const c = hostname.charCodeAt(i); + if ( c === 0x2E /* '.' */ ) { + buf[j+0] = i + 1; + buf[j+1] = i; + j += 2; + } + buf[i] = c; + } + buf[j] = 0; + return n; +}; + +/******************************************************************************/ + +// Returns an offset to the start of the public suffix. +// +// WASM-able, because no information outside the buffer content is required. + +const getPublicSuffixPosJS = function() { + const buf8 = pslBuffer8; + const buf32 = pslBuffer32; + const iCharData = buf32[CHARDATA_PTR_SLOT]; + + let iNode = pslBuffer32[RULES_PTR_SLOT]; + let cursorPos = -1; + let iLabel = LABEL_INDICES_SLOT; + + // Label-lookup loop + for (;;) { + // Extract label indices + const labelBeg = buf8[iLabel+1]; + const labelLen = buf8[iLabel+0] - labelBeg; + // Match-lookup loop: binary search + let r = buf32[iNode+0] >>> 16; + if ( r === 0 ) { break; } + const iCandidates = buf32[iNode+2]; + let l = 0; + let iFound = 0; + while ( l < r ) { + const iCandidate = l + r >>> 1; + const iCandidateNode = iCandidates + iCandidate + (iCandidate << 1); + const candidateLen = buf32[iCandidateNode+0] & 0x000000FF; + let d = labelLen - candidateLen; + if ( d === 0 ) { + const iCandidateChar = candidateLen <= 4 + ? iCandidateNode + 1 << 2 + : iCharData + buf32[iCandidateNode+1]; + for ( let i = 0; i < labelLen; i++ ) { + d = buf8[labelBeg+i] - buf8[iCandidateChar+i]; + if ( d !== 0 ) { break; } + } + } + if ( d < 0 ) { + r = iCandidate; + } else if ( d > 0 ) { + l = iCandidate + 1; + } else /* if ( d === 0 ) */ { + iFound = iCandidateNode; + break; + } + } + // 2. If no rules match, the prevailing rule is "*". + if ( iFound === 0 ) { + if ( buf32[iCandidates + 1] !== 0x2A /* '*' */ ) { break; } + buf8[SUFFIX_NOT_FOUND_SLOT] = 1; + iFound = iCandidates; + } + iNode = iFound; + // 5. If the prevailing rule is a exception rule, modify it by + // removing the leftmost label. + if ( (buf32[iNode+0] & 0x00000200) !== 0 ) { + if ( iLabel > LABEL_INDICES_SLOT ) { + return iLabel - 2; + } + break; + } + if ( (buf32[iNode+0] & 0x00000100) !== 0 ) { + cursorPos = iLabel; + } + if ( labelBeg === 0 ) { break; } + iLabel += 2; + } + + return cursorPos; +}; + +let getPublicSuffixPosWASM; +let getPublicSuffixPos = getPublicSuffixPosJS; + +/******************************************************************************/ + +const getPublicSuffix = function(hostname) { + if ( pslBuffer32 === undefined ) { return EMPTY_STRING; } + + const hostnameLen = setHostnameArg(hostname); + const buf8 = pslBuffer8; + if ( hostnameLen === 0 || buf8[0] === 0x2E /* '.' */ ) { + return EMPTY_STRING; + } + + const cursorPos = getPublicSuffixPos(); + if ( cursorPos === -1 ) { + return EMPTY_STRING; + } + + const beg = buf8[cursorPos + 1]; + return beg === 0 ? hostnameArg : hostnameArg.slice(beg); +}; + +/******************************************************************************/ + +const getDomain = function(hostname) { + if ( pslBuffer32 === undefined ) { return EMPTY_STRING; } + + const hostnameLen = setHostnameArg(hostname); + const buf8 = pslBuffer8; + if ( hostnameLen === 0 || buf8[0] === 0x2E /* '.' */ ) { + return EMPTY_STRING; + } + + const cursorPos = getPublicSuffixPos(); + if ( cursorPos === -1 || buf8[cursorPos + 1] === 0 ) { + return EMPTY_STRING; + } + + // 7. The registered or registrable domain is the public suffix plus one + // additional label. + const beg = buf8[cursorPos + 3]; + return beg === 0 ? hostnameArg : hostnameArg.slice(beg); +}; + +/******************************************************************************/ + +const suffixInPSL = function(hostname) { + if ( pslBuffer32 === undefined ) { return false; } + + const hostnameLen = setHostnameArg(hostname); + const buf8 = pslBuffer8; + if ( hostnameLen === 0 || buf8[0] === 0x2E /* '.' */ ) { + return false; + } + + buf8[SUFFIX_NOT_FOUND_SLOT] = 0; + const cursorPos = getPublicSuffixPos(); + return cursorPos !== -1 && + buf8[cursorPos + 1] === 0 && + buf8[SUFFIX_NOT_FOUND_SLOT] !== 1; +}; + +/******************************************************************************/ + +const toSelfie = function(encoder) { + if ( pslBuffer8 === undefined ) { return ''; } + if ( encoder instanceof Object ) { + const bufferStr = encoder.encode(pslBuffer8.buffer, pslByteLength); + return `${SELFIE_MAGIC}\t${bufferStr}`; + } + return { + magic: SELFIE_MAGIC, + buf32: Array.from( + new Uint32Array(pslBuffer8.buffer, 0, pslByteLength >>> 2) + ), + }; +}; + +const fromSelfie = function(selfie, decoder) { + let byteLength = 0; + if ( + typeof selfie === 'string' && + selfie.length !== 0 && + decoder instanceof Object + ) { + const pos = selfie.indexOf('\t'); + if ( pos === -1 || selfie.slice(0, pos) !== `${SELFIE_MAGIC}` ) { + return false; + } + const bufferStr = selfie.slice(pos + 1); + byteLength = decoder.decodeSize(bufferStr); + if ( byteLength === 0 ) { return false; } + allocateBuffers(byteLength); + decoder.decode(bufferStr, pslBuffer8.buffer); + } else if ( + selfie instanceof Object && + selfie.magic === SELFIE_MAGIC && + Array.isArray(selfie.buf32) + ) { + byteLength = selfie.buf32.length << 2; + allocateBuffers(byteLength); + pslBuffer32.set(selfie.buf32); + } else { + return false; + } + + // Important! + hostnameArg = EMPTY_STRING; + pslBuffer8[LABEL_INDICES_SLOT] = 0; + + fireChangedEvent(); + + return true; +}; + +/******************************************************************************/ + +// The WASM module is entirely optional, the JS implementation will be +// used should the WASM module be unavailable for whatever reason. + +const enableWASM = (( ) => { + let wasmPromise; + + const getWasmInstance = async function(wasmModuleFetcher, path) { + if ( typeof WebAssembly !== 'object' ) { return false; } + // The wasm code will work only if CPU is natively little-endian, + // as we use native uint32 array in our js code. + const uint32s = new Uint32Array(1); + const uint8s = new Uint8Array(uint32s.buffer); + uint32s[0] = 1; + if ( uint8s[0] !== 1 ) { return false; } + + try { + const module = await wasmModuleFetcher(`${path}publicsuffixlist`); + if ( module instanceof WebAssembly.Module === false ) { + return false; + } + const pageCount = pslBuffer8 !== undefined + ? pslBuffer8.byteLength + 0xFFFF >>> 16 + : 1; + const memory = new WebAssembly.Memory({ initial: pageCount }); + const instance = await WebAssembly.instantiate(module, { + imports: { memory } + }); + if ( instance instanceof WebAssembly.Instance === false ) { + return false; + } + const curPageCount = memory.buffer.byteLength >>> 16; + const newPageCount = pslBuffer8 !== undefined + ? pslBuffer8.byteLength + 0xFFFF >>> 16 + : 0; + if ( newPageCount > curPageCount ) { + memory.grow(newPageCount - curPageCount); + } + if ( pslBuffer32 !== undefined ) { + const buf8 = new Uint8Array(memory.buffer); + const buf32 = new Uint32Array(memory.buffer); + buf32.set(pslBuffer32); + pslBuffer8 = buf8; + pslBuffer32 = buf32; + } + wasmMemory = memory; + getPublicSuffixPosWASM = instance.exports.getPublicSuffixPos; + getPublicSuffixPos = getPublicSuffixPosWASM; + return true; + } catch(reason) { + console.info(reason); + } + return false; + }; + + return async function(wasmModuleFetcher, path) { + if ( getPublicSuffixPosWASM instanceof Function ) { return true; } + if ( wasmPromise instanceof Promise === false ) { + wasmPromise = getWasmInstance(wasmModuleFetcher, path); + } + return wasmPromise; + }; +})(); + +const disableWASM = function() { + if ( getPublicSuffixPosWASM instanceof Function ) { + getPublicSuffixPos = getPublicSuffixPosJS; + getPublicSuffixPosWASM = undefined; + } + if ( wasmMemory === undefined ) { return; } + if ( pslBuffer32 !== undefined ) { + const buf8 = new Uint8Array(pslByteLength); + const buf32 = new Uint32Array(buf8.buffer); + buf32.set(pslBuffer32); + pslBuffer8 = buf8; + pslBuffer32 = buf32; + } + wasmMemory = undefined; +}; + +/******************************************************************************/ + +return ({ + version: '2.0', + parse, + getDomain, + suffixInPSL, + getPublicSuffix, + toSelfie, fromSelfie, + disableWASM, enableWASM, +}); + +/******************************************************************************/ + +// <<<<<<<< end of anonymous namespace +})(); |