summaryrefslogtreecommitdiffstats
path: root/src/js/biditrie.js
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 05:47:55 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 05:47:55 +0000
commit31d6ff6f931696850c348007241195ab3b2eddc7 (patch)
tree615cb1c57ce9f6611bad93326b9105098f379609 /src/js/biditrie.js
parentInitial commit. (diff)
downloadublock-origin-31d6ff6f931696850c348007241195ab3b2eddc7.tar.xz
ublock-origin-31d6ff6f931696850c348007241195ab3b2eddc7.zip
Adding upstream version 1.55.0+dfsg.upstream/1.55.0+dfsg
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/js/biditrie.js')
-rw-r--r--src/js/biditrie.js947
1 files changed, 947 insertions, 0 deletions
diff --git a/src/js/biditrie.js b/src/js/biditrie.js
new file mode 100644
index 0000000..d0f64ee
--- /dev/null
+++ b/src/js/biditrie.js
@@ -0,0 +1,947 @@
+/*******************************************************************************
+
+ uBlock Origin - a comprehensive, efficient content blocker
+ Copyright (C) 2019-present Raymond Hill
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see {http://www.gnu.org/licenses/}.
+
+ Home: https://github.com/gorhill/uBlock
+*/
+
+/* globals WebAssembly, vAPI */
+
+'use strict';
+
+/*******************************************************************************
+
+ A BidiTrieContainer is mostly a large buffer in which distinct but related
+ tries are stored. The memory layout of the buffer is as follow:
+
+ 0-2047: haystack section
+ 2048-2051: number of significant characters in the haystack
+ 2052-2055: offset to start of trie data section (=> trie0)
+ 2056-2059: offset to end of trie data section (=> trie1)
+ 2060-2063: offset to start of character data section (=> char0)
+ 2064-2067: offset to end of character data section (=> char1)
+ 2068: start of trie data section
+
+ +--------------+
+ Normal cell: | And | If "Segment info" matches:
+ (aka CELL) +--------------+ Goto "And"
+ | Or | Else
+ +--------------+ Goto "Or"
+ | Segment info |
+ +--------------+
+
+ +--------------+
+ Boundary cell: | Right And | "Right And" and/or "Left And"
+ (aka BCELL) +--------------+ can be 0 in last-segment condition.
+ | Left And |
+ +--------------+
+ | 0 |
+ +--------------+
+
+ Given following filters and assuming token is "ad" for all of them:
+
+ -images/ad-
+ /google_ad.
+ /images_ad.
+ _images/ad.
+
+ We get the following internal representation:
+
+ +-----------+ +-----------+ +---+
+ | |---->| |---->| 0 |
+ +-----------+ +-----------+ +---+ +-----------+
+ | 0 | +--| | | |---->| 0 |
+ +-----------+ | +-----------+ +---+ +-----------+
+ | ad | | | - | | 0 | | 0 |
+ +-----------+ | +-----------+ +---+ +-----------+
+ | | -images/ |
+ | +-----------+ +---+ +-----------+
+ +->| |---->| 0 |
+ +-----------+ +---+ +-----------+ +-----------+
+ | 0 | | |---->| |---->| 0 |
+ +-----------+ +---+ +-----------+ +-----------+
+ | . | | 0 | +--| | +--| |
+ +-----------+ +---+ | +-----------+ | +-----------+
+ | | _ | | | /google |
+ | +-----------+ | +-----------+
+ | |
+ | | +-----------+
+ | +->| 0 |
+ | +-----------+
+ | | 0 |
+ | +-----------+
+ | | /images |
+ | +-----------+
+ |
+ | +-----------+
+ +->| 0 |
+ +-----------+
+ | 0 |
+ +-----------+
+ | _images/ |
+ +-----------+
+
+*/
+
+const PAGE_SIZE = 65536*2;
+const HAYSTACK_START = 0;
+const HAYSTACK_SIZE = 2048; // i32 / i8
+const HAYSTACK_SIZE_SLOT = HAYSTACK_SIZE >>> 2; // 512 / 2048
+const TRIE0_SLOT = HAYSTACK_SIZE_SLOT + 1; // 513 / 2052
+const TRIE1_SLOT = HAYSTACK_SIZE_SLOT + 2; // 514 / 2056
+const CHAR0_SLOT = HAYSTACK_SIZE_SLOT + 3; // 515 / 2060
+const CHAR1_SLOT = HAYSTACK_SIZE_SLOT + 4; // 516 / 2064
+const RESULT_L_SLOT = HAYSTACK_SIZE_SLOT + 5; // 517 / 2068
+const RESULT_R_SLOT = HAYSTACK_SIZE_SLOT + 6; // 518 / 2072
+const RESULT_IU_SLOT = HAYSTACK_SIZE_SLOT + 7; // 519 / 2076
+const TRIE0_START = HAYSTACK_SIZE_SLOT + 8 << 2; // 2080
+
+const CELL_BYTE_LENGTH = 12;
+const MIN_FREE_CELL_BYTE_LENGTH = CELL_BYTE_LENGTH * 8;
+
+const CELL_AND = 0;
+const CELL_OR = 1;
+const SEGMENT_INFO = 2;
+const BCELL_NEXT_AND = 0;
+const BCELL_ALT_AND = 1;
+const BCELL_EXTRA = 2;
+const BCELL_EXTRA_MAX = 0x00FFFFFF;
+
+const toSegmentInfo = (aL, l, r) => ((r - l) << 24) | (aL + l);
+const roundToPageSize = v => (v + PAGE_SIZE-1) & ~(PAGE_SIZE-1);
+
+
+class BidiTrieContainer {
+
+ constructor(extraHandler) {
+ const len = PAGE_SIZE * 4;
+ this.buf8 = new Uint8Array(len);
+ this.buf32 = new Uint32Array(this.buf8.buffer);
+ this.buf32[TRIE0_SLOT] = TRIE0_START;
+ this.buf32[TRIE1_SLOT] = this.buf32[TRIE0_SLOT];
+ this.buf32[CHAR0_SLOT] = len >>> 1;
+ this.buf32[CHAR1_SLOT] = this.buf32[CHAR0_SLOT];
+ this.haystack = this.buf8.subarray(
+ HAYSTACK_START,
+ HAYSTACK_START + HAYSTACK_SIZE
+ );
+ this.extraHandler = extraHandler;
+ this.textDecoder = null;
+ this.wasmMemory = null;
+
+ this.lastStored = '';
+ this.lastStoredLen = this.lastStoredIndex = 0;
+ }
+
+ //--------------------------------------------------------------------------
+ // Public methods
+ //--------------------------------------------------------------------------
+
+ get haystackLen() {
+ return this.buf32[HAYSTACK_SIZE_SLOT];
+ }
+
+ set haystackLen(v) {
+ this.buf32[HAYSTACK_SIZE_SLOT] = v;
+ }
+
+ reset(details) {
+ if (
+ details instanceof Object &&
+ typeof details.byteLength === 'number' &&
+ typeof details.char0 === 'number'
+ ) {
+ if ( details.byteLength > this.buf8.byteLength ) {
+ this.reallocateBuf(details.byteLength);
+ }
+ this.buf32[CHAR0_SLOT] = details.char0;
+ }
+ this.buf32[TRIE1_SLOT] = this.buf32[TRIE0_SLOT];
+ this.buf32[CHAR1_SLOT] = this.buf32[CHAR0_SLOT];
+
+ this.lastStored = '';
+ this.lastStoredLen = this.lastStoredIndex = 0;
+ }
+
+ createTrie() {
+ // grow buffer if needed
+ if ( (this.buf32[CHAR0_SLOT] - this.buf32[TRIE1_SLOT]) < CELL_BYTE_LENGTH ) {
+ this.growBuf(CELL_BYTE_LENGTH, 0);
+ }
+ const iroot = this.buf32[TRIE1_SLOT] >>> 2;
+ this.buf32[TRIE1_SLOT] += CELL_BYTE_LENGTH;
+ this.buf32[iroot+CELL_OR] = 0;
+ this.buf32[iroot+CELL_AND] = 0;
+ this.buf32[iroot+SEGMENT_INFO] = 0;
+ return iroot;
+ }
+
+ matches(icell, ai) {
+ const buf32 = this.buf32;
+ const buf8 = this.buf8;
+ const char0 = buf32[CHAR0_SLOT];
+ const aR = buf32[HAYSTACK_SIZE_SLOT];
+ let al = ai, x = 0, y = 0;
+ for (;;) {
+ x = buf8[al];
+ al += 1;
+ // find matching segment
+ for (;;) {
+ y = buf32[icell+SEGMENT_INFO];
+ let bl = char0 + (y & 0x00FFFFFF);
+ if ( buf8[bl] === x ) {
+ y = (y >>> 24) - 1;
+ if ( y !== 0 ) {
+ x = al + y;
+ if ( x > aR ) { return 0; }
+ for (;;) {
+ bl += 1;
+ if ( buf8[bl] !== buf8[al] ) { return 0; }
+ al += 1;
+ if ( al === x ) { break; }
+ }
+ }
+ break;
+ }
+ icell = buf32[icell+CELL_OR];
+ if ( icell === 0 ) { return 0; }
+ }
+ // next segment
+ icell = buf32[icell+CELL_AND];
+ x = buf32[icell+BCELL_EXTRA];
+ if ( x <= BCELL_EXTRA_MAX ) {
+ if ( x !== 0 && this.matchesExtra(ai, al, x) !== 0 ) {
+ return 1;
+ }
+ x = buf32[icell+BCELL_ALT_AND];
+ if ( x !== 0 && this.matchesLeft(x, ai, al) !== 0 ) {
+ return 1;
+ }
+ icell = buf32[icell+BCELL_NEXT_AND];
+ if ( icell === 0 ) { return 0; }
+ }
+ if ( al === aR ) { return 0; }
+ }
+ return 0; // eslint-disable-line no-unreachable
+ }
+
+ matchesLeft(icell, ar, r) {
+ const buf32 = this.buf32;
+ const buf8 = this.buf8;
+ const char0 = buf32[CHAR0_SLOT];
+ let x = 0, y = 0;
+ for (;;) {
+ if ( ar === 0 ) { return 0; }
+ ar -= 1;
+ x = buf8[ar];
+ // find first segment with a first-character match
+ for (;;) {
+ y = buf32[icell+SEGMENT_INFO];
+ let br = char0 + (y & 0x00FFFFFF);
+ y = (y >>> 24) - 1;
+ br += y;
+ if ( buf8[br] === x ) { // all characters in segment must match
+ if ( y !== 0 ) {
+ x = ar - y;
+ if ( x < 0 ) { return 0; }
+ for (;;) {
+ ar -= 1; br -= 1;
+ if ( buf8[ar] !== buf8[br] ) { return 0; }
+ if ( ar === x ) { break; }
+ }
+ }
+ break;
+ }
+ icell = buf32[icell+CELL_OR];
+ if ( icell === 0 ) { return 0; }
+ }
+ // next segment
+ icell = buf32[icell+CELL_AND];
+ x = buf32[icell+BCELL_EXTRA];
+ if ( x <= BCELL_EXTRA_MAX ) {
+ if ( x !== 0 && this.matchesExtra(ar, r, x) !== 0 ) {
+ return 1;
+ }
+ icell = buf32[icell+BCELL_NEXT_AND];
+ if ( icell === 0 ) { return 0; }
+ }
+ }
+ return 0; // eslint-disable-line no-unreachable
+ }
+
+ matchesExtra(l, r, ix) {
+ let iu = 0;
+ if ( ix !== 1 ) {
+ iu = this.extraHandler(l, r, ix);
+ if ( iu === 0 ) { return 0; }
+ } else {
+ iu = -1;
+ }
+ this.buf32[RESULT_IU_SLOT] = iu;
+ this.buf32[RESULT_L_SLOT] = l;
+ this.buf32[RESULT_R_SLOT] = r;
+ return 1;
+ }
+
+ get $l() { return this.buf32[RESULT_L_SLOT] | 0; }
+ get $r() { return this.buf32[RESULT_R_SLOT] | 0; }
+ get $iu() { return this.buf32[RESULT_IU_SLOT] | 0; }
+
+ add(iroot, aL0, n, pivot = 0) {
+ const aR = n;
+ if ( aR === 0 ) { return 0; }
+ // Grow buffer if needed. The characters are already in our character
+ // data buffer, so we do not need to grow character data buffer.
+ if (
+ (this.buf32[CHAR0_SLOT] - this.buf32[TRIE1_SLOT]) <
+ MIN_FREE_CELL_BYTE_LENGTH
+ ) {
+ this.growBuf(MIN_FREE_CELL_BYTE_LENGTH, 0);
+ }
+ const buf32 = this.buf32;
+ const char0 = buf32[CHAR0_SLOT];
+ let icell = iroot;
+ let aL = char0 + aL0;
+ // special case: first node in trie
+ if ( buf32[icell+SEGMENT_INFO] === 0 ) {
+ buf32[icell+SEGMENT_INFO] = toSegmentInfo(aL0, pivot, aR);
+ return this.addLeft(icell, aL0, pivot);
+ }
+ const buf8 = this.buf8;
+ let al = pivot;
+ let inext;
+ // find a matching cell: move down
+ for (;;) {
+ const binfo = buf32[icell+SEGMENT_INFO];
+ // length of segment
+ const bR = binfo >>> 24;
+ // skip boundary cells
+ if ( bR === 0 ) {
+ icell = buf32[icell+BCELL_NEXT_AND];
+ continue;
+ }
+ let bl = char0 + (binfo & 0x00FFFFFF);
+ // if first character is no match, move to next descendant
+ if ( buf8[bl] !== buf8[aL+al] ) {
+ inext = buf32[icell+CELL_OR];
+ if ( inext === 0 ) {
+ inext = this.addCell(0, 0, toSegmentInfo(aL0, al, aR));
+ buf32[icell+CELL_OR] = inext;
+ return this.addLeft(inext, aL0, pivot);
+ }
+ icell = inext;
+ continue;
+ }
+ // 1st character was tested
+ let bi = 1;
+ al += 1;
+ // find 1st mismatch in rest of segment
+ if ( bR !== 1 ) {
+ for (;;) {
+ if ( bi === bR ) { break; }
+ if ( al === aR ) { break; }
+ if ( buf8[bl+bi] !== buf8[aL+al] ) { break; }
+ bi += 1;
+ al += 1;
+ }
+ }
+ // all segment characters matched
+ if ( bi === bR ) {
+ // needle remainder: no
+ if ( al === aR ) {
+ return this.addLeft(icell, aL0, pivot);
+ }
+ // needle remainder: yes
+ inext = buf32[icell+CELL_AND];
+ if ( buf32[inext+CELL_AND] !== 0 ) {
+ icell = inext;
+ continue;
+ }
+ // add needle remainder
+ icell = this.addCell(0, 0, toSegmentInfo(aL0, al, aR));
+ buf32[inext+CELL_AND] = icell;
+ return this.addLeft(icell, aL0, pivot);
+ }
+ // some characters matched
+ // split current segment
+ bl -= char0;
+ buf32[icell+SEGMENT_INFO] = bi << 24 | bl;
+ inext = this.addCell(
+ buf32[icell+CELL_AND], 0, bR - bi << 24 | bl + bi
+ );
+ buf32[icell+CELL_AND] = inext;
+ // needle remainder: no = need boundary cell
+ if ( al === aR ) {
+ return this.addLeft(icell, aL0, pivot);
+ }
+ // needle remainder: yes = need new cell for remaining characters
+ icell = this.addCell(0, 0, toSegmentInfo(aL0, al, aR));
+ buf32[inext+CELL_OR] = icell;
+ return this.addLeft(icell, aL0, pivot);
+ }
+ }
+
+ addLeft(icell, aL0, pivot) {
+ const buf32 = this.buf32;
+ const char0 = buf32[CHAR0_SLOT];
+ let aL = aL0 + char0;
+ // fetch boundary cell
+ let iboundary = buf32[icell+CELL_AND];
+ // add boundary cell if none exist
+ if (
+ iboundary === 0 ||
+ buf32[iboundary+SEGMENT_INFO] > BCELL_EXTRA_MAX
+ ) {
+ const inext = iboundary;
+ iboundary = this.allocateCell();
+ buf32[icell+CELL_AND] = iboundary;
+ buf32[iboundary+BCELL_NEXT_AND] = inext;
+ if ( pivot === 0 ) { return iboundary; }
+ }
+ // shortest match with no extra conditions will always win
+ if ( buf32[iboundary+BCELL_EXTRA] === 1 ) {
+ return iboundary;
+ }
+ // bail out if no left segment
+ if ( pivot === 0 ) { return iboundary; }
+ // fetch root cell of left segment
+ icell = buf32[iboundary+BCELL_ALT_AND];
+ if ( icell === 0 ) {
+ icell = this.allocateCell();
+ buf32[iboundary+BCELL_ALT_AND] = icell;
+ }
+ // special case: first node in trie
+ if ( buf32[icell+SEGMENT_INFO] === 0 ) {
+ buf32[icell+SEGMENT_INFO] = toSegmentInfo(aL0, 0, pivot);
+ iboundary = this.allocateCell();
+ buf32[icell+CELL_AND] = iboundary;
+ return iboundary;
+ }
+ const buf8 = this.buf8;
+ let ar = pivot, inext;
+ // find a matching cell: move down
+ for (;;) {
+ const binfo = buf32[icell+SEGMENT_INFO];
+ // skip boundary cells
+ if ( binfo <= BCELL_EXTRA_MAX ) {
+ inext = buf32[icell+CELL_AND];
+ if ( inext !== 0 ) {
+ icell = inext;
+ continue;
+ }
+ iboundary = this.allocateCell();
+ buf32[icell+CELL_AND] =
+ this.addCell(iboundary, 0, toSegmentInfo(aL0, 0, ar));
+ // TODO: boundary cell might be last
+ // add remainder + boundary cell
+ return iboundary;
+ }
+ const bL = char0 + (binfo & 0x00FFFFFF);
+ const bR = bL + (binfo >>> 24);
+ let br = bR;
+ // if first character is no match, move to next descendant
+ if ( buf8[br-1] !== buf8[aL+ar-1] ) {
+ inext = buf32[icell+CELL_OR];
+ if ( inext === 0 ) {
+ iboundary = this.allocateCell();
+ inext = this.addCell(
+ iboundary, 0, toSegmentInfo(aL0, 0, ar)
+ );
+ buf32[icell+CELL_OR] = inext;
+ return iboundary;
+ }
+ icell = inext;
+ continue;
+ }
+ // 1st character was tested
+ br -= 1;
+ ar -= 1;
+ // find 1st mismatch in rest of segment
+ if ( br !== bL ) {
+ for (;;) {
+ if ( br === bL ) { break; }
+ if ( ar === 0 ) { break; }
+ if ( buf8[br-1] !== buf8[aL+ar-1] ) { break; }
+ br -= 1;
+ ar -= 1;
+ }
+ }
+ // all segment characters matched
+ // a: ...vvvvvvv
+ // b: vvvvvvv
+ if ( br === bL ) {
+ inext = buf32[icell+CELL_AND];
+ // needle remainder: no
+ // a: vvvvvvv
+ // b: vvvvvvv
+ // r: 0 & vvvvvvv
+ if ( ar === 0 ) {
+ // boundary cell already present
+ if ( buf32[inext+BCELL_EXTRA] <= BCELL_EXTRA_MAX ) {
+ return inext;
+ }
+ // need boundary cell
+ iboundary = this.allocateCell();
+ buf32[iboundary+CELL_AND] = inext;
+ buf32[icell+CELL_AND] = iboundary;
+ return iboundary;
+ }
+ // needle remainder: yes
+ // a: yyyyyyyvvvvvvv
+ // b: vvvvvvv
+ else {
+ if ( inext !== 0 ) {
+ icell = inext;
+ continue;
+ }
+ // TODO: we should never reach here because there will
+ // always be a boundary cell.
+ // eslint-disable-next-line no-debugger
+ debugger; // jshint ignore:line
+ // boundary cell + needle remainder
+ inext = this.addCell(0, 0, 0);
+ buf32[icell+CELL_AND] = inext;
+ buf32[inext+CELL_AND] =
+ this.addCell(0, 0, toSegmentInfo(aL0, 0, ar));
+ }
+ }
+ // some segment characters matched
+ // a: ...vvvvvvv
+ // b: yyyyyyyvvvvvvv
+ else {
+ // split current cell
+ buf32[icell+SEGMENT_INFO] = (bR - br) << 24 | (br - char0);
+ inext = this.addCell(
+ buf32[icell+CELL_AND],
+ 0,
+ (br - bL) << 24 | (bL - char0)
+ );
+ // needle remainder: no = need boundary cell
+ // a: vvvvvvv
+ // b: yyyyyyyvvvvvvv
+ // r: yyyyyyy & 0 & vvvvvvv
+ if ( ar === 0 ) {
+ iboundary = this.allocateCell();
+ buf32[icell+CELL_AND] = iboundary;
+ buf32[iboundary+CELL_AND] = inext;
+ return iboundary;
+ }
+ // needle remainder: yes = need new cell for remaining
+ // characters
+ // a: wwwwvvvvvvv
+ // b: yyyyyyyvvvvvvv
+ // r: (0 & wwww | yyyyyyy) & vvvvvvv
+ else {
+ buf32[icell+CELL_AND] = inext;
+ iboundary = this.allocateCell();
+ buf32[inext+CELL_OR] = this.addCell(
+ iboundary, 0, toSegmentInfo(aL0, 0, ar)
+ );
+ return iboundary;
+ }
+ }
+ //debugger; // jshint ignore:line
+ }
+ }
+
+ getExtra(iboundary) {
+ return this.buf32[iboundary+BCELL_EXTRA];
+ }
+
+ setExtra(iboundary, v) {
+ this.buf32[iboundary+BCELL_EXTRA] = v;
+ }
+
+ optimize(shrink = false) {
+ if ( shrink ) {
+ this.shrinkBuf();
+ }
+ return {
+ byteLength: this.buf8.byteLength,
+ char0: this.buf32[CHAR0_SLOT],
+ };
+ }
+
+ serialize(encoder) {
+ if ( encoder instanceof Object ) {
+ return encoder.encode(
+ this.buf32.buffer,
+ this.buf32[CHAR1_SLOT]
+ );
+ }
+ return Array.from(
+ new Uint32Array(
+ this.buf32.buffer,
+ 0,
+ this.buf32[CHAR1_SLOT] + 3 >>> 2
+ )
+ );
+ }
+
+ unserialize(selfie, decoder) {
+ const shouldDecode = typeof selfie === 'string';
+ let byteLength = shouldDecode
+ ? decoder.decodeSize(selfie)
+ : selfie.length << 2;
+ if ( byteLength === 0 ) { return false; }
+ this.reallocateBuf(byteLength);
+ if ( shouldDecode ) {
+ decoder.decode(selfie, this.buf8.buffer);
+ } else {
+ this.buf32.set(selfie);
+ }
+ return true;
+ }
+
+ storeString(s) {
+ const n = s.length;
+ if ( n === this.lastStoredLen && s === this.lastStored ) {
+ return this.lastStoredIndex;
+ }
+ this.lastStored = s;
+ this.lastStoredLen = n;
+ if ( (this.buf8.length - this.buf32[CHAR1_SLOT]) < n ) {
+ this.growBuf(0, n);
+ }
+ const offset = this.buf32[CHAR1_SLOT];
+ this.buf32[CHAR1_SLOT] = offset + n;
+ const buf8 = this.buf8;
+ for ( let i = 0; i < n; i++ ) {
+ buf8[offset+i] = s.charCodeAt(i);
+ }
+ return (this.lastStoredIndex = offset - this.buf32[CHAR0_SLOT]);
+ }
+
+ extractString(i, n) {
+ if ( this.textDecoder === null ) {
+ this.textDecoder = new TextDecoder();
+ }
+ const offset = this.buf32[CHAR0_SLOT] + i;
+ return this.textDecoder.decode(
+ this.buf8.subarray(offset, offset + n)
+ );
+ }
+
+ // WASMable.
+ startsWith(haystackLeft, haystackRight, needleLeft, needleLen) {
+ if ( haystackLeft < 0 || (haystackLeft + needleLen) > haystackRight ) {
+ return 0;
+ }
+ const charCodes = this.buf8;
+ needleLeft += this.buf32[CHAR0_SLOT];
+ const needleRight = needleLeft + needleLen;
+ while ( charCodes[haystackLeft] === charCodes[needleLeft] ) {
+ needleLeft += 1;
+ if ( needleLeft === needleRight ) { return 1; }
+ haystackLeft += 1;
+ }
+ return 0;
+ }
+
+ // Find the left-most instance of substring in main string
+ // WASMable.
+ indexOf(haystackLeft, haystackEnd, needleLeft, needleLen) {
+ if ( needleLen === 0 ) { return haystackLeft; }
+ haystackEnd -= needleLen;
+ if ( haystackEnd < haystackLeft ) { return -1; }
+ needleLeft += this.buf32[CHAR0_SLOT];
+ const needleRight = needleLeft + needleLen;
+ const charCodes = this.buf8;
+ for (;;) {
+ let i = haystackLeft;
+ let j = needleLeft;
+ while ( charCodes[i] === charCodes[j] ) {
+ j += 1;
+ if ( j === needleRight ) { return haystackLeft; }
+ i += 1;
+ }
+ haystackLeft += 1;
+ if ( haystackLeft > haystackEnd ) { break; }
+ }
+ return -1;
+ }
+
+ // Find the right-most instance of substring in main string.
+ // WASMable.
+ lastIndexOf(haystackBeg, haystackEnd, needleLeft, needleLen) {
+ if ( needleLen === 0 ) { return haystackBeg; }
+ let haystackLeft = haystackEnd - needleLen;
+ if ( haystackLeft < haystackBeg ) { return -1; }
+ needleLeft += this.buf32[CHAR0_SLOT];
+ const needleRight = needleLeft + needleLen;
+ const charCodes = this.buf8;
+ for (;;) {
+ let i = haystackLeft;
+ let j = needleLeft;
+ while ( charCodes[i] === charCodes[j] ) {
+ j += 1;
+ if ( j === needleRight ) { return haystackLeft; }
+ i += 1;
+ }
+ if ( haystackLeft === haystackBeg ) { break; }
+ haystackLeft -= 1;
+ }
+ return -1;
+ }
+
+ dumpTrie(iroot) {
+ for ( const s of this.trieIterator(iroot) ) {
+ console.log(s);
+ }
+ }
+
+ trieIterator(iroot) {
+ return {
+ value: undefined,
+ done: false,
+ next() {
+ if ( this.icell === 0 ) {
+ if ( this.forks.length === 0 ) {
+ this.value = undefined;
+ this.done = true;
+ return this;
+ }
+ this.pattern = this.forks.pop();
+ this.dir = this.forks.pop();
+ this.icell = this.forks.pop();
+ }
+ const buf32 = this.container.buf32;
+ const buf8 = this.container.buf8;
+ for (;;) {
+ const ialt = buf32[this.icell+CELL_OR];
+ const v = buf32[this.icell+SEGMENT_INFO];
+ const offset = v & 0x00FFFFFF;
+ let i0 = buf32[CHAR0_SLOT] + offset;
+ const len = v >>> 24;
+ for ( let i = 0; i < len; i++ ) {
+ this.charBuf[i] = buf8[i0+i];
+ }
+ if ( len !== 0 && ialt !== 0 ) {
+ this.forks.push(ialt, this.dir, this.pattern);
+ }
+ const inext = buf32[this.icell+CELL_AND];
+ if ( len !== 0 ) {
+ const s = this.textDecoder.decode(
+ new Uint8Array(this.charBuf.buffer, 0, len)
+ );
+ if ( this.dir > 0 ) {
+ this.pattern += s;
+ } else if ( this.dir < 0 ) {
+ this.pattern = s + this.pattern;
+ }
+ }
+ this.icell = inext;
+ if ( len !== 0 ) { continue; }
+ // boundary cell
+ if ( ialt !== 0 ) {
+ if ( inext === 0 ) {
+ this.icell = ialt;
+ this.dir = -1;
+ } else {
+ this.forks.push(ialt, -1, this.pattern);
+ }
+ }
+ if ( offset !== 0 ) {
+ this.value = { pattern: this.pattern, iextra: offset };
+ return this;
+ }
+ }
+ },
+ container: this,
+ icell: iroot,
+ charBuf: new Uint8Array(256),
+ pattern: '',
+ dir: 1,
+ forks: [],
+ textDecoder: new TextDecoder(),
+ [Symbol.iterator]() { return this; },
+ };
+ }
+
+ async enableWASM(wasmModuleFetcher, path) {
+ if ( typeof WebAssembly !== 'object' ) { return false; }
+ if ( this.wasmMemory instanceof WebAssembly.Memory ) { return true; }
+ const module = await getWasmModule(wasmModuleFetcher, path);
+ if ( module instanceof WebAssembly.Module === false ) { return false; }
+ const memory = new WebAssembly.Memory({
+ initial: roundToPageSize(this.buf8.length) >>> 16
+ });
+ const instance = await WebAssembly.instantiate(module, {
+ imports: { memory, extraHandler: this.extraHandler }
+ });
+ if ( instance instanceof WebAssembly.Instance === false ) {
+ return false;
+ }
+ this.wasmMemory = memory;
+ const curPageCount = memory.buffer.byteLength >>> 16;
+ const newPageCount = roundToPageSize(this.buf8.byteLength) >>> 16;
+ if ( newPageCount > curPageCount ) {
+ memory.grow(newPageCount - curPageCount);
+ }
+ const buf8 = new Uint8Array(memory.buffer);
+ buf8.set(this.buf8);
+ this.buf8 = buf8;
+ this.buf32 = new Uint32Array(this.buf8.buffer);
+ this.haystack = this.buf8.subarray(
+ HAYSTACK_START,
+ HAYSTACK_START + HAYSTACK_SIZE
+ );
+ this.matches = instance.exports.matches;
+ this.startsWith = instance.exports.startsWith;
+ this.indexOf = instance.exports.indexOf;
+ this.lastIndexOf = instance.exports.lastIndexOf;
+ return true;
+ }
+
+ dumpInfo() {
+ return [
+ `Buffer size (Uint8Array): ${this.buf32[CHAR1_SLOT].toLocaleString('en')}`,
+ `WASM: ${this.wasmMemory === null ? 'disabled' : 'enabled'}`,
+ ].join('\n');
+ }
+
+ //--------------------------------------------------------------------------
+ // Private methods
+ //--------------------------------------------------------------------------
+
+ allocateCell() {
+ let icell = this.buf32[TRIE1_SLOT];
+ this.buf32[TRIE1_SLOT] = icell + CELL_BYTE_LENGTH;
+ icell >>>= 2;
+ this.buf32[icell+0] = 0;
+ this.buf32[icell+1] = 0;
+ this.buf32[icell+2] = 0;
+ return icell;
+ }
+
+ addCell(iand, ior, v) {
+ const icell = this.allocateCell();
+ this.buf32[icell+CELL_AND] = iand;
+ this.buf32[icell+CELL_OR] = ior;
+ this.buf32[icell+SEGMENT_INFO] = v;
+ return icell;
+ }
+
+ growBuf(trieGrow, charGrow) {
+ const char0 = Math.max(
+ roundToPageSize(this.buf32[TRIE1_SLOT] + trieGrow),
+ this.buf32[CHAR0_SLOT]
+ );
+ const char1 = char0 + this.buf32[CHAR1_SLOT] - this.buf32[CHAR0_SLOT];
+ const bufLen = Math.max(
+ roundToPageSize(char1 + charGrow),
+ this.buf8.length
+ );
+ if ( bufLen > this.buf8.length ) {
+ this.reallocateBuf(bufLen);
+ }
+ if ( char0 !== this.buf32[CHAR0_SLOT] ) {
+ this.buf8.copyWithin(
+ char0,
+ this.buf32[CHAR0_SLOT],
+ this.buf32[CHAR1_SLOT]
+ );
+ this.buf32[CHAR0_SLOT] = char0;
+ this.buf32[CHAR1_SLOT] = char1;
+ }
+ }
+
+ shrinkBuf() {
+ const char0 = this.buf32[TRIE1_SLOT] + MIN_FREE_CELL_BYTE_LENGTH;
+ const char1 = char0 + this.buf32[CHAR1_SLOT] - this.buf32[CHAR0_SLOT];
+ const bufLen = char1 + 256;
+ if ( char0 !== this.buf32[CHAR0_SLOT] ) {
+ this.buf8.copyWithin(
+ char0,
+ this.buf32[CHAR0_SLOT],
+ this.buf32[CHAR1_SLOT]
+ );
+ this.buf32[CHAR0_SLOT] = char0;
+ this.buf32[CHAR1_SLOT] = char1;
+ }
+ if ( bufLen < this.buf8.length ) {
+ this.reallocateBuf(bufLen);
+ }
+ }
+
+ reallocateBuf(newSize) {
+ newSize = roundToPageSize(newSize);
+ if ( newSize === this.buf8.length ) { return; }
+ if ( this.wasmMemory === null ) {
+ const newBuf = new Uint8Array(newSize);
+ newBuf.set(
+ newBuf.length < this.buf8.length
+ ? this.buf8.subarray(0, newBuf.length)
+ : this.buf8
+ );
+ this.buf8 = newBuf;
+ } else {
+ const growBy =
+ ((newSize + 0xFFFF) >>> 16) - (this.buf8.length >>> 16);
+ if ( growBy <= 0 ) { return; }
+ this.wasmMemory.grow(growBy);
+ this.buf8 = new Uint8Array(this.wasmMemory.buffer);
+ }
+ this.buf32 = new Uint32Array(this.buf8.buffer);
+ this.haystack = this.buf8.subarray(
+ HAYSTACK_START,
+ HAYSTACK_START + HAYSTACK_SIZE
+ );
+ }
+}
+
+/******************************************************************************/
+
+// Code below is to attempt to load a WASM module which implements:
+//
+// - BidiTrieContainer.startsWith()
+//
+// The WASM module is entirely optional, the JS implementations will be
+// used should the WASM module be unavailable for whatever reason.
+
+const getWasmModule = (( ) => {
+ let wasmModulePromise;
+
+ return async function(wasmModuleFetcher, path) {
+ if ( wasmModulePromise instanceof Promise ) {
+ return wasmModulePromise;
+ }
+
+ if ( typeof WebAssembly !== 'object' ) { return; }
+
+ // Soft-dependency on vAPI so that the code here can be used outside of
+ // uBO (i.e. tests, benchmarks)
+ if ( typeof vAPI === 'object' && vAPI.canWASM !== true ) { return; }
+
+ // The wasm module will work only if CPU is natively little-endian,
+ // as we use native uint32 array in our js code.
+ const uint32s = new Uint32Array(1);
+ const uint8s = new Uint8Array(uint32s.buffer);
+ uint32s[0] = 1;
+ if ( uint8s[0] !== 1 ) { return; }
+
+ wasmModulePromise = wasmModuleFetcher(`${path}biditrie`).catch(reason => {
+ console.info(reason);
+ });
+
+ return wasmModulePromise;
+ };
+})();
+
+/******************************************************************************/
+
+export default BidiTrieContainer;