/******************************************************************************* uBlock Origin - a browser extension to block requests. Copyright (C) 2024-present Raymond Hill This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see {http://www.gnu.org/licenses/}. Home: https://github.com/gorhill/uBlock */ 'use strict'; /******************************************************************************* * * Structured-Cloneable to Unicode-Only SERIALIZER * * Purpose: * * Serialize/deserialize arbitrary JS data to/from well-formed Unicode strings. * * The browser does not expose an API to serialize structured-cloneable types * into a single string. JSON.stringify() does not support complex JavaScript * objects, and does not support references to composite types. Unless the * data to serialize is only JS strings, it is difficult to easily switch * from one type of storage to another. * * Serializing to a well-formed Unicode string allows to store structured- * cloneable data to any storage. Not all storages support storing binary data, * but all storages support storing Unicode strings. * * Structured-cloneable types: * https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Structured_clone_algorithm#supported_types * * ----------------+------------------+------------------+---------------------- * Data types | String | JSONable | structured-cloneable * ================+============================================================ * document.cookie | Yes | No | No * ----------------+------------------+------------------+---------------------- * localStorage | Yes | No | No * ----------------+------------------+------------------+---------------------- * IndexedDB | Yes | Yes | Yes * ----------------+------------------+------------------+---------------------- * browser.storage | Yes | Yes | No * ----------------+------------------+------------------+---------------------- * Cache API | Yes | No | No * ----------------+------------------+------------------+---------------------- * * The above table shows that only JS strings can be persisted natively to all * types of storage. The purpose of this library is to convert * structure-cloneable data (which is a superset of JSONable data) into a * single JS string. The resulting string is meant to be as small as possible. * As a result, it is not human-readable, though it contains only printable * ASCII characters -- and possibly Unicode characters beyond ASCII. * * The resulting JS string will not contain characters which require escaping * should it be converted to a JSON value. However it may contain characters * which require escaping should it be converted to a URI component. * * Characteristics: * * - Serializes/deserializes data to/from a single well-formed Unicode string * - Strings do not require escaping, i.e. they are stored as-is * - Supports multiple references to same object * - Supports reference cycles * - Supports synchronous and asynchronous API * - Supports usage of Worker * - Optionally supports LZ4 compression * * TODO: * * - Harden against unexpected conditions, such as corrupted string during * deserialization. * - Evaluate supporting checksum. * * */ const VERSION = 1; const SEPARATORCHAR = ' '; const SEPARATORCHARCODE = SEPARATORCHAR.charCodeAt(0); const SENTINELCHAR = '!'; const SENTINELCHARCODE = SENTINELCHAR.charCodeAt(0); const MAGICPREFIX = `UOSC_${VERSION}${SEPARATORCHAR}`; const MAGICLZ4PREFIX = `UOSC/lz4_${VERSION}${SEPARATORCHAR}`; const FAILMARK = Number.MAX_SAFE_INTEGER; // Avoid characters which require escaping when serialized to JSON: const SAFECHARS = "&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`abcdefghijklmnopqrstuvwxyz{|}~"; const NUMSAFECHARS = SAFECHARS.length; const BITS_PER_SAFECHARS = Math.log2(NUMSAFECHARS); const { intToChar, intToCharCode, charCodeToInt } = (( ) => { const intToChar = []; const intToCharCode = []; const charCodeToInt = []; for ( let i = 0; i < NUMSAFECHARS; i++ ) { intToChar[i] = SAFECHARS.charAt(i); intToCharCode[i] = SAFECHARS.charCodeAt(i); charCodeToInt[i] = 0; } for ( let i = NUMSAFECHARS; i < 128; i++ ) { intToChar[i] = ''; intToCharCode[i] = 0; charCodeToInt[i] = 0; } for ( let i = 0; i < SAFECHARS.length; i++ ) { charCodeToInt[SAFECHARS.charCodeAt(i)] = i; } return { intToChar, intToCharCode, charCodeToInt }; })(); let iota = 1; const I_STRING_SMALL = iota++; const I_STRING_LARGE = iota++; const I_ZERO = iota++; const I_INTEGER_SMALL_POS = iota++; const I_INTEGER_SMALL_NEG = iota++; const I_INTEGER_LARGE_POS = iota++; const I_INTEGER_LARGE_NEG = iota++; const I_BOOL_FALSE = iota++; const I_BOOL_TRUE = iota++; const I_NULL = iota++; const I_UNDEFINED = iota++; const I_FLOAT = iota++; const I_REGEXP = iota++; const I_DATE = iota++; const I_REFERENCE = iota++; const I_OBJECT_SMALL = iota++; const I_OBJECT_LARGE = iota++; const I_ARRAY_SMALL = iota++; const I_ARRAY_LARGE = iota++; const I_SET_SMALL = iota++; const I_SET_LARGE = iota++; const I_MAP_SMALL = iota++; const I_MAP_LARGE = iota++; const I_ARRAYBUFFER = iota++; const I_INT8ARRAY = iota++; const I_UINT8ARRAY = iota++; const I_UINT8CLAMPEDARRAY = iota++; const I_INT16ARRAY = iota++; const I_UINT16ARRAY = iota++; const I_INT32ARRAY = iota++; const I_UINT32ARRAY = iota++; const I_FLOAT32ARRAY = iota++; const I_FLOAT64ARRAY = iota++; const I_DATAVIEW = iota++; const C_STRING_SMALL = intToChar[I_STRING_SMALL]; const C_STRING_LARGE = intToChar[I_STRING_LARGE]; const C_ZERO = intToChar[I_ZERO]; const C_INTEGER_SMALL_POS = intToChar[I_INTEGER_SMALL_POS]; const C_INTEGER_SMALL_NEG = intToChar[I_INTEGER_SMALL_NEG]; const C_INTEGER_LARGE_POS = intToChar[I_INTEGER_LARGE_POS]; const C_INTEGER_LARGE_NEG = intToChar[I_INTEGER_LARGE_NEG]; const C_BOOL_FALSE = intToChar[I_BOOL_FALSE]; const C_BOOL_TRUE = intToChar[I_BOOL_TRUE]; const C_NULL = intToChar[I_NULL]; const C_UNDEFINED = intToChar[I_UNDEFINED]; const C_FLOAT = intToChar[I_FLOAT]; const C_REGEXP = intToChar[I_REGEXP]; const C_DATE = intToChar[I_DATE]; const C_REFERENCE = intToChar[I_REFERENCE]; const C_OBJECT_SMALL = intToChar[I_OBJECT_SMALL]; const C_OBJECT_LARGE = intToChar[I_OBJECT_LARGE]; const C_ARRAY_SMALL = intToChar[I_ARRAY_SMALL]; const C_ARRAY_LARGE = intToChar[I_ARRAY_LARGE]; const C_SET_SMALL = intToChar[I_SET_SMALL]; const C_SET_LARGE = intToChar[I_SET_LARGE]; const C_MAP_SMALL = intToChar[I_MAP_SMALL]; const C_MAP_LARGE = intToChar[I_MAP_LARGE]; const C_ARRAYBUFFER = intToChar[I_ARRAYBUFFER]; const C_INT8ARRAY = intToChar[I_INT8ARRAY]; const C_UINT8ARRAY = intToChar[I_UINT8ARRAY]; const C_UINT8CLAMPEDARRAY = intToChar[I_UINT8CLAMPEDARRAY]; const C_INT16ARRAY = intToChar[I_INT16ARRAY]; const C_UINT16ARRAY = intToChar[I_UINT16ARRAY]; const C_INT32ARRAY = intToChar[I_INT32ARRAY]; const C_UINT32ARRAY = intToChar[I_UINT32ARRAY]; const C_FLOAT32ARRAY = intToChar[I_FLOAT32ARRAY]; const C_FLOAT64ARRAY = intToChar[I_FLOAT64ARRAY]; const C_DATAVIEW = intToChar[I_DATAVIEW]; // Just reuse already defined constants, we just need distinct values const I_STRING = I_STRING_SMALL; const I_NUMBER = I_FLOAT; const I_BOOL = I_BOOL_FALSE; const I_OBJECT = I_OBJECT_SMALL; const I_ARRAY = I_ARRAY_SMALL; const I_SET = I_SET_SMALL; const I_MAP = I_MAP_SMALL; const typeToSerializedInt = { 'string': I_STRING, 'number': I_NUMBER, 'boolean': I_BOOL, 'object': I_OBJECT, }; const xtypeToSerializedInt = { '[object RegExp]': I_REGEXP, '[object Date]': I_DATE, '[object Array]': I_ARRAY, '[object Set]': I_SET, '[object Map]': I_MAP, '[object ArrayBuffer]': I_ARRAYBUFFER, '[object Int8Array]': I_INT8ARRAY, '[object Uint8Array]': I_UINT8ARRAY, '[object Uint8ClampedArray]': I_UINT8CLAMPEDARRAY, '[object Int16Array]': I_INT16ARRAY, '[object Uint16Array]': I_UINT16ARRAY, '[object Int32Array]': I_INT32ARRAY, '[object Uint32Array]': I_UINT32ARRAY, '[object Float32Array]': I_FLOAT32ARRAY, '[object Float64Array]': I_FLOAT64ARRAY, '[object DataView]': I_DATAVIEW, }; const xtypeToSerializedChar = { '[object Int8Array]': C_INT8ARRAY, '[object Uint8Array]': C_UINT8ARRAY, '[object Uint8ClampedArray]': C_UINT8CLAMPEDARRAY, '[object Int16Array]': C_INT16ARRAY, '[object Uint16Array]': C_UINT16ARRAY, '[object Int32Array]': C_INT32ARRAY, '[object Uint32Array]': C_UINT32ARRAY, '[object Float32Array]': C_FLOAT32ARRAY, '[object Float64Array]': C_FLOAT64ARRAY, }; const toArrayBufferViewConstructor = { [`${I_INT8ARRAY}`]: Int8Array, [`${I_UINT8ARRAY}`]: Uint8Array, [`${I_UINT8CLAMPEDARRAY}`]: Uint8ClampedArray, [`${I_INT16ARRAY}`]: Int16Array, [`${I_UINT16ARRAY}`]: Uint16Array, [`${I_INT32ARRAY}`]: Int32Array, [`${I_UINT32ARRAY}`]: Uint32Array, [`${I_FLOAT32ARRAY}`]: Float32Array, [`${I_FLOAT64ARRAY}`]: Float64Array, [`${I_DATAVIEW}`]: DataView, }; /******************************************************************************/ const textDecoder = new TextDecoder(); const textEncoder = new TextEncoder(); const isInteger = Number.isInteger; const writeRefs = new Map(); const writeBuffer = []; const readRefs = new Map(); let readStr = ''; let readPtr = 0; let readEnd = 0; let refCounter = 1; let uint8Input = null; const uint8InputFromAsciiStr = s => { if ( uint8Input === null || uint8Input.length < s.length ) { uint8Input = new Uint8Array(s.length + 0x03FF & ~0x03FF); } textEncoder.encodeInto(s, uint8Input); return uint8Input; }; const isInstanceOf = (o, s) => { return typeof o === 'object' && o !== null && ( s === 'Object' || Object.prototype.toString.call(o) === `[object ${s}]` ); }; const shouldCompress = (s, options) => options.compress === true && ( options.compressThreshold === undefined || options.compressThreshold <= s.length ); /******************************************************************************* * * A large Uint is always a positive integer (can be zero), assumed to be * large, i.e. > NUMSAFECHARS -- but not necessarily. The serialized value has * always at least one digit, and is always followed by a separator. * * */ const strFromLargeUint = i => { let r = 0, s = ''; for (;;) { r = i % NUMSAFECHARS; s += intToChar[r]; i -= r; if ( i === 0 ) { break; } i /= NUMSAFECHARS; } return s + SEPARATORCHAR; }; const deserializeLargeUint = ( ) => { let c = readStr.charCodeAt(readPtr++); let n = charCodeToInt[c]; let m = 1; while ( (c = readStr.charCodeAt(readPtr++)) !== SEPARATORCHARCODE ) { m *= NUMSAFECHARS; n += m * charCodeToInt[c]; } return n; }; /******************************************************************************* * * Methods specific to ArrayBuffer objects to serialize optimally according to * the content of the buffer. * * In sparse mode, number of output bytes per input int32 (4-byte) value: * [v === zero]: 1 byte (separator) * [v !== zero]: n digits + 1 byte (separator) * * */ const sparseValueLen = v => v !== 0 ? (Math.log2(v) / BITS_PER_SAFECHARS | 0) + 2 : 1; const analyzeArrayBuffer = arrbuf => { const byteLength = arrbuf.byteLength; const uint32len = byteLength >>> 2; const uint32arr = new Uint32Array(arrbuf, 0, uint32len); let notzeroCount = 0; for ( let i = uint32len-1; i >= 0; i-- ) { if ( uint32arr[i] === 0 ) { continue; } notzeroCount = i + 1; break; } const end = notzeroCount + 1 <= uint32len ? notzeroCount << 2 : byteLength; const endUint32 = end >>> 2; const remUint8 = end & 0b11; const denseSize = endUint32 * 5 + (remUint8 ? remUint8 + 1 : 0); let sparseSize = 0; for ( let i = 0; i < endUint32; i++ ) { sparseSize += sparseValueLen(uint32arr[i]); if ( sparseSize > denseSize ) { return { end, dense: true, denseSize }; } } if ( remUint8 !== 0 ) { sparseSize += 1; // sentinel const uint8arr = new Uint8Array(arrbuf, endUint32 << 2); for ( let i = 0; i < remUint8; i++ ) { sparseSize += sparseValueLen(uint8arr[i]); } } return { end, dense: false, sparseSize }; }; const denseArrayBufferToStr = (arrbuf, details) => { const end = details.end; const m = end % 4; const n = end - m; const uin32len = n >>> 2; const uint32arr = new Uint32Array(arrbuf, 0, uin32len); const output = new Uint8Array(details.denseSize); let j = 0, v = 0; for ( let i = 0; i < uin32len; i++ ) { v = uint32arr[i]; output[j+0] = intToCharCode[v % NUMSAFECHARS]; v = v / NUMSAFECHARS | 0; output[j+1] = intToCharCode[v % NUMSAFECHARS]; v = v / NUMSAFECHARS | 0; output[j+2] = intToCharCode[v % NUMSAFECHARS]; v = v / NUMSAFECHARS | 0; output[j+3] = intToCharCode[v % NUMSAFECHARS]; v = v / NUMSAFECHARS | 0; output[j+4] = intToCharCode[v]; j += 5; } if ( m !== 0 ) { const uint8arr = new Uint8Array(arrbuf, n); v = uint8arr[0]; if ( m > 1 ) { v += uint8arr[1] << 8; if ( m > 2 ) { v += uint8arr[2] << 16; } } output[j+0] = intToCharCode[v % NUMSAFECHARS]; v = v / NUMSAFECHARS | 0; output[j+1] = intToCharCode[v % NUMSAFECHARS]; if ( m > 1 ) { v = v / NUMSAFECHARS | 0; output[j+2] = intToCharCode[v % NUMSAFECHARS]; if ( m > 2 ) { v = v / NUMSAFECHARS | 0; output[j+3] = intToCharCode[v % NUMSAFECHARS]; } } } return textDecoder.decode(output); }; const BASE88_POW1 = NUMSAFECHARS; const BASE88_POW2 = NUMSAFECHARS * BASE88_POW1; const BASE88_POW3 = NUMSAFECHARS * BASE88_POW2; const BASE88_POW4 = NUMSAFECHARS * BASE88_POW3; const denseArrayBufferFromStr = (denseStr, arrbuf) => { const input = uint8InputFromAsciiStr(denseStr); const end = denseStr.length; const m = end % 5; const n = end - m; const uin32len = n / 5 * 4 >>> 2; const uint32arr = new Uint32Array(arrbuf, 0, uin32len); let j = 0, v = 0; for ( let i = 0; i < n; i += 5 ) { v = charCodeToInt[input[i+0]]; v += charCodeToInt[input[i+1]] * BASE88_POW1; v += charCodeToInt[input[i+2]] * BASE88_POW2; v += charCodeToInt[input[i+3]] * BASE88_POW3; v += charCodeToInt[input[i+4]] * BASE88_POW4; uint32arr[j++] = v; } if ( m === 0 ) { return; } v = charCodeToInt[input[n+0]] + charCodeToInt[input[n+1]] * BASE88_POW1; if ( m > 2 ) { v += charCodeToInt[input[n+2]] * BASE88_POW2; if ( m > 3 ) { v += charCodeToInt[input[n+3]] * BASE88_POW3; } } const uint8arr = new Uint8Array(arrbuf, j << 2); uint8arr[0] = v & 255; if ( v !== 0 ) { v >>>= 8; uint8arr[1] = v & 255; if ( v !== 0 ) { v >>>= 8; uint8arr[2] = v & 255; } } }; const sparseArrayBufferToStr = (arrbuf, details) => { const end = details.end; const uint8out = new Uint8Array(details.sparseSize); const uint32len = end >>> 2; const uint32arr = new Uint32Array(arrbuf, 0, uint32len); let j = 0, n = 0, r = 0; for ( let i = 0; i < uint32len; i++ ) { n = uint32arr[i]; if ( n !== 0 ) { for (;;) { r = n % NUMSAFECHARS; uint8out[j++] = intToCharCode[r]; n -= r; if ( n === 0 ) { break; } n /= NUMSAFECHARS; } } uint8out[j++] = SEPARATORCHARCODE; } const uint8rem = end & 0b11; if ( uint8rem !== 0 ) { uint8out[j++] = SENTINELCHARCODE; const uint8arr = new Uint8Array(arrbuf, end - uint8rem, uint8rem); for ( let i = 0; i < uint8rem; i++ ) { n = uint8arr[i]; if ( n !== 0 ) { for (;;) { r = n % NUMSAFECHARS; uint8out[j++] = intToCharCode[r]; n -= r; if ( n === 0 ) { break; } n /= NUMSAFECHARS; } } uint8out[j++] = SEPARATORCHARCODE; } } return textDecoder.decode(uint8out); }; const sparseArrayBufferFromStr = (sparseStr, arrbuf) => { const sparseLen = sparseStr.length; const input = uint8InputFromAsciiStr(sparseStr); const end = arrbuf.byteLength; const uint32len = end >>> 2; const uint32arr = new Uint32Array(arrbuf, 0, uint32len); let i = 0, j = 0, c = 0, n = 0, m = 0; for ( ; j < sparseLen; i++ ) { c = input[j++]; if ( c === SEPARATORCHARCODE ) { continue; } if ( c === SENTINELCHARCODE ) { break; } n = charCodeToInt[c]; m = 1; for (;;) { c = input[j++]; if ( c === SEPARATORCHARCODE ) { break; } m *= NUMSAFECHARS; n += m * charCodeToInt[c]; } uint32arr[i] = n; } if ( c === SENTINELCHARCODE ) { i <<= 2; const uint8arr = new Uint8Array(arrbuf, i); for ( ; j < sparseLen; i++ ) { c = input[j++]; if ( c === SEPARATORCHARCODE ) { continue; } n = charCodeToInt[c]; m = 1; for (;;) { c = input[j++]; if ( c === SEPARATORCHARCODE ) { break; } m *= NUMSAFECHARS; n += m * charCodeToInt[c]; } uint8arr[i] = n; } } }; /******************************************************************************/ const _serialize = data => { // Primitive types if ( data === 0 ) { writeBuffer.push(C_ZERO); return; } if ( data === null ) { writeBuffer.push(C_NULL); return; } if ( data === undefined ) { writeBuffer.push(C_UNDEFINED); return; } // Type name switch ( typeToSerializedInt[typeof data] ) { case I_STRING: { const length = data.length; if ( length < NUMSAFECHARS ) { writeBuffer.push(C_STRING_SMALL + intToChar[length], data); } else { writeBuffer.push(C_STRING_LARGE + strFromLargeUint(length), data); } return; } case I_NUMBER: if ( isInteger(data) ) { if ( data >= NUMSAFECHARS ) { writeBuffer.push(C_INTEGER_LARGE_POS + strFromLargeUint(data)); } else if ( data > 0 ) { writeBuffer.push(C_INTEGER_SMALL_POS + intToChar[data]); } else if ( data > -NUMSAFECHARS ) { writeBuffer.push(C_INTEGER_SMALL_NEG + intToChar[-data]); } else { writeBuffer.push(C_INTEGER_LARGE_NEG + strFromLargeUint(-data)); } } else { const s = `${data}`; writeBuffer.push(C_FLOAT + strFromLargeUint(s.length) + s); } return; case I_BOOL: writeBuffer.push(data ? C_BOOL_TRUE : C_BOOL_FALSE); return; case I_OBJECT: break; default: return; } const xtypeName = Object.prototype.toString.call(data); const xtypeInt = xtypeToSerializedInt[xtypeName]; if ( xtypeInt === I_REGEXP ) { writeBuffer.push(C_REGEXP); _serialize(data.source); _serialize(data.flags); return; } if ( xtypeInt === I_DATE ) { writeBuffer.push(C_DATE + _serialize(data.getTime())); return; } // Reference to composite types const ref = writeRefs.get(data); if ( ref !== undefined ) { writeBuffer.push(C_REFERENCE + strFromLargeUint(ref)); return; } // Remember reference writeRefs.set(data, refCounter++); // Extended type name switch ( xtypeInt ) { case I_ARRAY: { const size = data.length; if ( size < NUMSAFECHARS ) { writeBuffer.push(C_ARRAY_SMALL + intToChar[size]); } else { writeBuffer.push(C_ARRAY_LARGE + strFromLargeUint(size)); } for ( const v of data ) { _serialize(v); } return; } case I_SET: { const size = data.size; if ( size < NUMSAFECHARS ) { writeBuffer.push(C_SET_SMALL + intToChar[size]); } else { writeBuffer.push(C_SET_LARGE + strFromLargeUint(size)); } for ( const v of data ) { _serialize(v); } return; } case I_MAP: { const size = data.size; if ( size < NUMSAFECHARS ) { writeBuffer.push(C_MAP_SMALL + intToChar[size]); } else { writeBuffer.push(C_MAP_LARGE + strFromLargeUint(size)); } for ( const [ k, v ] of data ) { _serialize(k); _serialize(v); } return; } case I_ARRAYBUFFER: { const byteLength = data.byteLength; writeBuffer.push(C_ARRAYBUFFER + strFromLargeUint(byteLength)); _serialize(data.maxByteLength); const arrbuffDetails = analyzeArrayBuffer(data); _serialize(arrbuffDetails.dense); const str = arrbuffDetails.dense ? denseArrayBufferToStr(data, arrbuffDetails) : sparseArrayBufferToStr(data, arrbuffDetails); _serialize(str); //console.log(`arrbuf size=${byteLength} content size=${arrbuffDetails.end} dense=${arrbuffDetails.dense} array size=${arrbuffDetails.dense ? arrbuffDetails.denseSize : arrbuffDetails.sparseSize} serialized size=${str.length}`); return; } case I_INT8ARRAY: case I_UINT8ARRAY: case I_UINT8CLAMPEDARRAY: case I_INT16ARRAY: case I_UINT16ARRAY: case I_INT32ARRAY: case I_UINT32ARRAY: case I_FLOAT32ARRAY: case I_FLOAT64ARRAY: writeBuffer.push( xtypeToSerializedChar[xtypeName], strFromLargeUint(data.byteOffset), strFromLargeUint(data.length) ); _serialize(data.buffer); return; case I_DATAVIEW: writeBuffer.push(C_DATAVIEW, strFromLargeUint(data.byteOffset), strFromLargeUint(data.byteLength)); _serialize(data.buffer); return; default: { const keys = Object.keys(data); const size = keys.length; if ( size < NUMSAFECHARS ) { writeBuffer.push(C_OBJECT_SMALL + intToChar[size]); } else { writeBuffer.push(C_OBJECT_LARGE + strFromLargeUint(size)); } for ( const key of keys ) { _serialize(key); _serialize(data[key]); } break; } } }; /******************************************************************************/ const _deserialize = ( ) => { if ( readPtr >= readEnd ) { return; } const type = charCodeToInt[readStr.charCodeAt(readPtr++)]; switch ( type ) { // Primitive types case I_STRING_SMALL: case I_STRING_LARGE: { const size = type === I_STRING_SMALL ? charCodeToInt[readStr.charCodeAt(readPtr++)] : deserializeLargeUint(); const beg = readPtr; readPtr += size; return readStr.slice(beg, readPtr); } case I_ZERO: return 0; case I_INTEGER_SMALL_POS: return charCodeToInt[readStr.charCodeAt(readPtr++)]; case I_INTEGER_SMALL_NEG: return -charCodeToInt[readStr.charCodeAt(readPtr++)]; case I_INTEGER_LARGE_POS: return deserializeLargeUint(); case I_INTEGER_LARGE_NEG: return -deserializeLargeUint(); case I_BOOL_FALSE: return false; case I_BOOL_TRUE: return true; case I_NULL: return null; case I_UNDEFINED: return; case I_FLOAT: { const size = deserializeLargeUint(); const beg = readPtr; readPtr += size; return parseFloat(readStr.slice(beg, readPtr)); } case I_REGEXP: { const source = _deserialize(); const flags = _deserialize(); return new RegExp(source, flags); } case I_DATE: { const time = _deserialize(); return new Date(time); } case I_REFERENCE: { const ref = deserializeLargeUint(); return readRefs.get(ref); } case I_OBJECT_SMALL: case I_OBJECT_LARGE: { const entries = []; const size = type === I_OBJECT_SMALL ? charCodeToInt[readStr.charCodeAt(readPtr++)] : deserializeLargeUint(); for ( let i = 0; i < size; i++ ) { const k = _deserialize(); const v = _deserialize(); entries.push([ k, v ]); } const out = Object.fromEntries(entries); readRefs.set(refCounter++, out); return out; } case I_ARRAY_SMALL: case I_ARRAY_LARGE: { const out = []; const size = type === I_ARRAY_SMALL ? charCodeToInt[readStr.charCodeAt(readPtr++)] : deserializeLargeUint(); for ( let i = 0; i < size; i++ ) { out.push(_deserialize()); } readRefs.set(refCounter++, out); return out; } case I_SET_SMALL: case I_SET_LARGE: { const entries = []; const size = type === I_SET_SMALL ? charCodeToInt[readStr.charCodeAt(readPtr++)] : deserializeLargeUint(); for ( let i = 0; i < size; i++ ) { entries.push(_deserialize()); } const out = new Set(entries); readRefs.set(refCounter++, out); return out; } case I_MAP_SMALL: case I_MAP_LARGE: { const entries = []; const size = type === I_MAP_SMALL ? charCodeToInt[readStr.charCodeAt(readPtr++)] : deserializeLargeUint(); for ( let i = 0; i < size; i++ ) { const k = _deserialize(); const v = _deserialize(); entries.push([ k, v ]); } const out = new Map(entries); readRefs.set(refCounter++, out); return out; } case I_ARRAYBUFFER: { const byteLength = deserializeLargeUint(); const maxByteLength = _deserialize(); let options; if ( maxByteLength !== 0 && maxByteLength !== byteLength ) { options = { maxByteLength }; } const arrbuf = new ArrayBuffer(byteLength, options); const dense = _deserialize(); const str = _deserialize(); if ( dense ) { denseArrayBufferFromStr(str, arrbuf); } else { sparseArrayBufferFromStr(str, arrbuf); } readRefs.set(refCounter++, arrbuf); return arrbuf; } case I_INT8ARRAY: case I_UINT8ARRAY: case I_UINT8CLAMPEDARRAY: case I_INT16ARRAY: case I_UINT16ARRAY: case I_INT32ARRAY: case I_UINT32ARRAY: case I_FLOAT32ARRAY: case I_FLOAT64ARRAY: case I_DATAVIEW: { const byteOffset = deserializeLargeUint(); const length = deserializeLargeUint(); const arrayBuffer = _deserialize(); const ctor = toArrayBufferViewConstructor[`${type}`]; const out = new ctor(arrayBuffer, byteOffset, length); readRefs.set(refCounter++, out); return out; } default: break; } readPtr = FAILMARK; }; /******************************************************************************* * * LZ4 block compression/decompression * * Imported from: * https://github.com/gorhill/lz4-wasm/blob/8995cdef7b/dist/lz4-block-codec-js.js * * Customized to avoid external dependencies as I entertain the idea of * spinning off the serializer as a standalone utility for all to use. * * */ class LZ4BlockJS { constructor() { this.hashTable = undefined; this.outputBuffer = undefined; } reset() { this.hashTable = undefined; this.outputBuffer = undefined; } growOutputBuffer(size) { if ( this.outputBuffer !== undefined ) { if ( this.outputBuffer.byteLength >= size ) { return; } } this.outputBuffer = new ArrayBuffer(size + 0xFFFF & 0x7FFF0000); } encodeBound(size) { return size > 0x7E000000 ? 0 : size + (size / 255 | 0) + 16; } encodeBlock(iBuf, oOffset) { const iLen = iBuf.byteLength; if ( iLen >= 0x7E000000 ) { throw new RangeError(); } // "The last match must start at least 12 bytes before end of block" const lastMatchPos = iLen - 12; // "The last 5 bytes are always literals" const lastLiteralPos = iLen - 5; if ( this.hashTable === undefined ) { this.hashTable = new Int32Array(65536); } this.hashTable.fill(-65536); if ( isInstanceOf(iBuf, 'ArrayBuffer') ) { iBuf = new Uint8Array(iBuf); } const oLen = oOffset + this.encodeBound(iLen); this.growOutputBuffer(oLen); const oBuf = new Uint8Array(this.outputBuffer, 0, oLen); let iPos = 0; let oPos = oOffset; let anchorPos = 0; // sequence-finding loop for (;;) { let refPos; let mOffset; let sequence = iBuf[iPos] << 8 | iBuf[iPos+1] << 16 | iBuf[iPos+2] << 24; // match-finding loop while ( iPos <= lastMatchPos ) { sequence = sequence >>> 8 | iBuf[iPos+3] << 24; const hash = (sequence * 0x9E37 & 0xFFFF) + (sequence * 0x79B1 >>> 16) & 0xFFFF; refPos = this.hashTable[hash]; this.hashTable[hash] = iPos; mOffset = iPos - refPos; if ( mOffset < 65536 && iBuf[refPos+0] === ((sequence ) & 0xFF) && iBuf[refPos+1] === ((sequence >>> 8) & 0xFF) && iBuf[refPos+2] === ((sequence >>> 16) & 0xFF) && iBuf[refPos+3] === ((sequence >>> 24) & 0xFF) ) { break; } iPos += 1; } // no match found if ( iPos > lastMatchPos ) { break; } // match found let lLen = iPos - anchorPos; let mLen = iPos; iPos += 4; refPos += 4; while ( iPos < lastLiteralPos && iBuf[iPos] === iBuf[refPos] ) { iPos += 1; refPos += 1; } mLen = iPos - mLen; const token = mLen < 19 ? mLen - 4 : 15; // write token, length of literals if needed if ( lLen >= 15 ) { oBuf[oPos++] = 0xF0 | token; let l = lLen - 15; while ( l >= 255 ) { oBuf[oPos++] = 255; l -= 255; } oBuf[oPos++] = l; } else { oBuf[oPos++] = (lLen << 4) | token; } // write literals while ( lLen-- ) { oBuf[oPos++] = iBuf[anchorPos++]; } if ( mLen === 0 ) { break; } // write offset of match oBuf[oPos+0] = mOffset; oBuf[oPos+1] = mOffset >>> 8; oPos += 2; // write length of match if needed if ( mLen >= 19 ) { let l = mLen - 19; while ( l >= 255 ) { oBuf[oPos++] = 255; l -= 255; } oBuf[oPos++] = l; } anchorPos = iPos; } // last sequence is literals only let lLen = iLen - anchorPos; if ( lLen >= 15 ) { oBuf[oPos++] = 0xF0; let l = lLen - 15; while ( l >= 255 ) { oBuf[oPos++] = 255; l -= 255; } oBuf[oPos++] = l; } else { oBuf[oPos++] = lLen << 4; } while ( lLen-- ) { oBuf[oPos++] = iBuf[anchorPos++]; } return new Uint8Array(oBuf.buffer, 0, oPos); } decodeBlock(iBuf, iOffset, oLen) { const iLen = iBuf.byteLength; this.growOutputBuffer(oLen); const oBuf = new Uint8Array(this.outputBuffer, 0, oLen); let iPos = iOffset, oPos = 0; while ( iPos < iLen ) { const token = iBuf[iPos++]; // literals let clen = token >>> 4; // length of literals if ( clen !== 0 ) { if ( clen === 15 ) { let l; for (;;) { l = iBuf[iPos++]; if ( l !== 255 ) { break; } clen += 255; } clen += l; } // copy literals const end = iPos + clen; while ( iPos < end ) { oBuf[oPos++] = iBuf[iPos++]; } if ( iPos === iLen ) { break; } } // match const mOffset = iBuf[iPos+0] | (iBuf[iPos+1] << 8); if ( mOffset === 0 || mOffset > oPos ) { return; } iPos += 2; // length of match clen = (token & 0x0F) + 4; if ( clen === 19 ) { let l; for (;;) { l = iBuf[iPos++]; if ( l !== 255 ) { break; } clen += 255; } clen += l; } // copy match const end = oPos + clen; let mPos = oPos - mOffset; while ( oPos < end ) { oBuf[oPos++] = oBuf[mPos++]; } } return oBuf; } encode(input, outputOffset) { if ( isInstanceOf(input, 'ArrayBuffer') ) { input = new Uint8Array(input); } else if ( isInstanceOf(input, 'Uint8Array') === false ) { throw new TypeError(); } return this.encodeBlock(input, outputOffset); } decode(input, inputOffset, outputSize) { if ( isInstanceOf(input, 'ArrayBuffer') ) { input = new Uint8Array(input); } else if ( isInstanceOf(input, 'Uint8Array') === false ) { throw new TypeError(); } return this.decodeBlock(input, inputOffset, outputSize); } } /******************************************************************************* * * Synchronous APIs * * */ export const serialize = (data, options = {}) => { refCounter = 1; _serialize(data); writeBuffer.unshift(MAGICPREFIX); const s = writeBuffer.join(''); writeRefs.clear(); writeBuffer.length = 0; if ( shouldCompress(s, options) === false ) { return s; } const lz4Util = new LZ4BlockJS(); const uint8ArrayBefore = textEncoder.encode(s); const uint8ArrayAfter = lz4Util.encode(uint8ArrayBefore, 0); const lz4 = { size: uint8ArrayBefore.length, data: new Uint8Array(uint8ArrayAfter), }; refCounter = 1; _serialize(lz4); writeBuffer.unshift(MAGICLZ4PREFIX); const t = writeBuffer.join(''); writeRefs.clear(); writeBuffer.length = 0; const ratio = t.length / s.length; return ratio <= 0.85 ? t : s; }; export const deserialize = s => { if ( s.startsWith(MAGICLZ4PREFIX) ) { refCounter = 1; readStr = s; readEnd = s.length; readPtr = MAGICLZ4PREFIX.length; const lz4 = _deserialize(); readRefs.clear(); readStr = ''; const lz4Util = new LZ4BlockJS(); const uint8ArrayAfter = lz4Util.decode(lz4.data, 0, lz4.size); s = textDecoder.decode(new Uint8Array(uint8ArrayAfter)); } if ( s.startsWith(MAGICPREFIX) === false ) { return; } refCounter = 1; readStr = s; readEnd = s.length; readPtr = MAGICPREFIX.length; const data = _deserialize(); readRefs.clear(); readStr = ''; uint8Input = null; if ( readPtr === FAILMARK ) { return; } return data; }; export const isSerialized = s => typeof s === 'string' && (s.startsWith(MAGICLZ4PREFIX) || s.startsWith(MAGICPREFIX)); export const isCompressed = s => typeof s === 'string' && s.startsWith(MAGICLZ4PREFIX); /******************************************************************************* * * Configuration * * */ const defaultConfig = { threadTTL: 3000, }; const validateConfig = { threadTTL: val => val > 0, }; const currentConfig = Object.assign({}, defaultConfig); export const getConfig = ( ) => Object.assign({}, currentConfig); export const setConfig = config => { for ( const key in Object.keys(config) ) { if ( defaultConfig.hasOwnProperty(key) === false ) { continue; } const val = config[key]; if ( typeof val !== typeof defaultConfig[key] ) { continue; } if ( (validateConfig[key])(val) === false ) { continue; } currentConfig[key] = val; } }; /******************************************************************************* * * Asynchronous APIs * * Being asynchronous allows to support workers and future features such as * checksums. * * */ const THREAD_AREYOUREADY = 1; const THREAD_IAMREADY = 2; const THREAD_SERIALIZE = 3; const THREAD_DESERIALIZE = 4; class MainThread { constructor() { this.name = 'main'; this.jobs = []; this.workload = 0; this.timer = undefined; this.busy = 2; } process() { if ( this.jobs.length === 0 ) { return; } const job = this.jobs.shift(); this.workload -= job.size; const result = job.what === THREAD_SERIALIZE ? serialize(job.data, job.options) : deserialize(job.data); job.resolve(result); this.processAsync(); if ( this.jobs.length === 0 ) { this.busy = 2; } else if ( this.busy > 2 ) { this.busy -= 1; } } processAsync() { if ( this.timer !== undefined ) { return; } if ( this.jobs.length === 0 ) { return; } this.timer = globalThis.requestIdleCallback(deadline => { this.timer = undefined; globalThis.queueMicrotask(( ) => { this.process(); }); if ( deadline.timeRemaining() === 0 ) { this.busy += 1; } }, { timeout: 5 }); } serialize(data, options) { return new Promise(resolve => { this.workload += 1; this.jobs.push({ what: THREAD_SERIALIZE, data, options, size: 1, resolve }); this.processAsync(); }); } deserialize(data, options) { return new Promise(resolve => { const size = data.length; this.workload += size; this.jobs.push({ what: THREAD_DESERIALIZE, data, options, size, resolve }); this.processAsync(); }); } get queueSize() { return this.jobs.length; } get workSize() { return this.workload * this.busy; } } class Thread { constructor(gcer) { this.name = 'worker'; this.jobs = new Map(); this.jobIdGenerator = 1; this.workload = 0; this.workerAccessTime = 0; this.workerTimer = undefined; this.gcer = gcer; this.workerPromise = new Promise(resolve => { let worker = null; try { worker = new Worker('js/s14e-serializer.js', { type: 'module' }); worker.onmessage = ev => { const msg = ev.data; if ( isInstanceOf(msg, 'Object') === false ) { return; } if ( msg.what === THREAD_IAMREADY ) { worker.onmessage = ev => { this.onmessage(ev); }; worker.onerror = null; resolve(worker); } }; worker.onerror = ( ) => { worker.onmessage = worker.onerror = null; resolve(null); }; worker.postMessage({ what: THREAD_AREYOUREADY, config: currentConfig, }); } catch(ex) { console.info(ex); worker.onmessage = worker.onerror = null; resolve(null); } }); } countdownWorker() { if ( this.workerTimer !== undefined ) { return; } this.workerTimer = setTimeout(async ( ) => { this.workerTimer = undefined; if ( this.jobs.size !== 0 ) { return; } const idleTime = Date.now() - this.workerAccessTime; if ( idleTime < currentConfig.threadTTL ) { return this.countdownWorker(); } const worker = await this.workerPromise; if ( this.jobs.size !== 0 ) { return; } this.gcer(this); if ( worker === null ) { return; } worker.onmessage = worker.onerror = null; worker.terminate(); }, currentConfig.threadTTL); } onmessage(ev) { this.ondone(ev.data); } ondone(job) { const resolve = this.jobs.get(job.id); if ( resolve === undefined ) { return; } this.jobs.delete(job.id); resolve(job.result); this.workload -= job.size; if ( this.jobs.size !== 0 ) { return; } this.countdownWorker(); } async serialize(data, options) { return new Promise(resolve => { const id = this.jobIdGenerator++; this.workload += 1; this.jobs.set(id, resolve); return this.workerPromise.then(worker => { this.workerAccessTime = Date.now(); if ( worker === null ) { this.ondone({ id, result: serialize(data, options), size: 1 }); } else { worker.postMessage({ what: THREAD_SERIALIZE, id, data, options, size: 1 }); } }); }); } async deserialize(data, options) { return new Promise(resolve => { const id = this.jobIdGenerator++; const size = data.length; this.workload += size; this.jobs.set(id, resolve); return this.workerPromise.then(worker => { this.workerAccessTime = Date.now(); if ( worker === null ) { this.ondone({ id, result: deserialize(data, options), size }); } else { worker.postMessage({ what: THREAD_DESERIALIZE, id, data, options, size }); } }); }); } get queueSize() { return this.jobs.size; } get workSize() { return this.workload; } } const threads = { pool: [ new MainThread() ], thread(maxPoolSize) { const poolSize = this.pool.length; if ( poolSize !== 0 && poolSize >= maxPoolSize ) { if ( poolSize === 1 ) { return this.pool[0]; } return this.pool.reduce((a, b) => { //console.log(`${a.name}: q=${a.queueSize} w=${a.workSize} ${b.name}: q=${b.queueSize} w=${b.workSize}`); if ( b.queueSize === 0 ) { return b; } if ( a.queueSize === 0 ) { return a; } return b.workSize < a.workSize ? b : a; }); } const thread = new Thread(thread => { const pos = this.pool.indexOf(thread); if ( pos === -1 ) { return; } this.pool.splice(pos, 1); }); this.pool.push(thread); return thread; }, }; export async function serializeAsync(data, options = {}) { const maxThreadCount = options.multithreaded || 0; if ( maxThreadCount === 0 ) { return serialize(data, options); } const thread = threads.thread(maxThreadCount); //console.log(`serializeAsync: thread=${thread.name} workload=${thread.workSize}`); const result = await thread.serialize(data, options); if ( result !== undefined ) { return result; } return serialize(data, options); } export async function deserializeAsync(data, options = {}) { if ( isSerialized(data) === false ) { return data; } const maxThreadCount = options.multithreaded || 0; if ( maxThreadCount === 0 ) { return deserialize(data, options); } const thread = threads.thread(maxThreadCount); //console.log(`deserializeAsync: thread=${thread.name} data=${data.length} workload=${thread.workSize}`); const result = await thread.deserialize(data, options); if ( result !== undefined ) { return result; } return deserialize(data, options); } /******************************************************************************* * * Worker-only code * * */ if ( isInstanceOf(globalThis, 'DedicatedWorkerGlobalScope') ) { globalThis.onmessage = ev => { const msg = ev.data; switch ( msg.what ) { case THREAD_AREYOUREADY: setConfig(msg.config); globalThis.postMessage({ what: THREAD_IAMREADY }); break; case THREAD_SERIALIZE: const result = serialize(msg.data, msg.options); globalThis.postMessage({ id: msg.id, size: msg.size, result }); break; case THREAD_DESERIALIZE: { const result = deserialize(msg.data); globalThis.postMessage({ id: msg.id, size: msg.size, result }); break; } } }; } /******************************************************************************/