diff options
Diffstat (limited to 'ext/wasm/common/whwasmutil.js')
-rw-r--r-- | ext/wasm/common/whwasmutil.js | 1706 |
1 files changed, 1706 insertions, 0 deletions
diff --git a/ext/wasm/common/whwasmutil.js b/ext/wasm/common/whwasmutil.js new file mode 100644 index 0000000..7e5e798 --- /dev/null +++ b/ext/wasm/common/whwasmutil.js @@ -0,0 +1,1706 @@ +/** + 2022-07-08 + + The author disclaims copyright to this source code. In place of a + legal notice, here is a blessing: + + * May you do good and not evil. + * May you find forgiveness for yourself and forgive others. + * May you share freely, never taking more than you give. + + *********************************************************************** + + The whwasmutil is developed in conjunction with the Jaccwabyt + project: + + https://fossil.wanderinghorse.net/r/jaccwabyt + + and sqlite3: + + https://sqlite.org + + This file is kept in sync between both of those trees. + + Maintenance reminder: If you're reading this in a tree other than + one of those listed above, note that this copy may be replaced with + upstream copies of that one from time to time. Thus the code + installed by this function "should not" be edited outside of those + projects, else it risks getting overwritten. +*/ +/** + This function is intended to simplify porting around various bits + of WASM-related utility code from project to project. + + The primary goal of this code is to replace, where possible, + Emscripten-generated glue code with equivalent utility code which + can be used in arbitrary WASM environments built with toolchains + other than Emscripten. As of this writing, this code is capable of + acting as a replacement for Emscripten's generated glue code + _except_ that the latter installs handlers for Emscripten-provided + APIs such as its "FS" (virtual filesystem) API. Loading of such + things still requires using Emscripten's glue, but the post-load + utility APIs provided by this code are still usable as replacements + for their sub-optimally-documented Emscripten counterparts. + + Intended usage: + + ``` + self.WhWasmUtilInstaller(appObject); + delete self.WhWasmUtilInstaller; + ``` + + Its global-scope symbol is intended only to provide an easy way to + make it available to 3rd-party scripts and "should" be deleted + after calling it. That symbols is _not_ used within the library. + + Forewarning: this API explicitly targets only browser + environments. If a given non-browser environment has the + capabilities needed for a given feature (e.g. TextEncoder), great, + but it does not go out of its way to account for them and does not + provide compatibility crutches for them. + + It currently offers alternatives to the following + Emscripten-generated APIs: + + - OPTIONALLY memory allocation, but how this gets imported is + environment-specific. Most of the following features only work + if allocation is available. + + - WASM-exported "indirect function table" access and + manipulation. e.g. creating new WASM-side functions using JS + functions, analog to Emscripten's addFunction() and + uninstallFunction() but slightly different. + + - Get/set specific heap memory values, analog to Emscripten's + getValue() and setValue(). + + - String length counting in UTF-8 bytes (C-style and JS strings). + + - JS string to C-string conversion and vice versa, analog to + Emscripten's stringToUTF8Array() and friends, but with slighter + different interfaces. + + - JS string to Uint8Array conversion, noting that browsers actually + already have this built in via TextEncoder. + + - "Scoped" allocation, such that allocations made inside of a given + explicit scope will be automatically cleaned up when the scope is + closed. This is fundamentally similar to Emscripten's + stackAlloc() and friends but uses the heap instead of the stack + because access to the stack requires C code. + + - Create JS wrappers for WASM functions, analog to Emscripten's + ccall() and cwrap() functions, except that the automatic + conversions for function arguments and return values can be + easily customized by the client by assigning custom function + signature type names to conversion functions. Essentially, + it's ccall() and cwrap() on steroids. + + How to install... + + Passing an object to this function will install the functionality + into that object. Afterwards, client code "should" delete the global + symbol. + + This code requires that the target object have the following + properties, noting that they needn't be available until the first + time one of the installed APIs is used (as opposed to when this + function is called) except where explicitly noted: + + - `exports` must be a property of the target object OR a property + of `target.instance` (a WebAssembly.Module instance) and it must + contain the symbols exported by the WASM module associated with + this code. In an Enscripten environment it must be set to + `Module['asm']`. The exports object must contain a minimum of the + following symbols: + + - `memory`: a WebAssembly.Memory object representing the WASM + memory. _Alternately_, the `memory` property can be set as + `target.memory`, in particular if the WASM heap memory is + initialized in JS an _imported_ into WASM, as opposed to being + initialized in WASM and exported to JS. + + - `__indirect_function_table`: the WebAssembly.Table object which + holds WASM-exported functions. This API does not strictly + require that the table be able to grow but it will throw if its + `installFunction()` is called and the table cannot grow. + + In order to simplify downstream usage, if `target.exports` is not + set when this is called then a property access interceptor + (read-only, configurable, enumerable) gets installed as `exports` + which resolves to `target.instance.exports`, noting that the latter + property need not exist until the first time `target.exports` is + accessed. + + Some APIs _optionally_ make use of the `bigIntEnabled` property of + the target object. It "should" be set to true if the WASM + environment is compiled with BigInt support, else it must be + false. If it is false, certain BigInt-related features will trigger + an exception if invoked. This property, if not set when this is + called, will get a default value of true only if the BigInt64Array + constructor is available, else it will default to false. Note that + having the BigInt type is not sufficient for full int64 integration + with WASM: the target WASM file must also have been built with + that support. In Emscripten that's done using the `-sWASM_BIGINT` + flag. + + Some optional APIs require that the target have the following + methods: + + - 'alloc()` must behave like C's `malloc()`, allocating N bytes of + memory and returning its pointer. In Emscripten this is + conventionally made available via `Module['_malloc']`. This API + requires that the alloc routine throw on allocation error, as + opposed to returning null or 0. + + - 'dealloc()` must behave like C's `free()`, accepting either a + pointer returned from its allocation counterpart or the values + null/0 (for which it must be a no-op). allocating N bytes of + memory and returning its pointer. In Emscripten this is + conventionally made available via `Module['_free']`. + + APIs which require allocation routines are explicitly documented as + such and/or have "alloc" in their names. + + This code is developed and maintained in conjunction with the + Jaccwabyt project: + + https://fossil.wanderinghorse.net/r/jaccwabbyt + + More specifically: + + https://fossil.wanderinghorse.net/r/jaccwabbyt/file/common/whwasmutil.js +*/ +self.WhWasmUtilInstaller = function(target){ + 'use strict'; + if(undefined===target.bigIntEnabled){ + target.bigIntEnabled = !!self['BigInt64Array']; + } + + /** Throws a new Error, the message of which is the concatenation of + all args with a space between each. */ + const toss = (...args)=>{throw new Error(args.join(' '))}; + + if(!target.exports){ + Object.defineProperty(target, 'exports', { + enumerable: true, configurable: true, + get: ()=>(target.instance && target.instance.exports) + }); + } + + /********* + alloc()/dealloc() auto-install... + + This would be convenient but it can also cause us to pick up + malloc() even when the client code is using a different exported + allocator (who, me?), which is bad. malloc() may be exported even + if we're not explicitly using it and overriding the malloc() + function, linking ours first, is not always feasible when using a + malloc() proxy, as it can lead to recursion and stack overflow + (who, me?). So... we really need the downstream code to set up + target.alloc/dealloc() itself. + ******/ + /****** + if(target.exports){ + //Maybe auto-install alloc()/dealloc()... + if(!target.alloc && target.exports.malloc){ + target.alloc = function(n){ + const m = this(n); + return m || toss("Allocation of",n,"byte(s) failed."); + }.bind(target.exports.malloc); + } + + if(!target.dealloc && target.exports.free){ + target.dealloc = function(ptr){ + if(ptr) this(ptr); + }.bind(target.exports.free); + } + }*******/ + + /** + Pointers in WASM are currently assumed to be 32-bit, but someday + that will certainly change. + */ + const ptrIR = target.pointerIR || 'i32'; + const ptrSizeof = target.ptrSizeof = + ('i32'===ptrIR ? 4 + : ('i64'===ptrIR + ? 8 : toss("Unhandled ptrSizeof:",ptrIR))); + /** Stores various cached state. */ + const cache = Object.create(null); + /** Previously-recorded size of cache.memory.buffer, noted so that + we can recreate the view objects if the heap grows. */ + cache.heapSize = 0; + /** WebAssembly.Memory object extracted from target.memory or + target.exports.memory the first time heapWrappers() is + called. */ + cache.memory = null; + /** uninstallFunction() puts table indexes in here for reuse and + installFunction() extracts them. */ + cache.freeFuncIndexes = []; + /** + Used by scopedAlloc() and friends. + */ + cache.scopedAlloc = []; + + cache.utf8Decoder = new TextDecoder(); + cache.utf8Encoder = new TextEncoder('utf-8'); + + /** + If (cache.heapSize !== cache.memory.buffer.byteLength), i.e. if + the heap has grown since the last call, updates cache.HEAPxyz. + Returns the cache object. + */ + const heapWrappers = function(){ + if(!cache.memory){ + cache.memory = (target.memory instanceof WebAssembly.Memory) + ? target.memory : target.exports.memory; + }else if(cache.heapSize === cache.memory.buffer.byteLength){ + return cache; + } + // heap is newly-acquired or has been resized.... + const b = cache.memory.buffer; + cache.HEAP8 = new Int8Array(b); cache.HEAP8U = new Uint8Array(b); + cache.HEAP16 = new Int16Array(b); cache.HEAP16U = new Uint16Array(b); + cache.HEAP32 = new Int32Array(b); cache.HEAP32U = new Uint32Array(b); + if(target.bigIntEnabled){ + cache.HEAP64 = new BigInt64Array(b); cache.HEAP64U = new BigUint64Array(b); + } + cache.HEAP32F = new Float32Array(b); cache.HEAP64F = new Float64Array(b); + cache.heapSize = b.byteLength; + return cache; + }; + + /** Convenience equivalent of this.heapForSize(8,false). */ + target.heap8 = ()=>heapWrappers().HEAP8; + + /** Convenience equivalent of this.heapForSize(8,true). */ + target.heap8u = ()=>heapWrappers().HEAP8U; + + /** Convenience equivalent of this.heapForSize(16,false). */ + target.heap16 = ()=>heapWrappers().HEAP16; + + /** Convenience equivalent of this.heapForSize(16,true). */ + target.heap16u = ()=>heapWrappers().HEAP16U; + + /** Convenience equivalent of this.heapForSize(32,false). */ + target.heap32 = ()=>heapWrappers().HEAP32; + + /** Convenience equivalent of this.heapForSize(32,true). */ + target.heap32u = ()=>heapWrappers().HEAP32U; + + /** + Requires n to be one of: + + - integer 8, 16, or 32. + - A integer-type TypedArray constructor: Int8Array, Int16Array, + Int32Array, or their Uint counterparts. + + If this.bigIntEnabled is true, it also accepts the value 64 or a + BigInt64Array/BigUint64Array, else it throws if passed 64 or one + of those constructors. + + Returns an integer-based TypedArray view of the WASM heap + memory buffer associated with the given block size. If passed + an integer as the first argument and unsigned is truthy then + the "U" (unsigned) variant of that view is returned, else the + signed variant is returned. If passed a TypedArray value, the + 2nd argument is ignored. Note that Float32Array and + Float64Array views are not supported by this function. + + Note that growth of the heap will invalidate any references to + this heap, so do not hold a reference longer than needed and do + not use a reference after any operation which may + allocate. Instead, re-fetch the reference by calling this + function again. + + Throws if passed an invalid n. + + Pedantic side note: the name "heap" is a bit of a misnomer. In an + Emscripten environment, the memory managed via the stack + allocation API is in the same Memory object as the heap (which + makes sense because otherwise arbitrary pointer X would be + ambiguous: is it in the heap or the stack?). + */ + target.heapForSize = function(n,unsigned = false){ + let ctor; + const c = (cache.memory && cache.heapSize === cache.memory.buffer.byteLength) + ? cache : heapWrappers(); + switch(n){ + case Int8Array: return c.HEAP8; case Uint8Array: return c.HEAP8U; + case Int16Array: return c.HEAP16; case Uint16Array: return c.HEAP16U; + case Int32Array: return c.HEAP32; case Uint32Array: return c.HEAP32U; + case 8: return unsigned ? c.HEAP8U : c.HEAP8; + case 16: return unsigned ? c.HEAP16U : c.HEAP16; + case 32: return unsigned ? c.HEAP32U : c.HEAP32; + case 64: + if(c.HEAP64) return unsigned ? c.HEAP64U : c.HEAP64; + break; + default: + if(target.bigIntEnabled){ + if(n===self['BigUint64Array']) return c.HEAP64U; + else if(n===self['BigInt64Array']) return c.HEAP64; + break; + } + } + toss("Invalid heapForSize() size: expecting 8, 16, 32,", + "or (if BigInt is enabled) 64."); + }; + + /** + Returns the WASM-exported "indirect function table." + */ + target.functionTable = function(){ + return target.exports.__indirect_function_table; + /** -----------------^^^^^ "seems" to be a standardized export name. + From Emscripten release notes from 2020-09-10: + - Use `__indirect_function_table` as the import name for the + table, which is what LLVM does. + */ + }; + + /** + Given a function pointer, returns the WASM function table entry + if found, else returns a falsy value. + */ + target.functionEntry = function(fptr){ + const ft = target.functionTable(); + return fptr < ft.length ? ft.get(fptr) : undefined; + }; + + /** + Creates a WASM function which wraps the given JS function and + returns the JS binding of that WASM function. The signature + string must be the Jaccwabyt-format or Emscripten + addFunction()-format function signature string. In short: in may + have one of the following formats: + + - Emscripten: `"x..."`, where the first x is a letter representing + the result type and subsequent letters represent the argument + types. Functions with no arguments have only a single + letter. See below. + + - Jaccwabyt: `"x(...)"` where `x` is the letter representing the + result type and letters in the parens (if any) represent the + argument types. Functions with no arguments use `x()`. See + below. + + Supported letters: + + - `i` = int32 + - `p` = int32 ("pointer") + - `j` = int64 + - `f` = float32 + - `d` = float64 + - `v` = void, only legal for use as the result type + + It throws if an invalid signature letter is used. + + Jaccwabyt-format signatures support some additional letters which + have no special meaning here but (in this context) act as aliases + for other letters: + + - `s`, `P`: same as `p` + + Sidebar: this code is developed together with Jaccwabyt, thus the + support for its signature format. + + The arguments may be supplied in either order: (func,sig) or + (sig,func). + */ + target.jsFuncToWasm = function f(func, sig){ + /** Attribution: adapted up from Emscripten-generated glue code, + refactored primarily for efficiency's sake, eliminating + call-local functions and superfluous temporary arrays. */ + if(!f._){/*static init...*/ + f._ = { + // Map of signature letters to type IR values + sigTypes: Object.assign(Object.create(null),{ + i: 'i32', p: 'i32', P: 'i32', s: 'i32', + j: 'i64', f: 'f32', d: 'f64' + }), + // Map of type IR values to WASM type code values + typeCodes: Object.assign(Object.create(null),{ + f64: 0x7c, f32: 0x7d, i64: 0x7e, i32: 0x7f + }), + /** Encodes n, which must be <2^14 (16384), into target array + tgt, as a little-endian value, using the given method + ('push' or 'unshift'). */ + uleb128Encode: function(tgt, method, n){ + if(n<128) tgt[method](n); + else tgt[method]( (n % 128) | 128, n>>7); + }, + /** Intentionally-lax pattern for Jaccwabyt-format function + pointer signatures, the intent of which is simply to + distinguish them from Emscripten-format signatures. The + downstream checks are less lax. */ + rxJSig: /^(\w)\((\w*)\)$/, + /** Returns the parameter-value part of the given signature + string. */ + sigParams: function(sig){ + const m = f._.rxJSig.exec(sig); + return m ? m[2] : sig.substr(1); + }, + /** Returns the IR value for the given letter or throws + if the letter is invalid. */ + letterType: (x)=>f._.sigTypes[x] || toss("Invalid signature letter:",x), + /** Returns an object describing the result type and parameter + type(s) of the given function signature, or throws if the + signature is invalid. */ + /******** // only valid for use with the WebAssembly.Function ctor, which + // is not yet documented on MDN. + sigToWasm: function(sig){ + const rc = {parameters:[], results: []}; + if('v'!==sig[0]) rc.results.push(f.sigTypes(sig[0])); + for(const x of f._.sigParams(sig)){ + rc.parameters.push(f._.typeCodes(x)); + } + return rc; + },************/ + /** Pushes the WASM data type code for the given signature + letter to the given target array. Throws if letter is + invalid. */ + pushSigType: (dest, letter)=>dest.push(f._.typeCodes[f._.letterType(letter)]) + }; + }/*static init*/ + if('string'===typeof func){ + const x = sig; + sig = func; + func = x; + } + const sigParams = f._.sigParams(sig); + const wasmCode = [0x01/*count: 1*/, 0x60/*function*/]; + f._.uleb128Encode(wasmCode, 'push', sigParams.length); + for(const x of sigParams) f._.pushSigType(wasmCode, x); + if('v'===sig[0]) wasmCode.push(0); + else{ + wasmCode.push(1); + f._.pushSigType(wasmCode, sig[0]); + } + f._.uleb128Encode(wasmCode, 'unshift', wasmCode.length)/* type section length */; + wasmCode.unshift( + 0x00, 0x61, 0x73, 0x6d, /* magic: "\0asm" */ + 0x01, 0x00, 0x00, 0x00, /* version: 1 */ + 0x01 /* type section code */ + ); + wasmCode.push( + /* import section: */ 0x02, 0x07, + /* (import "e" "f" (func 0 (type 0))): */ + 0x01, 0x01, 0x65, 0x01, 0x66, 0x00, 0x00, + /* export section: */ 0x07, 0x05, + /* (export "f" (func 0 (type 0))): */ + 0x01, 0x01, 0x66, 0x00, 0x00 + ); + return (new WebAssembly.Instance( + new WebAssembly.Module(new Uint8Array(wasmCode)), { + e: { f: func } + })).exports['f']; + }/*jsFuncToWasm()*/; + + /** + Expects a JS function and signature, exactly as for + this.jsFuncToWasm(). It uses that function to create a + WASM-exported function, installs that function to the next + available slot of this.functionTable(), and returns the + function's index in that table (which acts as a pointer to that + function). The returned pointer can be passed to + uninstallFunction() to uninstall it and free up the table slot for + reuse. + + If passed (string,function) arguments then it treats the first + argument as the signature and second as the function. + + As a special case, if the passed-in function is a WASM-exported + function then the signature argument is ignored and func is + installed as-is, without requiring re-compilation/re-wrapping. + + This function will propagate an exception if + WebAssembly.Table.grow() throws or this.jsFuncToWasm() throws. + The former case can happen in an Emscripten-compiled + environment when building without Emscripten's + `-sALLOW_TABLE_GROWTH` flag. + + Sidebar: this function differs from Emscripten's addFunction() + _primarily_ in that it does not share that function's + undocumented behavior of reusing a function if it's passed to + addFunction() more than once, which leads to uninstallFunction() + breaking clients which do not take care to avoid that case: + + https://github.com/emscripten-core/emscripten/issues/17323 + */ + target.installFunction = function f(func, sig){ + if(2!==arguments.length){ + toss("installFunction() requires exactly 2 arguments"); + } + if('string'===typeof func){ + const x = sig; + sig = func; + func = x; + } + const ft = target.functionTable(); + const oldLen = ft.length; + let ptr; + while(cache.freeFuncIndexes.length){ + ptr = cache.freeFuncIndexes.pop(); + if(ft.get(ptr)){ /* Table was modified via a different API */ + ptr = null; + continue; + }else{ + break; + } + } + if(!ptr){ + ptr = oldLen; + ft.grow(1); + } + try{ + /*this will only work if func is a WASM-exported function*/ + ft.set(ptr, func); + return ptr; + }catch(e){ + if(!(e instanceof TypeError)){ + if(ptr===oldLen) cache.freeFuncIndexes.push(oldLen); + throw e; + } + } + // It's not a WASM-exported function, so compile one... + try { + ft.set(ptr, target.jsFuncToWasm(func, sig)); + }catch(e){ + if(ptr===oldLen) cache.freeFuncIndexes.push(oldLen); + throw e; + } + return ptr; + }; + + /** + Requires a pointer value previously returned from + this.installFunction(). Removes that function from the WASM + function table, marks its table slot as free for re-use, and + returns that function. It is illegal to call this before + installFunction() has been called and results are undefined if + ptr was not returned by that function. The returned function + may be passed back to installFunction() to reinstall it. + */ + target.uninstallFunction = function(ptr){ + const fi = cache.freeFuncIndexes; + const ft = target.functionTable(); + fi.push(ptr); + const rc = ft.get(ptr); + ft.set(ptr, null); + return rc; + }; + + /** + Given a WASM heap memory address and a data type name in the form + (i8, i16, i32, i64, float (or f32), double (or f64)), this + fetches the numeric value from that address and returns it as a + number or, for the case of type='i64', a BigInt (noting that that + type triggers an exception if this.bigIntEnabled is + falsy). Throws if given an invalid type. + + As a special case, if type ends with a `*`, it is considered to + be a pointer type and is treated as the WASM numeric type + appropriate for the pointer size (`i32`). + + While likely not obvious, this routine and its setMemValue() + counterpart are how pointer-to-value _output_ parameters + in WASM-compiled C code can be interacted with: + + ``` + const ptr = alloc(4); + setMemValue(ptr, 0, 'i32'); // clear the ptr's value + aCFuncWithOutputPtrToInt32Arg( ptr ); // e.g. void foo(int *x); + const result = getMemValue(ptr, 'i32'); // fetch ptr's value + dealloc(ptr); + ``` + + scopedAlloc() and friends can be used to make handling of + `ptr` safe against leaks in the case of an exception: + + ``` + let result; + const scope = scopedAllocPush(); + try{ + const ptr = scopedAlloc(4); + setMemValue(ptr, 0, 'i32'); + aCFuncWithOutputPtrArg( ptr ); + result = getMemValue(ptr, 'i32'); + }finally{ + scopedAllocPop(scope); + } + ``` + + As a rule setMemValue() must be called to set (typically zero + out) the pointer's value, else it will contain an essentially + random value. + + ACHTUNG: calling this often, e.g. in a loop, can have a noticably + painful impact on performance. Rather than doing so, use + heapForSize() to fetch the heap object and read directly from it. + + See: setMemValue() + */ + target.getMemValue = function(ptr, type='i8'){ + if(type.endsWith('*')) type = ptrIR; + const c = (cache.memory && cache.heapSize === cache.memory.buffer.byteLength) + ? cache : heapWrappers(); + switch(type){ + case 'i1': + case 'i8': return c.HEAP8[ptr>>0]; + case 'i16': return c.HEAP16[ptr>>1]; + case 'i32': return c.HEAP32[ptr>>2]; + case 'i64': + if(target.bigIntEnabled) return BigInt(c.HEAP64[ptr>>3]); + break; + case 'float': case 'f32': return c.HEAP32F[ptr>>2]; + case 'double': case 'f64': return Number(c.HEAP64F[ptr>>3]); + default: break; + } + toss('Invalid type for getMemValue():',type); + }; + + /** + The counterpart of getMemValue(), this sets a numeric value at + the given WASM heap address, using the type to define how many + bytes are written. Throws if given an invalid type. See + getMemValue() for details about the type argument. If the 3rd + argument ends with `*` then it is treated as a pointer type and + this function behaves as if the 3rd argument were `i32`. + + This function returns itself. + + ACHTUNG: calling this often, e.g. in a loop, can have a noticably + painful impact on performance. Rather than doing so, use + heapForSize() to fetch the heap object and assign directly to it. + */ + target.setMemValue = function f(ptr, value, type='i8'){ + if (type.endsWith('*')) type = ptrIR; + const c = (cache.memory && cache.heapSize === cache.memory.buffer.byteLength) + ? cache : heapWrappers(); + switch (type) { + case 'i1': + case 'i8': c.HEAP8[ptr>>0] = value; return f; + case 'i16': c.HEAP16[ptr>>1] = value; return f; + case 'i32': c.HEAP32[ptr>>2] = value; return f; + case 'i64': + if(c.HEAP64){ + c.HEAP64[ptr>>3] = BigInt(value); + return f; + } + break; + case 'float': case 'f32': c.HEAP32F[ptr>>2] = value; return f; + case 'double': case 'f64': c.HEAP64F[ptr>>3] = value; return f; + } + toss('Invalid type for setMemValue(): ' + type); + }; + + + /** Convenience form of getMemValue() intended for fetching + pointer-to-pointer values. */ + target.getPtrValue = (ptr)=>target.getMemValue(ptr, ptrIR); + + /** Convenience form of setMemValue() intended for setting + pointer-to-pointer values. */ + target.setPtrValue = (ptr, value)=>target.setMemValue(ptr, value, ptrIR); + + /** + Returns true if the given value appears to be legal for use as + a WASM pointer value. Its _range_ of values is not (cannot be) + validated except to ensure that it is a 32-bit integer with a + value of 0 or greater. Likewise, it cannot verify whether the + value actually refers to allocated memory in the WASM heap. + */ + target.isPtr32 = (ptr)=>('number'===typeof ptr && (ptr===(ptr|0)) && ptr>=0); + + /** + isPtr() is an alias for isPtr32(). If/when 64-bit WASM pointer + support becomes widespread, it will become an alias for either + isPtr32() or the as-yet-hypothetical isPtr64(), depending on a + configuration option. + */ + target.isPtr = target.isPtr32; + + /** + Expects ptr to be a pointer into the WASM heap memory which + refers to a NUL-terminated C-style string encoded as UTF-8. + Returns the length, in bytes, of the string, as for `strlen(3)`. + As a special case, if !ptr then it it returns `null`. Throws if + ptr is out of range for target.heap8u(). + */ + target.cstrlen = function(ptr){ + if(!ptr) return null; + const h = heapWrappers().HEAP8U; + let pos = ptr; + for( ; h[pos] !== 0; ++pos ){} + return pos - ptr; + }; + + /** Internal helper to use in operations which need to distinguish + between SharedArrayBuffer heap memory and non-shared heap. */ + const __SAB = ('undefined'===typeof SharedArrayBuffer) + ? function(){} : SharedArrayBuffer; + const __utf8Decode = function(arrayBuffer, begin, end){ + return cache.utf8Decoder.decode( + (arrayBuffer.buffer instanceof __SAB) + ? arrayBuffer.slice(begin, end) + : arrayBuffer.subarray(begin, end) + ); + }; + + /** + Expects ptr to be a pointer into the WASM heap memory which + refers to a NUL-terminated C-style string encoded as UTF-8. This + function counts its byte length using cstrlen() then returns a + JS-format string representing its contents. As a special case, if + ptr is falsy, `null` is returned. + */ + target.cstringToJs = function(ptr){ + const n = target.cstrlen(ptr); + return n ? __utf8Decode(heapWrappers().HEAP8U, ptr, ptr+n) : (null===n ? n : ""); + }; + + /** + Given a JS string, this function returns its UTF-8 length in + bytes. Returns null if str is not a string. + */ + target.jstrlen = function(str){ + /** Attribution: derived from Emscripten's lengthBytesUTF8() */ + if('string'!==typeof str) return null; + const n = str.length; + let len = 0; + for(let i = 0; i < n; ++i){ + let u = str.charCodeAt(i); + if(u>=0xd800 && u<=0xdfff){ + u = 0x10000 + ((u & 0x3FF) << 10) | (str.charCodeAt(++i) & 0x3FF); + } + if(u<=0x7f) ++len; + else if(u<=0x7ff) len += 2; + else if(u<=0xffff) len += 3; + else len += 4; + } + return len; + }; + + /** + Encodes the given JS string as UTF8 into the given TypedArray + tgt, starting at the given offset and writing, at most, maxBytes + bytes (including the NUL terminator if addNul is true, else no + NUL is added). If it writes any bytes at all and addNul is true, + it always NUL-terminates the output, even if doing so means that + the NUL byte is all that it writes. + + If maxBytes is negative (the default) then it is treated as the + remaining length of tgt, starting at the given offset. + + If writing the last character would surpass the maxBytes count + because the character is multi-byte, that character will not be + written (as opposed to writing a truncated multi-byte character). + This can lead to it writing as many as 3 fewer bytes than + maxBytes specifies. + + Returns the number of bytes written to the target, _including_ + the NUL terminator (if any). If it returns 0, it wrote nothing at + all, which can happen if: + + - str is empty and addNul is false. + - offset < 0. + - maxBytes == 0. + - maxBytes is less than the byte length of a multi-byte str[0]. + + Throws if tgt is not an Int8Array or Uint8Array. + + Design notes: + + - In C's strcpy(), the destination pointer is the first + argument. That is not the case here primarily because the 3rd+ + arguments are all referring to the destination, so it seems to + make sense to have them grouped with it. + + - Emscripten's counterpart of this function (stringToUTF8Array()) + returns the number of bytes written sans NUL terminator. That + is, however, ambiguous: str.length===0 or maxBytes===(0 or 1) + all cause 0 to be returned. + */ + target.jstrcpy = function(jstr, tgt, offset = 0, maxBytes = -1, addNul = true){ + /** Attribution: the encoding bits are taken from Emscripten's + stringToUTF8Array(). */ + if(!tgt || (!(tgt instanceof Int8Array) && !(tgt instanceof Uint8Array))){ + toss("jstrcpy() target must be an Int8Array or Uint8Array."); + } + if(maxBytes<0) maxBytes = tgt.length - offset; + if(!(maxBytes>0) || !(offset>=0)) return 0; + let i = 0, max = jstr.length; + const begin = offset, end = offset + maxBytes - (addNul ? 1 : 0); + for(; i < max && offset < end; ++i){ + let u = jstr.charCodeAt(i); + if(u>=0xd800 && u<=0xdfff){ + u = 0x10000 + ((u & 0x3FF) << 10) | (jstr.charCodeAt(++i) & 0x3FF); + } + if(u<=0x7f){ + if(offset >= end) break; + tgt[offset++] = u; + }else if(u<=0x7ff){ + if(offset + 1 >= end) break; + tgt[offset++] = 0xC0 | (u >> 6); + tgt[offset++] = 0x80 | (u & 0x3f); + }else if(u<=0xffff){ + if(offset + 2 >= end) break; + tgt[offset++] = 0xe0 | (u >> 12); + tgt[offset++] = 0x80 | ((u >> 6) & 0x3f); + tgt[offset++] = 0x80 | (u & 0x3f); + }else{ + if(offset + 3 >= end) break; + tgt[offset++] = 0xf0 | (u >> 18); + tgt[offset++] = 0x80 | ((u >> 12) & 0x3f); + tgt[offset++] = 0x80 | ((u >> 6) & 0x3f); + tgt[offset++] = 0x80 | (u & 0x3f); + } + } + if(addNul) tgt[offset++] = 0; + return offset - begin; + }; + + /** + Works similarly to C's strncpy(), copying, at most, n bytes (not + characters) from srcPtr to tgtPtr. It copies until n bytes have + been copied or a 0 byte is reached in src. _Unlike_ strncpy(), it + returns the number of bytes it assigns in tgtPtr, _including_ the + NUL byte (if any). If n is reached before a NUL byte in srcPtr, + tgtPtr will _not_ be NULL-terminated. If a NUL byte is reached + before n bytes are copied, tgtPtr will be NUL-terminated. + + If n is negative, cstrlen(srcPtr)+1 is used to calculate it, the + +1 being for the NUL byte. + + Throws if tgtPtr or srcPtr are falsy. Results are undefined if: + + - either is not a pointer into the WASM heap or + + - srcPtr is not NUL-terminated AND n is less than srcPtr's + logical length. + + ACHTUNG: it is possible to copy partial multi-byte characters + this way, and converting such strings back to JS strings will + have undefined results. + */ + target.cstrncpy = function(tgtPtr, srcPtr, n){ + if(!tgtPtr || !srcPtr) toss("cstrncpy() does not accept NULL strings."); + if(n<0) n = target.cstrlen(strPtr)+1; + else if(!(n>0)) return 0; + const heap = target.heap8u(); + let i = 0, ch; + for(; i < n && (ch = heap[srcPtr+i]); ++i){ + heap[tgtPtr+i] = ch; + } + if(i<n) heap[tgtPtr + i++] = 0; + return i; + }; + + /** + For the given JS string, returns a Uint8Array of its contents + encoded as UTF-8. If addNul is true, the returned array will have + a trailing 0 entry, else it will not. + */ + target.jstrToUintArray = (str, addNul=false)=>{ + return cache.utf8Encoder.encode(addNul ? (str+"\0") : str); + // Or the hard way... + /** Attribution: derived from Emscripten's stringToUTF8Array() */ + //const a = [], max = str.length; + //let i = 0, pos = 0; + //for(; i < max; ++i){ + // let u = str.charCodeAt(i); + // if(u>=0xd800 && u<=0xdfff){ + // u = 0x10000 + ((u & 0x3FF) << 10) | (str.charCodeAt(++i) & 0x3FF); + // } + // if(u<=0x7f) a[pos++] = u; + // else if(u<=0x7ff){ + // a[pos++] = 0xC0 | (u >> 6); + // a[pos++] = 0x80 | (u & 63); + // }else if(u<=0xffff){ + // a[pos++] = 0xe0 | (u >> 12); + // a[pos++] = 0x80 | ((u >> 6) & 63); + // a[pos++] = 0x80 | (u & 63); + // }else{ + // a[pos++] = 0xf0 | (u >> 18); + // a[pos++] = 0x80 | ((u >> 12) & 63); + // a[pos++] = 0x80 | ((u >> 6) & 63); + // a[pos++] = 0x80 | (u & 63); + // } + // } + // return new Uint8Array(a); + }; + + const __affirmAlloc = (obj,funcName)=>{ + if(!(obj.alloc instanceof Function) || + !(obj.dealloc instanceof Function)){ + toss("Object is missing alloc() and/or dealloc() function(s)", + "required by",funcName+"()."); + } + }; + + const __allocCStr = function(jstr, returnWithLength, allocator, funcName){ + __affirmAlloc(target, funcName); + if('string'!==typeof jstr) return null; + const n = target.jstrlen(jstr), + ptr = allocator(n+1); + target.jstrcpy(jstr, target.heap8u(), ptr, n+1, true); + return returnWithLength ? [ptr, n] : ptr; + }; + + /** + Uses target.alloc() to allocate enough memory for jstrlen(jstr)+1 + bytes of memory, copies jstr to that memory using jstrcpy(), + NUL-terminates it, and returns the pointer to that C-string. + Ownership of the pointer is transfered to the caller, who must + eventually pass the pointer to dealloc() to free it. + + If passed a truthy 2nd argument then its return semantics change: + it returns [ptr,n], where ptr is the C-string's pointer and n is + its cstrlen(). + + Throws if `target.alloc` or `target.dealloc` are not functions. + */ + target.allocCString = + (jstr, returnWithLength=false)=>__allocCStr(jstr, returnWithLength, + target.alloc, 'allocCString()'); + + /** + Starts an "allocation scope." All allocations made using + scopedAlloc() are recorded in this scope and are freed when the + value returned from this function is passed to + scopedAllocPop(). + + This family of functions requires that the API's object have both + `alloc()` and `dealloc()` methods, else this function will throw. + + Intended usage: + + ``` + const scope = scopedAllocPush(); + try { + const ptr1 = scopedAlloc(100); + const ptr2 = scopedAlloc(200); + const ptr3 = scopedAlloc(300); + ... + // Note that only allocations made via scopedAlloc() + // are managed by this allocation scope. + }finally{ + scopedAllocPop(scope); + } + ``` + + The value returned by this function must be treated as opaque by + the caller, suitable _only_ for passing to scopedAllocPop(). + Its type and value are not part of this function's API and may + change in any given version of this code. + + `scopedAlloc.level` can be used to determine how many scoped + alloc levels are currently active. + */ + target.scopedAllocPush = function(){ + __affirmAlloc(target, 'scopedAllocPush'); + const a = []; + cache.scopedAlloc.push(a); + return a; + }; + + /** + Cleans up all allocations made using scopedAlloc() in the context + of the given opaque state object, which must be a value returned + by scopedAllocPush(). See that function for an example of how to + use this function. + + Though scoped allocations are managed like a stack, this API + behaves properly if allocation scopes are popped in an order + other than the order they were pushed. + + If called with no arguments, it pops the most recent + scopedAllocPush() result: + + ``` + scopedAllocPush(); + try{ ... } finally { scopedAllocPop(); } + ``` + + It's generally recommended that it be passed an explicit argument + to help ensure that push/push are used in matching pairs, but in + trivial code that may be a non-issue. + */ + target.scopedAllocPop = function(state){ + __affirmAlloc(target, 'scopedAllocPop'); + const n = arguments.length + ? cache.scopedAlloc.indexOf(state) + : cache.scopedAlloc.length-1; + if(n<0) toss("Invalid state object for scopedAllocPop()."); + if(0===arguments.length) state = cache.scopedAlloc[n]; + cache.scopedAlloc.splice(n,1); + for(let p; (p = state.pop()); ) target.dealloc(p); + }; + + /** + Allocates n bytes of memory using this.alloc() and records that + fact in the state for the most recent call of scopedAllocPush(). + Ownership of the memory is given to scopedAllocPop(), which + will clean it up when it is called. The memory _must not_ be + passed to this.dealloc(). Throws if this API object is missing + the required `alloc()` or `dealloc()` functions or no scoped + alloc is active. + + See scopedAllocPush() for an example of how to use this function. + + The `level` property of this function can be queried to query how + many scoped allocation levels are currently active. + + See also: scopedAllocPtr(), scopedAllocCString() + */ + target.scopedAlloc = function(n){ + if(!cache.scopedAlloc.length){ + toss("No scopedAllocPush() scope is active."); + } + const p = target.alloc(n); + cache.scopedAlloc[cache.scopedAlloc.length-1].push(p); + return p; + }; + + Object.defineProperty(target.scopedAlloc, 'level', { + configurable: false, enumerable: false, + get: ()=>cache.scopedAlloc.length, + set: ()=>toss("The 'active' property is read-only.") + }); + + /** + Works identically to allocCString() except that it allocates the + memory using scopedAlloc(). + + Will throw if no scopedAllocPush() call is active. + */ + target.scopedAllocCString = + (jstr, returnWithLength=false)=>__allocCStr(jstr, returnWithLength, + target.scopedAlloc, 'scopedAllocCString()'); + + // impl for allocMainArgv() and scopedAllocMainArgv(). + const __allocMainArgv = function(isScoped, list){ + if(!list.length) toss("Cannot allocate empty array."); + const pList = target[ + isScoped ? 'scopedAlloc' : 'alloc' + ](list.length * target.ptrSizeof); + let i = 0; + list.forEach((e)=>{ + target.setPtrValue(pList + (target.ptrSizeof * i++), + target[ + isScoped ? 'scopedAllocCString' : 'allocCString' + ](""+e)); + }); + return pList; + }; + + /** + Creates an array, using scopedAlloc(), suitable for passing to a + C-level main() routine. The input is a collection with a length + property and a forEach() method. A block of memory list.length + entries long is allocated and each pointer-sized block of that + memory is populated with a scopedAllocCString() conversion of the + (""+value) of each element. Returns a pointer to the start of the + list, suitable for passing as the 2nd argument to a C-style + main() function. + + Throws if list.length is falsy or scopedAllocPush() is not active. + */ + target.scopedAllocMainArgv = (list)=>__allocMainArgv(true, list); + + /** + Identical to scopedAllocMainArgv() but uses alloc() instead of + scopedAllocMainArgv + */ + target.allocMainArgv = (list)=>__allocMainArgv(false, list); + + /** + Wraps function call func() in a scopedAllocPush() and + scopedAllocPop() block, such that all calls to scopedAlloc() and + friends from within that call will have their memory freed + automatically when func() returns. If func throws or propagates + an exception, the scope is still popped, otherwise it returns the + result of calling func(). + */ + target.scopedAllocCall = function(func){ + target.scopedAllocPush(); + try{ return func() } finally{ target.scopedAllocPop() } + }; + + /** Internal impl for allocPtr() and scopedAllocPtr(). */ + const __allocPtr = function(howMany, safePtrSize, method){ + __affirmAlloc(target, method); + const pIr = safePtrSize ? 'i64' : ptrIR; + let m = target[method](howMany * (safePtrSize ? 8 : ptrSizeof)); + target.setMemValue(m, 0, pIr) + if(1===howMany){ + return m; + } + const a = [m]; + for(let i = 1; i < howMany; ++i){ + m += (safePtrSize ? 8 : ptrSizeof); + a[i] = m; + target.setMemValue(m, 0, pIr); + } + return a; + }; + + /** + Allocates one or more pointers as a single chunk of memory and + zeroes them out. + + The first argument is the number of pointers to allocate. The + second specifies whether they should use a "safe" pointer size (8 + bytes) or whether they may use the default pointer size + (typically 4 but also possibly 8). + + How the result is returned depends on its first argument: if + passed 1, it returns the allocated memory address. If passed more + than one then an array of pointer addresses is returned, which + can optionally be used with "destructuring assignment" like this: + + ``` + const [p1, p2, p3] = allocPtr(3); + ``` + + ACHTUNG: when freeing the memory, pass only the _first_ result + value to dealloc(). The others are part of the same memory chunk + and must not be freed separately. + + The reason for the 2nd argument is.. + + When one of the returned pointers will refer to a 64-bit value, + e.g. a double or int64, an that value must be written or fetched, + e.g. using setMemValue() or getMemValue(), it is important that + the pointer in question be aligned to an 8-byte boundary or else + it will not be fetched or written properly and will corrupt or + read neighboring memory. It is only safe to pass false when the + client code is certain that it will only get/fetch 4-byte values + (or smaller). + */ + target.allocPtr = + (howMany=1, safePtrSize=true)=>__allocPtr(howMany, safePtrSize, 'alloc'); + + /** + Identical to allocPtr() except that it allocates using scopedAlloc() + instead of alloc(). + */ + target.scopedAllocPtr = + (howMany=1, safePtrSize=true)=>__allocPtr(howMany, safePtrSize, 'scopedAlloc'); + + /** + If target.exports[name] exists, it is returned, else an + exception is thrown. + */ + target.xGet = function(name){ + return target.exports[name] || toss("Cannot find exported symbol:",name); + }; + + const __argcMismatch = + (f,n)=>toss(f+"() requires",n,"argument(s)."); + + /** + Looks up a WASM-exported function named fname from + target.exports. If found, it is called, passed all remaining + arguments, and its return value is returned to xCall's caller. If + not found, an exception is thrown. This function does no + conversion of argument or return types, but see xWrap() and + xCallWrapped() for variants which do. + + As a special case, if passed only 1 argument after the name and + that argument in an Array, that array's entries become the + function arguments. (This is not an ambiguous case because it's + not legal to pass an Array object to a WASM function.) + */ + target.xCall = function(fname, ...args){ + const f = target.xGet(fname); + if(!(f instanceof Function)) toss("Exported symbol",fname,"is not a function."); + if(f.length!==args.length) __argcMismatch(fname,f.length) + /* This is arguably over-pedantic but we want to help clients keep + from shooting themselves in the foot when calling C APIs. */; + return (2===arguments.length && Array.isArray(arguments[1])) + ? f.apply(null, arguments[1]) + : f.apply(null, args); + }; + + /** + State for use with xWrap() + */ + cache.xWrap = Object.create(null); + const xcv = cache.xWrap.convert = Object.create(null); + /** Map of type names to argument conversion functions. */ + cache.xWrap.convert.arg = Object.create(null); + /** Map of type names to return result conversion functions. */ + cache.xWrap.convert.result = Object.create(null); + + if(target.bigIntEnabled){ + xcv.arg.i64 = (i)=>BigInt(i); + } + xcv.arg.i32 = (i)=>(i | 0); + xcv.arg.i16 = (i)=>((i | 0) & 0xFFFF); + xcv.arg.i8 = (i)=>((i | 0) & 0xFF); + xcv.arg.f32 = xcv.arg.float = (i)=>Number(i).valueOf(); + xcv.arg.f64 = xcv.arg.double = xcv.arg.f32; + xcv.arg.int = xcv.arg.i32; + xcv.result['*'] = xcv.result['pointer'] = xcv.arg['**'] = xcv.arg[ptrIR]; + xcv.result['number'] = (v)=>Number(v); + + { /* Copy certain xcv.arg[...] handlers to xcv.result[...] and + add pointer-style variants of them. */ + const copyToResult = ['i8', 'i16', 'i32', 'int', + 'f32', 'float', 'f64', 'double']; + if(target.bigIntEnabled) copyToResult.push('i64'); + for(const t of copyToResult){ + xcv.arg[t+'*'] = xcv.result[t+'*'] = xcv.arg[ptrIR]; + xcv.result[t] = xcv.arg[t] || toss("Missing arg converter:",t); + } + } + + /** + In order for args of type string to work in various contexts in + the sqlite3 API, we need to pass them on as, variably, a C-string + or a pointer value. Thus for ARGs of type 'string' and + '*'/'pointer' we behave differently depending on whether the + argument is a string or not: + + - If v is a string, scopeAlloc() a new C-string from it and return + that temp string's pointer. + + - Else return the value from the arg adaptor defined for ptrIR. + + TODO? Permit an Int8Array/Uint8Array and convert it to a string? + Would that be too much magic concentrated in one place, ready to + backfire? + */ + xcv.arg.string = xcv.arg.utf8 = xcv.arg['pointer'] = xcv.arg['*'] + = function(v){ + if('string'===typeof v) return target.scopedAllocCString(v); + return v ? xcv.arg[ptrIR](v) : null; + }; + xcv.result.string = xcv.result.utf8 = (i)=>target.cstringToJs(i); + xcv.result['string:free'] = xcv.result['utf8:free'] = (i)=>{ + try { return i ? target.cstringToJs(i) : null } + finally{ target.dealloc(i) } + }; + xcv.result.json = (i)=>JSON.parse(target.cstringToJs(i)); + xcv.result['json:free'] = (i)=>{ + try{ return i ? JSON.parse(target.cstringToJs(i)) : null } + finally{ target.dealloc(i) } + } + xcv.result['void'] = (v)=>undefined; + xcv.result['null'] = (v)=>v; + + if(0){ + /*** + This idea can't currently work because we don't know the + signature for the func and don't have a way for the user to + convey it. To do this we likely need to be able to match + arg/result handlers by a regex, but that would incur an O(N) + cost as we check the regex one at a time. Another use case for + such a thing would be pseudotypes like "int:-1" to say that + the value will always be treated like -1 (which has a useful + case in the sqlite3 bindings). + */ + xcv.arg['func-ptr'] = function(v){ + if(!(v instanceof Function)) return xcv.arg[ptrIR]; + const f = target.jsFuncToWasm(v, WHAT_SIGNATURE); + }; + } + + const __xArgAdapterCheck = + (t)=>xcv.arg[t] || toss("Argument adapter not found:",t); + + const __xResultAdapterCheck = + (t)=>xcv.result[t] || toss("Result adapter not found:",t); + + cache.xWrap.convertArg = (t,v)=>__xArgAdapterCheck(t)(v); + cache.xWrap.convertResult = + (t,v)=>(null===t ? v : (t ? __xResultAdapterCheck(t)(v) : undefined)); + + /** + Creates a wrapper for the WASM-exported function fname. Uses + xGet() to fetch the exported function (which throws on + error) and returns either that function or a wrapper for that + function which converts the JS-side argument types into WASM-side + types and converts the result type. If the function takes no + arguments and resultType is `null` then the function is returned + as-is, else a wrapper is created for it to adapt its arguments + and result value, as described below. + + (If you're familiar with Emscripten's ccall() and cwrap(), this + function is essentially cwrap() on steroids.) + + This function's arguments are: + + - fname: the exported function's name. xGet() is used to fetch + this, so will throw if no exported function is found with that + name. + + - resultType: the name of the result type. A literal `null` means + to return the original function's value as-is (mnemonic: there + is "null" conversion going on). Literal `undefined` or the + string `"void"` mean to ignore the function's result and return + `undefined`. Aside from those two special cases, it may be one + of the values described below or any mapping installed by the + client using xWrap.resultAdapter(). + + If passed 3 arguments and the final one is an array, that array + must contain a list of type names (see below) for adapting the + arguments from JS to WASM. If passed 2 arguments, more than 3, + or the 3rd is not an array, all arguments after the 2nd (if any) + are treated as type names. i.e.: + + ``` + xWrap('funcname', 'i32', 'string', 'f64'); + // is equivalent to: + xWrap('funcname', 'i32', ['string', 'f64']); + ``` + + Type names are symbolic names which map the arguments to an + adapter function to convert, if needed, the value before passing + it on to WASM or to convert a return result from WASM. The list + of built-in names: + + - `i8`, `i16`, `i32` (args and results): all integer conversions + which convert their argument to an integer and truncate it to + the given bit length. + + - `N*` (args): a type name in the form `N*`, where N is a numeric + type name, is treated the same as WASM pointer. + + - `*` and `pointer` (args): have multple semantics. They + behave exactly as described below for `string` args. + + - `*` and `pointer` (results): are aliases for the current + WASM pointer numeric type. + + - `**` (args): is simply a descriptive alias for the WASM pointer + type. It's primarily intended to mark output-pointer arguments. + + - `i64` (args and results): passes the value to BigInt() to + convert it to an int64. Only available if bigIntEnabled is + true. + + - `f32` (`float`), `f64` (`double`) (args and results): pass + their argument to Number(). i.e. the adaptor does not currently + distinguish between the two types of floating-point numbers. + + - `number` (results): converts the result to a JS Number using + Number(theValue).valueOf(). Note that this is for result + conversions only, as it's not possible to generically know + which type of number to convert arguments to. + + Non-numeric conversions include: + + - `string` or `utf8` (args): has two different semantics in order + to accommodate various uses of certain C APIs + (e.g. output-style strings)... + + - If the arg is a string, it creates a _temporary_ + UTF-8-encoded C-string to pass to the exported function, + cleaning it up before the wrapper returns. If a long-lived + C-string pointer is required, that requires client-side code + to create the string, then pass its pointer to the function. + + - Else the arg is assumed to be a pointer to a string the + client has already allocated and it's passed on as + a WASM pointer. + + - `string` or `utf8` (results): treats the result value as a + const C-string, encoded as UTF-8, copies it to a JS string, + and returns that JS string. + + - `string:free` or `utf8:free) (results): treats the result value + as a non-const UTF-8 C-string, ownership of which has just been + transfered to the caller. It copies the C-string to a JS + string, frees the C-string, and returns the JS string. If such + a result value is NULL, the JS result is `null`. Achtung: when + using an API which returns results from a specific allocator, + e.g. `my_malloc()`, this conversion _is not legal_. Instead, an + equivalent conversion which uses the appropriate deallocator is + required. For example: + +```js + target.xWrap.resultAdaptor('string:my_free',(i)=>{ + try { return i ? target.cstringToJs(i) : null } + finally{ target.exports.my_free(i) } + }; +``` + + - `json` (results): treats the result as a const C-string and + returns the result of passing the converted-to-JS string to + JSON.parse(). Returns `null` if the C-string is a NULL pointer. + + - `json:free` (results): works exactly like `string:free` but + returns the same thing as the `json` adapter. Note the + warning in `string:free` regarding maching allocators and + deallocators. + + The type names for results and arguments are validated when + xWrap() is called and any unknown names will trigger an + exception. + + Clients may map their own result and argument adapters using + xWrap.resultAdapter() and xWrap.argAdaptor(), noting that not all + type conversions are valid for both arguments _and_ result types + as they often have different memory ownership requirements. + + TODOs: + + - Figure out how/whether we can (semi-)transparently handle + pointer-type _output_ arguments. Those currently require + explicit handling by allocating pointers, assigning them before + the call using setMemValue(), and fetching them with + getMemValue() after the call. We may be able to automate some + or all of that. + + - Figure out whether it makes sense to extend the arg adapter + interface such that each arg adapter gets an array containing + the results of the previous arguments in the current call. That + might allow some interesting type-conversion feature. Use case: + handling of the final argument to sqlite3_prepare_v2() depends + on the type (pointer vs JS string) of its 2nd + argument. Currently that distinction requires hand-writing a + wrapper for that function. That case is unusual enough that + abstracting it into this API (and taking on the associated + costs) may well not make good sense. + */ + target.xWrap = function(fname, resultType, ...argTypes){ + if(3===arguments.length && Array.isArray(arguments[2])){ + argTypes = arguments[2]; + } + const xf = target.xGet(fname); + if(argTypes.length!==xf.length) __argcMismatch(fname, xf.length); + if((null===resultType) && 0===xf.length){ + /* Func taking no args with an as-is return. We don't need a wrapper. */ + return xf; + } + /*Verify the arg type conversions are valid...*/; + if(undefined!==resultType && null!==resultType) __xResultAdapterCheck(resultType); + argTypes.forEach(__xArgAdapterCheck); + if(0===xf.length){ + // No args to convert, so we can create a simpler wrapper... + return (...args)=>(args.length + ? __argcMismatch(fname, xf.length) + : cache.xWrap.convertResult(resultType, xf.call(null))); + } + return function(...args){ + if(args.length!==xf.length) __argcMismatch(fname, xf.length); + const scope = target.scopedAllocPush(); + try{ + const rc = xf.apply(null,args.map((v,i)=>cache.xWrap.convertArg(argTypes[i], v))); + return cache.xWrap.convertResult(resultType, rc); + }finally{ + target.scopedAllocPop(scope); + } + }; + }/*xWrap()*/; + + /** Internal impl for xWrap.resultAdapter() and argAdaptor(). */ + const __xAdapter = function(func, argc, typeName, adapter, modeName, xcvPart){ + if('string'===typeof typeName){ + if(1===argc) return xcvPart[typeName]; + else if(2===argc){ + if(!adapter){ + delete xcvPart[typeName]; + return func; + }else if(!(adapter instanceof Function)){ + toss(modeName,"requires a function argument."); + } + xcvPart[typeName] = adapter; + return func; + } + } + toss("Invalid arguments to",modeName); + }; + + /** + Gets, sets, or removes a result value adapter for use with + xWrap(). If passed only 1 argument, the adapter function for the + given type name is returned. If the second argument is explicit + falsy (as opposed to defaulted), the adapter named by the first + argument is removed. If the 2nd argument is not falsy, it must be + a function which takes one value and returns a value appropriate + for the given type name. The adapter may throw if its argument is + not of a type it can work with. This function throws for invalid + arguments. + + Example: + + ``` + xWrap.resultAdapter('twice',(v)=>v+v); + ``` + + xWrap.resultAdapter() MUST NOT use the scopedAlloc() family of + APIs to allocate a result value. xWrap()-generated wrappers run + in the context of scopedAllocPush() so that argument adapters can + easily convert, e.g., to C-strings, and have them cleaned up + automatically before the wrapper returns to the caller. Likewise, + if a _result_ adapter uses scoped allocation, the result will be + freed before because they would be freed before the wrapper + returns, leading to chaos and undefined behavior. + + Except when called as a getter, this function returns itself. + */ + target.xWrap.resultAdapter = function f(typeName, adapter){ + return __xAdapter(f, arguments.length, typeName, adapter, + 'resultAdaptor()', xcv.result); + }; + + /** + Functions identically to xWrap.resultAdapter() but applies to + call argument conversions instead of result value conversions. + + xWrap()-generated wrappers perform argument conversion in the + context of a scopedAllocPush(), so any memory allocation + performed by argument adapters really, really, really should be + made using the scopedAlloc() family of functions unless + specifically necessary. For example: + + ``` + xWrap.argAdapter('my-string', function(v){ + return ('string'===typeof v) + ? myWasmObj.scopedAllocCString(v) : null; + }; + ``` + + Contrariwise, xWrap.resultAdapter() must _not_ use scopedAlloc() + to allocate its results because they would be freed before the + xWrap()-created wrapper returns. + + Note that it is perfectly legitimate to use these adapters to + perform argument validation, as opposed (or in addition) to + conversion. + */ + target.xWrap.argAdapter = function f(typeName, adapter){ + return __xAdapter(f, arguments.length, typeName, adapter, + 'argAdaptor()', xcv.arg); + }; + + /** + Functions like xCall() but performs argument and result type + conversions as for xWrap(). The first argument is the name of the + exported function to call. The 2nd its the name of its result + type, as documented for xWrap(). The 3rd is an array of argument + type name, as documented for xWrap() (use a falsy value or an + empty array for nullary functions). The 4th+ arguments are + arguments for the call, with the special case that if the 4th + argument is an array, it is used as the arguments for the + call. Returns the converted result of the call. + + This is just a thin wrapper around xWrap(). If the given function + is to be called more than once, it's more efficient to use + xWrap() to create a wrapper, then to call that wrapper as many + times as needed. For one-shot calls, however, this variant is + arguably more efficient because it will hypothetically free the + wrapper function quickly. + */ + target.xCallWrapped = function(fname, resultType, argTypes, ...args){ + if(Array.isArray(arguments[3])) args = arguments[3]; + return target.xWrap(fname, resultType, argTypes||[]).apply(null, args||[]); + }; + + return target; +}; + +/** + yawl (Yet Another Wasm Loader) provides very basic wasm loader. + It requires a config object: + + - `uri`: required URI of the WASM file to load. + + - `onload(loadResult,config)`: optional callback. The first + argument is the result object from + WebAssembly.instantiate[Streaming](). The 2nd is the config + object passed to this function. Described in more detail below. + + - `imports`: optional imports object for + WebAssembly.instantiate[Streaming](). The default is an empty set + of imports. If the module requires any imports, this object + must include them. + + - `wasmUtilTarget`: optional object suitable for passing to + WhWasmUtilInstaller(). If set, it gets passed to that function + after the promise resolves. This function sets several properties + on it before passing it on to that function (which sets many + more): + + - `module`, `instance`: the properties from the + instantiate[Streaming]() result. + + - If `instance.exports.memory` is _not_ set then it requires that + `config.imports.env.memory` be set (else it throws), and + assigns that to `target.memory`. + + - If `wasmUtilTarget.alloc` is not set and + `instance.exports.malloc` is, it installs + `wasmUtilTarget.alloc()` and `wasmUtilTarget.dealloc()` + wrappers for the exports `malloc` and `free` functions. + + It returns a function which, when called, initiates loading of the + module and returns a Promise. When that Promise resolves, it calls + the `config.onload` callback (if set) and passes it + `(loadResult,config)`, where `loadResult` is the result of + WebAssembly.instantiate[Streaming](): an object in the form: + + ``` + { + module: a WebAssembly.Module, + instance: a WebAssembly.Instance + } + ``` + + (Note that the initial `then()` attached to the promise gets only + that object, and not the `config` one.) + + Error handling is up to the caller, who may attach a `catch()` call + to the promise. +*/ +self.WhWasmUtilInstaller.yawl = function(config){ + const wfetch = ()=>fetch(config.uri, {credentials: 'same-origin'}); + const wui = this; + const finalThen = function(arg){ + //log("finalThen()",arg); + if(config.wasmUtilTarget){ + const toss = (...args)=>{throw new Error(args.join(' '))}; + const tgt = config.wasmUtilTarget; + tgt.module = arg.module; + tgt.instance = arg.instance; + //tgt.exports = tgt.instance.exports; + if(!tgt.instance.exports.memory){ + /** + WhWasmUtilInstaller requires either tgt.exports.memory + (exported from WASM) or tgt.memory (JS-provided memory + imported into WASM). + */ + tgt.memory = (config.imports && config.imports.env + && config.imports.env.memory) + || toss("Missing 'memory' object!"); + } + if(!tgt.alloc && arg.instance.exports.malloc){ + const exports = arg.instance.exports; + tgt.alloc = function(n){ + return exports.malloc(n) || toss("Allocation of",n,"bytes failed."); + }; + tgt.dealloc = function(m){exports.free(m)}; + } + wui(tgt); + } + if(config.onload) config.onload(arg,config); + return arg /* for any then() handler attached to + yetAnotherWasmLoader()'s return value */; + }; + const loadWasm = WebAssembly.instantiateStreaming + ? function loadWasmStreaming(){ + return WebAssembly.instantiateStreaming(wfetch(), config.imports||{}) + .then(finalThen); + } + : function loadWasmOldSchool(){ // Safari < v15 + return wfetch() + .then(response => response.arrayBuffer()) + .then(bytes => WebAssembly.instantiate(bytes, config.imports||{})) + .then(finalThen); + }; + return loadWasm; +}.bind(self.WhWasmUtilInstaller)/*yawl()*/; |