From 63847496f14c813a5d80efd5b7de0f1294ffe1e3 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 13 Apr 2024 16:07:11 +0200 Subject: Adding upstream version 3.45.1. Signed-off-by: Daniel Baumann --- ext/wasm/jaccwabyt/jaccwabyt.js | 696 +++++++++++++++++++++++++ ext/wasm/jaccwabyt/jaccwabyt.md | 1065 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 1761 insertions(+) create mode 100644 ext/wasm/jaccwabyt/jaccwabyt.js create mode 100644 ext/wasm/jaccwabyt/jaccwabyt.md (limited to 'ext/wasm/jaccwabyt') diff --git a/ext/wasm/jaccwabyt/jaccwabyt.js b/ext/wasm/jaccwabyt/jaccwabyt.js new file mode 100644 index 0000000..1846441 --- /dev/null +++ b/ext/wasm/jaccwabyt/jaccwabyt.js @@ -0,0 +1,696 @@ +/** + 2022-06-30 + + The author disclaims copyright to this source code. In place of a + legal notice, here is a blessing: + + * May you do good and not evil. + * May you find forgiveness for yourself and forgive others. + * May you share freely, never taking more than you give. + + *********************************************************************** + + The Jaccwabyt API is documented in detail in an external file, + _possibly_ called jaccwabyt.md in the same directory as this file. + + Project homes: + - https://fossil.wanderinghorse.net/r/jaccwabyt + - https://sqlite.org/src/dir/ext/wasm/jaccwabyt + +*/ +'use strict'; +globalThis.Jaccwabyt = function StructBinderFactory(config){ +/* ^^^^ it is recommended that clients move that object into wherever + they'd like to have it and delete the self-held copy ("self" being + the global window or worker object). This API does not require the + global reference - it is simply installed as a convenience for + connecting these bits to other co-developed code before it gets + removed from the global namespace. +*/ + + /** Throws a new Error, the message of which is the concatenation + all args with a space between each. */ + const toss = (...args)=>{throw new Error(args.join(' '))}; + + /** + Implementing function bindings revealed significant + shortcomings in Emscripten's addFunction()/removeFunction() + interfaces: + + https://github.com/emscripten-core/emscripten/issues/17323 + + Until those are resolved, or a suitable replacement can be + implemented, our function-binding API will be more limited + and/or clumsier to use than initially hoped. + */ + if(!(config.heap instanceof WebAssembly.Memory) + && !(config.heap instanceof Function)){ + toss("config.heap must be WebAssembly.Memory instance or a function."); + } + ['alloc','dealloc'].forEach(function(k){ + (config[k] instanceof Function) || + toss("Config option '"+k+"' must be a function."); + }); + const SBF = StructBinderFactory; + const heap = (config.heap instanceof Function) + ? config.heap : (()=>new Uint8Array(config.heap.buffer)), + alloc = config.alloc, + dealloc = config.dealloc, + log = config.log || console.log.bind(console), + memberPrefix = (config.memberPrefix || ""), + memberSuffix = (config.memberSuffix || ""), + bigIntEnabled = (undefined===config.bigIntEnabled + ? !!globalThis['BigInt64Array'] : !!config.bigIntEnabled), + BigInt = globalThis['BigInt'], + BigInt64Array = globalThis['BigInt64Array'], + /* Undocumented (on purpose) config options: */ + ptrSizeof = config.ptrSizeof || 4, + ptrIR = config.ptrIR || 'i32' + ; + + if(!SBF.debugFlags){ + SBF.__makeDebugFlags = function(deriveFrom=null){ + /* This is disgustingly overengineered. :/ */ + if(deriveFrom && deriveFrom.__flags) deriveFrom = deriveFrom.__flags; + const f = function f(flags){ + if(0===arguments.length){ + return f.__flags; + } + if(flags<0){ + delete f.__flags.getter; delete f.__flags.setter; + delete f.__flags.alloc; delete f.__flags.dealloc; + }else{ + f.__flags.getter = 0!==(0x01 & flags); + f.__flags.setter = 0!==(0x02 & flags); + f.__flags.alloc = 0!==(0x04 & flags); + f.__flags.dealloc = 0!==(0x08 & flags); + } + return f._flags; + }; + Object.defineProperty(f,'__flags', { + iterable: false, writable: false, + value: Object.create(deriveFrom) + }); + if(!deriveFrom) f(0); + return f; + }; + SBF.debugFlags = SBF.__makeDebugFlags(); + }/*static init*/ + + const isLittleEndian = (function() { + const buffer = new ArrayBuffer(2); + new DataView(buffer).setInt16(0, 256, true /* littleEndian */); + // Int16Array uses the platform's endianness. + return new Int16Array(buffer)[0] === 256; + })(); + /** + Some terms used in the internal docs: + + StructType: a struct-wrapping class generated by this + framework. + DEF: struct description object. + SIG: struct member signature string. + */ + + /** True if SIG s looks like a function signature, else + false. */ + const isFuncSig = (s)=>'('===s[1]; + /** True if SIG s is-a pointer signature. */ + const isPtrSig = (s)=>'p'===s || 'P'===s; + const isAutoPtrSig = (s)=>'P'===s /*EXPERIMENTAL*/; + const sigLetter = (s)=>isFuncSig(s) ? 'p' : s[0]; + /** Returns the WASM IR form of the Emscripten-conventional letter + at SIG s[0]. Throws for an unknown SIG. */ + const sigIR = function(s){ + switch(sigLetter(s)){ + case 'c': case 'C': return 'i8'; + case 'i': return 'i32'; + case 'p': case 'P': case 's': return ptrIR; + case 'j': return 'i64'; + case 'f': return 'float'; + case 'd': return 'double'; + } + toss("Unhandled signature IR:",s); + }; + + const affirmBigIntArray = BigInt64Array + ? ()=>true : ()=>toss('BigInt64Array is not available.'); + /** Returns the name of a DataView getter method corresponding + to the given SIG. */ + const sigDVGetter = function(s){ + switch(sigLetter(s)) { + case 'p': case 'P': case 's': { + switch(ptrSizeof){ + case 4: return 'getInt32'; + case 8: return affirmBigIntArray() && 'getBigInt64'; + } + break; + } + case 'i': return 'getInt32'; + case 'c': return 'getInt8'; + case 'C': return 'getUint8'; + case 'j': return affirmBigIntArray() && 'getBigInt64'; + case 'f': return 'getFloat32'; + case 'd': return 'getFloat64'; + } + toss("Unhandled DataView getter for signature:",s); + }; + /** Returns the name of a DataView setter method corresponding + to the given SIG. */ + const sigDVSetter = function(s){ + switch(sigLetter(s)){ + case 'p': case 'P': case 's': { + switch(ptrSizeof){ + case 4: return 'setInt32'; + case 8: return affirmBigIntArray() && 'setBigInt64'; + } + break; + } + case 'i': return 'setInt32'; + case 'c': return 'setInt8'; + case 'C': return 'setUint8'; + case 'j': return affirmBigIntArray() && 'setBigInt64'; + case 'f': return 'setFloat32'; + case 'd': return 'setFloat64'; + } + toss("Unhandled DataView setter for signature:",s); + }; + /** + Returns either Number of BigInt, depending on the given + SIG. This constructor is used in property setters to coerce + the being-set value to the correct size. + */ + const sigDVSetWrapper = function(s){ + switch(sigLetter(s)) { + case 'i': case 'f': case 'c': case 'C': case 'd': return Number; + case 'j': return affirmBigIntArray() && BigInt; + case 'p': case 'P': case 's': + switch(ptrSizeof){ + case 4: return Number; + case 8: return affirmBigIntArray() && BigInt; + } + break; + } + toss("Unhandled DataView set wrapper for signature:",s); + }; + + /** Returns the given struct and member name in a form suitable for + debugging and error output. */ + const sPropName = (s,k)=>s+'::'+k; + + const __propThrowOnSet = function(structName,propName){ + return ()=>toss(sPropName(structName,propName),"is read-only."); + }; + + /** + In order to completely hide StructBinder-bound struct + pointers from JS code, we store them in a scope-local + WeakMap which maps the struct-bound objects to their WASM + pointers. The pointers are accessible via + boundObject.pointer, which is gated behind an accessor + function, but are not exposed anywhere else in the + object. The main intention of that is to make it impossible + for stale copies to be made. + */ + const __instancePointerMap = new WeakMap(); + + /** Property name for the pointer-is-external marker. */ + const xPtrPropName = '(pointer-is-external)'; + + /** Frees the obj.pointer memory and clears the pointer + property. */ + const __freeStruct = function(ctor, obj, m){ + if(!m) m = __instancePointerMap.get(obj); + if(m) { + __instancePointerMap.delete(obj); + if(Array.isArray(obj.ondispose)){ + let x; + while((x = obj.ondispose.shift())){ + try{ + if(x instanceof Function) x.call(obj); + else if(x instanceof StructType) x.dispose(); + else if('number' === typeof x) dealloc(x); + // else ignore. Strings are permitted to annotate entries + // to assist in debugging. + }catch(e){ + console.warn("ondispose() for",ctor.structName,'@', + m,'threw. NOT propagating it.',e); + } + } + }else if(obj.ondispose instanceof Function){ + try{obj.ondispose()} + catch(e){ + /*do not rethrow: destructors must not throw*/ + console.warn("ondispose() for",ctor.structName,'@', + m,'threw. NOT propagating it.',e); + } + } + delete obj.ondispose; + if(ctor.debugFlags.__flags.dealloc){ + log("debug.dealloc:",(obj[xPtrPropName]?"EXTERNAL":""), + ctor.structName,"instance:", + ctor.structInfo.sizeof,"bytes @"+m); + } + if(!obj[xPtrPropName]) dealloc(m); + } + }; + + /** Returns a skeleton for a read-only property accessor wrapping + value v. */ + const rop = (v)=>{return {configurable: false, writable: false, + iterable: false, value: v}}; + + /** Allocates obj's memory buffer based on the size defined in + ctor.structInfo.sizeof. */ + const __allocStruct = function(ctor, obj, m){ + let fill = !m; + if(m) Object.defineProperty(obj, xPtrPropName, rop(m)); + else{ + m = alloc(ctor.structInfo.sizeof); + if(!m) toss("Allocation of",ctor.structName,"structure failed."); + } + try { + if(ctor.debugFlags.__flags.alloc){ + log("debug.alloc:",(fill?"":"EXTERNAL"), + ctor.structName,"instance:", + ctor.structInfo.sizeof,"bytes @"+m); + } + if(fill) heap().fill(0, m, m + ctor.structInfo.sizeof); + __instancePointerMap.set(obj, m); + }catch(e){ + __freeStruct(ctor, obj, m); + throw e; + } + }; + /** Gets installed as the memoryDump() method of all structs. */ + const __memoryDump = function(){ + const p = this.pointer; + return p + ? new Uint8Array(heap().slice(p, p+this.structInfo.sizeof)) + : null; + }; + + const __memberKey = (k)=>memberPrefix + k + memberSuffix; + const __memberKeyProp = rop(__memberKey); + + /** + Looks up a struct member in structInfo.members. Throws if found + if tossIfNotFound is true, else returns undefined if not + found. The given name may be either the name of the + structInfo.members key (faster) or the key as modified by the + memberPrefix and memberSuffix settings. + */ + const __lookupMember = function(structInfo, memberName, tossIfNotFound=true){ + let m = structInfo.members[memberName]; + if(!m && (memberPrefix || memberSuffix)){ + // Check for a match on members[X].key + for(const v of Object.values(structInfo.members)){ + if(v.key===memberName){ m = v; break; } + } + if(!m && tossIfNotFound){ + toss(sPropName(structInfo.name,memberName),'is not a mapped struct member.'); + } + } + return m; + }; + + /** + Uses __lookupMember(obj.structInfo,memberName) to find a member, + throwing if not found. Returns its signature, either in this + framework's native format or in Emscripten format. + */ + const __memberSignature = function f(obj,memberName,emscriptenFormat=false){ + if(!f._) f._ = (x)=>x.replace(/[^vipPsjrdcC]/g,"").replace(/[pPscC]/g,'i'); + const m = __lookupMember(obj.structInfo, memberName, true); + return emscriptenFormat ? f._(m.signature) : m.signature; + }; + + const __ptrPropDescriptor = { + configurable: false, enumerable: false, + get: function(){return __instancePointerMap.get(this)}, + set: ()=>toss("Cannot assign the 'pointer' property of a struct.") + // Reminder: leaving `set` undefined makes assignments + // to the property _silently_ do nothing. Current unit tests + // rely on it throwing, though. + }; + + /** Impl of X.memberKeys() for StructType and struct ctors. */ + const __structMemberKeys = rop(function(){ + const a = []; + for(const k of Object.keys(this.structInfo.members)){ + a.push(this.memberKey(k)); + } + return a; + }); + + const __utf8Decoder = new TextDecoder('utf-8'); + const __utf8Encoder = new TextEncoder(); + /** Internal helper to use in operations which need to distinguish + between SharedArrayBuffer heap memory and non-shared heap. */ + const __SAB = ('undefined'===typeof SharedArrayBuffer) + ? function(){} : SharedArrayBuffer; + const __utf8Decode = function(arrayBuffer, begin, end){ + return __utf8Decoder.decode( + (arrayBuffer.buffer instanceof __SAB) + ? arrayBuffer.slice(begin, end) + : arrayBuffer.subarray(begin, end) + ); + }; + /** + Uses __lookupMember() to find the given obj.structInfo key. + Returns that member if it is a string, else returns false. If the + member is not found, throws if tossIfNotFound is true, else + returns false. + */ + const __memberIsString = function(obj,memberName, tossIfNotFound=false){ + const m = __lookupMember(obj.structInfo, memberName, tossIfNotFound); + return (m && 1===m.signature.length && 's'===m.signature[0]) ? m : false; + }; + + /** + Given a member description object, throws if member.signature is + not valid for assigning to or interpretation as a C-style string. + It optimistically assumes that any signature of (i,p,s) is + C-string compatible. + */ + const __affirmCStringSignature = function(member){ + if('s'===member.signature) return; + toss("Invalid member type signature for C-string value:", + JSON.stringify(member)); + }; + + /** + Looks up the given member in obj.structInfo. If it has a + signature of 's' then it is assumed to be a C-style UTF-8 string + and a decoded copy of the string at its address is returned. If + the signature is of any other type, it throws. If an s-type + member's address is 0, `null` is returned. + */ + const __memberToJsString = function f(obj,memberName){ + const m = __lookupMember(obj.structInfo, memberName, true); + __affirmCStringSignature(m); + const addr = obj[m.key]; + //log("addr =",addr,memberName,"m =",m); + if(!addr) return null; + let pos = addr; + const mem = heap(); + for( ; mem[pos]!==0; ++pos ) { + //log("mem[",pos,"]",mem[pos]); + }; + //log("addr =",addr,"pos =",pos); + return (addr===pos) ? "" : __utf8Decode(mem, addr, pos); + }; + + /** + Adds value v to obj.ondispose, creating ondispose, + or converting it to an array, if needed. + */ + const __addOnDispose = function(obj, ...v){ + if(obj.ondispose){ + if(!Array.isArray(obj.ondispose)){ + obj.ondispose = [obj.ondispose]; + } + }else{ + obj.ondispose = []; + } + obj.ondispose.push(...v); + }; + + /** + Allocates a new UTF-8-encoded, NUL-terminated copy of the given + JS string and returns its address relative to heap(). If + allocation returns 0 this function throws. Ownership of the + memory is transfered to the caller, who must eventually pass it + to the configured dealloc() function. + */ + const __allocCString = function(str){ + const u = __utf8Encoder.encode(str); + const mem = alloc(u.length+1); + if(!mem) toss("Allocation error while duplicating string:",str); + const h = heap(); + //let i = 0; + //for( ; i < u.length; ++i ) h[mem + i] = u[i]; + h.set(u, mem); + h[mem + u.length] = 0; + //log("allocCString @",mem," =",u); + return mem; + }; + + /** + Sets the given struct member of obj to a dynamically-allocated, + UTF-8-encoded, NUL-terminated copy of str. It is up to the caller + to free any prior memory, if appropriate. The newly-allocated + string is added to obj.ondispose so will be freed when the object + is disposed. + + The given name may be either the name of the structInfo.members + key (faster) or the key as modified by the memberPrefix and + memberSuffix settings. + */ + const __setMemberCString = function(obj, memberName, str){ + const m = __lookupMember(obj.structInfo, memberName, true); + __affirmCStringSignature(m); + /* Potential TODO: if obj.ondispose contains obj[m.key] then + dealloc that value and clear that ondispose entry */ + const mem = __allocCString(str); + obj[m.key] = mem; + __addOnDispose(obj, mem); + return obj; + }; + + /** + Prototype for all StructFactory instances (the constructors + returned from StructBinder). + */ + const StructType = function ctor(structName, structInfo){ + if(arguments[2]!==rop){ + toss("Do not call the StructType constructor", + "from client-level code."); + } + Object.defineProperties(this,{ + //isA: rop((v)=>v instanceof ctor), + structName: rop(structName), + structInfo: rop(structInfo) + }); + }; + + /** + Properties inherited by struct-type-specific StructType instances + and (indirectly) concrete struct-type instances. + */ + StructType.prototype = Object.create(null, { + dispose: rop(function(){__freeStruct(this.constructor, this)}), + lookupMember: rop(function(memberName, tossIfNotFound=true){ + return __lookupMember(this.structInfo, memberName, tossIfNotFound); + }), + memberToJsString: rop(function(memberName){ + return __memberToJsString(this, memberName); + }), + memberIsString: rop(function(memberName, tossIfNotFound=true){ + return __memberIsString(this, memberName, tossIfNotFound); + }), + memberKey: __memberKeyProp, + memberKeys: __structMemberKeys, + memberSignature: rop(function(memberName, emscriptenFormat=false){ + return __memberSignature(this, memberName, emscriptenFormat); + }), + memoryDump: rop(__memoryDump), + pointer: __ptrPropDescriptor, + setMemberCString: rop(function(memberName, str){ + return __setMemberCString(this, memberName, str); + }) + }); + // Function-type non-Property inherited members + Object.assign(StructType.prototype,{ + addOnDispose: function(...v){ + __addOnDispose(this,...v); + return this; + } + }); + + /** + "Static" properties for StructType. + */ + Object.defineProperties(StructType, { + allocCString: rop(__allocCString), + isA: rop((v)=>v instanceof StructType), + hasExternalPointer: rop((v)=>(v instanceof StructType) && !!v[xPtrPropName]), + memberKey: __memberKeyProp + }); + + const isNumericValue = (v)=>Number.isFinite(v) || (v instanceof (BigInt || Number)); + + /** + Pass this a StructBinder-generated prototype, and the struct + member description object. It will define property accessors for + proto[memberKey] which read from/write to memory in + this.pointer. It modifies descr to make certain downstream + operations much simpler. + */ + const makeMemberWrapper = function f(ctor,name, descr){ + if(!f._){ + /*cache all available getters/setters/set-wrappers for + direct reuse in each accessor function. */ + f._ = {getters: {}, setters: {}, sw:{}}; + const a = ['i','c','C','p','P','s','f','d','v()']; + if(bigIntEnabled) a.push('j'); + a.forEach(function(v){ + //const ir = sigIR(v); + f._.getters[v] = sigDVGetter(v) /* DataView[MethodName] values for GETTERS */; + f._.setters[v] = sigDVSetter(v) /* DataView[MethodName] values for SETTERS */; + f._.sw[v] = sigDVSetWrapper(v) /* BigInt or Number ctor to wrap around values + for conversion */; + }); + const rxSig1 = /^[ipPsjfdcC]$/, + rxSig2 = /^[vipPsjfdcC]\([ipPsjfdcC]*\)$/; + f.sigCheck = function(obj, name, key,sig){ + if(Object.prototype.hasOwnProperty.call(obj, key)){ + toss(obj.structName,'already has a property named',key+'.'); + } + rxSig1.test(sig) || rxSig2.test(sig) + || toss("Malformed signature for", + sPropName(obj.structName,name)+":",sig); + }; + } + const key = ctor.memberKey(name); + f.sigCheck(ctor.prototype, name, key, descr.signature); + descr.key = key; + descr.name = name; + const sigGlyph = sigLetter(descr.signature); + const xPropName = sPropName(ctor.prototype.structName,key); + const dbg = ctor.prototype.debugFlags.__flags; + /* + TODO?: set prototype of descr to an object which can set/fetch + its prefered representation, e.g. conversion to string or mapped + function. Advantage: we can avoid doing that via if/else if/else + in the get/set methods. + */ + const prop = Object.create(null); + prop.configurable = false; + prop.enumerable = false; + prop.get = function(){ + if(dbg.getter){ + log("debug.getter:",f._.getters[sigGlyph],"for", sigIR(sigGlyph), + xPropName,'@', this.pointer,'+',descr.offset,'sz',descr.sizeof); + } + let rc = ( + new DataView(heap().buffer, this.pointer + descr.offset, descr.sizeof) + )[f._.getters[sigGlyph]](0, isLittleEndian); + if(dbg.getter) log("debug.getter:",xPropName,"result =",rc); + return rc; + }; + if(descr.readOnly){ + prop.set = __propThrowOnSet(ctor.prototype.structName,key); + }else{ + prop.set = function(v){ + if(dbg.setter){ + log("debug.setter:",f._.setters[sigGlyph],"for", sigIR(sigGlyph), + xPropName,'@', this.pointer,'+',descr.offset,'sz',descr.sizeof, v); + } + if(!this.pointer){ + toss("Cannot set struct property on disposed instance."); + } + if(null===v) v = 0; + else while(!isNumericValue(v)){ + if(isAutoPtrSig(descr.signature) && (v instanceof StructType)){ + // It's a struct instance: let's store its pointer value! + v = v.pointer || 0; + if(dbg.setter) log("debug.setter:",xPropName,"resolved to",v); + break; + } + toss("Invalid value for pointer-type",xPropName+'.'); + } + ( + new DataView(heap().buffer, this.pointer + descr.offset, descr.sizeof) + )[f._.setters[sigGlyph]](0, f._.sw[sigGlyph](v), isLittleEndian); + }; + } + Object.defineProperty(ctor.prototype, key, prop); + }/*makeMemberWrapper*/; + + /** + The main factory function which will be returned to the + caller. + */ + const StructBinder = function StructBinder(structName, structInfo){ + if(1===arguments.length){ + structInfo = structName; + structName = structInfo.name; + }else if(!structInfo.name){ + structInfo.name = structName; + } + if(!structName) toss("Struct name is required."); + let lastMember = false; + Object.keys(structInfo.members).forEach((k)=>{ + // Sanity checks of sizeof/offset info... + const m = structInfo.members[k]; + if(!m.sizeof) toss(structName,"member",k,"is missing sizeof."); + else if(m.sizeof===1){ + (m.signature === 'c' || m.signature === 'C') || + toss("Unexpected sizeof==1 member", + sPropName(structInfo.name,k), + "with signature",m.signature); + }else{ + // sizes and offsets of size-1 members may be odd values, but + // others may not. + if(0!==(m.sizeof%4)){ + console.warn("Invalid struct member description =",m,"from",structInfo); + toss(structName,"member",k,"sizeof is not aligned. sizeof="+m.sizeof); + } + if(0!==(m.offset%4)){ + console.warn("Invalid struct member description =",m,"from",structInfo); + toss(structName,"member",k,"offset is not aligned. offset="+m.offset); + } + } + if(!lastMember || lastMember.offset < m.offset) lastMember = m; + }); + if(!lastMember) toss("No member property descriptions found."); + else if(structInfo.sizeof < lastMember.offset+lastMember.sizeof){ + toss("Invalid struct config:",structName, + "max member offset ("+lastMember.offset+") ", + "extends past end of struct (sizeof="+structInfo.sizeof+")."); + } + const debugFlags = rop(SBF.__makeDebugFlags(StructBinder.debugFlags)); + /** Constructor for the StructCtor. */ + const StructCtor = function StructCtor(externalMemory){ + if(!(this instanceof StructCtor)){ + toss("The",structName,"constructor may only be called via 'new'."); + }else if(arguments.length){ + if(externalMemory!==(externalMemory|0) || externalMemory<=0){ + toss("Invalid pointer value for",structName,"constructor."); + } + __allocStruct(StructCtor, this, externalMemory); + }else{ + __allocStruct(StructCtor, this); + } + }; + Object.defineProperties(StructCtor,{ + debugFlags: debugFlags, + isA: rop((v)=>v instanceof StructCtor), + memberKey: __memberKeyProp, + memberKeys: __structMemberKeys, + methodInfoForKey: rop(function(mKey){ + }), + structInfo: rop(structInfo), + structName: rop(structName) + }); + StructCtor.prototype = new StructType(structName, structInfo, rop); + Object.defineProperties(StructCtor.prototype,{ + debugFlags: debugFlags, + constructor: rop(StructCtor) + /*if we assign StructCtor.prototype and don't do + this then StructCtor!==instance.constructor!*/ + }); + Object.keys(structInfo.members).forEach( + (name)=>makeMemberWrapper(StructCtor, name, structInfo.members[name]) + ); + return StructCtor; + }; + StructBinder.StructType = StructType; + StructBinder.config = config; + StructBinder.allocCString = __allocCString; + if(!StructBinder.debugFlags){ + StructBinder.debugFlags = SBF.__makeDebugFlags(SBF.debugFlags); + } + return StructBinder; +}/*StructBinderFactory*/; diff --git a/ext/wasm/jaccwabyt/jaccwabyt.md b/ext/wasm/jaccwabyt/jaccwabyt.md new file mode 100644 index 0000000..431741e --- /dev/null +++ b/ext/wasm/jaccwabyt/jaccwabyt.md @@ -0,0 +1,1065 @@ +Jaccwabyt 🐇 +============================================================ + +**Jaccwabyt**: _JavaScript ⇄ C Struct Communication via WASM Byte +Arrays_ + +Welcome to Jaccwabyt, a JavaScript API which creates bindings for +WASM-compiled C structs, defining them in such a way that changes to +their state in JS are visible in C/WASM, and vice versa, permitting +two-way interchange of struct state with very little user-side +friction. + +(If that means nothing to you, neither will the rest of this page!) + +**Browser compatibility**: this library requires a _recent_ browser +and makes no attempt whatsoever to accommodate "older" or +lesser-capable ones, where "recent," _very roughly_, means released in +mid-2018 or later, with late 2021 releases required for some optional +features in some browsers (e.g. [BigInt64Array][] in Safari). It also +relies on a couple non-standard, but widespread, features, namely +[TextEncoder][] and [TextDecoder][]. It is developed primarily on +Firefox and Chrome on Linux and all claims of Safari compatibility +are based solely on feature compatibility tables provided at +[MDN][]. + +**Formalities:** + +- Author: [Stephan Beal][sgb] +- Project Homes: + - \ + Is the primary home but... + - \ + ... most development happens here. + +The license for both this documentation and the software it documents +is the same as [sqlite3][], the project from which this spinoff +project was spawned: + +----- + +> 2022-06-30: +> +> The author disclaims copyright to this source code. In place of a +> legal notice, here is a blessing: +> +> May you do good and not evil. +> May you find forgiveness for yourself and forgive others. +> May you share freely, never taking more than you give. + +----- + + +Table of Contents +============================================================ + +- [Overview](#overview) + - [Architecture](#architecture) +- [Creating and Binding Structs](#creating-binding) + - [Step 1: Configure Jaccwabyt](#step-1) + - [Step 2: Struct Description](#step-2) + - [`P` vs `p`](#step-2-pvsp) + - [Step 3: Binding a Struct](#step-3) + - [Step 4: Creating, Using, and Destroying Instances](#step-4) +- APIs + - [Struct Binder Factory](#api-binderfactory) + - [Struct Binder](#api-structbinder) + - [Struct Type](#api-structtype) + - [Struct Constructors](#api-structctor) + - [Struct Protypes](#api-structprototype) + - [Struct Instances](#api-structinstance) +- Appendices + - [Appendix A: Limitations, TODOs, etc.](#appendix-a) + - [Appendix D: Debug Info](#appendix-d) + - [Appendix G: Generating Struct Descriptions](#appendix-g) + + +Overview +============================================================ + +Management summary: this JavaScript-only framework provides limited +two-way bindings between C structs and JavaScript objects, such that +changes to the struct in one environment are visible in the other. + +Details... + +It works by creating JavaScript proxies for C structs. Reads and +writes of the JS-side members are marshaled through a flat byte array +allocated from the WASM heap. As that heap is shared with the C-side +code, and the memory block is written using the same approach C does, +that byte array can be used to access and manipulate a given struct +instance from both JS and C. + +Motivating use case: this API was initially developed as an +experiment to determine whether it would be feasible to implement, +completely in JS, custom "VFS" and "virtual table" objects for the +WASM build of [sqlite3][]. Doing so was going to require some form of +two-way binding of several structs. Once the proof of concept was +demonstrated, a rabbit hole appeared and _down we went_... It has +since grown beyond its humble proof-of-concept origins and is believed +to be a useful (or at least interesting) tool for mixed JS/C +applications. + +Portability notes: + +- These docs sometimes use [Emscripten][] as a point of reference + because it is the most widespread WASM toolchain, but this code is + specifically designed to be usable in arbitrary WASM environments. + It abstracts away a few Emscripten-specific features into + configurable options. Similarly, the build tree requires Emscripten + but Jaccwabyt does not have any hard Emscripten dependencies. +- This code is encapsulated into a single JavaScript function. It + should be trivial to copy/paste into arbitrary WASM/JS-using + projects. +- The source tree includes C code, but only for testing and + demonstration purposes. It is not part of the core distributable. + + +Architecture +------------------------------------------------------------ + + + +```pikchr +BSBF: box rad 0.3*boxht "StructBinderFactory" fit fill lightblue +BSB: box same "StructBinder" fit at 0.75 e of 0.7 s of BSBF.c +BST: box same "StructType" fit at 1.5 e of BSBF +BSC: box same "Struct" "Ctor" fit at 1.5 s of BST +BSI: box same "Struct" "Instances" fit at 1 right of BSB.e +BC: box same at 0.25 right of 1.6 e of BST "C Structs" fit fill lightgrey + +arrow -> from BSBF.s to BSB.w "Generates" aligned above +arrow -> from BSB.n to BST.sw "Contains" aligned above +arrow -> from BSB.s to BSC.nw "Generates" aligned below +arrow -> from BSC.ne to BSI.s "Constructs" aligned below +arrow <- from BST.se to BSI.n "Inherits" aligned above +arrow <-> from BSI.e to BC.s dotted "Shared" aligned above "Memory" aligned below +arrow -> from BST.e to BC.w dotted "Mirrors Struct" aligned above "Model From" aligned below +arrow -> from BST.s to BSC.n "Prototype of" aligned above +``` + +Its major classes and functions are: + +- **[StructBinderFactory][StructBinderFactory]** is a factory function which + accepts a configuration object to customize it for a given WASM + environment. A client will typically call this only one time, with + an appropriate configuration, to generate a single... +- **[StructBinder][]** is a factory function which converts an + arbitrary number struct descriptions into... +- **[StructTypes][StructCtors]** are constructors, one per struct + description, which inherit from + **[`StructBinder.StructType`][StructType]** and are used to instantiate... +- **[Struct instances][StructInstance]** are objects representing + individual instances of generated struct types. + +An app may have any number of StructBinders, but will typically +need only one. Each StructBinder is effectively a separate +namespace for struct creation. + + + +Creating and Binding Structs +============================================================ + +From the amount of documentation provided, it may seem that +creating and using struct bindings is a daunting task, but it +essentially boils down to: + +1. [Confire Jaccwabyt for your WASM environment](#step-1). This is a + one-time task per project and results is a factory function which + can create new struct bindings. +2. [Create a JSON-format description of your C structs](#step-2). This is + required once for each struct and required updating if the C + structs change. +3. [Feed (2) to the function generated by (1)](#step-3) to create JS + constuctor functions for each struct. This is done at runtime, as + opposed to during a build-process step, and can be set up in such a + way that it does not require any maintenace after its initial + setup. +4. [Create and use instances of those structs](#step-4). + +Detailed instructions for each of those steps follows... + + +Step 1: Configure Jaccwabyt for the Environment +------------------------------------------------------------ + +Jaccwabyt's highest-level API is a single function. It creates a +factory for processing struct descriptions, but does not process any +descriptions itself. This level of abstraction exist primarily so that +the struct-specific factories can be configured for a given WASM +environment. Its usage looks like: + +> +```javascript +const MyBinder = StructBinderFactory({ + // These config options are all required: + heap: WebAssembly.Memory instance or a function which returns + a Uint8Array or Int8Array view of the WASM memory, + alloc: function(howMuchMemory){...}, + dealloc: function(pointerToFree){...} +}); +``` + +It also offers a number of other settings, but all are optional except +for the ones shown above. Those three config options abstract away +details which are specific to a given WASM environment. They provide +the WASM "heap" memory, the memory allocator, and the deallocator. In +a conventional Emscripten setup, that config might simply look like: + +> +```javascript +{ + heap: Module['asm']['memory'], + //Or: + // heap: ()=>Module['HEAP8'], + alloc: (n)=>Module['_malloc'](n), + dealloc: (m)=>Module['_free'](m) +} +``` + +The StructBinder factory function returns a function which can then be +used to create bindings for our structs. + + +Step 2: Create a Struct Description +------------------------------------------------------------ + +The primary input for this framework is a JSON-compatible construct +which describes a struct we want to bind. For example, given this C +struct: + +> +```c +// C-side: +struct Foo { + int member1; + void * member2; + int64_t member3; +}; +``` + +Its JSON description looks like: + +> +```json +{ + "name": "Foo", + "sizeof": 16, + "members": { + "member1": {"offset": 0,"sizeof": 4,"signature": "i"}, + "member2": {"offset": 4,"sizeof": 4,"signature": "p"}, + "member3": {"offset": 8,"sizeof": 8,"signature": "j"} + } +} +``` + +These data _must_ match up with the C-side definition of the struct +(if any). See [Appendix G][appendix-g] for one way to easily generate +these from C code. + +Each entry in the `members` object maps the member's name to +its low-level layout: + +- `offset`: the byte offset from the start of the struct, as reported + by C's `offsetof()` feature. +- `sizeof`: as reported by C's `sizeof()`. +- `signature`: described below. +- `readOnly`: optional. If set to true, the binding layer will + throw if JS code tries to set that property. + +The order of the `members` entries is not important: their memory +layout is determined by their `offset` and `sizeof` members. The +`name` property is technically optional, but one of the steps in the +binding process requires that either it be passed an explicit name or +there be one in the struct description. The names of the `members` +entries need not match their C counterparts. Project conventions may +call for giving them different names in the JS side and the +[StructBinderFactory][] can be configured to automatically add a +prefix and/or suffix to their names. + +Nested structs are as-yet unsupported by this tool. + +Struct member "signatures" describe the data types of the members and +are an extended variant of the format used by Emscripten's +`addFunction()`. A signature for a non-function-pointer member, or +function pointer member which is to be modelled as an opaque pointer, +is a single letter. A signature for a function pointer may also be +modelled as a series of letters describing the call signature. The +supported letters are: + +- **`v`** = `void` (only used as return type for function pointer members) +- **`i`** = `int32` (4 bytes) +- **`j`** = `int64` (8 bytes) is only really usable if this code is built + with BigInt support (e.g. using the Emscripten `-sWASM_BIGINT` build + flag). Without that, this API may throw when encountering the `j` + signature entry. +- **`f`** = `float` (4 bytes) +- **`d`** = `double` (8 bytes) +- **`c`** = `int8` (1 byte) char - see notes below! +- **`C`** = `uint8` (1 byte) unsigned char - see notes below! +- **`p`** = `int32` (see notes below!) +- **`P`** = Like `p` but with extra handling. Described below. +- **`s`** = like `int32` but is a _hint_ that it's a pointer to a + string so that _some_ (very limited) contexts may treat it as such, + noting that such algorithms must, for lack of information to the + contrary, assume both that the encoding is UTF-8 and that the + pointer's member is NUL-terminated. If that is _not_ the case for a + given string member, do not use `s`: use `i` or `p` instead and do + any string handling yourself. + +Noting that: + +- **All of these types are numeric**. Attempting to set any + struct-bound property to a non-numeric value will trigger an + exception except in cases explicitly noted otherwise. +- **"Char" types**: WASM does not define an `int8` type, nor does it + distinguish between signed and unsigned. This API treats `c` as + `int8` and `C` as `uint8` for purposes of getting and setting values + when using the `DataView` class. It is _not_ recommended that client + code use these types in new WASM-capable code, but they were added + for the sake of binding some immutable legacy code to WASM. + +> Sidebar: Emscripten's public docs do not mention `p`, but their +generated code includes `p` as an alias for `i`, presumably to mean +"pointer". Though `i` is legal for pointer types in the signature, `p` +is more descriptive, so this framework encourages the use of `p` for +pointer-type members. Using `p` for pointers also helps future-proof +the signatures against the eventuality that WASM eventually supports +64-bit pointers. Note that sometimes `p` really means +pointer-to-pointer, but the Emscripten JS/WASM glue does not offer +that level of expressiveness in these signatures. We simply have to be +aware of when we need to deal with pointers and pointers-to-pointers +in JS code. + +> Trivia: this API treates `p` as distinctly different from `i` in +some contexts, so its use is encouraged for pointer types. + +Signatures in the form `x(...)` denote function-pointer members and +`x` denotes non-function members. Functions with no arguments use the +form `x()`. For function-type signatures, the strings are formulated +such that they can be passed to Emscripten's `addFunction()` after +stripping out the `(` and `)` characters. For good measure, to match +the public Emscripten docs, `p`, `c`, and `C`, should also be replaced +with `i`. In JavaScript that might look like: + +> +``` +signature.replace(/[^vipPsjfdcC]/g,'').replace(/[pPscC]/g,'i'); +``` + + +### `P` vs `p` in Method Signatures + +*This support is experimental and subject to change.* + +The method signature letter `p` means "pointer," which, in WASM, means +"integer." `p` is treated as an integer for most contexts, while still +also being a separate type (analog to how pointers in C are just a +special use of unsigned numbers). A capital `P` changes the semantics +of plain member pointers (but not, as of this writing, function +pointer members) as follows: + +- When a `P`-type member is **set** via `myStruct.x=y`, if + [`(y instanceof StructType)`][StructType] then the value of `y.pointer` is + stored in `myStruct.x`. If `y` is neither a number nor + a [StructType][], an exception is triggered (regardless of whether + `p` or `P` is used). + + + +Step 3: Binding the Struct +------------------------------------------------------------ + +We can now use the results of steps 1 and 2: + +> +```javascript +const MyStruct = MyBinder(myStructDescription); +``` + +That creates a new constructor function, `MyStruct`, which can be used +to instantiate new instances. The binder will throw if it encounters +any problems. + +That's all there is to it. + +> Sidebar: that function may modify the struct description object +and/or its sub-objects, or may even replace sub-objects, in order to +simplify certain later operations. If that is not desired, then feed +it a copy of the original, e.g. by passing it +`JSON.parse(JSON.stringify(structDefinition))`. + + +Step 4: Creating, Using, and Destroying Struct Instances +------------------------------------------------------------ + +Now that we have our constructor... + +> +```javascript +const my = new MyStruct(); +``` + +It is important to understand that creating a new instance allocates +memory on the WASM heap. We must not simply rely on garbage collection +to clean up the instances because doing so will not free up the WASM +heap memory. The correct way to free up that memory is to use the +object's `dispose()` method. + +The following usage pattern offers one way to easily ensure proper +cleanup of struct instances: + +> +```javascript +const my = new MyStruct(); +try { + console.log(my.member1, my.member2, my.member3); + my.member1 = 12; + assert(12 === my.member1); + /* ^^^ it may seem silly to test that, but recall that assigning that + property encodes the value into a byte array in heap memory, not + a normal JS property. Similarly, fetching the property decodes it + from the byte array. */ + // Pass the struct to C code which takes a MyStruct pointer: + aCFunction( my.pointer ); +} finally { + my.dispose(); +} +``` + +> Sidebar: the `finally` block will be run no matter how the `try` +exits, whether it runs to completion, propagates an exception, or uses +flow-control keywords like `return` or `break`. It is perfectly legal +to use `try`/`finally` without a `catch`, and doing so is an ideal +match for the memory management requirements of Jaccwaby-bound struct +instances. + +It is often useful to wrap an existing instance of a C-side struct +without taking over ownership of its memory. That can be achieved by +simply passing a pointer to the constructor. For example: + +```js +const m = new MyStruct( functionReturningASharedPtr() ); +// calling m.dispose() will _not_ free the wrapped C-side instance +// but will trigger any ondispose handler. +``` + +Now that we have struct instances, there are a number of things we +can do with them, as covered in the rest of this document. + + + +API Reference +============================================================ + + +API: Binder Factory +------------------------------------------------------------ + +This is the top-most function of the API, from which all other +functions and types are generated. The binder factory's signature is: + +> +``` +Function StructBinderFactory(object configOptions); +``` + +It returns a function which these docs refer to as a [StructBinder][] +(covered in the next section). It throws on error. + +The binder factory supports the following options in its +configuration object argument: + + +- `heap` + Must be either a `WebAssembly.Memory` instance representing the WASM + heap memory OR a function which returns an Int8Array or Uint8Array + view of the WASM heap. In the latter case the function should, if + appropriate for the environment, account for the heap being able to + grow. Jaccwabyt uses this property in such a way that it "should" be + okay for the WASM heap to grow at runtime (that case is, however, + untested). + +- `alloc` + Must be a function semantically compatible with Emscripten's + `Module._malloc()`. That is, it is passed the number of bytes to + allocate and it returns a pointer. On allocation failure it may + either return 0 or throw an exception. This API will throw an + exception if allocation fails or will propagate whatever exception + the allocator throws. The allocator _must_ use the same heap as the + `heap` config option. + +- `dealloc` + Must be a function semantically compatible with Emscripten's + `Module._free()`. That is, it takes a pointer returned from + `alloc()` and releases that memory. It must never throw and must + accept a value of 0/null to mean "do nothing" (noting that 0 is + _technically_ a legal memory address in WASM, but that seems like a + design flaw). + +- `bigIntEnabled` (bool=true if BigInt64Array is available, else false) + If true, the WASM bits this code is used with must have been + compiled with int64 support (e.g. using Emscripten's `-sWASM_BIGINT` + flag). If that's not the case, this flag should be set to false. If + it's enabled, BigInt support is assumed to work and certain extra + features are enabled. Trying to use features which requires BigInt + when it is disabled (e.g. using 64-bit integer types) will trigger + an exception. + +- `memberPrefix` and `memberSuffix` (string="") + If set, struct-defined properties get bound to JS with this string + as a prefix resp. suffix. This can be used to avoid symbol name + collisions between the struct-side members and the JS-side ones + and/or to make more explicit which object-level properties belong to + the struct mapping and which to the JS side. This does not modify + the values in the struct description objects, just the property + names through which they are accessed via property access operations + and the various a [StructInstance][] APIs (noting that the latter + tend to permit both the original names and the names as modified by + these settings). + +- `log` + Optional function used for debugging output. By default + `console.log` is used but by default no debug output is generated. + This API assumes that the function will space-separate each argument + (like `console.log` does). See [Appendix D](#appendix-d) for info + about enabling debugging output. + + + +API: Struct Binder +------------------------------------------------------------ + +Struct Binders are factories which are created by the +[StructBinderFactory][]. A given Struct Binder can process any number +of distinct structs. In a typical setup, an app will have ony one +shared Binder Factory and one Struct Binder. Struct Binders which are +created via different [StructBinderFactory][] calls are unrelated to each +other, sharing no state except, perhaps, indirectly via +[StructBinderFactory][] configuration (e.g. the memory heap). + +These factories have two call signatures: + +> +```javascript +Function StructBinder([string structName,] object structDescription) +``` + +If the struct description argument has a `name` property then the name +argument is optional, otherwise it is required. + +The returned object is a constructor for instances of the struct +described by its argument(s), each of which derives from +a separate [StructType][] instance. + +The Struct Binder has the following members: + +- `allocCString(str)` + Allocates a new UTF-8-encoded, NUL-terminated copy of the given JS + string and returns its address relative to `config.heap()`. If + allocation returns 0 this function throws. Ownership of the memory + is transfered to the caller, who must eventually pass it to the + configured `config.dealloc()` function. + +- `config` + The configuration object passed to the [StructBinderFactory][], + primarily for accessing the memory (de)allocator and memory. Modifying + any of its "significant" configuration values may have undefined + results. + + +API: Struct Type +------------------------------------------------------------ + +The StructType class is a property of the [StructBinder][] function. + +Each constructor created by a [StructBinder][] inherits from _its own +instance_ of the StructType class, which contains state specific to +that struct type (e.g. the struct name and description metadata). +StructTypes which are created via different [StructBinder][] instances +are unrelated to each other, sharing no state except [StructBinderFactory][] +config options. + +The StructType constructor cannot be called from client code. It is +only called by the [StructBinder][]-generated +[constructors][StructCtors]. The `StructBinder.StructType` object +has the following "static" properties (^Which are accessible from +individual instances via `theInstance.constructor`.): + +- `addOnDispose(...value)`\ + If this object has no `ondispose` property, this function creates it + as an array and pushes the given value(s) onto it. If the object has + a function-typed `ondispose` property, this call replaces it with an + array and moves that function into the array. In all other cases, + `ondispose` is assumed to be an array and the argument(s) is/are + appended to it. Returns `this`. + +- `allocCString(str)` + Identical to the [StructBinder][] method of the same name. + +- `hasExternalPointer(object)` + Returns true if the given object's `pointer` member refers to an + "external" object. That is the case when a pointer is passed to a + [struct's constructor][StructCtors]. If true, the memory is owned by + someone other than the object and must outlive the object. + +- `isA(value)` + Returns true if its argument is a StructType instance _from the same + [StructBinder][]_ as this StructType. + +- `memberKey(string)` + Returns the given string wrapped in the configured `memberPrefix` + and `memberSuffix` values. e.g. if passed `"x"` and `memberPrefix` + is `"$"` then it returns `"$x"`. This does not verify that the + property is actually a struct a member, it simply transforms the + given string. TODO(?): add a 2nd parameter indicating whether it + should validate that it's a known member name. + +The base StructType prototype has the following members, all of which +are inherited by [struct instances](#api-structinstance) and may only +legally be called on concrete struct instances unless noted otherwise: + +- `dispose()` + Frees, if appropriate, the WASM-allocated memory which is allocated + by the constructor. If this is not called before the JS engine + cleans up the object, a leak in the WASM heap memory pool will result. + When `dispose()` is called, if the object has a property named `ondispose` + then it is treated as follows: + - If it is a function, it is called with the struct object as its `this`. + That method must not throw - if it does, the exception will be + ignored. + - If it is an array, it may contain functions, pointers, other + [StructType] instances, and/or JS strings. If an entry is a + function, it is called as described above. If it's a number, it's + assumed to be a pointer and is passed to the `dealloc()` function + configured for the parent [StructBinder][]. If it's a + [StructType][] instance then its `dispose()` method is called. If + it's a JS string, it's assumed to be a helpful description of the + next entry in the list and is simply ignored. Strings are + supported primarily for use as debugging information. + - Some struct APIs will manipulate the `ondispose` member, creating + it as an array or converting it from a function to array as + needed. + +- `lookupMember(memberName,throwIfNotFound=true)` + Given the name of a mapped struct member, it returns the member + description object. If not found, it either throws (if the 2nd + argument is true) or returns `undefined` (if the second argument is + false). The first argument may be either the member name as it is + mapped in the struct description or that same name with the + configured `memberPrefix` and `memberSuffix` applied, noting that + the lookup in the former case is faster.\ + This method may be called directly on the prototype, without a + struct instance. + +- `memberToJsString(memberName)` + Uses `this.lookupMember(memberName,true)` to look up the given + member. If its signature is `s` then it is assumed to refer to a + NUL-terminated, UTF-8-encoded string and its memory is decoded as + such. If its signature is not one of those then an exception is + thrown. If its address is 0, `null` is returned. See also: + `setMemberCString()`. + +- `memberIsString(memberName [,throwIfNotFound=true])` + Uses `this.lookupMember(memberName,throwIfNotFound)` to look up the + given member. Returns the member description object if the member + has a signature of `s`, else returns false. If the given member is + not found, it throws if the 2nd argument is true, else it returns + false. + +- `memberKey(string)` + Works identically to `StructBinder.StructType.memberKey()`. + +- `memberKeys()` + Returns an array of the names of the properties of this object + which refer to C-side struct counterparts. + +- `memberSignature(memberName [,emscriptenFormat=false])` + Returns the signature for a given a member property, either in this + framework's format or, if passed a truthy 2nd argument, in a format + suitable for the 2nd argument to Emscripten's `addFunction()`. + Throws if the first argument does not resolve to a struct-bound + member name. The member name is resolved using `this.lookupMember()` + and throws if the member is found mapped. + +- `memoryDump()` + Returns a Uint8Array which contains the current state of this + object's raw memory buffer. Potentially useful for debugging, but + not much else. Note that the memory is necessarily, for + compatibility with C, written in the host platform's endianness and + is thus not useful as a persistent/portable serialization format. + +- `setMemberCString(memberName,str)` + Uses `StructType.allocCString()` to allocate a new C-style string, + assign it to the given member, and add the new string to this + object's `ondispose` list for cleanup when `this.dispose()` is + called. This function throws if `lookupMember()` fails for the given + member name, if allocation of the string fails, or if the member has + a signature value of anything other than `s`. Returns `this`. + *Achtung*: calling this repeatedly will not immediately free the + previous values because this code cannot know whether they are in + use in other places, namely C. Instead, each time this is called, + the prior value is retained in the `ondispose` list for cleanup when + the struct is disposed of. Because of the complexities and general + uncertainties of memory ownership and lifetime in such + constellations, it is recommended that the use of C-string members + from JS be kept to a minimum or that the relationship be one-way: + let C manage the strings and only fetch them from JS using, e.g., + `memberToJsString()`. + + + +API: Struct Constructors +------------------------------------------------------------ + +Struct constructors (the functions returned from [StructBinder][]) +are used for, intuitively enough, creating new instances of a given +struct type: + +> +``` +const x = new MyStruct; +``` + +Normally they should be passed no arguments, but they optionally +accept a single argument: a WASM heap pointer address of memory +which the object will use for storage. It does _not_ take over +ownership of that memory and that memory must be valid at +for least as long as this struct instance. This is used, for example, +to proxy static/shared C-side instances: + +> +``` +const x = new MyStruct( someCFuncWhichReturnsAMyStructPointer() ); +... +x.dispose(); // does NOT free the memory +``` + +The JS-side construct does not own the memory in that case and has no +way of knowing when the C-side struct is destroyed. Results are +specifically undefined if the JS-side struct is used after the C-side +struct's member is freed. + +> Potential TODO: add a way of passing ownership of the C-side struct +to the JS-side object. e.g. maybe simply pass `true` as the second +argument to tell the constructor to take over ownership. Currently the +pointer can be taken over using something like +`myStruct.ondispose=[myStruct.pointer]` immediately after creation. + +These constructors have the following "static" members: + +- `isA(value)` + Returns true if its argument was created by this constructor. + +- `memberKey(string)` + Works exactly as documented for [StructType][]. + +- `memberKeys(string)` + Works exactly as documented for [StructType][]. + +- `structInfo` + The structure description passed to [StructBinder][] when this + constructor was generated. + +- `structName` + The structure name passed to [StructBinder][] when this constructor + was generated. + + + +API: Struct Prototypes +------------------------------------------------------------ + +The prototypes of structs created via [the constructors described in +the previous section][StructCtors] are each a struct-type-specific +instance of [StructType][] and add the following struct-type-specific +properties to the mix: + +- `structInfo` + The struct description metadata, as it was given to the + [StructBinder][] which created this class. + +- `structName` + The name of the struct, as it was given to the [StructBinder][] which + created this class. + + +API: Struct Instances +------------------------------------------------------------------------ + +Instances of structs created via [the constructors described +above][StructCtors] each have the following instance-specific state in +common: + +- `pointer` + A read-only numeric property which is the "pointer" returned by the + configured allocator when this object is constructed. After + `dispose()` (inherited from [StructType][]) is called, this property + has the `undefined` value. When calling C-side code which takes a + pointer to a struct of this type, simply pass it `myStruct.pointer`. + + +Appendices +============================================================ + + +Appendix A: Limitations, TODOs, and Non-TODOs +------------------------------------------------------------ + +- This library only supports the basic set of member types supported + by WASM: numbers (which includes pointers). Nested structs are not + handled except that a member may be a _pointer_ to such a + struct. Whether or not it ever will depends entirely on whether its + developer ever needs that support. Conversion of strings between + JS and C requires infrastructure specific to each WASM environment + and is not directly supported by this library. + +- Binding functions to struct instances, such that C can see and call + JS-defined functions, is not as transparent as it really could be, + due to [shortcomings in the Emscripten + `addFunction()`/`removeFunction()` + interfaces](https://github.com/emscripten-core/emscripten/issues/17323). Until + a replacement for that API can be written, this support will be + quite limited. It _is_ possible to bind a JS-defined function to a + C-side function pointer and call that function from C. What's + missing is easier-to-use/more transparent support for doing so. + - In the meantime, a [standalone + subproject](/file/common/whwasmutil.js) of Jaccwabyt provides such a + binding mechanism, but integrating it directly with Jaccwabyt would + not only more than double its size but somehow feels inappropriate, so + experimentation is in order for how to offer that capability via + completely optional [StructBinderFactory][] config options. + +- It "might be interesting" to move access of the C-bound members into + a sub-object. e.g., from JS they might be accessed via + `myStructInstance.s.structMember`. The main advantage is that it would + eliminate any potential confusion about which members are part of + the C struct and which exist purely in JS. "The problem" with that + is that it requires internally mapping the `s` member back to the + object which contains it, which makes the whole thing more costly + and adds one more moving part which can break. Even so, it's + something to try out one rainy day. Maybe even make it optional and + make the `s` name configurable via the [StructBinderFactory][] + options. (Over-engineering is an arguably bad habit of mine.) + +- It "might be interesting" to offer (de)serialization support. It + would be very limited, e.g. we can't serialize arbitrary pointers in + any meaningful way, but "might" be useful for structs which contain + only numeric or C-string state. As it is, it's easy enough for + client code to write wrappers for that and handle the members in + ways appropriate to their apps. Any impl provided in this library + would have the shortcoming that it may inadvertently serialize + pointers (since they're just integers), resulting in potential chaos + after deserialization. Perhaps the struct description can be + extended to tag specific members as serializable and how to + serialize them. + + +Appendix D: Debug Info +------------------------------------------------------------ + +The [StructBinderFactory][], [StructBinder][], and [StructType][] classes +all have the following "unsupported" method intended primarily +to assist in their own development, as opposed to being for use in +client code: + +- `debugFlags(flags)` (integer) + An "unsupported" debugging option which may change or be removed at + any time. Its argument is a set of flags to enable/disable certain + debug/tracing output for property accessors: 0x01 for getters, 0x02 + for setters, 0x04 for allocations, 0x08 for deallocations. Pass 0 to + disable all flags and pass a negative value to _completely_ clear + all flags. The latter has the side effect of telling the flags to be + inherited from the next-higher-up class in the hierarchy, with + [StructBinderFactory][] being top-most, followed by [StructBinder][], then + [StructType][]. + + + +Appendix G: Generating Struct Descriptions From C +------------------------------------------------------------ + +Struct definitions are _ideally_ generated from WASM-compiled C, as +opposed to simply guessing the sizeofs and offsets, so that the sizeof +and offset information can be collected using C's `sizeof()` and +`offsetof()` features (noting that struct padding may impact offsets +in ways which might not be immediately obvious, so writing them by +hand is _most certainly not recommended_). + +How exactly the desciption is generated is necessarily +project-dependent. It's tempting say, "oh, that's easy! We'll just +write it by hand!" but that would be folly. The struct sizes and byte +offsets into the struct _must_ be precisely how C-side code sees the +struct or the runtime results are completely undefined. + +The approach used in developing and testing _this_ software is... + +Below is a complete copy/pastable example of how we can use a small +set of macros to generate struct descriptions from C99 or later into +static string memory. Simply add such a file to your WASM build, +arrange for its function to be exported[^export-func], and call it +from JS (noting that it requires environment-specific JS glue to +convert the returned pointer to a JS-side string). Use `JSON.parse()` +to process it, then feed the included struct descriptions into the +binder factory at your leisure. + +------------------------------------------------------------ + +```c +#include /* memset() */ +#include /* offsetof() */ +#include /* snprintf() */ +#include /* int64_t */ +#include + +struct ExampleStruct { + int v4; + void * ppV; + int64_t v8; + void (*xFunc)(void*); +}; +typedef struct ExampleStruct ExampleStruct; + +const char * wasm__ctype_json(void){ + static char strBuf[512 * 8] = {0} + /* Static buffer which must be sized large enough for + our JSON. The string-generation macros try very + hard to assert() if this buffer is too small. */; + int n = 0, structCount = 0 /* counters for the macros */; + char * pos = &strBuf[1] + /* Write-position cursor. Skip the first byte for now to help + protect against a small race condition */; + char const * const zEnd = pos + sizeof(strBuf) + /* one-past-the-end cursor (virtual EOF) */; + if(strBuf[0]) return strBuf; // Was set up in a previous call. + + //////////////////////////////////////////////////////////////////// + // First we need to build up our macro framework... + + //////////////////////////////////////////////////////////////////// + // Core output-generating macros... +#define lenCheck assert(pos < zEnd - 100) +#define outf(format,...) \ + pos += snprintf(pos, ((size_t)(zEnd - pos)), format, __VA_ARGS__); \ + lenCheck +#define out(TXT) outf("%s",TXT) +#define CloseBrace(LEVEL) \ + assert(LEVEL<5); memset(pos, '}', LEVEL); pos+=LEVEL; lenCheck + + //////////////////////////////////////////////////////////////////// + // Macros for emiting StructBinders... +#define StructBinder__(TYPE) \ + n = 0; \ + outf("%s{", (structCount++ ? ", " : "")); \ + out("\"name\": \"" # TYPE "\","); \ + outf("\"sizeof\": %d", (int)sizeof(TYPE)); \ + out(",\"members\": {"); +#define StructBinder_(T) StructBinder__(T) +// ^^^ extra indirection needed to expand CurrentStruct +#define StructBinder StructBinder_(CurrentStruct) +#define _StructBinder CloseBrace(2) +#define M(MEMBER,SIG) \ + outf("%s\"%s\": " \ + "{\"offset\":%d,\"sizeof\": %d,\"signature\":\"%s\"}", \ + (n++ ? ", " : ""), #MEMBER, \ + (int)offsetof(CurrentStruct,MEMBER), \ + (int)sizeof(((CurrentStruct*)0)->MEMBER), \ + SIG) + // End of macros. + //////////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////////// + // With that out of the way, we can do what we came here to do. + out("\"structs\": ["); { + +// For each struct description, do... +#define CurrentStruct ExampleStruct + StructBinder { + M(v4,"i"); + M(ppV,"p"); + M(v8,"j"); + M(xFunc,"v(p)"); + } _StructBinder; +#undef CurrentStruct + + } out( "]"/*structs*/); + //////////////////////////////////////////////////////////////////// + // Done! Finalize the output... + out("}"/*top-level wrapper*/); + *pos = 0; + strBuf[0] = '{'/*end of the race-condition workaround*/; + return strBuf; + +// If this file will ever be concatenated or #included with others, +// it's good practice to clean up our macros: +#undef StructBinder +#undef StructBinder_ +#undef StructBinder__ +#undef M +#undef _StructBinder +#undef CloseBrace +#undef out +#undef outf +#undef lenCheck +} +``` + +------------------------------------------------------------ + + + +[sqlite3]: https://sqlite.org +[emscripten]: https://emscripten.org +[sgb]: https://wanderinghorse.net/home/stephan/ +[appendix-g]: #appendix-g +[StructBinderFactory]: #api-binderfactory +[StructCtors]: #api-structctor +[StructType]: #api-structtype +[StructBinder]: #api-structbinder +[StructInstance]: #api-structinstance +[^export-func]: In Emscripten, add its name, prefixed with `_`, to the + project's `EXPORT_FUNCTIONS` list. +[BigInt64Array]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/BigInt64Array +[TextDecoder]: https://developer.mozilla.org/en-US/docs/Web/API/TextDecoder +[TextEncoder]: https://developer.mozilla.org/en-US/docs/Web/API/TextEncoder +[MDN]: https://developer.mozilla.org/docs/Web/API -- cgit v1.2.3