summaryrefslogtreecommitdiffstats
path: root/third_party/rust/lucet-module-wasmsbx/src
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/lucet-module-wasmsbx/src')
-rw-r--r--third_party/rust/lucet-module-wasmsbx/src/bindings.rs203
-rw-r--r--third_party/rust/lucet-module-wasmsbx/src/error.rs17
-rw-r--r--third_party/rust/lucet-module-wasmsbx/src/functions.rs192
-rw-r--r--third_party/rust/lucet-module-wasmsbx/src/globals.rs164
-rw-r--r--third_party/rust/lucet-module-wasmsbx/src/lib.rs44
-rw-r--r--third_party/rust/lucet-module-wasmsbx/src/linear_memory.rs191
-rw-r--r--third_party/rust/lucet-module-wasmsbx/src/module.rs27
-rw-r--r--third_party/rust/lucet-module-wasmsbx/src/module_data.rs303
-rw-r--r--third_party/rust/lucet-module-wasmsbx/src/runtime.rs8
-rw-r--r--third_party/rust/lucet-module-wasmsbx/src/signature.rs209
-rw-r--r--third_party/rust/lucet-module-wasmsbx/src/tables.rs14
-rw-r--r--third_party/rust/lucet-module-wasmsbx/src/traps.rs63
-rw-r--r--third_party/rust/lucet-module-wasmsbx/src/types.rs72
13 files changed, 1507 insertions, 0 deletions
diff --git a/third_party/rust/lucet-module-wasmsbx/src/bindings.rs b/third_party/rust/lucet-module-wasmsbx/src/bindings.rs
new file mode 100644
index 0000000000..3c1dab3855
--- /dev/null
+++ b/third_party/rust/lucet-module-wasmsbx/src/bindings.rs
@@ -0,0 +1,203 @@
+use failure::{format_err, Error};
+use serde_json::{self, Map, Value};
+use std::collections::{hash_map::Entry, HashMap};
+use std::fs;
+use std::path::Path;
+
+#[derive(Debug, Clone)]
+pub struct Bindings {
+ bindings: HashMap<String, HashMap<String, String>>,
+}
+
+impl Bindings {
+ pub fn new(bindings: HashMap<String, HashMap<String, String>>) -> Bindings {
+ Self { bindings: bindings }
+ }
+
+ pub fn env(env: HashMap<String, String>) -> Bindings {
+ let mut bindings = HashMap::new();
+ bindings.insert("env".to_owned(), env);
+ Self::new(bindings)
+ }
+
+ pub fn empty() -> Bindings {
+ Self::new(HashMap::new())
+ }
+
+ pub fn from_json(v: &Value) -> Result<Bindings, Error> {
+ match v.as_object() {
+ Some(modules) => Self::parse_modules_json_obj(modules),
+ None => Err(format_err!("top level json expected to be object"))?,
+ }
+ }
+
+ pub fn from_str(s: &str) -> Result<Bindings, Error> {
+ let top: Value = serde_json::from_str(s)?;
+ Ok(Self::from_json(&top)?)
+ }
+
+ pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Bindings, Error> {
+ let contents = fs::read_to_string(path.as_ref())?;
+ Ok(Self::from_str(&contents)?)
+ }
+
+ pub fn extend(&mut self, other: &Bindings) -> Result<(), Error> {
+ for (modname, othermodbindings) in other.bindings.iter() {
+ match self.bindings.entry(modname.clone()) {
+ Entry::Occupied(mut e) => {
+ let existing = e.get_mut();
+ for (bindname, binding) in othermodbindings {
+ match existing.entry(bindname.clone()) {
+ Entry::Vacant(e) => {
+ e.insert(binding.clone());
+ }
+ Entry::Occupied(e) => {
+ if binding != e.get() {
+ Err(format_err!(
+ "cannot re-bind {} from {} to {}",
+ e.key(),
+ binding,
+ e.get()
+ ))?;
+ }
+ }
+ }
+ }
+ }
+ Entry::Vacant(e) => {
+ e.insert(othermodbindings.clone());
+ }
+ }
+ }
+ Ok(())
+ }
+
+ pub fn translate(&self, module: &str, symbol: &str) -> Result<&str, Error> {
+ match self.bindings.get(module) {
+ Some(m) => match m.get(symbol) {
+ Some(s) => Ok(s),
+ None => Err(format_err!("Unknown symbol `{}::{}`", module, symbol)),
+ },
+ None => Err(format_err!(
+ "Unknown module for symbol `{}::{}`",
+ module,
+ symbol
+ )),
+ }
+ }
+
+ fn parse_modules_json_obj(m: &Map<String, Value>) -> Result<Self, Error> {
+ let mut res = HashMap::new();
+ for (modulename, values) in m {
+ match values.as_object() {
+ Some(methods) => {
+ let methodmap = Self::parse_methods_json_obj(methods)?;
+ res.insert(modulename.to_owned(), methodmap);
+ }
+ None => Err(format_err!(""))?,
+ }
+ }
+ Ok(Self::new(res))
+ }
+
+ fn parse_methods_json_obj(m: &Map<String, Value>) -> Result<HashMap<String, String>, Error> {
+ let mut res = HashMap::new();
+ for (method, i) in m {
+ match i.as_str() {
+ Some(importbinding) => {
+ res.insert(method.to_owned(), importbinding.to_owned());
+ }
+ None => Err(format_err!(""))?,
+ }
+ }
+ Ok(res)
+ }
+
+ pub fn to_string(&self) -> Result<String, Error> {
+ let s = serde_json::to_string(&self.to_json())?;
+ Ok(s)
+ }
+
+ pub fn to_json(&self) -> Value {
+ Value::from(self.serialize_modules_json_obj())
+ }
+
+ fn serialize_modules_json_obj(&self) -> Map<String, Value> {
+ let mut m = Map::new();
+ for (modulename, values) in self.bindings.iter() {
+ m.insert(
+ modulename.to_owned(),
+ Value::from(Self::serialize_methods_json_obj(values)),
+ );
+ }
+ m
+ }
+
+ fn serialize_methods_json_obj(methods: &HashMap<String, String>) -> Map<String, Value> {
+ let mut m = Map::new();
+ for (methodname, symbol) in methods.iter() {
+ m.insert(methodname.to_owned(), Value::from(symbol.to_owned()));
+ }
+ m
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ fn test_file(f: &str) -> PathBuf {
+ PathBuf::from(format!("tests/bindings/{}", f))
+ }
+
+ use super::Bindings;
+ use std::collections::HashMap;
+ use std::path::PathBuf;
+
+ #[test]
+ fn explicit() {
+ let mut explicit_map = HashMap::new();
+ explicit_map.insert(String::from("hello"), String::from("goodbye"));
+ let map = Bindings::env(explicit_map);
+
+ let result = map.translate("env", "hello").unwrap();
+ assert!(result == "goodbye");
+
+ let result = map.translate("env", "nonexistent");
+ if let Ok(_) = result {
+ assert!(
+ false,
+ "explicit import map returned value for non-existent symbol"
+ )
+ }
+ }
+
+ #[test]
+ fn explicit_from_nonexistent_file() {
+ let fail_map = Bindings::from_file(&test_file("nonexistent_bindings.json"));
+ assert!(
+ fail_map.is_err(),
+ "ImportMap::explicit_from_file did not fail on a non-existent file"
+ );
+ }
+
+ #[test]
+ fn explicit_from_garbage_file() {
+ let fail_map = Bindings::from_file(&test_file("garbage.json"));
+ assert!(
+ fail_map.is_err(),
+ "ImportMap::explicit_from_file did not fail on a garbage file"
+ );
+ }
+
+ #[test]
+ fn explicit_from_file() {
+ let map = Bindings::from_file(&test_file("bindings_test.json"))
+ .expect("load valid bindings from file");
+ let result = map.translate("env", "hello").expect("hello has a binding");
+ assert!(result == "json is cool");
+
+ assert!(
+ map.translate("env", "nonexistent").is_err(),
+ "bindings from file returned value for non-existent symbol"
+ );
+ }
+}
diff --git a/third_party/rust/lucet-module-wasmsbx/src/error.rs b/third_party/rust/lucet-module-wasmsbx/src/error.rs
new file mode 100644
index 0000000000..a19ec8a334
--- /dev/null
+++ b/third_party/rust/lucet-module-wasmsbx/src/error.rs
@@ -0,0 +1,17 @@
+use failure::Fail;
+
+/// Module data (de)serialization errors.
+#[derive(Debug, Fail)]
+pub enum Error {
+ #[fail(display = "Sparse data contained a page with length other than 4096")]
+ IncorrectPageSize,
+ #[fail(display = "Deserialization error: {}", _0)]
+ DeserializationError(#[cause] bincode::Error),
+ #[fail(display = "Serialization error: {}", _0)]
+ SerializationError(#[cause] bincode::Error),
+ #[cfg(feature = "signature_checking")]
+ #[fail(display = "Module signature error: {}", _0)]
+ ModuleSignatureError(#[cause] minisign::PError),
+ #[fail(display = "I/O error: {}", _0)]
+ IOError(#[cause] std::io::Error),
+}
diff --git a/third_party/rust/lucet-module-wasmsbx/src/functions.rs b/third_party/rust/lucet-module-wasmsbx/src/functions.rs
new file mode 100644
index 0000000000..87f0b68d97
--- /dev/null
+++ b/third_party/rust/lucet-module-wasmsbx/src/functions.rs
@@ -0,0 +1,192 @@
+use crate::traps::{TrapManifest, TrapSite};
+use cranelift_entity::entity_impl;
+use serde::{Deserialize, Serialize};
+
+use std::slice::from_raw_parts;
+
+/// FunctionIndex is an identifier for a function, imported, exported, or external. The space of
+/// FunctionIndex is shared for all of these, so `FunctionIndex(N)` may identify exported function
+/// #2, `FunctionIndex(N + 1)` may identify an internal function, and `FunctionIndex(N + 2)` may
+/// identify an imported function.
+#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug, Serialize, Deserialize)]
+pub struct FunctionIndex(u32);
+
+impl FunctionIndex {
+ pub fn from_u32(idx: u32) -> FunctionIndex {
+ FunctionIndex(idx)
+ }
+ pub fn as_u32(&self) -> u32 {
+ self.0
+ }
+}
+
+/// ImportFunction describes an internal function - its internal function index and the name/module
+/// pair that function should be found in.
+#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug, Serialize, Deserialize)]
+pub struct ImportFunction<'a> {
+ pub fn_idx: FunctionIndex,
+ pub module: &'a str,
+ pub name: &'a str,
+}
+
+/// ExportFunction describes an exported function - its internal function index and a name that
+/// function has been exported under.
+#[derive(Clone, PartialEq, Eq, Hash, Debug, Serialize, Deserialize)]
+pub struct ExportFunction<'a> {
+ pub fn_idx: FunctionIndex,
+ #[serde(borrow)]
+ pub names: Vec<&'a str>,
+}
+
+pub struct OwnedExportFunction {
+ pub fn_idx: FunctionIndex,
+ pub names: Vec<String>,
+}
+
+impl OwnedExportFunction {
+ pub fn to_ref<'a>(&'a self) -> ExportFunction<'a> {
+ ExportFunction {
+ fn_idx: self.fn_idx.clone(),
+ names: self.names.iter().map(|x| x.as_str()).collect(),
+ }
+ }
+}
+
+pub struct OwnedImportFunction {
+ pub fn_idx: FunctionIndex,
+ pub module: String,
+ pub name: String,
+}
+
+impl OwnedImportFunction {
+ pub fn to_ref<'a>(&'a self) -> ImportFunction<'a> {
+ ImportFunction {
+ fn_idx: self.fn_idx.clone(),
+ module: self.module.as_str(),
+ name: self.name.as_str(),
+ }
+ }
+}
+
+/// UniqueSignatureIndex names a signature after collapsing duplicate signatures to a single
+/// identifier, whereas SignatureIndex is directly what the original module specifies, and may
+/// specify duplicates of types that are structurally equal.
+#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug, Serialize, Deserialize)]
+pub struct UniqueSignatureIndex(u32);
+entity_impl!(UniqueSignatureIndex);
+
+/// FunctionPointer serves entirely as a safer way to work with function pointers than as raw u64
+/// or usize values. It also avoids the need to write them as `fn` types, which cannot be freely
+/// cast from one to another with `as`. If you need to call a `FunctionPointer`, use `as_usize()`
+/// and transmute the resulting usize to a `fn` type with appropriate signature.
+#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug, Serialize, Deserialize)]
+pub struct FunctionPointer(usize);
+
+impl FunctionPointer {
+ pub fn from_usize(ptr: usize) -> FunctionPointer {
+ FunctionPointer(ptr)
+ }
+ pub fn as_usize(&self) -> usize {
+ self.0
+ }
+}
+
+/// Information about the corresponding function.
+///
+/// This is split from but closely related to a [`FunctionSpec`]. The distinction is largely for
+/// serialization/deserialization simplicity, as [`FunctionSpec`] contains fields that need
+/// cooperation from a loader, with manual layout and serialization as a result.
+/// [`FunctionMetadata`] is the remainder of fields that can be automatically
+/// serialized/deserialied and are small enough copying isn't a large concern.
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct FunctionMetadata<'a> {
+ pub signature: UniqueSignatureIndex,
+ /// the "name" field is some human-friendly name, not necessarily the same as used to reach
+ /// this function (through an export, for example), and may not even indicate that a function
+ /// is exported at all.
+ /// TODO: at some point when possible, this field ought to be set from the names section of a
+ /// wasm module. At the moment that information is lost at parse time.
+ #[serde(borrow)]
+ pub name: Option<&'a str>,
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct OwnedFunctionMetadata {
+ pub signature: UniqueSignatureIndex,
+ pub name: Option<String>,
+}
+
+impl OwnedFunctionMetadata {
+ pub fn to_ref(&self) -> FunctionMetadata<'_> {
+ FunctionMetadata {
+ signature: self.signature.clone(),
+ name: self.name.as_ref().map(|n| n.as_str()),
+ }
+ }
+}
+
+pub struct FunctionHandle {
+ pub ptr: FunctionPointer,
+ pub id: FunctionIndex,
+}
+
+// The layout of this struct is very tightly coupled to lucetc's `write_function_manifest`!
+//
+// Specifically, `write_function_manifest` sets up relocations on `code_addr` and `traps_addr`.
+// It does not explicitly serialize a correctly formed `FunctionSpec`, because addresses
+// for these fields do not exist until the object is loaded in the future.
+//
+// So `write_function_manifest` has implicit knowledge of the layout of this structure
+// (including padding bytes between `code_len` and `traps_addr`)
+#[repr(C)]
+#[derive(Clone, Debug)]
+pub struct FunctionSpec {
+ code_addr: u64,
+ code_len: u32,
+ traps_addr: u64,
+ traps_len: u64,
+}
+
+impl FunctionSpec {
+ pub fn new(code_addr: u64, code_len: u32, traps_addr: u64, traps_len: u64) -> Self {
+ FunctionSpec {
+ code_addr,
+ code_len,
+ traps_addr,
+ traps_len,
+ }
+ }
+ pub fn ptr(&self) -> FunctionPointer {
+ FunctionPointer::from_usize(self.code_addr as usize)
+ }
+ pub fn code_len(&self) -> u32 {
+ self.code_len
+ }
+ pub fn traps_len(&self) -> u64 {
+ self.traps_len
+ }
+ pub fn contains(&self, addr: u64) -> bool {
+ addr >= self.code_addr && (addr - self.code_addr) < (self.code_len as u64)
+ }
+ pub fn relative_addr(&self, addr: u64) -> Option<u32> {
+ if let Some(offset) = addr.checked_sub(self.code_addr) {
+ if offset < (self.code_len as u64) {
+ // self.code_len is u32, so if the above check succeeded
+ // offset must implicitly be <= u32::MAX - the following
+ // conversion will not truncate bits
+ return Some(offset as u32);
+ }
+ }
+
+ None
+ }
+ pub fn traps(&self) -> Option<TrapManifest<'_>> {
+ let traps_ptr = self.traps_addr as *const TrapSite;
+ if !traps_ptr.is_null() {
+ let traps_slice = unsafe { from_raw_parts(traps_ptr, self.traps_len as usize) };
+ Some(TrapManifest::new(traps_slice))
+ } else {
+ None
+ }
+ }
+}
diff --git a/third_party/rust/lucet-module-wasmsbx/src/globals.rs b/third_party/rust/lucet-module-wasmsbx/src/globals.rs
new file mode 100644
index 0000000000..801d0c9d8e
--- /dev/null
+++ b/third_party/rust/lucet-module-wasmsbx/src/globals.rs
@@ -0,0 +1,164 @@
+use serde::{Deserialize, Serialize};
+
+/// A WebAssembly global along with its export specification.
+///
+/// The lifetime parameter exists to support zero-copy deserialization for the `&str` fields at the
+/// leaves of the structure. For a variant with owned types at the leaves, see
+/// [`OwnedGlobalSpec`](owned/struct.OwnedGlobalSpec.html).
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct GlobalSpec<'a> {
+ #[serde(borrow)]
+ global: Global<'a>,
+ export_names: Vec<&'a str>,
+}
+
+impl<'a> GlobalSpec<'a> {
+ pub fn new(global: Global<'a>, export_names: Vec<&'a str>) -> Self {
+ Self {
+ global,
+ export_names,
+ }
+ }
+
+ /// Create a new global definition with an initial value and export names.
+ pub fn new_def(init_val: i64, export_names: Vec<&'a str>) -> Self {
+ Self::new(Global::Def(GlobalDef::I64(init_val)), export_names)
+ }
+
+ /// Create a new global import definition with a module and field name, and export names.
+ pub fn new_import(module: &'a str, field: &'a str, export_names: Vec<&'a str>) -> Self {
+ Self::new(Global::Import { module, field }, export_names)
+ }
+
+ pub fn global(&self) -> &Global<'_> {
+ &self.global
+ }
+
+ pub fn export_names(&self) -> &[&str] {
+ &self.export_names
+ }
+
+ pub fn is_internal(&self) -> bool {
+ self.export_names.len() == 0
+ }
+}
+
+/// A WebAssembly global is either defined locally, or is defined in relation to a field of another
+/// WebAssembly module.
+///
+/// The lifetime parameter exists to support zero-copy deserialization for the `&str` fields at the
+/// leaves of the structure. For a variant with owned types at the leaves, see
+/// [`OwnedGlobal`](owned/struct.OwnedGlobal.html).
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub enum Global<'a> {
+ Def(GlobalDef),
+ Import { module: &'a str, field: &'a str },
+}
+
+/// Definition for a global in this module (not imported).
+#[derive(Debug, Copy, Clone, PartialEq, Serialize, Deserialize)]
+pub enum GlobalDef {
+ I32(i32),
+ I64(i64),
+ F32(f32),
+ F64(f64),
+}
+
+impl GlobalDef {
+ pub fn init_val(&self) -> GlobalValue {
+ match self {
+ GlobalDef::I32(i) => GlobalValue { i_32: *i },
+ GlobalDef::I64(i) => GlobalValue { i_64: *i },
+ GlobalDef::F32(f) => GlobalValue { f_32: *f },
+ GlobalDef::F64(f) => GlobalValue { f_64: *f },
+ }
+ }
+}
+
+#[derive(Copy, Clone)]
+pub union GlobalValue {
+ pub i_32: i32,
+ pub i_64: i64,
+ pub f_32: f32,
+ pub f_64: f64,
+}
+
+impl std::fmt::Debug for GlobalValue {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ // Because GlobalValue is a union of primitives, there won't be anything wrong,
+ // representation-wise, with printing the underlying data as an i64, f64, or
+ // another primitive. This still may incur UB by doing something like trying to
+ // read data from an uninitialized memory, if the union is initialized with a
+ // 32-bit value, and then read as a 64-bit value (as this code is about to do).
+ //
+ // In short, avoid using `<GlobalValue as Debug>`::fmt, please.
+
+ writeln!(f, "GlobalValue {{")?;
+ unsafe {
+ writeln!(f, " i_32: {},", self.i_32)?;
+ writeln!(f, " i_64: {},", self.i_64)?;
+ writeln!(f, " f_32: {},", self.f_32)?;
+ writeln!(f, " f_64: {},", self.f_64)?;
+ }
+ writeln!(f, "}}")
+ }
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/// A variant of [`GlobalSpec`](../struct.GlobalSpec.html) with owned strings throughout.
+///
+/// This type is useful when directly building up a value to be serialized.
+pub struct OwnedGlobalSpec {
+ global: OwnedGlobal,
+ export_names: Vec<String>,
+}
+
+impl OwnedGlobalSpec {
+ pub fn new(global: OwnedGlobal, export_names: Vec<String>) -> Self {
+ Self {
+ global,
+ export_names,
+ }
+ }
+
+ /// Create a new global definition with an initial value and export names.
+ pub fn new_def(init_val: i64, export_names: Vec<String>) -> Self {
+ Self::new(OwnedGlobal::Def(GlobalDef::I64(init_val)), export_names)
+ }
+
+ /// Create a new global import definition with a module and field name, and export names.
+ pub fn new_import(module: String, field: String, export_names: Vec<String>) -> Self {
+ Self::new(OwnedGlobal::Import { module, field }, export_names)
+ }
+
+ /// Create a [`GlobalSpec`](../struct.GlobalSpec.html) backed by the values in this
+ /// `OwnedGlobalSpec`.
+ pub fn to_ref<'a>(&'a self) -> GlobalSpec<'a> {
+ GlobalSpec::new(
+ self.global.to_ref(),
+ self.export_names.iter().map(|x| x.as_str()).collect(),
+ )
+ }
+}
+
+/// A variant of [`Global`](../struct.Global.html) with owned strings throughout.
+///
+/// This type is useful when directly building up a value to be serialized.
+pub enum OwnedGlobal {
+ Def(GlobalDef),
+ Import { module: String, field: String },
+}
+
+impl OwnedGlobal {
+ /// Create a [`Global`](../struct.Global.html) backed by the values in this `OwnedGlobal`.
+ pub fn to_ref<'a>(&'a self) -> Global<'a> {
+ match self {
+ OwnedGlobal::Def(def) => Global::Def(def.clone()),
+ OwnedGlobal::Import { module, field } => Global::Import {
+ module: module.as_str(),
+ field: field.as_str(),
+ },
+ }
+ }
+}
diff --git a/third_party/rust/lucet-module-wasmsbx/src/lib.rs b/third_party/rust/lucet-module-wasmsbx/src/lib.rs
new file mode 100644
index 0000000000..9582113c5d
--- /dev/null
+++ b/third_party/rust/lucet-module-wasmsbx/src/lib.rs
@@ -0,0 +1,44 @@
+//! Common types for representing Lucet modules.
+//!
+//! These types are used both in `lucetc` and `lucet-runtime`, with values serialized in
+//! [`bincode`](https://github.com/TyOverby/bincode) format to the compiled Lucet modules.
+
+#![deny(bare_trait_objects)]
+
+pub mod bindings;
+mod error;
+mod functions;
+mod globals;
+mod linear_memory;
+mod module;
+mod module_data;
+mod runtime;
+mod signature;
+mod tables;
+mod traps;
+mod types;
+
+pub use crate::error::Error;
+pub use crate::functions::{
+ ExportFunction, FunctionHandle, FunctionIndex, FunctionMetadata, FunctionPointer, FunctionSpec,
+ ImportFunction, UniqueSignatureIndex,
+};
+pub use crate::globals::{Global, GlobalDef, GlobalSpec, GlobalValue};
+pub use crate::linear_memory::{HeapSpec, LinearMemorySpec, SparseData};
+pub use crate::module::{Module, SerializedModule, LUCET_MODULE_SYM};
+pub use crate::module_data::{ModuleData, ModuleFeatures, MODULE_DATA_SYM};
+pub use crate::runtime::InstanceRuntimeData;
+pub use crate::signature::ModuleSignature;
+#[cfg(feature = "signature_checking")]
+pub use crate::signature::PublicKey;
+pub use crate::tables::TableElement;
+pub use crate::traps::{TrapCode, TrapManifest, TrapSite};
+pub use crate::types::{Signature, ValueType};
+
+/// Owned variants of the module data types, useful for serialization and testing.
+pub mod owned {
+ pub use crate::functions::{OwnedExportFunction, OwnedFunctionMetadata, OwnedImportFunction};
+ pub use crate::globals::OwnedGlobalSpec;
+ pub use crate::linear_memory::{OwnedLinearMemorySpec, OwnedSparseData};
+ pub use crate::module_data::OwnedModuleData;
+}
diff --git a/third_party/rust/lucet-module-wasmsbx/src/linear_memory.rs b/third_party/rust/lucet-module-wasmsbx/src/linear_memory.rs
new file mode 100644
index 0000000000..d2107c421a
--- /dev/null
+++ b/third_party/rust/lucet-module-wasmsbx/src/linear_memory.rs
@@ -0,0 +1,191 @@
+use crate::Error;
+use serde::{Deserialize, Serialize};
+
+/// Specification of the linear memory of a module
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct LinearMemorySpec<'a> {
+ /// Specification of the heap used to implement the linear memory
+ pub heap: HeapSpec,
+ /// Initialization values for linear memory
+ #[serde(borrow)]
+ pub initializer: SparseData<'a>,
+}
+
+/// Specification of the linear memory of a module
+///
+/// This is a version of [`LinearMemorySpec`](../struct.LinearMemorySpec.html) with an
+/// `OwnedSparseData` for the initializer.
+/// This type is useful when directly building up a value to be serialized.
+pub struct OwnedLinearMemorySpec {
+ /// Specification of the heap used to implement the linear memory
+ pub heap: HeapSpec,
+ /// Initialization values for linear memory
+ pub initializer: OwnedSparseData,
+}
+
+impl OwnedLinearMemorySpec {
+ pub fn to_ref<'a>(&'a self) -> LinearMemorySpec<'a> {
+ LinearMemorySpec {
+ heap: self.heap.clone(),
+ initializer: self.initializer.to_ref(),
+ }
+ }
+}
+
+/// Specifications about the heap of a Lucet module.
+#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub struct HeapSpec {
+ /// Total bytes of memory for the heap to possibly expand into, as configured for Cranelift
+ /// codegen.
+ ///
+ /// All of this memory is addressable. Only some part of it is accessible - from 0 to the
+ /// initial size, guaranteed, and up to the `max_size`. This size allows Cranelift to elide
+ /// checks of the *base pointer*. At the moment that just means checking if it is greater than
+ /// 4gb, in which case it can elide the base pointer check completely. In the future, Cranelift
+ /// could use a solver to elide more base pointer checks if it can prove the calculation will
+ /// always be less than this bound.
+ ///
+ /// Specified in bytes, and must be evenly divisible by the host page size (4K).
+ pub reserved_size: u64,
+
+ /// Total bytes of memory *after* the reserved area, as configured for Cranelift codegen.
+ ///
+ /// All of this memory is addressable, but it is never accessible - it is guaranteed to trap if
+ /// an access happens in this region. This size allows Cranelift to use *common subexpression
+ /// elimination* to reduce checks of the *sum of base pointer and offset* (where the offset is
+ /// always rounded up to a multiple of the guard size, to be friendly to CSE).
+ ///
+ /// Specified in bytes, and must be evenly divisible by the host page size (4K).
+ pub guard_size: u64,
+
+ /// Total bytes of memory for the WebAssembly program's linear memory upon initialization.
+ ///
+ /// Specified in bytes, must be evenly divisible by the WebAssembly page size (64K), and must be
+ /// less than or equal to `reserved_size`.
+ pub initial_size: u64,
+
+ /// Maximum bytes of memory for the WebAssembly program's linear memory at any time.
+ ///
+ /// This is not necessarily the same as `reserved_size` - we want to be able to tune the check
+ /// bound there separately than the declaration of a max size in the client program.
+ ///
+ /// The program may optionally define this value. If it does, it must be less than the
+ /// `reserved_size`. If it does not, the max size is left up to the runtime, and is allowed to
+ /// be less than `reserved_size`.
+ pub max_size: Option<u64>,
+}
+
+impl HeapSpec {
+ pub fn new(
+ reserved_size: u64,
+ guard_size: u64,
+ initial_size: u64,
+ max_size: Option<u64>,
+ ) -> Self {
+ Self {
+ reserved_size,
+ guard_size,
+ initial_size,
+ max_size,
+ }
+ }
+
+ /// Some very small test programs dont specify a memory import or definition.
+ pub fn empty() -> Self {
+ Self {
+ reserved_size: 0,
+ guard_size: 0,
+ initial_size: 0,
+ max_size: None,
+ }
+ }
+}
+
+/// A sparse representation of a Lucet module's initial heap.
+///
+/// The lifetime parameter exists to support zero-copy deserialization for the `&[u8]` slices
+/// representing non-zero pages. For a variant with owned `Vec<u8>` pages, see
+/// [`OwnedSparseData`](owned/struct.OwnedSparseData.html).
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct SparseData<'a> {
+ /// Indices into the vector correspond to the offset, in host page (4k) increments, from the
+ /// base of the instance heap.
+ ///
+ /// If the option at a given index is None, the page is initialized as zeros. Otherwise,
+ /// the contents of the page are given as a slice of exactly 4k bytes.
+ ///
+ /// The deserializer of this datastructure does not make sure the 4k invariant holds,
+ /// but the constructor on the serializier side does.
+ #[serde(borrow)]
+ pages: Vec<Option<&'a [u8]>>,
+}
+
+impl<'a> SparseData<'a> {
+ /// Create a new `SparseData` from its constituent pages.
+ ///
+ /// Entries in the `pages` argument which are `Some` must contain a slice of exactly the host
+ /// page size (4096), otherwise this function returns `Error::IncorrectPageSize`. Entries which
+ /// are `None` are interpreted as empty pages, which will be zeroed by the runtime.
+ pub fn new(pages: Vec<Option<&'a [u8]>>) -> Result<Self, Error> {
+ if !pages.iter().all(|page| match page {
+ Some(contents) => contents.len() == 4096,
+ None => true,
+ }) {
+ return Err(Error::IncorrectPageSize);
+ }
+
+ Ok(Self { pages })
+ }
+
+ pub fn pages(&self) -> &[Option<&'a [u8]>] {
+ &self.pages
+ }
+
+ pub fn get_page(&self, offset: usize) -> &Option<&'a [u8]> {
+ self.pages.get(offset).unwrap_or(&None)
+ }
+
+ pub fn len(&self) -> usize {
+ self.pages.len()
+ }
+}
+
+/// A sparse representation of a Lucet module's initial heap.
+///
+/// This is a version of [`SparseData`](../struct.SparseData.html) with owned `Vec<u8>`s
+/// representing pages. This type is useful when directly building up a value to be serialized.
+pub struct OwnedSparseData {
+ pages: Vec<Option<Vec<u8>>>,
+}
+
+impl OwnedSparseData {
+ /// Create a new `OwnedSparseData` from its consitutent pages.
+ ///
+ /// Entries in the `pages` argument which are `Some` must contain a vector of exactly the host
+ /// page size (4096), otherwise this function returns `Error::IncorrectPageSize`. Entries which
+ /// are `None` are interpreted as empty pages, which will be zeroed by the runtime.
+ pub fn new(pages: Vec<Option<Vec<u8>>>) -> Result<Self, Error> {
+ if !pages.iter().all(|page| match page {
+ Some(contents) => contents.len() == 4096,
+ None => true,
+ }) {
+ return Err(Error::IncorrectPageSize);
+ }
+ Ok(Self { pages })
+ }
+
+ /// Create a [`SparseData`](../struct.SparseData.html) backed by the values in this
+ /// `OwnedSparseData`.
+ pub fn to_ref<'a>(&'a self) -> SparseData<'a> {
+ SparseData::new(
+ self.pages
+ .iter()
+ .map(|c| match c {
+ Some(data) => Some(data.as_slice()),
+ None => None,
+ })
+ .collect(),
+ )
+ .expect("SparseData invariant enforced by OwnedSparseData constructor")
+ }
+}
diff --git a/third_party/rust/lucet-module-wasmsbx/src/module.rs b/third_party/rust/lucet-module-wasmsbx/src/module.rs
new file mode 100644
index 0000000000..793a943822
--- /dev/null
+++ b/third_party/rust/lucet-module-wasmsbx/src/module.rs
@@ -0,0 +1,27 @@
+use crate::functions::FunctionSpec;
+use crate::module_data::ModuleData;
+use crate::tables::TableElement;
+
+pub const LUCET_MODULE_SYM: &str = "lucet_module";
+
+/// Module is the exposed structure that contains all the data backing a Lucet-compiled object.
+#[derive(Debug)]
+pub struct Module<'a> {
+ pub module_data: ModuleData<'a>,
+ pub tables: &'a [&'a [TableElement]],
+ pub function_manifest: &'a [FunctionSpec],
+}
+
+/// SerializedModule is a serialization-friendly form of Module, in that the `module_data_*` fields
+/// here refer to a serialized `ModuleData`, while `tables_*` and `function_manifest_*` refer to
+/// the actual tables and function manifest written in the binary.
+#[repr(C)]
+#[derive(Debug)]
+pub struct SerializedModule {
+ pub module_data_ptr: u64,
+ pub module_data_len: u64,
+ pub tables_ptr: u64,
+ pub tables_len: u64,
+ pub function_manifest_ptr: u64,
+ pub function_manifest_len: u64,
+}
diff --git a/third_party/rust/lucet-module-wasmsbx/src/module_data.rs b/third_party/rust/lucet-module-wasmsbx/src/module_data.rs
new file mode 100644
index 0000000000..e55c33a186
--- /dev/null
+++ b/third_party/rust/lucet-module-wasmsbx/src/module_data.rs
@@ -0,0 +1,303 @@
+use crate::{
+ functions::{
+ ExportFunction, FunctionIndex, FunctionMetadata, ImportFunction, OwnedFunctionMetadata,
+ },
+ globals::GlobalSpec,
+ linear_memory::{HeapSpec, LinearMemorySpec, SparseData},
+ types::Signature,
+ Error,
+};
+#[cfg(feature = "signature_checking")]
+use minisign::SignatureBones;
+use serde::{Deserialize, Serialize};
+
+pub const MODULE_DATA_SYM: &str = "lucet_module_data";
+
+/// The metadata (and some data) for a Lucet module.
+///
+/// The lifetime parameter exists to support zero-copy deserialization for the `&str` and `&[u8]`
+/// fields at the leaves of the structure. For a variant with owned types at the leaves, see
+/// [`OwnedModuleData`](owned/struct.OwnedModuleData.html).
+///
+/// The goal is for this structure to eventually include everything except the code for the guest
+/// functions themselves.
+#[derive(Debug, Serialize, Deserialize)]
+pub struct ModuleData<'a> {
+ #[serde(borrow)]
+ linear_memory: Option<LinearMemorySpec<'a>>,
+ #[serde(borrow)]
+ globals_spec: Vec<GlobalSpec<'a>>,
+ #[serde(borrow)]
+ function_info: Vec<FunctionMetadata<'a>>,
+ #[serde(borrow)]
+ import_functions: Vec<ImportFunction<'a>>,
+ #[serde(borrow)]
+ export_functions: Vec<ExportFunction<'a>>,
+ signatures: Vec<Signature>,
+ module_signature: Vec<u8>,
+ features: ModuleFeatures,
+}
+
+#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
+pub struct ModuleFeatures {
+ pub sse3: bool,
+ pub ssse3: bool,
+ pub sse41: bool,
+ pub sse42: bool,
+ pub avx: bool,
+ pub bmi1: bool,
+ pub bmi2: bool,
+ pub lzcnt: bool,
+ pub popcnt: bool,
+ _hidden: (),
+}
+
+impl ModuleFeatures {
+ pub fn none() -> Self {
+ Self {
+ sse3: false,
+ ssse3: false,
+ sse41: false,
+ sse42: false,
+ avx: false,
+ bmi1: false,
+ bmi2: false,
+ lzcnt: false,
+ popcnt: false,
+ _hidden: (),
+ }
+ }
+}
+
+impl<'a> ModuleData<'a> {
+ #[cfg(feature = "signature_checking")]
+ pub fn new(
+ linear_memory: Option<LinearMemorySpec<'a>>,
+ globals_spec: Vec<GlobalSpec<'a>>,
+ function_info: Vec<FunctionMetadata<'a>>,
+ import_functions: Vec<ImportFunction<'a>>,
+ export_functions: Vec<ExportFunction<'a>>,
+ signatures: Vec<Signature>,
+ features: ModuleFeatures,
+ ) -> Self {
+ let module_signature = vec![0u8; SignatureBones::BYTES];
+ Self {
+ linear_memory,
+ globals_spec,
+ function_info,
+ import_functions,
+ export_functions,
+ signatures,
+ module_signature,
+ features,
+ }
+ }
+
+ #[cfg(not(feature = "signature_checking"))]
+ pub fn new(
+ linear_memory: Option<LinearMemorySpec<'a>>,
+ globals_spec: Vec<GlobalSpec<'a>>,
+ function_info: Vec<FunctionMetadata<'a>>,
+ import_functions: Vec<ImportFunction<'a>>,
+ export_functions: Vec<ExportFunction<'a>>,
+ signatures: Vec<Signature>,
+ features: ModuleFeatures,
+ ) -> Self {
+ let module_signature = vec![0u8; 0];
+ Self {
+ linear_memory,
+ globals_spec,
+ function_info,
+ import_functions,
+ export_functions,
+ signatures,
+ module_signature,
+ features,
+ }
+ }
+
+ pub fn heap_spec(&self) -> Option<&HeapSpec> {
+ if let Some(ref linear_memory) = self.linear_memory {
+ Some(&linear_memory.heap)
+ } else {
+ None
+ }
+ }
+
+ pub fn sparse_data(&self) -> Option<&SparseData<'a>> {
+ if let Some(ref linear_memory) = self.linear_memory {
+ Some(&linear_memory.initializer)
+ } else {
+ None
+ }
+ }
+
+ pub fn globals_spec(&self) -> &[GlobalSpec<'a>] {
+ &self.globals_spec
+ }
+
+ pub fn function_info(&self) -> &[FunctionMetadata<'a>] {
+ &self.function_info
+ }
+
+ pub fn import_functions(&self) -> &[ImportFunction<'_>] {
+ &self.import_functions
+ }
+
+ pub fn export_functions(&self) -> &[ExportFunction<'_>] {
+ &self.export_functions
+ }
+
+ // Function index here is a different index space than `get_func_from_idx`, which
+ // uses function index as an index into a table of function elements.
+ //
+ // This is an index of all functions in the module.
+ pub fn get_signature(&self, fn_id: FunctionIndex) -> &Signature {
+ let sig_idx = self.function_info[fn_id.as_u32() as usize].signature;
+ &self.signatures[sig_idx.as_u32() as usize]
+ }
+
+ pub fn get_export_func_id(&self, name: &str) -> Option<FunctionIndex> {
+ self.export_functions
+ .iter()
+ .find(|export| export.names.contains(&name))
+ .map(|export| export.fn_idx)
+ }
+
+ pub fn signatures(&self) -> &[Signature] {
+ &self.signatures
+ }
+
+ pub fn get_module_signature(&self) -> &[u8] {
+ &self.module_signature
+ }
+
+ pub fn features(&self) -> &ModuleFeatures {
+ &self.features
+ }
+
+ #[cfg(feature = "signature_checking")]
+ pub fn patch_module_signature(
+ module_data_bin: &'a [u8],
+ module_signature: &[u8],
+ ) -> Result<Vec<u8>, Error> {
+ assert_eq!(module_signature.len(), SignatureBones::BYTES);
+ let mut module_data = Self::deserialize(module_data_bin)?;
+ module_data
+ .module_signature
+ .copy_from_slice(module_signature);
+ let patched_module_data_bin = module_data.serialize()?;
+ assert_eq!(patched_module_data_bin.len(), module_data_bin.len());
+ Ok(patched_module_data_bin)
+ }
+
+ #[cfg(feature = "signature_checking")]
+ pub fn clear_module_signature(module_data_bin: &'a [u8]) -> Result<Vec<u8>, Error> {
+ let module_signature = vec![0u8; SignatureBones::BYTES];
+ Self::patch_module_signature(module_data_bin, &module_signature)
+ }
+
+ /// Serialize to [`bincode`](https://github.com/TyOverby/bincode).
+ pub fn serialize(&self) -> Result<Vec<u8>, Error> {
+ bincode::serialize(self).map_err(Error::SerializationError)
+ }
+
+ /// Deserialize from [`bincode`](https://github.com/TyOverby/bincode).
+ pub fn deserialize(buf: &'a [u8]) -> Result<ModuleData<'a>, Error> {
+ bincode::deserialize(buf).map_err(Error::DeserializationError)
+ }
+}
+
+use crate::{
+ functions::{OwnedExportFunction, OwnedImportFunction},
+ globals::OwnedGlobalSpec,
+ linear_memory::{OwnedLinearMemorySpec, OwnedSparseData},
+};
+
+/// The metadata (and some data) for a Lucet module.
+///
+/// This is a version of [`ModuleData`](../struct.ModuleData.html) with owned types throughout,
+/// rather than references to support zero-copy deserialization. This type is useful when directly
+/// building up a value to be serialized.
+pub struct OwnedModuleData {
+ linear_memory: Option<OwnedLinearMemorySpec>,
+ globals_spec: Vec<OwnedGlobalSpec>,
+ function_info: Vec<OwnedFunctionMetadata>,
+ imports: Vec<OwnedImportFunction>,
+ exports: Vec<OwnedExportFunction>,
+ signatures: Vec<Signature>,
+ features: ModuleFeatures,
+}
+
+impl OwnedModuleData {
+ pub fn new(
+ linear_memory: Option<OwnedLinearMemorySpec>,
+ globals_spec: Vec<OwnedGlobalSpec>,
+ function_info: Vec<OwnedFunctionMetadata>,
+ imports: Vec<OwnedImportFunction>,
+ exports: Vec<OwnedExportFunction>,
+ signatures: Vec<Signature>,
+ features: ModuleFeatures,
+ ) -> Self {
+ Self {
+ linear_memory,
+ globals_spec,
+ function_info,
+ imports,
+ exports,
+ signatures,
+ features,
+ }
+ }
+
+ /// Create a [`ModuleData`](../struct.ModuleData.html) backed by the values in this
+ /// `OwnedModuleData`.
+ pub fn to_ref<'a>(&'a self) -> ModuleData<'a> {
+ ModuleData::new(
+ if let Some(ref owned_linear_memory) = self.linear_memory {
+ Some(owned_linear_memory.to_ref())
+ } else {
+ None
+ },
+ self.globals_spec.iter().map(|gs| gs.to_ref()).collect(),
+ self.function_info
+ .iter()
+ .map(|info| info.to_ref())
+ .collect(),
+ self.imports.iter().map(|imp| imp.to_ref()).collect(),
+ self.exports.iter().map(|exp| exp.to_ref()).collect(),
+ self.signatures.clone(),
+ self.features.clone(),
+ )
+ }
+
+ pub fn empty() -> Self {
+ Self::new(
+ None,
+ vec![],
+ vec![],
+ vec![],
+ vec![],
+ vec![],
+ ModuleFeatures::none(),
+ )
+ }
+
+ pub fn with_heap_spec(mut self, heap_spec: HeapSpec) -> Self {
+ if let Some(ref mut linear_memory) = self.linear_memory {
+ linear_memory.heap = heap_spec;
+ } else {
+ self.linear_memory = Some(OwnedLinearMemorySpec {
+ heap: heap_spec,
+ initializer: OwnedSparseData::new(vec![]).unwrap(),
+ });
+ }
+ self
+ }
+}
+
+impl Default for OwnedModuleData {
+ fn default() -> Self {
+ OwnedModuleData::empty()
+ }
+}
diff --git a/third_party/rust/lucet-module-wasmsbx/src/runtime.rs b/third_party/rust/lucet-module-wasmsbx/src/runtime.rs
new file mode 100644
index 0000000000..864344dc9b
--- /dev/null
+++ b/third_party/rust/lucet-module-wasmsbx/src/runtime.rs
@@ -0,0 +1,8 @@
+/// This struct describes the handful of fields that Lucet-compiled programs may directly interact with, but
+/// are provided through VMContext.
+#[repr(C)]
+#[repr(align(8))]
+pub struct InstanceRuntimeData {
+ pub globals_ptr: *mut i64,
+ pub instruction_count: u64,
+}
diff --git a/third_party/rust/lucet-module-wasmsbx/src/signature.rs b/third_party/rust/lucet-module-wasmsbx/src/signature.rs
new file mode 100644
index 0000000000..e1b9a2351f
--- /dev/null
+++ b/third_party/rust/lucet-module-wasmsbx/src/signature.rs
@@ -0,0 +1,209 @@
+#[cfg(feature = "signature_checking")]
+use crate::error::Error::{self, IOError, ModuleSignatureError};
+use crate::module::LUCET_MODULE_SYM;
+use crate::module_data::MODULE_DATA_SYM;
+#[cfg(feature = "signature_checking")]
+use crate::ModuleData;
+use byteorder::{ByteOrder, LittleEndian};
+#[cfg(feature = "signature_checking")]
+pub use minisign::{PublicKey, SecretKey};
+#[cfg(feature = "signature_checking")]
+use minisign::{SignatureBones, SignatureBox};
+use object::*;
+use std::fs::{File, OpenOptions};
+#[cfg(feature = "signature_checking")]
+use std::io::Cursor;
+use std::io::{self, Read, Seek, SeekFrom, Write};
+use std::path::Path;
+
+pub struct ModuleSignature;
+
+#[cfg(feature = "signature_checking")]
+impl ModuleSignature {
+ pub fn verify<P: AsRef<Path>>(
+ so_path: P,
+ pk: &PublicKey,
+ module_data: &ModuleData,
+ ) -> Result<(), Error> {
+ let signature_box: SignatureBox =
+ SignatureBones::from_bytes(&module_data.get_module_signature())
+ .map_err(|e| ModuleSignatureError(e))?
+ .into();
+
+ let mut raw_module_and_data =
+ RawModuleAndData::from_file(&so_path).map_err(|e| IOError(e))?;
+ let cleared_module_data_bin =
+ ModuleData::clear_module_signature(raw_module_and_data.module_data_bin())?;
+ raw_module_and_data.patch_module_data(&cleared_module_data_bin);
+
+ minisign::verify(
+ &pk,
+ &signature_box,
+ Cursor::new(&raw_module_and_data.obj_bin),
+ true,
+ false,
+ )
+ .map_err(|e| ModuleSignatureError(e))
+ }
+
+ pub fn sign<P: AsRef<Path>>(path: P, sk: &SecretKey) -> Result<(), Error> {
+ let raw_module_and_data = RawModuleAndData::from_file(&path).map_err(|e| IOError(e))?;
+ let signature_box = minisign::sign(
+ None,
+ sk,
+ Cursor::new(&raw_module_and_data.obj_bin),
+ true,
+ None,
+ None,
+ )
+ .map_err(|e| ModuleSignatureError(e))?;
+ let signature_bones: SignatureBones = signature_box.into();
+ let patched_module_data_bin = ModuleData::patch_module_signature(
+ raw_module_and_data.module_data_bin(),
+ &signature_bones.to_bytes(),
+ )?;
+ raw_module_and_data
+ .write_patched_module_data(&path, &patched_module_data_bin)
+ .map_err(|e| IOError(e))?;
+ Ok(())
+ }
+}
+
+#[allow(dead_code)]
+struct SymbolData {
+ offset: usize,
+ len: usize,
+}
+
+#[allow(dead_code)]
+struct RawModuleAndData {
+ pub obj_bin: Vec<u8>,
+ pub module_data_offset: usize,
+ pub module_data_len: usize,
+}
+
+#[allow(dead_code)]
+impl RawModuleAndData {
+ pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self, io::Error> {
+ let mut obj_bin: Vec<u8> = Vec::new();
+ File::open(&path)?.read_to_end(&mut obj_bin)?;
+
+ let native_data_symbol_data =
+ Self::symbol_data(&obj_bin, LUCET_MODULE_SYM, true)?.ok_or(io::Error::new(
+ io::ErrorKind::InvalidInput,
+ format!("`{}` symbol not present", LUCET_MODULE_SYM),
+ ))?;
+
+ // While `module_data` is the first field of the `SerializedModule` that `lucet_module` points
+ // to, it is a virtual address, not a file offset. The translation is somewhat tricky at
+ // the moment, so just look at the corresponding `lucet_module_data` symbol for now.
+ let module_data_symbol_data =
+ Self::symbol_data(&obj_bin, MODULE_DATA_SYM, true)?.ok_or(io::Error::new(
+ io::ErrorKind::InvalidInput,
+ format!("`{}` symbol not present", MODULE_DATA_SYM),
+ ))?;
+
+ let module_data_len =
+ LittleEndian::read_u64(&obj_bin[(native_data_symbol_data.offset + 8)..]) as usize;
+
+ Ok(RawModuleAndData {
+ obj_bin,
+ module_data_offset: module_data_symbol_data.offset,
+ module_data_len: module_data_len,
+ })
+ }
+
+ pub fn module_data_bin(&self) -> &[u8] {
+ &self.obj_bin[self.module_data_offset as usize
+ ..self.module_data_offset as usize + self.module_data_len]
+ }
+
+ pub fn module_data_bin_mut(&mut self) -> &mut [u8] {
+ &mut self.obj_bin[self.module_data_offset as usize
+ ..self.module_data_offset as usize + self.module_data_len]
+ }
+
+ pub fn patch_module_data(&mut self, module_data_bin: &[u8]) {
+ self.module_data_bin_mut().copy_from_slice(&module_data_bin);
+ }
+
+ pub fn write_patched_module_data<P: AsRef<Path>>(
+ &self,
+ path: P,
+ patched_module_data_bin: &[u8],
+ ) -> Result<(), io::Error> {
+ let mut fp = OpenOptions::new()
+ .write(true)
+ .create_new(false)
+ .open(&path)?;
+ fp.seek(SeekFrom::Start(self.module_data_offset as u64))?;
+ fp.write_all(&patched_module_data_bin)?;
+ Ok(())
+ }
+
+ // Retrieving the offset of a symbol is not supported by the object crate.
+ // In Mach-O, actual file offsets are encoded, whereas Elf encodes virtual
+ // addresses, requiring extra steps to retrieve the section, its base
+ // address as well as the section offset.
+
+ // Elf
+ #[cfg(all(target_family = "unix", not(target_os = "macos")))]
+ fn symbol_data(
+ obj_bin: &[u8],
+ symbol_name: &str,
+ _mangle: bool,
+ ) -> Result<Option<SymbolData>, io::Error> {
+ let obj = object::ElfFile::parse(obj_bin)
+ .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
+ let symbol_map = obj.symbol_map();
+ for symbol in symbol_map.symbols() {
+ let kind = symbol.kind();
+ if kind != SymbolKind::Data {
+ continue;
+ }
+ if symbol.name() != Some(symbol_name) {
+ continue;
+ }
+ let section_index = match symbol.section_index() {
+ Some(section_index) => section_index,
+ None => continue,
+ };
+ let section = &obj.elf().section_headers[section_index.0];
+ let offset = (symbol.address() - section.sh_addr + section.sh_offset) as usize;
+ let len = symbol.size() as usize;
+ return Ok(Some(SymbolData { offset, len }));
+ }
+ Ok(None)
+ }
+
+ // Mach-O
+ #[cfg(target_os = "macos")]
+ fn symbol_data(
+ obj_bin: &[u8],
+ symbol_name: &str,
+ mangle: bool,
+ ) -> Result<Option<SymbolData>, io::Error> {
+ let obj = object::File::parse(obj_bin)
+ .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
+ let symbol_map = obj.symbol_map();
+ let mangled_symbol_name = format!("_{}", symbol_name);
+ let symbol_name = if mangle {
+ &mangled_symbol_name
+ } else {
+ symbol_name
+ };
+ for symbol in symbol_map.symbols() {
+ let kind = symbol.kind();
+ if kind != SymbolKind::Data && kind != SymbolKind::Unknown {
+ continue;
+ }
+ if symbol.name() != Some(symbol_name) {
+ continue;
+ }
+ let offset = symbol.address() as usize;
+ let len = symbol.size() as usize;
+ return Ok(Some(SymbolData { offset, len }));
+ }
+ Ok(None)
+ }
+}
diff --git a/third_party/rust/lucet-module-wasmsbx/src/tables.rs b/third_party/rust/lucet-module-wasmsbx/src/tables.rs
new file mode 100644
index 0000000000..b8ecb0d0f3
--- /dev/null
+++ b/third_party/rust/lucet-module-wasmsbx/src/tables.rs
@@ -0,0 +1,14 @@
+use crate::functions::FunctionPointer;
+
+#[repr(C)]
+#[derive(Clone, Debug)]
+pub struct TableElement {
+ ty: u64,
+ func: u64,
+}
+
+impl TableElement {
+ pub fn function_pointer(&self) -> FunctionPointer {
+ FunctionPointer::from_usize(self.func as usize)
+ }
+}
diff --git a/third_party/rust/lucet-module-wasmsbx/src/traps.rs b/third_party/rust/lucet-module-wasmsbx/src/traps.rs
new file mode 100644
index 0000000000..28f01c60ef
--- /dev/null
+++ b/third_party/rust/lucet-module-wasmsbx/src/traps.rs
@@ -0,0 +1,63 @@
+use num_derive::FromPrimitive;
+use num_traits::FromPrimitive;
+
+/// The type of a WebAssembly
+/// [trap](http://webassembly.github.io/spec/core/intro/overview.html#trap).
+#[repr(u32)]
+#[derive(Copy, Clone, Debug, FromPrimitive, PartialEq)]
+pub enum TrapCode {
+ StackOverflow = 0,
+ HeapOutOfBounds = 1,
+ OutOfBounds = 2,
+ IndirectCallToNull = 3,
+ BadSignature = 4,
+ IntegerOverflow = 5,
+ IntegerDivByZero = 6,
+ BadConversionToInteger = 7,
+ Interrupt = 8,
+ TableOutOfBounds = 9,
+ Unreachable = 10,
+}
+
+impl TrapCode {
+ pub fn try_from_u32(v: u32) -> Option<TrapCode> {
+ Self::from_u32(v)
+ }
+}
+
+/// Trap information for an address in a compiled function
+///
+/// To support zero-copy deserialization of trap tables, this
+/// must be repr(C) [to avoid cases where Rust may change the
+/// layout in some future version, mangling the interpretation
+/// of an old TrapSite struct]
+#[repr(C)]
+#[derive(Clone, Debug)]
+pub struct TrapSite {
+ pub offset: u32,
+ pub code: TrapCode,
+}
+
+/// A collection of trap sites, typically obtained from a
+/// single function (see [`FunctionSpec::traps`])
+#[repr(C)]
+#[derive(Clone, Debug)]
+pub struct TrapManifest<'a> {
+ pub traps: &'a [TrapSite],
+}
+
+impl<'a> TrapManifest<'a> {
+ pub fn new(traps: &'a [TrapSite]) -> TrapManifest<'_> {
+ TrapManifest { traps }
+ }
+ pub fn lookup_addr(&self, addr: u32) -> Option<TrapCode> {
+ // predicate to find the trapsite for the addr via binary search
+ let f = |ts: &TrapSite| ts.offset.cmp(&addr);
+
+ if let Ok(i) = self.traps.binary_search_by(f) {
+ Some(self.traps[i].code)
+ } else {
+ None
+ }
+ }
+}
diff --git a/third_party/rust/lucet-module-wasmsbx/src/types.rs b/third_party/rust/lucet-module-wasmsbx/src/types.rs
new file mode 100644
index 0000000000..e2f2fe681c
--- /dev/null
+++ b/third_party/rust/lucet-module-wasmsbx/src/types.rs
@@ -0,0 +1,72 @@
+use serde::{Deserialize, Serialize};
+use std::fmt::{Display, Formatter};
+
+#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize)]
+pub enum ValueType {
+ I32,
+ I64,
+ F32,
+ F64,
+}
+
+impl Display for ValueType {
+ fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+ match self {
+ ValueType::I32 => write!(f, "I32"),
+ ValueType::I64 => write!(f, "I64"),
+ ValueType::F32 => write!(f, "F32"),
+ ValueType::F64 => write!(f, "F64"),
+ }
+ }
+}
+
+/// A signature for a function in a wasm module.
+///
+/// Note that this does not explicitly name VMContext as a parameter! It is assumed that all wasm
+/// functions take VMContext as their first parameter.
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
+pub struct Signature {
+ pub params: Vec<ValueType>,
+ // In the future, wasm may permit this to be a Vec of ValueType
+ pub ret_ty: Option<ValueType>,
+}
+
+impl Display for Signature {
+ fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+ write!(f, "(")?;
+ for (i, p) in self.params.iter().enumerate() {
+ if i == 0 {
+ write!(f, "{}", p)?;
+ } else {
+ write!(f, ", {}", p)?;
+ }
+ }
+ write!(f, ") -> ")?;
+ match self.ret_ty {
+ Some(ty) => write!(f, "{}", ty),
+ None => write!(f, "()"),
+ }
+ }
+}
+
+#[macro_export]
+macro_rules! lucet_signature {
+ ((() -> ())) => {
+ $crate::Signature {
+ params: vec![],
+ ret_ty: None
+ }
+ };
+ (($($arg_ty:ident),*) -> ()) => {
+ $crate::Signature {
+ params: vec![$($crate::ValueType::$arg_ty),*],
+ ret_ty: None,
+ }
+ };
+ (($($arg_ty:ident),*) -> $ret_ty:ident) => {
+ $crate::Signature {
+ params: vec![$($crate::ValueType::$arg_ty),*],
+ ret_ty: Some($crate::ValueType::$ret_ty),
+ }
+ };
+}