diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
commit | 2aa4a82499d4becd2284cdb482213d541b8804dd (patch) | |
tree | b80bf8bf13c3766139fbacc530efd0dd9d54394c /third_party/rust/lucet-module-wasmsbx | |
parent | Initial commit. (diff) | |
download | firefox-upstream.tar.xz firefox-upstream.zip |
Adding upstream version 86.0.1.upstream/86.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/lucet-module-wasmsbx')
18 files changed, 1544 insertions, 0 deletions
diff --git a/third_party/rust/lucet-module-wasmsbx/.cargo-checksum.json b/third_party/rust/lucet-module-wasmsbx/.cargo-checksum.json new file mode 100644 index 0000000000..b5656b73bf --- /dev/null +++ b/third_party/rust/lucet-module-wasmsbx/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"9049dc7763fd6af5c4d4f9e924f61dfe99e63c03f627c4dc2880b01cc1dfa3fd","src/bindings.rs":"bb0bcfb2e328f139d7d34b95d91ac64008c54fbf2748caf818b4a6af6c3bec14","src/error.rs":"1c938dfd33b790eb97e920785aadc7b4a2796d8e2e806b6083593ccf173d43d1","src/functions.rs":"29f4e4bd91986680d7d06ed6f5ccbb8bc734b52316e5bf9ac40ced45cbc8b149","src/globals.rs":"18841f6f5d6163673de5e9dfd641b1c558e9ad984418cbf94691059a1c4ed2a2","src/lib.rs":"759adce1db8c138294edcc47610f905e89da2e3ba5e62e3cef769ba1556ae174","src/linear_memory.rs":"aa62a57ad0783508465dc43bd22167c33d15efb32e44b9aeba6ff86ea72051ef","src/module.rs":"56f37d2892fe91d19afc49d2f62b562f0f71a21f21aec205b5368c733329d629","src/module_data.rs":"9e08e44ce8aaa90ae60240eb8feb693d5ca36dc42df55147617f5c3b636b9b8b","src/runtime.rs":"0532a55d2649eb6ea486a70c278ccbc243255cdbf5462739c0db6a2e73cb4d1a","src/signature.rs":"c1606419111edec1895762c212c7116a0d39670c8d303b5af0f58295ba15cfc4","src/tables.rs":"efe9aa8ae3602cce8aa093e220d63490b17907be8ed192024939445ff8b8922b","src/traps.rs":"2ff45ff2438f18ca1740182cf9735cbb789125bd57214e5a140b3bcd5295a433","src/types.rs":"d354b3a9f96b768dc7a8885b9fefa2efc1e01bcb66e5028e5f787247d1b2abb2","tests/bindings/bad_bindings.json":"7c5ad85eb0a222985857b6ff35246dccfa2a35de415d57fd712264ca7e078934","tests/bindings/bindings_test.json":"2e3ed6ff5d18da665d5c2fb24e279f009092f21bba6deff221db7a5770a6a3d4","tests/bindings/garbage.json":"f956f64c8c23ecf2aa6226fa7abc5f52eee2fa861e12ceac3bddbdf0602a174b"},"package":null}
\ No newline at end of file diff --git a/third_party/rust/lucet-module-wasmsbx/Cargo.toml b/third_party/rust/lucet-module-wasmsbx/Cargo.toml new file mode 100644 index 0000000000..5dd20ba20a --- /dev/null +++ b/third_party/rust/lucet-module-wasmsbx/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "lucet-module-wasmsbx" +version = "0.1.1" +description = "A structured interface for Lucet modules" +homepage = "https://github.com/fastly/lucet" +repository = "https://github.com/fastly/lucet" +license = "Apache-2.0 WITH LLVM-exception" +categories = ["wasm"] +authors = ["Lucet team <lucet@fastly.com>"] +edition = "2018" + +[dependencies] +cranelift-entity = { path = "../cranelift/cranelift-entity", version = "0.41.0" } +failure = "0.1" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +bincode = "1.1.4" +num-derive = "0.3" +num-traits = "0.2" +# minisign = { version = "0.5.11", optional = true } +object = ">=0.12, <0.18" +byteorder = "1.3" + +[features] +# default = ["signature_checking"] +default = [] +signature_checking = [] diff --git a/third_party/rust/lucet-module-wasmsbx/src/bindings.rs b/third_party/rust/lucet-module-wasmsbx/src/bindings.rs new file mode 100644 index 0000000000..3c1dab3855 --- /dev/null +++ b/third_party/rust/lucet-module-wasmsbx/src/bindings.rs @@ -0,0 +1,203 @@ +use failure::{format_err, Error}; +use serde_json::{self, Map, Value}; +use std::collections::{hash_map::Entry, HashMap}; +use std::fs; +use std::path::Path; + +#[derive(Debug, Clone)] +pub struct Bindings { + bindings: HashMap<String, HashMap<String, String>>, +} + +impl Bindings { + pub fn new(bindings: HashMap<String, HashMap<String, String>>) -> Bindings { + Self { bindings: bindings } + } + + pub fn env(env: HashMap<String, String>) -> Bindings { + let mut bindings = HashMap::new(); + bindings.insert("env".to_owned(), env); + Self::new(bindings) + } + + pub fn empty() -> Bindings { + Self::new(HashMap::new()) + } + + pub fn from_json(v: &Value) -> Result<Bindings, Error> { + match v.as_object() { + Some(modules) => Self::parse_modules_json_obj(modules), + None => Err(format_err!("top level json expected to be object"))?, + } + } + + pub fn from_str(s: &str) -> Result<Bindings, Error> { + let top: Value = serde_json::from_str(s)?; + Ok(Self::from_json(&top)?) + } + + pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Bindings, Error> { + let contents = fs::read_to_string(path.as_ref())?; + Ok(Self::from_str(&contents)?) + } + + pub fn extend(&mut self, other: &Bindings) -> Result<(), Error> { + for (modname, othermodbindings) in other.bindings.iter() { + match self.bindings.entry(modname.clone()) { + Entry::Occupied(mut e) => { + let existing = e.get_mut(); + for (bindname, binding) in othermodbindings { + match existing.entry(bindname.clone()) { + Entry::Vacant(e) => { + e.insert(binding.clone()); + } + Entry::Occupied(e) => { + if binding != e.get() { + Err(format_err!( + "cannot re-bind {} from {} to {}", + e.key(), + binding, + e.get() + ))?; + } + } + } + } + } + Entry::Vacant(e) => { + e.insert(othermodbindings.clone()); + } + } + } + Ok(()) + } + + pub fn translate(&self, module: &str, symbol: &str) -> Result<&str, Error> { + match self.bindings.get(module) { + Some(m) => match m.get(symbol) { + Some(s) => Ok(s), + None => Err(format_err!("Unknown symbol `{}::{}`", module, symbol)), + }, + None => Err(format_err!( + "Unknown module for symbol `{}::{}`", + module, + symbol + )), + } + } + + fn parse_modules_json_obj(m: &Map<String, Value>) -> Result<Self, Error> { + let mut res = HashMap::new(); + for (modulename, values) in m { + match values.as_object() { + Some(methods) => { + let methodmap = Self::parse_methods_json_obj(methods)?; + res.insert(modulename.to_owned(), methodmap); + } + None => Err(format_err!(""))?, + } + } + Ok(Self::new(res)) + } + + fn parse_methods_json_obj(m: &Map<String, Value>) -> Result<HashMap<String, String>, Error> { + let mut res = HashMap::new(); + for (method, i) in m { + match i.as_str() { + Some(importbinding) => { + res.insert(method.to_owned(), importbinding.to_owned()); + } + None => Err(format_err!(""))?, + } + } + Ok(res) + } + + pub fn to_string(&self) -> Result<String, Error> { + let s = serde_json::to_string(&self.to_json())?; + Ok(s) + } + + pub fn to_json(&self) -> Value { + Value::from(self.serialize_modules_json_obj()) + } + + fn serialize_modules_json_obj(&self) -> Map<String, Value> { + let mut m = Map::new(); + for (modulename, values) in self.bindings.iter() { + m.insert( + modulename.to_owned(), + Value::from(Self::serialize_methods_json_obj(values)), + ); + } + m + } + + fn serialize_methods_json_obj(methods: &HashMap<String, String>) -> Map<String, Value> { + let mut m = Map::new(); + for (methodname, symbol) in methods.iter() { + m.insert(methodname.to_owned(), Value::from(symbol.to_owned())); + } + m + } +} + +#[cfg(test)] +mod tests { + fn test_file(f: &str) -> PathBuf { + PathBuf::from(format!("tests/bindings/{}", f)) + } + + use super::Bindings; + use std::collections::HashMap; + use std::path::PathBuf; + + #[test] + fn explicit() { + let mut explicit_map = HashMap::new(); + explicit_map.insert(String::from("hello"), String::from("goodbye")); + let map = Bindings::env(explicit_map); + + let result = map.translate("env", "hello").unwrap(); + assert!(result == "goodbye"); + + let result = map.translate("env", "nonexistent"); + if let Ok(_) = result { + assert!( + false, + "explicit import map returned value for non-existent symbol" + ) + } + } + + #[test] + fn explicit_from_nonexistent_file() { + let fail_map = Bindings::from_file(&test_file("nonexistent_bindings.json")); + assert!( + fail_map.is_err(), + "ImportMap::explicit_from_file did not fail on a non-existent file" + ); + } + + #[test] + fn explicit_from_garbage_file() { + let fail_map = Bindings::from_file(&test_file("garbage.json")); + assert!( + fail_map.is_err(), + "ImportMap::explicit_from_file did not fail on a garbage file" + ); + } + + #[test] + fn explicit_from_file() { + let map = Bindings::from_file(&test_file("bindings_test.json")) + .expect("load valid bindings from file"); + let result = map.translate("env", "hello").expect("hello has a binding"); + assert!(result == "json is cool"); + + assert!( + map.translate("env", "nonexistent").is_err(), + "bindings from file returned value for non-existent symbol" + ); + } +} diff --git a/third_party/rust/lucet-module-wasmsbx/src/error.rs b/third_party/rust/lucet-module-wasmsbx/src/error.rs new file mode 100644 index 0000000000..a19ec8a334 --- /dev/null +++ b/third_party/rust/lucet-module-wasmsbx/src/error.rs @@ -0,0 +1,17 @@ +use failure::Fail; + +/// Module data (de)serialization errors. +#[derive(Debug, Fail)] +pub enum Error { + #[fail(display = "Sparse data contained a page with length other than 4096")] + IncorrectPageSize, + #[fail(display = "Deserialization error: {}", _0)] + DeserializationError(#[cause] bincode::Error), + #[fail(display = "Serialization error: {}", _0)] + SerializationError(#[cause] bincode::Error), + #[cfg(feature = "signature_checking")] + #[fail(display = "Module signature error: {}", _0)] + ModuleSignatureError(#[cause] minisign::PError), + #[fail(display = "I/O error: {}", _0)] + IOError(#[cause] std::io::Error), +} diff --git a/third_party/rust/lucet-module-wasmsbx/src/functions.rs b/third_party/rust/lucet-module-wasmsbx/src/functions.rs new file mode 100644 index 0000000000..87f0b68d97 --- /dev/null +++ b/third_party/rust/lucet-module-wasmsbx/src/functions.rs @@ -0,0 +1,192 @@ +use crate::traps::{TrapManifest, TrapSite}; +use cranelift_entity::entity_impl; +use serde::{Deserialize, Serialize}; + +use std::slice::from_raw_parts; + +/// FunctionIndex is an identifier for a function, imported, exported, or external. The space of +/// FunctionIndex is shared for all of these, so `FunctionIndex(N)` may identify exported function +/// #2, `FunctionIndex(N + 1)` may identify an internal function, and `FunctionIndex(N + 2)` may +/// identify an imported function. +#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug, Serialize, Deserialize)] +pub struct FunctionIndex(u32); + +impl FunctionIndex { + pub fn from_u32(idx: u32) -> FunctionIndex { + FunctionIndex(idx) + } + pub fn as_u32(&self) -> u32 { + self.0 + } +} + +/// ImportFunction describes an internal function - its internal function index and the name/module +/// pair that function should be found in. +#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug, Serialize, Deserialize)] +pub struct ImportFunction<'a> { + pub fn_idx: FunctionIndex, + pub module: &'a str, + pub name: &'a str, +} + +/// ExportFunction describes an exported function - its internal function index and a name that +/// function has been exported under. +#[derive(Clone, PartialEq, Eq, Hash, Debug, Serialize, Deserialize)] +pub struct ExportFunction<'a> { + pub fn_idx: FunctionIndex, + #[serde(borrow)] + pub names: Vec<&'a str>, +} + +pub struct OwnedExportFunction { + pub fn_idx: FunctionIndex, + pub names: Vec<String>, +} + +impl OwnedExportFunction { + pub fn to_ref<'a>(&'a self) -> ExportFunction<'a> { + ExportFunction { + fn_idx: self.fn_idx.clone(), + names: self.names.iter().map(|x| x.as_str()).collect(), + } + } +} + +pub struct OwnedImportFunction { + pub fn_idx: FunctionIndex, + pub module: String, + pub name: String, +} + +impl OwnedImportFunction { + pub fn to_ref<'a>(&'a self) -> ImportFunction<'a> { + ImportFunction { + fn_idx: self.fn_idx.clone(), + module: self.module.as_str(), + name: self.name.as_str(), + } + } +} + +/// UniqueSignatureIndex names a signature after collapsing duplicate signatures to a single +/// identifier, whereas SignatureIndex is directly what the original module specifies, and may +/// specify duplicates of types that are structurally equal. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug, Serialize, Deserialize)] +pub struct UniqueSignatureIndex(u32); +entity_impl!(UniqueSignatureIndex); + +/// FunctionPointer serves entirely as a safer way to work with function pointers than as raw u64 +/// or usize values. It also avoids the need to write them as `fn` types, which cannot be freely +/// cast from one to another with `as`. If you need to call a `FunctionPointer`, use `as_usize()` +/// and transmute the resulting usize to a `fn` type with appropriate signature. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug, Serialize, Deserialize)] +pub struct FunctionPointer(usize); + +impl FunctionPointer { + pub fn from_usize(ptr: usize) -> FunctionPointer { + FunctionPointer(ptr) + } + pub fn as_usize(&self) -> usize { + self.0 + } +} + +/// Information about the corresponding function. +/// +/// This is split from but closely related to a [`FunctionSpec`]. The distinction is largely for +/// serialization/deserialization simplicity, as [`FunctionSpec`] contains fields that need +/// cooperation from a loader, with manual layout and serialization as a result. +/// [`FunctionMetadata`] is the remainder of fields that can be automatically +/// serialized/deserialied and are small enough copying isn't a large concern. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct FunctionMetadata<'a> { + pub signature: UniqueSignatureIndex, + /// the "name" field is some human-friendly name, not necessarily the same as used to reach + /// this function (through an export, for example), and may not even indicate that a function + /// is exported at all. + /// TODO: at some point when possible, this field ought to be set from the names section of a + /// wasm module. At the moment that information is lost at parse time. + #[serde(borrow)] + pub name: Option<&'a str>, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct OwnedFunctionMetadata { + pub signature: UniqueSignatureIndex, + pub name: Option<String>, +} + +impl OwnedFunctionMetadata { + pub fn to_ref(&self) -> FunctionMetadata<'_> { + FunctionMetadata { + signature: self.signature.clone(), + name: self.name.as_ref().map(|n| n.as_str()), + } + } +} + +pub struct FunctionHandle { + pub ptr: FunctionPointer, + pub id: FunctionIndex, +} + +// The layout of this struct is very tightly coupled to lucetc's `write_function_manifest`! +// +// Specifically, `write_function_manifest` sets up relocations on `code_addr` and `traps_addr`. +// It does not explicitly serialize a correctly formed `FunctionSpec`, because addresses +// for these fields do not exist until the object is loaded in the future. +// +// So `write_function_manifest` has implicit knowledge of the layout of this structure +// (including padding bytes between `code_len` and `traps_addr`) +#[repr(C)] +#[derive(Clone, Debug)] +pub struct FunctionSpec { + code_addr: u64, + code_len: u32, + traps_addr: u64, + traps_len: u64, +} + +impl FunctionSpec { + pub fn new(code_addr: u64, code_len: u32, traps_addr: u64, traps_len: u64) -> Self { + FunctionSpec { + code_addr, + code_len, + traps_addr, + traps_len, + } + } + pub fn ptr(&self) -> FunctionPointer { + FunctionPointer::from_usize(self.code_addr as usize) + } + pub fn code_len(&self) -> u32 { + self.code_len + } + pub fn traps_len(&self) -> u64 { + self.traps_len + } + pub fn contains(&self, addr: u64) -> bool { + addr >= self.code_addr && (addr - self.code_addr) < (self.code_len as u64) + } + pub fn relative_addr(&self, addr: u64) -> Option<u32> { + if let Some(offset) = addr.checked_sub(self.code_addr) { + if offset < (self.code_len as u64) { + // self.code_len is u32, so if the above check succeeded + // offset must implicitly be <= u32::MAX - the following + // conversion will not truncate bits + return Some(offset as u32); + } + } + + None + } + pub fn traps(&self) -> Option<TrapManifest<'_>> { + let traps_ptr = self.traps_addr as *const TrapSite; + if !traps_ptr.is_null() { + let traps_slice = unsafe { from_raw_parts(traps_ptr, self.traps_len as usize) }; + Some(TrapManifest::new(traps_slice)) + } else { + None + } + } +} diff --git a/third_party/rust/lucet-module-wasmsbx/src/globals.rs b/third_party/rust/lucet-module-wasmsbx/src/globals.rs new file mode 100644 index 0000000000..801d0c9d8e --- /dev/null +++ b/third_party/rust/lucet-module-wasmsbx/src/globals.rs @@ -0,0 +1,164 @@ +use serde::{Deserialize, Serialize}; + +/// A WebAssembly global along with its export specification. +/// +/// The lifetime parameter exists to support zero-copy deserialization for the `&str` fields at the +/// leaves of the structure. For a variant with owned types at the leaves, see +/// [`OwnedGlobalSpec`](owned/struct.OwnedGlobalSpec.html). +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct GlobalSpec<'a> { + #[serde(borrow)] + global: Global<'a>, + export_names: Vec<&'a str>, +} + +impl<'a> GlobalSpec<'a> { + pub fn new(global: Global<'a>, export_names: Vec<&'a str>) -> Self { + Self { + global, + export_names, + } + } + + /// Create a new global definition with an initial value and export names. + pub fn new_def(init_val: i64, export_names: Vec<&'a str>) -> Self { + Self::new(Global::Def(GlobalDef::I64(init_val)), export_names) + } + + /// Create a new global import definition with a module and field name, and export names. + pub fn new_import(module: &'a str, field: &'a str, export_names: Vec<&'a str>) -> Self { + Self::new(Global::Import { module, field }, export_names) + } + + pub fn global(&self) -> &Global<'_> { + &self.global + } + + pub fn export_names(&self) -> &[&str] { + &self.export_names + } + + pub fn is_internal(&self) -> bool { + self.export_names.len() == 0 + } +} + +/// A WebAssembly global is either defined locally, or is defined in relation to a field of another +/// WebAssembly module. +/// +/// The lifetime parameter exists to support zero-copy deserialization for the `&str` fields at the +/// leaves of the structure. For a variant with owned types at the leaves, see +/// [`OwnedGlobal`](owned/struct.OwnedGlobal.html). +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub enum Global<'a> { + Def(GlobalDef), + Import { module: &'a str, field: &'a str }, +} + +/// Definition for a global in this module (not imported). +#[derive(Debug, Copy, Clone, PartialEq, Serialize, Deserialize)] +pub enum GlobalDef { + I32(i32), + I64(i64), + F32(f32), + F64(f64), +} + +impl GlobalDef { + pub fn init_val(&self) -> GlobalValue { + match self { + GlobalDef::I32(i) => GlobalValue { i_32: *i }, + GlobalDef::I64(i) => GlobalValue { i_64: *i }, + GlobalDef::F32(f) => GlobalValue { f_32: *f }, + GlobalDef::F64(f) => GlobalValue { f_64: *f }, + } + } +} + +#[derive(Copy, Clone)] +pub union GlobalValue { + pub i_32: i32, + pub i_64: i64, + pub f_32: f32, + pub f_64: f64, +} + +impl std::fmt::Debug for GlobalValue { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + // Because GlobalValue is a union of primitives, there won't be anything wrong, + // representation-wise, with printing the underlying data as an i64, f64, or + // another primitive. This still may incur UB by doing something like trying to + // read data from an uninitialized memory, if the union is initialized with a + // 32-bit value, and then read as a 64-bit value (as this code is about to do). + // + // In short, avoid using `<GlobalValue as Debug>`::fmt, please. + + writeln!(f, "GlobalValue {{")?; + unsafe { + writeln!(f, " i_32: {},", self.i_32)?; + writeln!(f, " i_64: {},", self.i_64)?; + writeln!(f, " f_32: {},", self.f_32)?; + writeln!(f, " f_64: {},", self.f_64)?; + } + writeln!(f, "}}") + } +} + +///////////////////////////////////////////////////////////////////////////////////////////////////////// + +/// A variant of [`GlobalSpec`](../struct.GlobalSpec.html) with owned strings throughout. +/// +/// This type is useful when directly building up a value to be serialized. +pub struct OwnedGlobalSpec { + global: OwnedGlobal, + export_names: Vec<String>, +} + +impl OwnedGlobalSpec { + pub fn new(global: OwnedGlobal, export_names: Vec<String>) -> Self { + Self { + global, + export_names, + } + } + + /// Create a new global definition with an initial value and export names. + pub fn new_def(init_val: i64, export_names: Vec<String>) -> Self { + Self::new(OwnedGlobal::Def(GlobalDef::I64(init_val)), export_names) + } + + /// Create a new global import definition with a module and field name, and export names. + pub fn new_import(module: String, field: String, export_names: Vec<String>) -> Self { + Self::new(OwnedGlobal::Import { module, field }, export_names) + } + + /// Create a [`GlobalSpec`](../struct.GlobalSpec.html) backed by the values in this + /// `OwnedGlobalSpec`. + pub fn to_ref<'a>(&'a self) -> GlobalSpec<'a> { + GlobalSpec::new( + self.global.to_ref(), + self.export_names.iter().map(|x| x.as_str()).collect(), + ) + } +} + +/// A variant of [`Global`](../struct.Global.html) with owned strings throughout. +/// +/// This type is useful when directly building up a value to be serialized. +pub enum OwnedGlobal { + Def(GlobalDef), + Import { module: String, field: String }, +} + +impl OwnedGlobal { + /// Create a [`Global`](../struct.Global.html) backed by the values in this `OwnedGlobal`. + pub fn to_ref<'a>(&'a self) -> Global<'a> { + match self { + OwnedGlobal::Def(def) => Global::Def(def.clone()), + OwnedGlobal::Import { module, field } => Global::Import { + module: module.as_str(), + field: field.as_str(), + }, + } + } +} diff --git a/third_party/rust/lucet-module-wasmsbx/src/lib.rs b/third_party/rust/lucet-module-wasmsbx/src/lib.rs new file mode 100644 index 0000000000..9582113c5d --- /dev/null +++ b/third_party/rust/lucet-module-wasmsbx/src/lib.rs @@ -0,0 +1,44 @@ +//! Common types for representing Lucet modules. +//! +//! These types are used both in `lucetc` and `lucet-runtime`, with values serialized in +//! [`bincode`](https://github.com/TyOverby/bincode) format to the compiled Lucet modules. + +#![deny(bare_trait_objects)] + +pub mod bindings; +mod error; +mod functions; +mod globals; +mod linear_memory; +mod module; +mod module_data; +mod runtime; +mod signature; +mod tables; +mod traps; +mod types; + +pub use crate::error::Error; +pub use crate::functions::{ + ExportFunction, FunctionHandle, FunctionIndex, FunctionMetadata, FunctionPointer, FunctionSpec, + ImportFunction, UniqueSignatureIndex, +}; +pub use crate::globals::{Global, GlobalDef, GlobalSpec, GlobalValue}; +pub use crate::linear_memory::{HeapSpec, LinearMemorySpec, SparseData}; +pub use crate::module::{Module, SerializedModule, LUCET_MODULE_SYM}; +pub use crate::module_data::{ModuleData, ModuleFeatures, MODULE_DATA_SYM}; +pub use crate::runtime::InstanceRuntimeData; +pub use crate::signature::ModuleSignature; +#[cfg(feature = "signature_checking")] +pub use crate::signature::PublicKey; +pub use crate::tables::TableElement; +pub use crate::traps::{TrapCode, TrapManifest, TrapSite}; +pub use crate::types::{Signature, ValueType}; + +/// Owned variants of the module data types, useful for serialization and testing. +pub mod owned { + pub use crate::functions::{OwnedExportFunction, OwnedFunctionMetadata, OwnedImportFunction}; + pub use crate::globals::OwnedGlobalSpec; + pub use crate::linear_memory::{OwnedLinearMemorySpec, OwnedSparseData}; + pub use crate::module_data::OwnedModuleData; +} diff --git a/third_party/rust/lucet-module-wasmsbx/src/linear_memory.rs b/third_party/rust/lucet-module-wasmsbx/src/linear_memory.rs new file mode 100644 index 0000000000..d2107c421a --- /dev/null +++ b/third_party/rust/lucet-module-wasmsbx/src/linear_memory.rs @@ -0,0 +1,191 @@ +use crate::Error; +use serde::{Deserialize, Serialize}; + +/// Specification of the linear memory of a module +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct LinearMemorySpec<'a> { + /// Specification of the heap used to implement the linear memory + pub heap: HeapSpec, + /// Initialization values for linear memory + #[serde(borrow)] + pub initializer: SparseData<'a>, +} + +/// Specification of the linear memory of a module +/// +/// This is a version of [`LinearMemorySpec`](../struct.LinearMemorySpec.html) with an +/// `OwnedSparseData` for the initializer. +/// This type is useful when directly building up a value to be serialized. +pub struct OwnedLinearMemorySpec { + /// Specification of the heap used to implement the linear memory + pub heap: HeapSpec, + /// Initialization values for linear memory + pub initializer: OwnedSparseData, +} + +impl OwnedLinearMemorySpec { + pub fn to_ref<'a>(&'a self) -> LinearMemorySpec<'a> { + LinearMemorySpec { + heap: self.heap.clone(), + initializer: self.initializer.to_ref(), + } + } +} + +/// Specifications about the heap of a Lucet module. +#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct HeapSpec { + /// Total bytes of memory for the heap to possibly expand into, as configured for Cranelift + /// codegen. + /// + /// All of this memory is addressable. Only some part of it is accessible - from 0 to the + /// initial size, guaranteed, and up to the `max_size`. This size allows Cranelift to elide + /// checks of the *base pointer*. At the moment that just means checking if it is greater than + /// 4gb, in which case it can elide the base pointer check completely. In the future, Cranelift + /// could use a solver to elide more base pointer checks if it can prove the calculation will + /// always be less than this bound. + /// + /// Specified in bytes, and must be evenly divisible by the host page size (4K). + pub reserved_size: u64, + + /// Total bytes of memory *after* the reserved area, as configured for Cranelift codegen. + /// + /// All of this memory is addressable, but it is never accessible - it is guaranteed to trap if + /// an access happens in this region. This size allows Cranelift to use *common subexpression + /// elimination* to reduce checks of the *sum of base pointer and offset* (where the offset is + /// always rounded up to a multiple of the guard size, to be friendly to CSE). + /// + /// Specified in bytes, and must be evenly divisible by the host page size (4K). + pub guard_size: u64, + + /// Total bytes of memory for the WebAssembly program's linear memory upon initialization. + /// + /// Specified in bytes, must be evenly divisible by the WebAssembly page size (64K), and must be + /// less than or equal to `reserved_size`. + pub initial_size: u64, + + /// Maximum bytes of memory for the WebAssembly program's linear memory at any time. + /// + /// This is not necessarily the same as `reserved_size` - we want to be able to tune the check + /// bound there separately than the declaration of a max size in the client program. + /// + /// The program may optionally define this value. If it does, it must be less than the + /// `reserved_size`. If it does not, the max size is left up to the runtime, and is allowed to + /// be less than `reserved_size`. + pub max_size: Option<u64>, +} + +impl HeapSpec { + pub fn new( + reserved_size: u64, + guard_size: u64, + initial_size: u64, + max_size: Option<u64>, + ) -> Self { + Self { + reserved_size, + guard_size, + initial_size, + max_size, + } + } + + /// Some very small test programs dont specify a memory import or definition. + pub fn empty() -> Self { + Self { + reserved_size: 0, + guard_size: 0, + initial_size: 0, + max_size: None, + } + } +} + +/// A sparse representation of a Lucet module's initial heap. +/// +/// The lifetime parameter exists to support zero-copy deserialization for the `&[u8]` slices +/// representing non-zero pages. For a variant with owned `Vec<u8>` pages, see +/// [`OwnedSparseData`](owned/struct.OwnedSparseData.html). +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SparseData<'a> { + /// Indices into the vector correspond to the offset, in host page (4k) increments, from the + /// base of the instance heap. + /// + /// If the option at a given index is None, the page is initialized as zeros. Otherwise, + /// the contents of the page are given as a slice of exactly 4k bytes. + /// + /// The deserializer of this datastructure does not make sure the 4k invariant holds, + /// but the constructor on the serializier side does. + #[serde(borrow)] + pages: Vec<Option<&'a [u8]>>, +} + +impl<'a> SparseData<'a> { + /// Create a new `SparseData` from its constituent pages. + /// + /// Entries in the `pages` argument which are `Some` must contain a slice of exactly the host + /// page size (4096), otherwise this function returns `Error::IncorrectPageSize`. Entries which + /// are `None` are interpreted as empty pages, which will be zeroed by the runtime. + pub fn new(pages: Vec<Option<&'a [u8]>>) -> Result<Self, Error> { + if !pages.iter().all(|page| match page { + Some(contents) => contents.len() == 4096, + None => true, + }) { + return Err(Error::IncorrectPageSize); + } + + Ok(Self { pages }) + } + + pub fn pages(&self) -> &[Option<&'a [u8]>] { + &self.pages + } + + pub fn get_page(&self, offset: usize) -> &Option<&'a [u8]> { + self.pages.get(offset).unwrap_or(&None) + } + + pub fn len(&self) -> usize { + self.pages.len() + } +} + +/// A sparse representation of a Lucet module's initial heap. +/// +/// This is a version of [`SparseData`](../struct.SparseData.html) with owned `Vec<u8>`s +/// representing pages. This type is useful when directly building up a value to be serialized. +pub struct OwnedSparseData { + pages: Vec<Option<Vec<u8>>>, +} + +impl OwnedSparseData { + /// Create a new `OwnedSparseData` from its consitutent pages. + /// + /// Entries in the `pages` argument which are `Some` must contain a vector of exactly the host + /// page size (4096), otherwise this function returns `Error::IncorrectPageSize`. Entries which + /// are `None` are interpreted as empty pages, which will be zeroed by the runtime. + pub fn new(pages: Vec<Option<Vec<u8>>>) -> Result<Self, Error> { + if !pages.iter().all(|page| match page { + Some(contents) => contents.len() == 4096, + None => true, + }) { + return Err(Error::IncorrectPageSize); + } + Ok(Self { pages }) + } + + /// Create a [`SparseData`](../struct.SparseData.html) backed by the values in this + /// `OwnedSparseData`. + pub fn to_ref<'a>(&'a self) -> SparseData<'a> { + SparseData::new( + self.pages + .iter() + .map(|c| match c { + Some(data) => Some(data.as_slice()), + None => None, + }) + .collect(), + ) + .expect("SparseData invariant enforced by OwnedSparseData constructor") + } +} diff --git a/third_party/rust/lucet-module-wasmsbx/src/module.rs b/third_party/rust/lucet-module-wasmsbx/src/module.rs new file mode 100644 index 0000000000..793a943822 --- /dev/null +++ b/third_party/rust/lucet-module-wasmsbx/src/module.rs @@ -0,0 +1,27 @@ +use crate::functions::FunctionSpec; +use crate::module_data::ModuleData; +use crate::tables::TableElement; + +pub const LUCET_MODULE_SYM: &str = "lucet_module"; + +/// Module is the exposed structure that contains all the data backing a Lucet-compiled object. +#[derive(Debug)] +pub struct Module<'a> { + pub module_data: ModuleData<'a>, + pub tables: &'a [&'a [TableElement]], + pub function_manifest: &'a [FunctionSpec], +} + +/// SerializedModule is a serialization-friendly form of Module, in that the `module_data_*` fields +/// here refer to a serialized `ModuleData`, while `tables_*` and `function_manifest_*` refer to +/// the actual tables and function manifest written in the binary. +#[repr(C)] +#[derive(Debug)] +pub struct SerializedModule { + pub module_data_ptr: u64, + pub module_data_len: u64, + pub tables_ptr: u64, + pub tables_len: u64, + pub function_manifest_ptr: u64, + pub function_manifest_len: u64, +} diff --git a/third_party/rust/lucet-module-wasmsbx/src/module_data.rs b/third_party/rust/lucet-module-wasmsbx/src/module_data.rs new file mode 100644 index 0000000000..e55c33a186 --- /dev/null +++ b/third_party/rust/lucet-module-wasmsbx/src/module_data.rs @@ -0,0 +1,303 @@ +use crate::{ + functions::{ + ExportFunction, FunctionIndex, FunctionMetadata, ImportFunction, OwnedFunctionMetadata, + }, + globals::GlobalSpec, + linear_memory::{HeapSpec, LinearMemorySpec, SparseData}, + types::Signature, + Error, +}; +#[cfg(feature = "signature_checking")] +use minisign::SignatureBones; +use serde::{Deserialize, Serialize}; + +pub const MODULE_DATA_SYM: &str = "lucet_module_data"; + +/// The metadata (and some data) for a Lucet module. +/// +/// The lifetime parameter exists to support zero-copy deserialization for the `&str` and `&[u8]` +/// fields at the leaves of the structure. For a variant with owned types at the leaves, see +/// [`OwnedModuleData`](owned/struct.OwnedModuleData.html). +/// +/// The goal is for this structure to eventually include everything except the code for the guest +/// functions themselves. +#[derive(Debug, Serialize, Deserialize)] +pub struct ModuleData<'a> { + #[serde(borrow)] + linear_memory: Option<LinearMemorySpec<'a>>, + #[serde(borrow)] + globals_spec: Vec<GlobalSpec<'a>>, + #[serde(borrow)] + function_info: Vec<FunctionMetadata<'a>>, + #[serde(borrow)] + import_functions: Vec<ImportFunction<'a>>, + #[serde(borrow)] + export_functions: Vec<ExportFunction<'a>>, + signatures: Vec<Signature>, + module_signature: Vec<u8>, + features: ModuleFeatures, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +pub struct ModuleFeatures { + pub sse3: bool, + pub ssse3: bool, + pub sse41: bool, + pub sse42: bool, + pub avx: bool, + pub bmi1: bool, + pub bmi2: bool, + pub lzcnt: bool, + pub popcnt: bool, + _hidden: (), +} + +impl ModuleFeatures { + pub fn none() -> Self { + Self { + sse3: false, + ssse3: false, + sse41: false, + sse42: false, + avx: false, + bmi1: false, + bmi2: false, + lzcnt: false, + popcnt: false, + _hidden: (), + } + } +} + +impl<'a> ModuleData<'a> { + #[cfg(feature = "signature_checking")] + pub fn new( + linear_memory: Option<LinearMemorySpec<'a>>, + globals_spec: Vec<GlobalSpec<'a>>, + function_info: Vec<FunctionMetadata<'a>>, + import_functions: Vec<ImportFunction<'a>>, + export_functions: Vec<ExportFunction<'a>>, + signatures: Vec<Signature>, + features: ModuleFeatures, + ) -> Self { + let module_signature = vec![0u8; SignatureBones::BYTES]; + Self { + linear_memory, + globals_spec, + function_info, + import_functions, + export_functions, + signatures, + module_signature, + features, + } + } + + #[cfg(not(feature = "signature_checking"))] + pub fn new( + linear_memory: Option<LinearMemorySpec<'a>>, + globals_spec: Vec<GlobalSpec<'a>>, + function_info: Vec<FunctionMetadata<'a>>, + import_functions: Vec<ImportFunction<'a>>, + export_functions: Vec<ExportFunction<'a>>, + signatures: Vec<Signature>, + features: ModuleFeatures, + ) -> Self { + let module_signature = vec![0u8; 0]; + Self { + linear_memory, + globals_spec, + function_info, + import_functions, + export_functions, + signatures, + module_signature, + features, + } + } + + pub fn heap_spec(&self) -> Option<&HeapSpec> { + if let Some(ref linear_memory) = self.linear_memory { + Some(&linear_memory.heap) + } else { + None + } + } + + pub fn sparse_data(&self) -> Option<&SparseData<'a>> { + if let Some(ref linear_memory) = self.linear_memory { + Some(&linear_memory.initializer) + } else { + None + } + } + + pub fn globals_spec(&self) -> &[GlobalSpec<'a>] { + &self.globals_spec + } + + pub fn function_info(&self) -> &[FunctionMetadata<'a>] { + &self.function_info + } + + pub fn import_functions(&self) -> &[ImportFunction<'_>] { + &self.import_functions + } + + pub fn export_functions(&self) -> &[ExportFunction<'_>] { + &self.export_functions + } + + // Function index here is a different index space than `get_func_from_idx`, which + // uses function index as an index into a table of function elements. + // + // This is an index of all functions in the module. + pub fn get_signature(&self, fn_id: FunctionIndex) -> &Signature { + let sig_idx = self.function_info[fn_id.as_u32() as usize].signature; + &self.signatures[sig_idx.as_u32() as usize] + } + + pub fn get_export_func_id(&self, name: &str) -> Option<FunctionIndex> { + self.export_functions + .iter() + .find(|export| export.names.contains(&name)) + .map(|export| export.fn_idx) + } + + pub fn signatures(&self) -> &[Signature] { + &self.signatures + } + + pub fn get_module_signature(&self) -> &[u8] { + &self.module_signature + } + + pub fn features(&self) -> &ModuleFeatures { + &self.features + } + + #[cfg(feature = "signature_checking")] + pub fn patch_module_signature( + module_data_bin: &'a [u8], + module_signature: &[u8], + ) -> Result<Vec<u8>, Error> { + assert_eq!(module_signature.len(), SignatureBones::BYTES); + let mut module_data = Self::deserialize(module_data_bin)?; + module_data + .module_signature + .copy_from_slice(module_signature); + let patched_module_data_bin = module_data.serialize()?; + assert_eq!(patched_module_data_bin.len(), module_data_bin.len()); + Ok(patched_module_data_bin) + } + + #[cfg(feature = "signature_checking")] + pub fn clear_module_signature(module_data_bin: &'a [u8]) -> Result<Vec<u8>, Error> { + let module_signature = vec![0u8; SignatureBones::BYTES]; + Self::patch_module_signature(module_data_bin, &module_signature) + } + + /// Serialize to [`bincode`](https://github.com/TyOverby/bincode). + pub fn serialize(&self) -> Result<Vec<u8>, Error> { + bincode::serialize(self).map_err(Error::SerializationError) + } + + /// Deserialize from [`bincode`](https://github.com/TyOverby/bincode). + pub fn deserialize(buf: &'a [u8]) -> Result<ModuleData<'a>, Error> { + bincode::deserialize(buf).map_err(Error::DeserializationError) + } +} + +use crate::{ + functions::{OwnedExportFunction, OwnedImportFunction}, + globals::OwnedGlobalSpec, + linear_memory::{OwnedLinearMemorySpec, OwnedSparseData}, +}; + +/// The metadata (and some data) for a Lucet module. +/// +/// This is a version of [`ModuleData`](../struct.ModuleData.html) with owned types throughout, +/// rather than references to support zero-copy deserialization. This type is useful when directly +/// building up a value to be serialized. +pub struct OwnedModuleData { + linear_memory: Option<OwnedLinearMemorySpec>, + globals_spec: Vec<OwnedGlobalSpec>, + function_info: Vec<OwnedFunctionMetadata>, + imports: Vec<OwnedImportFunction>, + exports: Vec<OwnedExportFunction>, + signatures: Vec<Signature>, + features: ModuleFeatures, +} + +impl OwnedModuleData { + pub fn new( + linear_memory: Option<OwnedLinearMemorySpec>, + globals_spec: Vec<OwnedGlobalSpec>, + function_info: Vec<OwnedFunctionMetadata>, + imports: Vec<OwnedImportFunction>, + exports: Vec<OwnedExportFunction>, + signatures: Vec<Signature>, + features: ModuleFeatures, + ) -> Self { + Self { + linear_memory, + globals_spec, + function_info, + imports, + exports, + signatures, + features, + } + } + + /// Create a [`ModuleData`](../struct.ModuleData.html) backed by the values in this + /// `OwnedModuleData`. + pub fn to_ref<'a>(&'a self) -> ModuleData<'a> { + ModuleData::new( + if let Some(ref owned_linear_memory) = self.linear_memory { + Some(owned_linear_memory.to_ref()) + } else { + None + }, + self.globals_spec.iter().map(|gs| gs.to_ref()).collect(), + self.function_info + .iter() + .map(|info| info.to_ref()) + .collect(), + self.imports.iter().map(|imp| imp.to_ref()).collect(), + self.exports.iter().map(|exp| exp.to_ref()).collect(), + self.signatures.clone(), + self.features.clone(), + ) + } + + pub fn empty() -> Self { + Self::new( + None, + vec![], + vec![], + vec![], + vec![], + vec![], + ModuleFeatures::none(), + ) + } + + pub fn with_heap_spec(mut self, heap_spec: HeapSpec) -> Self { + if let Some(ref mut linear_memory) = self.linear_memory { + linear_memory.heap = heap_spec; + } else { + self.linear_memory = Some(OwnedLinearMemorySpec { + heap: heap_spec, + initializer: OwnedSparseData::new(vec![]).unwrap(), + }); + } + self + } +} + +impl Default for OwnedModuleData { + fn default() -> Self { + OwnedModuleData::empty() + } +} diff --git a/third_party/rust/lucet-module-wasmsbx/src/runtime.rs b/third_party/rust/lucet-module-wasmsbx/src/runtime.rs new file mode 100644 index 0000000000..864344dc9b --- /dev/null +++ b/third_party/rust/lucet-module-wasmsbx/src/runtime.rs @@ -0,0 +1,8 @@ +/// This struct describes the handful of fields that Lucet-compiled programs may directly interact with, but +/// are provided through VMContext. +#[repr(C)] +#[repr(align(8))] +pub struct InstanceRuntimeData { + pub globals_ptr: *mut i64, + pub instruction_count: u64, +} diff --git a/third_party/rust/lucet-module-wasmsbx/src/signature.rs b/third_party/rust/lucet-module-wasmsbx/src/signature.rs new file mode 100644 index 0000000000..e1b9a2351f --- /dev/null +++ b/third_party/rust/lucet-module-wasmsbx/src/signature.rs @@ -0,0 +1,209 @@ +#[cfg(feature = "signature_checking")] +use crate::error::Error::{self, IOError, ModuleSignatureError}; +use crate::module::LUCET_MODULE_SYM; +use crate::module_data::MODULE_DATA_SYM; +#[cfg(feature = "signature_checking")] +use crate::ModuleData; +use byteorder::{ByteOrder, LittleEndian}; +#[cfg(feature = "signature_checking")] +pub use minisign::{PublicKey, SecretKey}; +#[cfg(feature = "signature_checking")] +use minisign::{SignatureBones, SignatureBox}; +use object::*; +use std::fs::{File, OpenOptions}; +#[cfg(feature = "signature_checking")] +use std::io::Cursor; +use std::io::{self, Read, Seek, SeekFrom, Write}; +use std::path::Path; + +pub struct ModuleSignature; + +#[cfg(feature = "signature_checking")] +impl ModuleSignature { + pub fn verify<P: AsRef<Path>>( + so_path: P, + pk: &PublicKey, + module_data: &ModuleData, + ) -> Result<(), Error> { + let signature_box: SignatureBox = + SignatureBones::from_bytes(&module_data.get_module_signature()) + .map_err(|e| ModuleSignatureError(e))? + .into(); + + let mut raw_module_and_data = + RawModuleAndData::from_file(&so_path).map_err(|e| IOError(e))?; + let cleared_module_data_bin = + ModuleData::clear_module_signature(raw_module_and_data.module_data_bin())?; + raw_module_and_data.patch_module_data(&cleared_module_data_bin); + + minisign::verify( + &pk, + &signature_box, + Cursor::new(&raw_module_and_data.obj_bin), + true, + false, + ) + .map_err(|e| ModuleSignatureError(e)) + } + + pub fn sign<P: AsRef<Path>>(path: P, sk: &SecretKey) -> Result<(), Error> { + let raw_module_and_data = RawModuleAndData::from_file(&path).map_err(|e| IOError(e))?; + let signature_box = minisign::sign( + None, + sk, + Cursor::new(&raw_module_and_data.obj_bin), + true, + None, + None, + ) + .map_err(|e| ModuleSignatureError(e))?; + let signature_bones: SignatureBones = signature_box.into(); + let patched_module_data_bin = ModuleData::patch_module_signature( + raw_module_and_data.module_data_bin(), + &signature_bones.to_bytes(), + )?; + raw_module_and_data + .write_patched_module_data(&path, &patched_module_data_bin) + .map_err(|e| IOError(e))?; + Ok(()) + } +} + +#[allow(dead_code)] +struct SymbolData { + offset: usize, + len: usize, +} + +#[allow(dead_code)] +struct RawModuleAndData { + pub obj_bin: Vec<u8>, + pub module_data_offset: usize, + pub module_data_len: usize, +} + +#[allow(dead_code)] +impl RawModuleAndData { + pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self, io::Error> { + let mut obj_bin: Vec<u8> = Vec::new(); + File::open(&path)?.read_to_end(&mut obj_bin)?; + + let native_data_symbol_data = + Self::symbol_data(&obj_bin, LUCET_MODULE_SYM, true)?.ok_or(io::Error::new( + io::ErrorKind::InvalidInput, + format!("`{}` symbol not present", LUCET_MODULE_SYM), + ))?; + + // While `module_data` is the first field of the `SerializedModule` that `lucet_module` points + // to, it is a virtual address, not a file offset. The translation is somewhat tricky at + // the moment, so just look at the corresponding `lucet_module_data` symbol for now. + let module_data_symbol_data = + Self::symbol_data(&obj_bin, MODULE_DATA_SYM, true)?.ok_or(io::Error::new( + io::ErrorKind::InvalidInput, + format!("`{}` symbol not present", MODULE_DATA_SYM), + ))?; + + let module_data_len = + LittleEndian::read_u64(&obj_bin[(native_data_symbol_data.offset + 8)..]) as usize; + + Ok(RawModuleAndData { + obj_bin, + module_data_offset: module_data_symbol_data.offset, + module_data_len: module_data_len, + }) + } + + pub fn module_data_bin(&self) -> &[u8] { + &self.obj_bin[self.module_data_offset as usize + ..self.module_data_offset as usize + self.module_data_len] + } + + pub fn module_data_bin_mut(&mut self) -> &mut [u8] { + &mut self.obj_bin[self.module_data_offset as usize + ..self.module_data_offset as usize + self.module_data_len] + } + + pub fn patch_module_data(&mut self, module_data_bin: &[u8]) { + self.module_data_bin_mut().copy_from_slice(&module_data_bin); + } + + pub fn write_patched_module_data<P: AsRef<Path>>( + &self, + path: P, + patched_module_data_bin: &[u8], + ) -> Result<(), io::Error> { + let mut fp = OpenOptions::new() + .write(true) + .create_new(false) + .open(&path)?; + fp.seek(SeekFrom::Start(self.module_data_offset as u64))?; + fp.write_all(&patched_module_data_bin)?; + Ok(()) + } + + // Retrieving the offset of a symbol is not supported by the object crate. + // In Mach-O, actual file offsets are encoded, whereas Elf encodes virtual + // addresses, requiring extra steps to retrieve the section, its base + // address as well as the section offset. + + // Elf + #[cfg(all(target_family = "unix", not(target_os = "macos")))] + fn symbol_data( + obj_bin: &[u8], + symbol_name: &str, + _mangle: bool, + ) -> Result<Option<SymbolData>, io::Error> { + let obj = object::ElfFile::parse(obj_bin) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?; + let symbol_map = obj.symbol_map(); + for symbol in symbol_map.symbols() { + let kind = symbol.kind(); + if kind != SymbolKind::Data { + continue; + } + if symbol.name() != Some(symbol_name) { + continue; + } + let section_index = match symbol.section_index() { + Some(section_index) => section_index, + None => continue, + }; + let section = &obj.elf().section_headers[section_index.0]; + let offset = (symbol.address() - section.sh_addr + section.sh_offset) as usize; + let len = symbol.size() as usize; + return Ok(Some(SymbolData { offset, len })); + } + Ok(None) + } + + // Mach-O + #[cfg(target_os = "macos")] + fn symbol_data( + obj_bin: &[u8], + symbol_name: &str, + mangle: bool, + ) -> Result<Option<SymbolData>, io::Error> { + let obj = object::File::parse(obj_bin) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?; + let symbol_map = obj.symbol_map(); + let mangled_symbol_name = format!("_{}", symbol_name); + let symbol_name = if mangle { + &mangled_symbol_name + } else { + symbol_name + }; + for symbol in symbol_map.symbols() { + let kind = symbol.kind(); + if kind != SymbolKind::Data && kind != SymbolKind::Unknown { + continue; + } + if symbol.name() != Some(symbol_name) { + continue; + } + let offset = symbol.address() as usize; + let len = symbol.size() as usize; + return Ok(Some(SymbolData { offset, len })); + } + Ok(None) + } +} diff --git a/third_party/rust/lucet-module-wasmsbx/src/tables.rs b/third_party/rust/lucet-module-wasmsbx/src/tables.rs new file mode 100644 index 0000000000..b8ecb0d0f3 --- /dev/null +++ b/third_party/rust/lucet-module-wasmsbx/src/tables.rs @@ -0,0 +1,14 @@ +use crate::functions::FunctionPointer; + +#[repr(C)] +#[derive(Clone, Debug)] +pub struct TableElement { + ty: u64, + func: u64, +} + +impl TableElement { + pub fn function_pointer(&self) -> FunctionPointer { + FunctionPointer::from_usize(self.func as usize) + } +} diff --git a/third_party/rust/lucet-module-wasmsbx/src/traps.rs b/third_party/rust/lucet-module-wasmsbx/src/traps.rs new file mode 100644 index 0000000000..28f01c60ef --- /dev/null +++ b/third_party/rust/lucet-module-wasmsbx/src/traps.rs @@ -0,0 +1,63 @@ +use num_derive::FromPrimitive; +use num_traits::FromPrimitive; + +/// The type of a WebAssembly +/// [trap](http://webassembly.github.io/spec/core/intro/overview.html#trap). +#[repr(u32)] +#[derive(Copy, Clone, Debug, FromPrimitive, PartialEq)] +pub enum TrapCode { + StackOverflow = 0, + HeapOutOfBounds = 1, + OutOfBounds = 2, + IndirectCallToNull = 3, + BadSignature = 4, + IntegerOverflow = 5, + IntegerDivByZero = 6, + BadConversionToInteger = 7, + Interrupt = 8, + TableOutOfBounds = 9, + Unreachable = 10, +} + +impl TrapCode { + pub fn try_from_u32(v: u32) -> Option<TrapCode> { + Self::from_u32(v) + } +} + +/// Trap information for an address in a compiled function +/// +/// To support zero-copy deserialization of trap tables, this +/// must be repr(C) [to avoid cases where Rust may change the +/// layout in some future version, mangling the interpretation +/// of an old TrapSite struct] +#[repr(C)] +#[derive(Clone, Debug)] +pub struct TrapSite { + pub offset: u32, + pub code: TrapCode, +} + +/// A collection of trap sites, typically obtained from a +/// single function (see [`FunctionSpec::traps`]) +#[repr(C)] +#[derive(Clone, Debug)] +pub struct TrapManifest<'a> { + pub traps: &'a [TrapSite], +} + +impl<'a> TrapManifest<'a> { + pub fn new(traps: &'a [TrapSite]) -> TrapManifest<'_> { + TrapManifest { traps } + } + pub fn lookup_addr(&self, addr: u32) -> Option<TrapCode> { + // predicate to find the trapsite for the addr via binary search + let f = |ts: &TrapSite| ts.offset.cmp(&addr); + + if let Ok(i) = self.traps.binary_search_by(f) { + Some(self.traps[i].code) + } else { + None + } + } +} diff --git a/third_party/rust/lucet-module-wasmsbx/src/types.rs b/third_party/rust/lucet-module-wasmsbx/src/types.rs new file mode 100644 index 0000000000..e2f2fe681c --- /dev/null +++ b/third_party/rust/lucet-module-wasmsbx/src/types.rs @@ -0,0 +1,72 @@ +use serde::{Deserialize, Serialize}; +use std::fmt::{Display, Formatter}; + +#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize)] +pub enum ValueType { + I32, + I64, + F32, + F64, +} + +impl Display for ValueType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + ValueType::I32 => write!(f, "I32"), + ValueType::I64 => write!(f, "I64"), + ValueType::F32 => write!(f, "F32"), + ValueType::F64 => write!(f, "F64"), + } + } +} + +/// A signature for a function in a wasm module. +/// +/// Note that this does not explicitly name VMContext as a parameter! It is assumed that all wasm +/// functions take VMContext as their first parameter. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] +pub struct Signature { + pub params: Vec<ValueType>, + // In the future, wasm may permit this to be a Vec of ValueType + pub ret_ty: Option<ValueType>, +} + +impl Display for Signature { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "(")?; + for (i, p) in self.params.iter().enumerate() { + if i == 0 { + write!(f, "{}", p)?; + } else { + write!(f, ", {}", p)?; + } + } + write!(f, ") -> ")?; + match self.ret_ty { + Some(ty) => write!(f, "{}", ty), + None => write!(f, "()"), + } + } +} + +#[macro_export] +macro_rules! lucet_signature { + ((() -> ())) => { + $crate::Signature { + params: vec![], + ret_ty: None + } + }; + (($($arg_ty:ident),*) -> ()) => { + $crate::Signature { + params: vec![$($crate::ValueType::$arg_ty),*], + ret_ty: None, + } + }; + (($($arg_ty:ident),*) -> $ret_ty:ident) => { + $crate::Signature { + params: vec![$($crate::ValueType::$arg_ty),*], + ret_ty: Some($crate::ValueType::$ret_ty), + } + }; +} diff --git a/third_party/rust/lucet-module-wasmsbx/tests/bindings/bad_bindings.json b/third_party/rust/lucet-module-wasmsbx/tests/bindings/bad_bindings.json new file mode 100644 index 0000000000..ae6803892f --- /dev/null +++ b/third_party/rust/lucet-module-wasmsbx/tests/bindings/bad_bindings.json @@ -0,0 +1,3 @@ +{ + "env": {} +} diff --git a/third_party/rust/lucet-module-wasmsbx/tests/bindings/bindings_test.json b/third_party/rust/lucet-module-wasmsbx/tests/bindings/bindings_test.json new file mode 100644 index 0000000000..049f8582f4 --- /dev/null +++ b/third_party/rust/lucet-module-wasmsbx/tests/bindings/bindings_test.json @@ -0,0 +1,5 @@ +{ + "env": { + "hello": "json is cool" + } +} diff --git a/third_party/rust/lucet-module-wasmsbx/tests/bindings/garbage.json b/third_party/rust/lucet-module-wasmsbx/tests/bindings/garbage.json new file mode 100644 index 0000000000..52942f28ce --- /dev/null +++ b/third_party/rust/lucet-module-wasmsbx/tests/bindings/garbage.json @@ -0,0 +1 @@ +this file is not valid json! |