diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
commit | 2aa4a82499d4becd2284cdb482213d541b8804dd (patch) | |
tree | b80bf8bf13c3766139fbacc530efd0dd9d54394c /third_party/rust/lucet-runtime-internals-wasmsbx/src | |
parent | Initial commit. (diff) | |
download | firefox-upstream.tar.xz firefox-upstream.zip |
Adding upstream version 86.0.1.upstream/86.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
30 files changed, 8355 insertions, 0 deletions
diff --git a/third_party/rust/lucet-runtime-internals-wasmsbx/src/alloc/mod.rs b/third_party/rust/lucet-runtime-internals-wasmsbx/src/alloc/mod.rs new file mode 100644 index 0000000000..b6d1290195 --- /dev/null +++ b/third_party/rust/lucet-runtime-internals-wasmsbx/src/alloc/mod.rs @@ -0,0 +1,448 @@ +use crate::error::Error; +use crate::module::Module; +use crate::region::RegionInternal; +use libc::c_void; +use lucet_module::GlobalValue; +use nix::unistd::{sysconf, SysconfVar}; +use std::sync::{Arc, Once, Weak}; + +pub const HOST_PAGE_SIZE_EXPECTED: usize = 4096; +static mut HOST_PAGE_SIZE: usize = 0; +static HOST_PAGE_SIZE_INIT: Once = Once::new(); + +/// Our host is Linux x86_64, which should always use a 4K page. +/// +/// We double check the expected value using `sysconf` at runtime. +pub fn host_page_size() -> usize { + unsafe { + HOST_PAGE_SIZE_INIT.call_once(|| match sysconf(SysconfVar::PAGE_SIZE) { + Ok(Some(sz)) => { + if sz as usize == HOST_PAGE_SIZE_EXPECTED { + HOST_PAGE_SIZE = HOST_PAGE_SIZE_EXPECTED; + } else { + panic!( + "host page size was {}; expected {}", + sz, HOST_PAGE_SIZE_EXPECTED + ); + } + } + _ => panic!("could not get host page size from sysconf"), + }); + HOST_PAGE_SIZE + } +} + +pub fn instance_heap_offset() -> usize { + 1 * host_page_size() +} + +/// A set of pointers into virtual memory that can be allocated into an `Alloc`. +/// +/// The `'r` lifetime parameter represents the lifetime of the region that backs this virtual +/// address space. +/// +/// The memory layout in a `Slot` is meant to be reused in order to reduce overhead on region +/// implementations. To back the layout with real memory, use `Region::allocate_runtime`. +/// +/// To ensure a `Slot` can only be backed by one allocation at a time, it contains a mutex, but +/// otherwise can be freely copied. +#[repr(C)] +pub struct Slot { + /// The beginning of the contiguous virtual memory chunk managed by this `Alloc`. + /// + /// The first part of this memory, pointed to by `start`, is always backed by real memory, and + /// is used to store the lucet_instance structure. + pub start: *mut c_void, + + /// The next part of memory contains the heap and its guard pages. + /// + /// The heap is backed by real memory according to the `HeapSpec`. Guard pages trigger a sigsegv + /// when accessed. + pub heap: *mut c_void, + + /// The stack comes after the heap. + /// + /// Because the stack grows downwards, we get the added safety of ensuring that stack overflows + /// go into the guard pages, if the `Limits` specify guard pages. The stack is always the size + /// given by `Limits.stack_pages`. + pub stack: *mut c_void, + + /// The WebAssembly Globals follow the stack and a single guard page. + pub globals: *mut c_void, + + /// The signal handler stack follows the globals. + /// + /// Having a separate signal handler stack allows the signal handler to run in situations where + /// the normal stack has grown into the guard page. + pub sigstack: *mut c_void, + + /// Limits of the memory. + /// + /// Should not change through the lifetime of the `Alloc`. + pub limits: Limits, + + pub region: Weak<dyn RegionInternal>, +} + +// raw pointers require unsafe impl +unsafe impl Send for Slot {} +unsafe impl Sync for Slot {} + +impl Slot { + pub fn stack_top(&self) -> *mut c_void { + (self.stack as usize + self.limits.stack_size) as *mut c_void + } +} + +/// The structure that manages the allocations backing an `Instance`. +/// +/// `Alloc`s are not to be created directly, but rather are created by `Region`s during instance +/// creation. +pub struct Alloc { + pub heap_accessible_size: usize, + pub heap_inaccessible_size: usize, + pub slot: Option<Slot>, + pub region: Arc<dyn RegionInternal>, +} + +impl Drop for Alloc { + fn drop(&mut self) { + // eprintln!("Alloc::drop()"); + self.region.clone().drop_alloc(self); + } +} + +impl Alloc { + pub fn addr_in_guard_page(&self, addr: *const c_void) -> bool { + let addr = addr as usize; + let heap = self.slot().heap as usize; + let guard_start = heap + self.heap_accessible_size; + let guard_end = heap + self.slot().limits.heap_address_space_size; + // eprintln!( + // "addr = {:p}, guard_start = {:p}, guard_end = {:p}", + // addr, guard_start as *mut c_void, guard_end as *mut c_void + // ); + let stack_guard_end = self.slot().stack as usize; + let stack_guard_start = stack_guard_end - host_page_size(); + // eprintln!( + // "addr = {:p}, stack_guard_start = {:p}, stack_guard_end = {:p}", + // addr, stack_guard_start as *mut c_void, stack_guard_end as *mut c_void + // ); + let in_heap_guard = (addr >= guard_start) && (addr < guard_end); + let in_stack_guard = (addr >= stack_guard_start) && (addr < stack_guard_end); + + in_heap_guard || in_stack_guard + } + + pub fn expand_heap(&mut self, expand_bytes: u32, module: &dyn Module) -> Result<u32, Error> { + let slot = self.slot(); + + if expand_bytes == 0 { + // no expansion takes place, which is not an error + return Ok(self.heap_accessible_size as u32); + } + + let host_page_size = host_page_size() as u32; + + if self.heap_accessible_size as u32 % host_page_size != 0 { + lucet_bail!("heap is not page-aligned; this is a bug"); + } + + if expand_bytes > std::u32::MAX - host_page_size - 1 { + bail_limits_exceeded!("expanded heap would overflow address space"); + } + + // round the expansion up to a page boundary + let expand_pagealigned = + ((expand_bytes + host_page_size - 1) / host_page_size) * host_page_size; + + // `heap_inaccessible_size` tracks the size of the allocation that is addressible but not + // accessible. We cannot perform an expansion larger than this size. + if expand_pagealigned as usize > self.heap_inaccessible_size { + bail_limits_exceeded!("expanded heap would overflow addressable memory"); + } + + // the above makes sure this expression does not underflow + let guard_remaining = self.heap_inaccessible_size - expand_pagealigned as usize; + + if let Some(heap_spec) = module.heap_spec() { + // The compiler specifies how much guard (memory which traps on access) must be beyond the + // end of the accessible memory. We cannot perform an expansion that would make this region + // smaller than the compiler expected it to be. + if guard_remaining < heap_spec.guard_size as usize { + bail_limits_exceeded!("expansion would leave guard memory too small"); + } + + // The compiler indicates that the module has specified a maximum memory size. Don't let + // the heap expand beyond that: + if let Some(max_size) = heap_spec.max_size { + if self.heap_accessible_size + expand_pagealigned as usize > max_size as usize { + bail_limits_exceeded!( + "expansion would exceed module-specified heap limit: {:?}", + max_size + ); + } + } + } else { + return Err(Error::NoLinearMemory("cannot expand heap".to_owned())); + } + // The runtime sets a limit on how much of the heap can be backed by real memory. Don't let + // the heap expand beyond that: + if self.heap_accessible_size + expand_pagealigned as usize > slot.limits.heap_memory_size { + bail_limits_exceeded!( + "expansion would exceed runtime-specified heap limit: {:?}", + slot.limits + ); + } + + let newly_accessible = self.heap_accessible_size; + + self.region + .clone() + .expand_heap(slot, newly_accessible as u32, expand_pagealigned)?; + + self.heap_accessible_size += expand_pagealigned as usize; + self.heap_inaccessible_size -= expand_pagealigned as usize; + + Ok(newly_accessible as u32) + } + + pub fn reset_heap(&mut self, module: &dyn Module) -> Result<(), Error> { + self.region.clone().reset_heap(self, module) + } + + pub fn heap_len(&self) -> usize { + self.heap_accessible_size + } + + pub fn slot(&self) -> &Slot { + self.slot + .as_ref() + .expect("alloc missing its slot before drop") + } + + /// Return the heap as a byte slice. + pub unsafe fn heap(&self) -> &[u8] { + std::slice::from_raw_parts(self.slot().heap as *mut u8, self.heap_accessible_size) + } + + /// Return the heap as a mutable byte slice. + pub unsafe fn heap_mut(&mut self) -> &mut [u8] { + std::slice::from_raw_parts_mut(self.slot().heap as *mut u8, self.heap_accessible_size) + } + + /// Return the heap as a slice of 32-bit words. + pub unsafe fn heap_u32(&self) -> &[u32] { + assert!(self.slot().heap as usize % 4 == 0, "heap is 4-byte aligned"); + assert!( + self.heap_accessible_size % 4 == 0, + "heap size is multiple of 4-bytes" + ); + std::slice::from_raw_parts(self.slot().heap as *mut u32, self.heap_accessible_size / 4) + } + + /// Return the heap as a mutable slice of 32-bit words. + pub unsafe fn heap_u32_mut(&self) -> &mut [u32] { + assert!(self.slot().heap as usize % 4 == 0, "heap is 4-byte aligned"); + assert!( + self.heap_accessible_size % 4 == 0, + "heap size is multiple of 4-bytes" + ); + std::slice::from_raw_parts_mut(self.slot().heap as *mut u32, self.heap_accessible_size / 4) + } + + /// Return the heap as a slice of 64-bit words. + pub unsafe fn heap_u64(&self) -> &[u64] { + assert!(self.slot().heap as usize % 8 == 0, "heap is 8-byte aligned"); + assert!( + self.heap_accessible_size % 8 == 0, + "heap size is multiple of 8-bytes" + ); + std::slice::from_raw_parts(self.slot().heap as *mut u64, self.heap_accessible_size / 8) + } + + /// Return the heap as a mutable slice of 64-bit words. + pub unsafe fn heap_u64_mut(&mut self) -> &mut [u64] { + assert!(self.slot().heap as usize % 8 == 0, "heap is 8-byte aligned"); + assert!( + self.heap_accessible_size % 8 == 0, + "heap size is multiple of 8-bytes" + ); + std::slice::from_raw_parts_mut(self.slot().heap as *mut u64, self.heap_accessible_size / 8) + } + + /// Return the stack as a mutable byte slice. + /// + /// Since the stack grows down, `alloc.stack_mut()[0]` is the top of the stack, and + /// `alloc.stack_mut()[alloc.limits.stack_size - 1]` is the last byte at the bottom of the + /// stack. + pub unsafe fn stack_mut(&mut self) -> &mut [u8] { + std::slice::from_raw_parts_mut(self.slot().stack as *mut u8, self.slot().limits.stack_size) + } + + /// Return the stack as a mutable slice of 64-bit words. + /// + /// Since the stack grows down, `alloc.stack_mut()[0]` is the top of the stack, and + /// `alloc.stack_mut()[alloc.limits.stack_size - 1]` is the last word at the bottom of the + /// stack. + pub unsafe fn stack_u64_mut(&mut self) -> &mut [u64] { + assert!( + self.slot().stack as usize % 8 == 0, + "stack is 8-byte aligned" + ); + assert!( + self.slot().limits.stack_size % 8 == 0, + "stack size is multiple of 8-bytes" + ); + std::slice::from_raw_parts_mut( + self.slot().stack as *mut u64, + self.slot().limits.stack_size / 8, + ) + } + + /// Return the globals as a slice. + pub unsafe fn globals(&self) -> &[GlobalValue] { + std::slice::from_raw_parts( + self.slot().globals as *const GlobalValue, + self.slot().limits.globals_size / std::mem::size_of::<GlobalValue>(), + ) + } + + /// Return the globals as a mutable slice. + pub unsafe fn globals_mut(&mut self) -> &mut [GlobalValue] { + std::slice::from_raw_parts_mut( + self.slot().globals as *mut GlobalValue, + self.slot().limits.globals_size / std::mem::size_of::<GlobalValue>(), + ) + } + + /// Return the sigstack as a mutable byte slice. + pub unsafe fn sigstack_mut(&mut self) -> &mut [u8] { + std::slice::from_raw_parts_mut( + self.slot().sigstack as *mut u8, + self.slot().limits.signal_stack_size, + ) + } + + pub fn mem_in_heap<T>(&self, ptr: *const T, len: usize) -> bool { + let start = ptr as usize; + let end = start + len; + + let heap_start = self.slot().heap as usize; + let heap_end = heap_start + self.heap_accessible_size; + + // TODO: check for off-by-ones + start <= end + && start >= heap_start + && start < heap_end + && end >= heap_start + && end <= heap_end + } +} + +/// Runtime limits for the various memories that back a Lucet instance. +/// +/// Each value is specified in bytes, and must be evenly divisible by the host page size (4K). +#[derive(Clone, Debug)] +#[repr(C)] +pub struct Limits { + /// Max size of the heap, which can be backed by real memory. (default 1M) + pub heap_memory_size: usize, + /// Size of total virtual memory. (default 8G) + pub heap_address_space_size: usize, + /// Size of the guest stack. (default 128K) + pub stack_size: usize, + /// Size of the globals region in bytes; each global uses 8 bytes. (default 4K) + pub globals_size: usize, + /// Size of the signal stack in bytes. (default SIGSTKSZ for release builds, 12K for debug builds) + /// + /// This difference is to account for the greatly increased stack size usage in the signal + /// handler when running without optimizations. + /// + /// Note that debug vs. release mode is determined by `cfg(debug_assertions)`, so if you are + /// specifically enabling debug assertions in your release builds, the default signal stack will + /// be larger. + pub signal_stack_size: usize, +} + +#[cfg(debug_assertions)] +pub const DEFAULT_SIGNAL_STACK_SIZE: usize = 12 * 1024; +#[cfg(not(debug_assertions))] +pub const DEFAULT_SIGNAL_STACK_SIZE: usize = libc::SIGSTKSZ; + +impl Limits { + pub const fn default() -> Limits { + Limits { + heap_memory_size: 16 * 64 * 1024, + heap_address_space_size: 0x200000000, + stack_size: 128 * 1024, + globals_size: 4096, + signal_stack_size: DEFAULT_SIGNAL_STACK_SIZE, + } + } +} + +impl Limits { + pub fn total_memory_size(&self) -> usize { + // Memory is laid out as follows: + // * the instance (up to instance_heap_offset) + // * the heap, followed by guard pages + // * the stack (grows towards heap guard pages) + // * globals + // * one guard page (to catch signal stack overflow) + // * the signal stack + + [ + instance_heap_offset(), + self.heap_address_space_size, + host_page_size(), + self.stack_size, + self.globals_size, + host_page_size(), + self.signal_stack_size, + ] + .iter() + .try_fold(0usize, |acc, &x| acc.checked_add(x)) + .expect("total_memory_size doesn't overflow") + } + + /// Validate that the limits are aligned to page sizes, and that the stack is not empty. + pub fn validate(&self) -> Result<(), Error> { + if self.heap_memory_size % host_page_size() != 0 { + return Err(Error::InvalidArgument( + "memory size must be a multiple of host page size", + )); + } + if self.heap_address_space_size % host_page_size() != 0 { + return Err(Error::InvalidArgument( + "address space size must be a multiple of host page size", + )); + } + if self.heap_memory_size > self.heap_address_space_size { + return Err(Error::InvalidArgument( + "address space size must be at least as large as memory size", + )); + } + if self.stack_size % host_page_size() != 0 { + return Err(Error::InvalidArgument( + "stack size must be a multiple of host page size", + )); + } + if self.globals_size % host_page_size() != 0 { + return Err(Error::InvalidArgument( + "globals size must be a multiple of host page size", + )); + } + if self.stack_size <= 0 { + return Err(Error::InvalidArgument("stack size must be greater than 0")); + } + if self.signal_stack_size % host_page_size() != 0 { + return Err(Error::InvalidArgument( + "signal stack size must be a multiple of host page size", + )); + } + Ok(()) + } +} + +pub mod tests; diff --git a/third_party/rust/lucet-runtime-internals-wasmsbx/src/alloc/tests.rs b/third_party/rust/lucet-runtime-internals-wasmsbx/src/alloc/tests.rs new file mode 100644 index 0000000000..0e7c1be4dc --- /dev/null +++ b/third_party/rust/lucet-runtime-internals-wasmsbx/src/alloc/tests.rs @@ -0,0 +1,685 @@ +#[macro_export] +macro_rules! alloc_tests { + ( $TestRegion:path ) => { + use libc::c_void; + use std::sync::Arc; + use $TestRegion as TestRegion; + use $crate::alloc::Limits; + use $crate::context::{Context, ContextHandle}; + use $crate::instance::InstanceInternal; + use $crate::module::{GlobalValue, HeapSpec, MockModuleBuilder}; + use $crate::region::Region; + use $crate::val::Val; + + const LIMITS_HEAP_MEM_SIZE: usize = 16 * 64 * 1024; + const LIMITS_HEAP_ADDRSPACE_SIZE: usize = 8 * 1024 * 1024; + const LIMITS_STACK_SIZE: usize = 64 * 1024; + const LIMITS_GLOBALS_SIZE: usize = 4 * 1024; + + const LIMITS: Limits = Limits { + heap_memory_size: LIMITS_HEAP_MEM_SIZE, + heap_address_space_size: LIMITS_HEAP_ADDRSPACE_SIZE, + stack_size: LIMITS_STACK_SIZE, + globals_size: LIMITS_GLOBALS_SIZE, + ..Limits::default() + }; + + const SPEC_HEAP_RESERVED_SIZE: u64 = LIMITS_HEAP_ADDRSPACE_SIZE as u64 / 2; + const SPEC_HEAP_GUARD_SIZE: u64 = LIMITS_HEAP_ADDRSPACE_SIZE as u64 / 2; + + // one wasm page, not host page + const ONEPAGE_INITIAL_SIZE: u64 = 64 * 1024; + const ONEPAGE_MAX_SIZE: u64 = 64 * 1024; + + const ONE_PAGE_HEAP: HeapSpec = HeapSpec { + reserved_size: SPEC_HEAP_RESERVED_SIZE, + guard_size: SPEC_HEAP_GUARD_SIZE, + initial_size: ONEPAGE_INITIAL_SIZE, + max_size: Some(ONEPAGE_MAX_SIZE), + }; + + const THREEPAGE_INITIAL_SIZE: u64 = 64 * 1024; + const THREEPAGE_MAX_SIZE: u64 = 3 * 64 * 1024; + + const THREE_PAGE_MAX_HEAP: HeapSpec = HeapSpec { + reserved_size: SPEC_HEAP_RESERVED_SIZE, + guard_size: 0, + initial_size: THREEPAGE_INITIAL_SIZE, + max_size: Some(THREEPAGE_MAX_SIZE), + }; + + /// This test shows an `AllocHandle` passed to `Region::allocate_runtime` will have its heap + /// and stack of the correct size and read/writability. + #[test] + fn allocate_runtime_works() { + let region = TestRegion::create(1, &LIMITS).expect("region created"); + let mut inst = region + .new_instance( + MockModuleBuilder::new() + .with_heap_spec(ONE_PAGE_HEAP) + .build(), + ) + .expect("new_instance succeeds"); + + let heap_len = inst.alloc().heap_len(); + assert_eq!(heap_len, ONEPAGE_INITIAL_SIZE as usize); + + let heap = unsafe { inst.alloc_mut().heap_mut() }; + + assert_eq!(heap[0], 0); + heap[0] = 0xFF; + assert_eq!(heap[0], 0xFF); + + assert_eq!(heap[heap_len - 1], 0); + heap[heap_len - 1] = 0xFF; + assert_eq!(heap[heap_len - 1], 0xFF); + + let stack = unsafe { inst.alloc_mut().stack_mut() }; + assert_eq!(stack.len(), LIMITS_STACK_SIZE); + + assert_eq!(stack[0], 0); + stack[0] = 0xFF; + assert_eq!(stack[0], 0xFF); + + assert_eq!(stack[LIMITS_STACK_SIZE - 1], 0); + stack[LIMITS_STACK_SIZE - 1] = 0xFF; + assert_eq!(stack[LIMITS_STACK_SIZE - 1], 0xFF); + } + + /// This test shows the heap works properly after a single expand. + #[test] + fn expand_heap_once() { + expand_heap_once_template(THREE_PAGE_MAX_HEAP) + } + + fn expand_heap_once_template(heap_spec: HeapSpec) { + let region = TestRegion::create(1, &LIMITS).expect("region created"); + let module = MockModuleBuilder::new() + .with_heap_spec(heap_spec.clone()) + .build(); + let mut inst = region + .new_instance(module.clone()) + .expect("new_instance succeeds"); + + let heap_len = inst.alloc().heap_len(); + assert_eq!(heap_len, heap_spec.initial_size as usize); + + let new_heap_area = inst + .alloc_mut() + .expand_heap(64 * 1024, module.as_ref()) + .expect("expand_heap succeeds"); + assert_eq!(heap_len, new_heap_area as usize); + + let new_heap_len = inst.alloc().heap_len(); + assert_eq!(new_heap_len, heap_len + (64 * 1024)); + + let heap = unsafe { inst.alloc_mut().heap_mut() }; + assert_eq!(heap[new_heap_len - 1], 0); + heap[new_heap_len - 1] = 0xFF; + assert_eq!(heap[new_heap_len - 1], 0xFF); + } + + /// This test shows the heap works properly after two expands. + #[test] + fn expand_heap_twice() { + let region = TestRegion::create(1, &LIMITS).expect("region created"); + let module = MockModuleBuilder::new() + .with_heap_spec(THREE_PAGE_MAX_HEAP) + .build(); + let mut inst = region + .new_instance(module.clone()) + .expect("new_instance succeeds"); + + let heap_len = inst.alloc().heap_len(); + assert_eq!(heap_len, THREEPAGE_INITIAL_SIZE as usize); + + let new_heap_area = inst + .alloc_mut() + .expand_heap(64 * 1024, module.as_ref()) + .expect("expand_heap succeeds"); + assert_eq!(heap_len, new_heap_area as usize); + + let new_heap_len = inst.alloc().heap_len(); + assert_eq!(new_heap_len, heap_len + (64 * 1024)); + + let second_new_heap_area = inst + .alloc_mut() + .expand_heap(64 * 1024, module.as_ref()) + .expect("expand_heap succeeds"); + assert_eq!(new_heap_len, second_new_heap_area as usize); + + let second_new_heap_len = inst.alloc().heap_len(); + assert_eq!(second_new_heap_len as u64, THREEPAGE_MAX_SIZE); + + let heap = unsafe { inst.alloc_mut().heap_mut() }; + assert_eq!(heap[new_heap_len - 1], 0); + heap[new_heap_len - 1] = 0xFF; + assert_eq!(heap[new_heap_len - 1], 0xFF); + } + + /// This test shows that if you try to expand past the max given by the heap spec, the + /// expansion fails, but the existing heap can still be used. This test uses a region with + /// multiple slots in order to exercise more edge cases with adjacent managed memory. + #[test] + fn expand_past_spec_max() { + let region = TestRegion::create(10, &LIMITS).expect("region created"); + let module = MockModuleBuilder::new() + .with_heap_spec(THREE_PAGE_MAX_HEAP) + .build(); + let mut inst = region + .new_instance(module.clone()) + .expect("new_instance succeeds"); + + let heap_len = inst.alloc().heap_len(); + assert_eq!(heap_len, THREEPAGE_INITIAL_SIZE as usize); + + let new_heap_area = inst + .alloc_mut() + .expand_heap(THREEPAGE_MAX_SIZE as u32, module.as_ref()); + assert!(new_heap_area.is_err(), "heap expansion past spec fails"); + + let new_heap_len = inst.alloc().heap_len(); + assert_eq!(new_heap_len, heap_len); + + let heap = unsafe { inst.alloc_mut().heap_mut() }; + assert_eq!(heap[new_heap_len - 1], 0); + heap[new_heap_len - 1] = 0xFF; + assert_eq!(heap[new_heap_len - 1], 0xFF); + } + + const EXPANDPASTLIMIT_INITIAL_SIZE: u64 = LIMITS_HEAP_MEM_SIZE as u64 - (64 * 1024); + const EXPANDPASTLIMIT_MAX_SIZE: u64 = LIMITS_HEAP_MEM_SIZE as u64 + (64 * 1024); + const EXPAND_PAST_LIMIT_SPEC: HeapSpec = HeapSpec { + reserved_size: SPEC_HEAP_RESERVED_SIZE, + guard_size: SPEC_HEAP_GUARD_SIZE, + initial_size: EXPANDPASTLIMIT_INITIAL_SIZE, + max_size: Some(EXPANDPASTLIMIT_MAX_SIZE), + }; + + /// This test shows that a heap refuses to grow past the alloc limits, even if the runtime + /// spec says it can grow bigger. This test uses a region with multiple slots in order to + /// exercise more edge cases with adjacent managed memory. + #[test] + fn expand_past_heap_limit() { + let region = TestRegion::create(10, &LIMITS).expect("region created"); + let module = MockModuleBuilder::new() + .with_heap_spec(EXPAND_PAST_LIMIT_SPEC) + .build(); + let mut inst = region + .new_instance(module.clone()) + .expect("new_instance succeeds"); + + let heap_len = inst.alloc().heap_len(); + assert_eq!(heap_len, EXPANDPASTLIMIT_INITIAL_SIZE as usize); + + let new_heap_area = inst + .alloc_mut() + .expand_heap(64 * 1024, module.as_ref()) + .expect("expand_heap succeeds"); + assert_eq!(heap_len, new_heap_area as usize); + + let new_heap_len = inst.alloc().heap_len(); + assert_eq!(new_heap_len, LIMITS_HEAP_MEM_SIZE); + + let past_limit_heap_area = inst.alloc_mut().expand_heap(64 * 1024, module.as_ref()); + assert!( + past_limit_heap_area.is_err(), + "heap expansion past limit fails" + ); + + let still_heap_len = inst.alloc().heap_len(); + assert_eq!(still_heap_len, LIMITS_HEAP_MEM_SIZE); + + let heap = unsafe { inst.alloc_mut().heap_mut() }; + assert_eq!(heap[new_heap_len - 1], 0); + heap[new_heap_len - 1] = 0xFF; + assert_eq!(heap[new_heap_len - 1], 0xFF); + } + + const INITIAL_OVERSIZE_HEAP: HeapSpec = HeapSpec { + reserved_size: SPEC_HEAP_RESERVED_SIZE, + guard_size: SPEC_HEAP_GUARD_SIZE, + initial_size: SPEC_HEAP_RESERVED_SIZE + (64 * 1024), + max_size: None, + }; + + /// This test shows that a heap refuses to grow past the alloc limits, even if the runtime + /// spec says it can grow bigger. This test uses a region with multiple slots in order to + /// exercise more edge cases with adjacent managed memory. + #[test] + fn reject_initial_oversize_heap() { + let region = TestRegion::create(10, &LIMITS).expect("region created"); + let res = region.new_instance( + MockModuleBuilder::new() + .with_heap_spec(INITIAL_OVERSIZE_HEAP) + .build(), + ); + assert!(res.is_err(), "new_instance fails"); + } + + /// This test shows that we reject limits with a larger memory size than address space size + #[test] + fn reject_undersized_address_space() { + const LIMITS: Limits = Limits { + heap_memory_size: LIMITS_HEAP_ADDRSPACE_SIZE + 4096, + heap_address_space_size: LIMITS_HEAP_ADDRSPACE_SIZE, + stack_size: LIMITS_STACK_SIZE, + globals_size: LIMITS_GLOBALS_SIZE, + ..Limits::default() + }; + let res = TestRegion::create(10, &LIMITS); + assert!(res.is_err(), "region creation fails"); + } + + const SMALL_GUARD_HEAP: HeapSpec = HeapSpec { + reserved_size: SPEC_HEAP_RESERVED_SIZE, + guard_size: SPEC_HEAP_GUARD_SIZE - 1, + initial_size: LIMITS_HEAP_MEM_SIZE as u64, + max_size: None, + }; + + /// This test shows that a heap spec with a guard size smaller than the limits is + /// allowed. + #[test] + fn accept_small_guard_heap() { + let region = TestRegion::create(1, &LIMITS).expect("region created"); + let _inst = region + .new_instance( + MockModuleBuilder::new() + .with_heap_spec(SMALL_GUARD_HEAP) + .build(), + ) + .expect("new_instance succeeds"); + } + + const LARGE_GUARD_HEAP: HeapSpec = HeapSpec { + reserved_size: SPEC_HEAP_RESERVED_SIZE, + guard_size: SPEC_HEAP_GUARD_SIZE + 1, + initial_size: ONEPAGE_INITIAL_SIZE, + max_size: None, + }; + + /// This test shows that a `HeapSpec` with a guard size larger than the limits is not + /// allowed. + #[test] + fn reject_large_guard_heap() { + let region = TestRegion::create(1, &LIMITS).expect("region created"); + let res = region.new_instance( + MockModuleBuilder::new() + .with_heap_spec(LARGE_GUARD_HEAP) + .build(), + ); + assert!(res.is_err(), "new_instance fails"); + } + + /// This test shows that a `Slot` can be reused after an `AllocHandle` is dropped, and that + /// its memory is reset. + #[test] + fn reuse_slot_works() { + fn peek_n_poke(region: &Arc<TestRegion>) { + let mut inst = region + .new_instance( + MockModuleBuilder::new() + .with_heap_spec(ONE_PAGE_HEAP) + .build(), + ) + .expect("new_instance succeeds"); + + let heap_len = inst.alloc().heap_len(); + assert_eq!(heap_len, ONEPAGE_INITIAL_SIZE as usize); + + let heap = unsafe { inst.alloc_mut().heap_mut() }; + + assert_eq!(heap[0], 0); + heap[0] = 0xFF; + assert_eq!(heap[0], 0xFF); + + assert_eq!(heap[heap_len - 1], 0); + heap[heap_len - 1] = 0xFF; + assert_eq!(heap[heap_len - 1], 0xFF); + + let stack = unsafe { inst.alloc_mut().stack_mut() }; + assert_eq!(stack.len(), LIMITS_STACK_SIZE); + + assert_eq!(stack[0], 0); + stack[0] = 0xFF; + assert_eq!(stack[0], 0xFF); + + assert_eq!(stack[LIMITS_STACK_SIZE - 1], 0); + stack[LIMITS_STACK_SIZE - 1] = 0xFF; + assert_eq!(stack[LIMITS_STACK_SIZE - 1], 0xFF); + + let globals = unsafe { inst.alloc_mut().globals_mut() }; + assert_eq!( + globals.len(), + LIMITS_GLOBALS_SIZE / std::mem::size_of::<GlobalValue>() + ); + + unsafe { + assert_eq!(globals[0].i_64, 0); + globals[0].i_64 = 0xFF; + assert_eq!(globals[0].i_64, 0xFF); + } + + unsafe { + assert_eq!(globals[globals.len() - 1].i_64, 0); + globals[globals.len() - 1].i_64 = 0xFF; + assert_eq!(globals[globals.len() - 1].i_64, 0xFF); + } + + let sigstack = unsafe { inst.alloc_mut().sigstack_mut() }; + assert_eq!(sigstack.len(), LIMITS.signal_stack_size); + + assert_eq!(sigstack[0], 0); + sigstack[0] = 0xFF; + assert_eq!(sigstack[0], 0xFF); + + assert_eq!(sigstack[sigstack.len() - 1], 0); + sigstack[sigstack.len() - 1] = 0xFF; + assert_eq!(sigstack[sigstack.len() - 1], 0xFF); + } + + // with a region size of 1, the slot must be reused + let region = TestRegion::create(1, &LIMITS).expect("region created"); + + peek_n_poke(®ion); + peek_n_poke(®ion); + } + + /// This test shows that the reset method clears the heap and resets its protections. + #[test] + fn alloc_reset() { + let region = TestRegion::create(1, &LIMITS).expect("region created"); + let module = MockModuleBuilder::new() + .with_heap_spec(THREE_PAGE_MAX_HEAP) + .build(); + let mut inst = region + .new_instance(module.clone()) + .expect("new_instance succeeds"); + + let heap_len = inst.alloc().heap_len(); + assert_eq!(heap_len, THREEPAGE_INITIAL_SIZE as usize); + + let heap = unsafe { inst.alloc_mut().heap_mut() }; + + assert_eq!(heap[0], 0); + heap[0] = 0xFF; + assert_eq!(heap[0], 0xFF); + + assert_eq!(heap[heap_len - 1], 0); + heap[heap_len - 1] = 0xFF; + assert_eq!(heap[heap_len - 1], 0xFF); + + // Making a new mock module here because the borrow checker doesn't like referencing + // `inst.module` while `inst.alloc()` is borrowed mutably. The `Instance` tests don't have + // this weirdness + inst.alloc_mut() + .reset_heap(module.as_ref()) + .expect("reset succeeds"); + + let reset_heap_len = inst.alloc().heap_len(); + assert_eq!(reset_heap_len, THREEPAGE_INITIAL_SIZE as usize); + + let heap = unsafe { inst.alloc_mut().heap_mut() }; + + assert_eq!(heap[0], 0); + heap[0] = 0xFF; + assert_eq!(heap[0], 0xFF); + + assert_eq!(heap[reset_heap_len - 1], 0); + heap[reset_heap_len - 1] = 0xFF; + assert_eq!(heap[reset_heap_len - 1], 0xFF); + } + + /// This test shows that the reset method clears the heap and restores it to the spec + /// initial size after growing the heap. + #[test] + fn alloc_grow_reset() { + let region = TestRegion::create(1, &LIMITS).expect("region created"); + let module = MockModuleBuilder::new() + .with_heap_spec(THREE_PAGE_MAX_HEAP) + .build(); + let mut inst = region + .new_instance(module.clone()) + .expect("new_instance succeeds"); + + let heap_len = inst.alloc().heap_len(); + assert_eq!(heap_len, THREEPAGE_INITIAL_SIZE as usize); + + let heap = unsafe { inst.alloc_mut().heap_mut() }; + + assert_eq!(heap[0], 0); + heap[0] = 0xFF; + assert_eq!(heap[0], 0xFF); + + assert_eq!(heap[heap_len - 1], 0); + heap[heap_len - 1] = 0xFF; + assert_eq!(heap[heap_len - 1], 0xFF); + + let new_heap_area = inst + .alloc_mut() + .expand_heap( + (THREEPAGE_MAX_SIZE - THREEPAGE_INITIAL_SIZE) as u32, + module.as_ref(), + ) + .expect("expand_heap succeeds"); + assert_eq!(heap_len, new_heap_area as usize); + + let new_heap_len = inst.alloc().heap_len(); + assert_eq!(new_heap_len, THREEPAGE_MAX_SIZE as usize); + + // Making a new mock module here because the borrow checker doesn't like referencing + // `inst.module` while `inst.alloc()` is borrowed mutably. The `Instance` tests don't have + // this weirdness + inst.alloc_mut() + .reset_heap(module.as_ref()) + .expect("reset succeeds"); + + let reset_heap_len = inst.alloc().heap_len(); + assert_eq!(reset_heap_len, THREEPAGE_INITIAL_SIZE as usize); + + let heap = unsafe { inst.alloc_mut().heap_mut() }; + + assert_eq!(heap[0], 0); + heap[0] = 0xFF; + assert_eq!(heap[0], 0xFF); + + assert_eq!(heap[reset_heap_len - 1], 0); + heap[reset_heap_len - 1] = 0xFF; + assert_eq!(heap[reset_heap_len - 1], 0xFF); + } + + const GUARDLESS_HEAP: HeapSpec = HeapSpec { + reserved_size: SPEC_HEAP_RESERVED_SIZE, + guard_size: 0, + initial_size: ONEPAGE_INITIAL_SIZE, + max_size: None, + }; + + /// This test shows the alloc works even with a zero guard size. + #[test] + fn guardless_heap_create() { + let region = TestRegion::create(1, &LIMITS).expect("region created"); + let mut inst = region + .new_instance( + MockModuleBuilder::new() + .with_heap_spec(GUARDLESS_HEAP) + .build(), + ) + .expect("new_instance succeeds"); + + let heap_len = inst.alloc().heap_len(); + assert_eq!(heap_len, ONEPAGE_INITIAL_SIZE as usize); + + let heap = unsafe { inst.alloc_mut().heap_mut() }; + + assert_eq!(heap[0], 0); + heap[0] = 0xFF; + assert_eq!(heap[0], 0xFF); + + assert_eq!(heap[heap_len - 1], 0); + heap[heap_len - 1] = 0xFF; + assert_eq!(heap[heap_len - 1], 0xFF); + + let stack = unsafe { inst.alloc_mut().stack_mut() }; + assert_eq!(stack.len(), LIMITS_STACK_SIZE); + + assert_eq!(stack[0], 0); + stack[0] = 0xFF; + assert_eq!(stack[0], 0xFF); + + assert_eq!(stack[LIMITS_STACK_SIZE - 1], 0); + stack[LIMITS_STACK_SIZE - 1] = 0xFF; + assert_eq!(stack[LIMITS_STACK_SIZE - 1], 0xFF); + } + + /// This test shows a guardless heap works properly after a single expand. + #[test] + fn guardless_expand_heap_once() { + expand_heap_once_template(GUARDLESS_HEAP) + } + + const INITIAL_EMPTY_HEAP: HeapSpec = HeapSpec { + reserved_size: SPEC_HEAP_RESERVED_SIZE, + guard_size: SPEC_HEAP_GUARD_SIZE, + initial_size: 0, + max_size: None, + }; + + /// This test shows an initially-empty heap works properly after a single expand. + #[test] + fn initial_empty_expand_heap_once() { + expand_heap_once_template(INITIAL_EMPTY_HEAP) + } + + const INITIAL_EMPTY_GUARDLESS_HEAP: HeapSpec = HeapSpec { + reserved_size: SPEC_HEAP_RESERVED_SIZE, + guard_size: 0, + initial_size: 0, + max_size: None, + }; + + /// This test shows an initially-empty, guardless heap works properly after a single + /// expand. + #[test] + fn initial_empty_guardless_expand_heap_once() { + expand_heap_once_template(INITIAL_EMPTY_GUARDLESS_HEAP) + } + + const CONTEXT_TEST_LIMITS: Limits = Limits { + heap_memory_size: 4096, + heap_address_space_size: 2 * 4096, + stack_size: 4096, + globals_size: 4096, + ..Limits::default() + }; + const CONTEXT_TEST_INITIAL_SIZE: u64 = 4096; + const CONTEXT_TEST_HEAP: HeapSpec = HeapSpec { + reserved_size: 4096, + guard_size: 4096, + initial_size: CONTEXT_TEST_INITIAL_SIZE, + max_size: Some(4096), + }; + + /// This test shows that alloced memory will create a heap and a stack that child context + /// code can use. + #[test] + fn context_alloc_child() { + extern "C" fn heap_touching_child(heap: *mut u8) { + let heap = unsafe { + std::slice::from_raw_parts_mut(heap, CONTEXT_TEST_INITIAL_SIZE as usize) + }; + heap[0] = 123; + heap[4095] = 45; + } + + let region = TestRegion::create(1, &CONTEXT_TEST_LIMITS).expect("region created"); + let mut inst = region + .new_instance( + MockModuleBuilder::new() + .with_heap_spec(CONTEXT_TEST_HEAP) + .build(), + ) + .expect("new_instance succeeds"); + + let mut parent = ContextHandle::new(); + unsafe { + let heap_ptr = inst.alloc_mut().heap_mut().as_ptr() as *mut c_void; + let child = ContextHandle::create_and_init( + inst.alloc_mut().stack_u64_mut(), + &mut parent, + heap_touching_child as usize, + &[Val::CPtr(heap_ptr)], + ) + .expect("context init succeeds"); + Context::swap(&mut parent, &child); + assert_eq!(inst.alloc().heap()[0], 123); + assert_eq!(inst.alloc().heap()[4095], 45); + } + } + + /// This test shows that an alloced memory will create a heap and stack, the child code can + /// write a pattern to that stack, and we can read back that same pattern after it is done + /// running. + #[test] + fn context_stack_pattern() { + const STACK_PATTERN_LENGTH: usize = 1024; + extern "C" fn stack_pattern_child(heap: *mut u64) { + let heap = unsafe { + std::slice::from_raw_parts_mut(heap, CONTEXT_TEST_INITIAL_SIZE as usize / 8) + }; + let mut onthestack = [0u8; STACK_PATTERN_LENGTH]; + for i in 0..STACK_PATTERN_LENGTH { + onthestack[i] = (i % 256) as u8; + } + heap[0] = onthestack.as_ptr() as u64; + } + + let region = TestRegion::create(1, &CONTEXT_TEST_LIMITS).expect("region created"); + let mut inst = region + .new_instance( + MockModuleBuilder::new() + .with_heap_spec(CONTEXT_TEST_HEAP) + .build(), + ) + .expect("new_instance succeeds"); + + let mut parent = ContextHandle::new(); + unsafe { + let heap_ptr = inst.alloc_mut().heap_mut().as_ptr() as *mut c_void; + let child = ContextHandle::create_and_init( + inst.alloc_mut().stack_u64_mut(), + &mut parent, + stack_pattern_child as usize, + &[Val::CPtr(heap_ptr)], + ) + .expect("context init succeeds"); + Context::swap(&mut parent, &child); + + let stack_pattern = inst.alloc().heap_u64()[0] as usize; + assert!(stack_pattern > inst.alloc().slot().stack as usize); + assert!( + stack_pattern + STACK_PATTERN_LENGTH < inst.alloc().slot().stack_top() as usize + ); + let stack_pattern = + std::slice::from_raw_parts(stack_pattern as *const u8, STACK_PATTERN_LENGTH); + for i in 0..STACK_PATTERN_LENGTH { + assert_eq!(stack_pattern[i], (i % 256) as u8); + } + } + } + + #[test] + fn drop_region_first() { + let region = TestRegion::create(1, &Limits::default()).expect("region can be created"); + let inst = region + .new_instance(MockModuleBuilder::new().build()) + .expect("new_instance succeeds"); + drop(region); + drop(inst); + } + }; +} + +#[cfg(test)] +alloc_tests!(crate::region::mmap::MmapRegion); diff --git a/third_party/rust/lucet-runtime-internals-wasmsbx/src/c_api.rs b/third_party/rust/lucet-runtime-internals-wasmsbx/src/c_api.rs new file mode 100644 index 0000000000..cd3bb4867a --- /dev/null +++ b/third_party/rust/lucet-runtime-internals-wasmsbx/src/c_api.rs @@ -0,0 +1,668 @@ +#![allow(non_camel_case_types)] + +pub use self::lucet_result::*; +pub use self::lucet_val::*; + +use crate::alloc::Limits; +use crate::error::Error; +use crate::instance::signals::SignalBehavior; +use libc::{c_int, c_void}; +use num_derive::FromPrimitive; + +#[macro_export] +macro_rules! assert_nonnull { + ( $name:ident ) => { + if $name.is_null() { + return lucet_error::InvalidArgument; + } + }; +} + +/// Wrap up the management of `Arc`s that go across the FFI boundary. +/// +/// Trait objects must be wrapped in two `Arc`s in order to yield a thin pointer. +#[macro_export] +macro_rules! with_ffi_arcs { + ( [ $name:ident : dyn $ty:ident ], $body:block ) => {{ + assert_nonnull!($name); + let $name = Arc::from_raw($name as *const Arc<dyn $ty>); + let res = $body; + Arc::into_raw($name); + res + }}; + ( [ $name:ident : $ty:ty ], $body:block ) => {{ + assert_nonnull!($name); + let $name = Arc::from_raw($name as *const $ty); + let res = $body; + Arc::into_raw($name); + res + }}; + ( [ $name:ident : dyn $ty:ident, $($tail:tt)* ], $body:block ) => {{ + assert_nonnull!($name); + let $name = Arc::from_raw($name as *const Arc<dyn $ty>); + let rec = with_ffi_arcs!([$($tail)*], $body); + Arc::into_raw($name); + rec + }}; + ( [ $name:ident : $ty:ty, $($tail:tt)* ], $body:block ) => {{ + assert_nonnull!($name); + let $name = Arc::from_raw($name as *const $ty); + let rec = with_ffi_arcs!([$($tail)*], $body); + Arc::into_raw($name); + rec + }}; +} + +/// Marker type for the `vmctx` pointer argument. +/// +/// This type should only be used with [`Vmctx::from_raw()`](struct.Vmctx.html#method.from_raw) or +/// the C API. +#[repr(C)] +pub struct lucet_vmctx { + _unused: [u8; 0], +} + +#[repr(C)] +#[derive(Clone, Copy, Debug, FromPrimitive)] +pub enum lucet_error { + Ok, + InvalidArgument, + RegionFull, + Module, + LimitsExceeded, + NoLinearMemory, + SymbolNotFound, + FuncNotFound, + RuntimeFault, + RuntimeTerminated, + Dl, + InstanceNotReturned, + InstanceNotYielded, + StartYielded, + Internal, + Unsupported, +} + +impl From<Error> for lucet_error { + fn from(e: Error) -> lucet_error { + lucet_error::from(&e) + } +} + +impl From<&Error> for lucet_error { + fn from(e: &Error) -> lucet_error { + match e { + Error::InvalidArgument(_) => lucet_error::InvalidArgument, + Error::RegionFull(_) => lucet_error::RegionFull, + Error::ModuleError(_) => lucet_error::Module, + Error::LimitsExceeded(_) => lucet_error::LimitsExceeded, + Error::NoLinearMemory(_) => lucet_error::NoLinearMemory, + Error::SymbolNotFound(_) => lucet_error::SymbolNotFound, + Error::FuncNotFound(_, _) => lucet_error::FuncNotFound, + Error::RuntimeFault(_) => lucet_error::RuntimeFault, + Error::RuntimeTerminated(_) => lucet_error::RuntimeTerminated, + Error::DlError(_) => lucet_error::Dl, + Error::InstanceNotReturned => lucet_error::InstanceNotReturned, + Error::InstanceNotYielded => lucet_error::InstanceNotYielded, + Error::StartYielded => lucet_error::StartYielded, + Error::InternalError(_) => lucet_error::Internal, + Error::Unsupported(_) => lucet_error::Unsupported, + } + } +} + +#[repr(C)] +pub struct lucet_instance { + _unused: [u8; 0], +} + +#[repr(C)] +pub struct lucet_region { + _unused: [u8; 0], +} + +#[repr(C)] +pub struct lucet_dl_module { + _unused: [u8; 0], +} + +/// Runtime limits for the various memories that back a Lucet instance. +/// +/// Each value is specified in bytes, and must be evenly divisible by the host page size (4K). +#[derive(Clone, Debug)] +#[repr(C)] +pub struct lucet_alloc_limits { + /// Max size of the heap, which can be backed by real memory. (default 1M) + pub heap_memory_size: u64, + /// Size of total virtual memory. (default 8G) + pub heap_address_space_size: u64, + /// Size of the guest stack. (default 128K) + pub stack_size: u64, + /// Size of the globals region in bytes; each global uses 8 bytes. (default 4K) + pub globals_size: u64, + /// Size of the signal stack in bytes. (default SIGSTKSZ for Rust release builds, 12K for Rust + /// debug builds) + /// + /// This difference is to account for the greatly increased stack size usage in the signal + /// handler when running without optimizations. + /// + /// Note that debug vs. release mode is determined by `cfg(debug_assertions)`, so if you are + /// specifically enabling Rust debug assertions in your Cargo release builds, the default signal + /// stack will be larger. + pub signal_stack_size: u64, +} + +impl From<Limits> for lucet_alloc_limits { + fn from(limits: Limits) -> lucet_alloc_limits { + (&limits).into() + } +} + +impl From<&Limits> for lucet_alloc_limits { + fn from(limits: &Limits) -> lucet_alloc_limits { + lucet_alloc_limits { + heap_memory_size: limits.heap_memory_size as u64, + heap_address_space_size: limits.heap_address_space_size as u64, + stack_size: limits.stack_size as u64, + globals_size: limits.globals_size as u64, + signal_stack_size: limits.signal_stack_size as u64, + } + } +} + +impl From<lucet_alloc_limits> for Limits { + fn from(limits: lucet_alloc_limits) -> Limits { + (&limits).into() + } +} + +impl From<&lucet_alloc_limits> for Limits { + fn from(limits: &lucet_alloc_limits) -> Limits { + Limits { + heap_memory_size: limits.heap_memory_size as usize, + heap_address_space_size: limits.heap_address_space_size as usize, + stack_size: limits.stack_size as usize, + globals_size: limits.globals_size as usize, + signal_stack_size: limits.signal_stack_size as usize, + } + } +} + +#[repr(C)] +#[derive(Clone, Copy, Debug)] +pub enum lucet_signal_behavior { + Default, + Continue, + Terminate, +} + +impl From<lucet_signal_behavior> for SignalBehavior { + fn from(sb: lucet_signal_behavior) -> SignalBehavior { + sb.into() + } +} + +impl From<&lucet_signal_behavior> for SignalBehavior { + fn from(sb: &lucet_signal_behavior) -> SignalBehavior { + match sb { + lucet_signal_behavior::Default => SignalBehavior::Default, + lucet_signal_behavior::Continue => SignalBehavior::Continue, + lucet_signal_behavior::Terminate => SignalBehavior::Terminate, + } + } +} + +pub type lucet_signal_handler = unsafe extern "C" fn( + inst: *mut lucet_instance, + trap: lucet_result::lucet_trapcode, + signum: c_int, + siginfo: *const libc::siginfo_t, + context: *const c_void, +) -> lucet_signal_behavior; + +pub type lucet_fatal_handler = unsafe extern "C" fn(inst: *mut lucet_instance); + +pub struct CTerminationDetails { + pub details: *mut c_void, +} + +unsafe impl Send for CTerminationDetails {} +unsafe impl Sync for CTerminationDetails {} + +pub struct CYieldedVal { + pub val: *mut c_void, +} + +unsafe impl Send for CYieldedVal {} +unsafe impl Sync for CYieldedVal {} + +pub mod lucet_result { + use super::lucet_error; + use crate::c_api::{lucet_val, CTerminationDetails, CYieldedVal}; + use crate::error::Error; + use crate::instance::{RunResult, TerminationDetails}; + use crate::module::{AddrDetails, TrapCode}; + use libc::{c_uchar, c_void}; + use num_derive::FromPrimitive; + use std::ffi::CString; + use std::ptr; + + impl From<Result<RunResult, Error>> for lucet_result { + fn from(res: Result<RunResult, Error>) -> lucet_result { + match res { + Ok(RunResult::Returned(retval)) => lucet_result { + tag: lucet_result_tag::Returned, + val: lucet_result_val { + returned: retval.into(), + }, + }, + Ok(RunResult::Yielded(val)) => lucet_result { + tag: lucet_result_tag::Yielded, + val: lucet_result_val { + yielded: lucet_yielded { + val: val + .downcast_ref() + .map(|CYieldedVal { val }| *val) + .unwrap_or(ptr::null_mut()), + }, + }, + }, + // TODO: test this path; currently our C API tests don't include any faulting tests + Err(Error::RuntimeFault(details)) => lucet_result { + tag: lucet_result_tag::Faulted, + val: lucet_result_val { + fault: lucet_runtime_faulted { + fatal: details.fatal, + trapcode: details.trapcode.into(), + rip_addr: details.rip_addr, + rip_addr_details: details.rip_addr_details.into(), + }, + }, + }, + // TODO: test this path; currently our C API tests don't include any terminating tests + Err(Error::RuntimeTerminated(details)) => lucet_result { + tag: lucet_result_tag::Terminated, + val: lucet_result_val { + terminated: match details { + TerminationDetails::Signal => lucet_terminated { + reason: lucet_terminated_reason::Signal, + provided: ptr::null_mut(), + }, + TerminationDetails::CtxNotFound => lucet_terminated { + reason: lucet_terminated_reason::CtxNotFound, + provided: ptr::null_mut(), + }, + TerminationDetails::YieldTypeMismatch => lucet_terminated { + reason: lucet_terminated_reason::YieldTypeMismatch, + provided: ptr::null_mut(), + }, + TerminationDetails::BorrowError(_) => lucet_terminated { + reason: lucet_terminated_reason::BorrowError, + provided: ptr::null_mut(), + }, + TerminationDetails::Provided(p) => lucet_terminated { + reason: lucet_terminated_reason::Provided, + provided: p + .downcast_ref() + .map(|CTerminationDetails { details }| *details) + .unwrap_or(ptr::null_mut()), + }, + }, + }, + }, + Err(e) => lucet_result { + tag: lucet_result_tag::Errored, + val: lucet_result_val { errored: e.into() }, + }, + } + } + } + + #[repr(C)] + #[derive(Clone, Copy)] + pub struct lucet_result { + pub tag: lucet_result_tag, + pub val: lucet_result_val, + } + + #[repr(C)] + #[derive(Clone, Copy, Debug, FromPrimitive)] + pub enum lucet_result_tag { + Returned, + Yielded, + Faulted, + Terminated, + Errored, + } + + #[repr(C)] + #[derive(Clone, Copy)] + pub union lucet_result_val { + pub returned: lucet_val::lucet_untyped_retval, + pub yielded: lucet_yielded, + pub fault: lucet_runtime_faulted, + pub terminated: lucet_terminated, + pub errored: lucet_error, + } + + #[repr(C)] + #[derive(Clone, Copy)] + pub struct lucet_terminated { + pub reason: lucet_terminated_reason, + pub provided: *mut c_void, + } + + #[repr(C)] + #[derive(Clone, Copy)] + pub enum lucet_terminated_reason { + Signal, + CtxNotFound, + YieldTypeMismatch, + BorrowError, + Provided, + } + + #[repr(C)] + #[derive(Clone, Copy)] + pub struct lucet_yielded { + pub val: *mut c_void, + } + + #[repr(C)] + #[derive(Clone, Copy)] + pub struct lucet_runtime_faulted { + pub fatal: bool, + pub trapcode: lucet_trapcode, + pub rip_addr: libc::uintptr_t, + pub rip_addr_details: lucet_module_addr_details, + } + + #[repr(C)] + #[derive(Clone, Copy, Debug)] + pub enum lucet_trapcode { + StackOverflow, + HeapOutOfBounds, + OutOfBounds, + IndirectCallToNull, + BadSignature, + IntegerOverflow, + IntegerDivByZero, + BadConversionToInteger, + Interrupt, + TableOutOfBounds, + Unreachable, + Unknown, + } + + impl From<Option<TrapCode>> for lucet_trapcode { + fn from(ty: Option<TrapCode>) -> lucet_trapcode { + (&ty).into() + } + } + + impl From<&Option<TrapCode>> for lucet_trapcode { + fn from(ty: &Option<TrapCode>) -> lucet_trapcode { + if let Some(ty) = ty { + match ty { + TrapCode::StackOverflow => lucet_trapcode::StackOverflow, + TrapCode::HeapOutOfBounds => lucet_trapcode::HeapOutOfBounds, + TrapCode::OutOfBounds => lucet_trapcode::OutOfBounds, + TrapCode::IndirectCallToNull => lucet_trapcode::IndirectCallToNull, + TrapCode::BadSignature => lucet_trapcode::BadSignature, + TrapCode::IntegerOverflow => lucet_trapcode::IntegerOverflow, + TrapCode::IntegerDivByZero => lucet_trapcode::IntegerDivByZero, + TrapCode::BadConversionToInteger => lucet_trapcode::BadConversionToInteger, + TrapCode::Interrupt => lucet_trapcode::Interrupt, + TrapCode::TableOutOfBounds => lucet_trapcode::TableOutOfBounds, + TrapCode::Unreachable => lucet_trapcode::Unreachable, + } + } else { + lucet_trapcode::Unknown + } + } + } + + const ADDR_DETAILS_NAME_LEN: usize = 256; + + /// Half a kilobyte is too substantial for `Copy`, but we must have it because [unions with + /// non-`Copy` fields are unstable](https://github.com/rust-lang/rust/issues/32836). + #[repr(C)] + #[derive(Clone, Copy)] + pub struct lucet_module_addr_details { + pub module_code_resolvable: bool, + pub in_module_code: bool, + pub file_name: [c_uchar; ADDR_DETAILS_NAME_LEN], + pub sym_name: [c_uchar; ADDR_DETAILS_NAME_LEN], + } + + impl Default for lucet_module_addr_details { + fn default() -> Self { + lucet_module_addr_details { + module_code_resolvable: false, + in_module_code: false, + file_name: [0; ADDR_DETAILS_NAME_LEN], + sym_name: [0; ADDR_DETAILS_NAME_LEN], + } + } + } + + impl From<Option<AddrDetails>> for lucet_module_addr_details { + fn from(details: Option<AddrDetails>) -> Self { + /// Convert a string into C-compatible bytes, truncate it to length + /// `ADDR_DETAILS_NAME_LEN`, and make sure it has a trailing nul. + fn trunc_c_str_bytes(s: &str) -> Vec<u8> { + let s = CString::new(s); + let mut bytes = s.ok().map(|s| s.into_bytes_with_nul()).unwrap_or(vec![0]); + bytes.truncate(ADDR_DETAILS_NAME_LEN); + // we always have at least the 0, so this `last` can be unwrapped + *bytes.last_mut().unwrap() = 0; + bytes + } + + let mut ret = details + .as_ref() + .map(|details| lucet_module_addr_details { + module_code_resolvable: true, + in_module_code: details.in_module_code, + file_name: [0; ADDR_DETAILS_NAME_LEN], + sym_name: [0; ADDR_DETAILS_NAME_LEN], + }) + .unwrap_or_default(); + + // get truncated C-compatible bytes for each string, or "\0" if they're not present + let file_name_bytes = details + .as_ref() + .and_then(|details| details.file_name.as_ref().map(|s| trunc_c_str_bytes(s))) + .unwrap_or_else(|| vec![0]); + let sym_name_bytes = details + .and_then(|details| details.sym_name.as_ref().map(|s| trunc_c_str_bytes(s))) + .unwrap_or_else(|| vec![0]); + + // copy the bytes into the array, making sure to copy only as many as are in the string + ret.file_name[0..file_name_bytes.len()].copy_from_slice(file_name_bytes.as_slice()); + ret.sym_name[0..sym_name_bytes.len()].copy_from_slice(sym_name_bytes.as_slice()); + + ret + } + } +} + +pub mod lucet_val { + use crate::val::{UntypedRetVal, UntypedRetValInternal, Val}; + use libc::{c_char, c_void}; + + // Note on the value associated with each type: the most significant bits represent the "class" + // of the type (1: a C pointer, 2: something unsigned that fits in 64 bits, 3: something signed + // that fits in 64 bits, 4: f32, 5: f64). The remain bits can be anything as long as it is + // unique. + #[repr(C)] + #[derive(Clone, Copy, Debug)] + pub enum lucet_val_type { + C_Ptr, // = (1 << 16) | 0x0100, + GuestPtr, // = (2 << 16) | 0x0101, + U8, // = (2 << 16) | 0x0201, + U16, // = (2 << 16) | 0x0202, + U32, // = (2 << 16) | 0x0203, + U64, // = (2 << 16) | 0x0204, + I8, // = (3 << 16) | 0x0300, + I16, // = (3 << 16) | 0x0301, + I32, // = (3 << 16) | 0x0302, + I64, // = (3 << 16) | 0x0303, + USize, // = (2 << 16) | 0x0400, + ISize, // = (3 << 16) | 0x0401, + Bool, // = (2 << 16) | 0x0700, + F32, // = (4 << 16) | 0x0800, + F64, // = (5 << 16) | 0x0801, + } + + #[repr(C)] + #[derive(Clone, Copy)] + pub union lucet_val_inner_val { + as_c_ptr: *mut c_void, // (1 << 16) + as_u64: u64, // (2 << 16) + as_i64: i64, // (3 << 16) + as_f32: f32, // (4 << 16) + as_f64: f64, // (5 << 16) + } + + #[repr(C)] + #[derive(Clone, Copy)] + pub struct lucet_val { + ty: lucet_val_type, + inner_val: lucet_val_inner_val, + } + + impl From<lucet_val> for Val { + fn from(val: lucet_val) -> Val { + (&val).into() + } + } + + impl From<&lucet_val> for Val { + fn from(val: &lucet_val) -> Val { + match val.ty { + lucet_val_type::C_Ptr => Val::CPtr(unsafe { val.inner_val.as_u64 } as _), + lucet_val_type::GuestPtr => Val::GuestPtr(unsafe { val.inner_val.as_u64 } as _), + lucet_val_type::U8 => Val::U8(unsafe { val.inner_val.as_u64 } as _), + lucet_val_type::U16 => Val::U16(unsafe { val.inner_val.as_u64 } as _), + lucet_val_type::U32 => Val::U32(unsafe { val.inner_val.as_u64 } as _), + lucet_val_type::U64 => Val::U64(unsafe { val.inner_val.as_u64 } as _), + lucet_val_type::I8 => Val::I16(unsafe { val.inner_val.as_i64 } as _), + lucet_val_type::I16 => Val::I32(unsafe { val.inner_val.as_i64 } as _), + lucet_val_type::I32 => Val::I32(unsafe { val.inner_val.as_i64 } as _), + lucet_val_type::I64 => Val::I64(unsafe { val.inner_val.as_i64 } as _), + lucet_val_type::USize => Val::USize(unsafe { val.inner_val.as_u64 } as _), + lucet_val_type::ISize => Val::ISize(unsafe { val.inner_val.as_i64 } as _), + lucet_val_type::Bool => Val::Bool(unsafe { val.inner_val.as_u64 } != 0), + lucet_val_type::F32 => Val::F32(unsafe { val.inner_val.as_f32 } as _), + lucet_val_type::F64 => Val::F64(unsafe { val.inner_val.as_f64 } as _), + } + } + } + + impl From<Val> for lucet_val { + fn from(val: Val) -> Self { + (&val).into() + } + } + + impl From<&Val> for lucet_val { + fn from(val: &Val) -> Self { + match val { + Val::CPtr(a) => lucet_val { + ty: lucet_val_type::C_Ptr, + inner_val: lucet_val_inner_val { as_u64: *a as _ }, + }, + Val::GuestPtr(a) => lucet_val { + ty: lucet_val_type::GuestPtr, + inner_val: lucet_val_inner_val { as_u64: *a as _ }, + }, + Val::U8(a) => lucet_val { + ty: lucet_val_type::U8, + inner_val: lucet_val_inner_val { as_u64: *a as _ }, + }, + Val::U16(a) => lucet_val { + ty: lucet_val_type::U16, + inner_val: lucet_val_inner_val { as_u64: *a as _ }, + }, + Val::U32(a) => lucet_val { + ty: lucet_val_type::U32, + inner_val: lucet_val_inner_val { as_u64: *a as _ }, + }, + Val::U64(a) => lucet_val { + ty: lucet_val_type::U64, + inner_val: lucet_val_inner_val { as_u64: *a as _ }, + }, + Val::I8(a) => lucet_val { + ty: lucet_val_type::I8, + inner_val: lucet_val_inner_val { as_i64: *a as _ }, + }, + Val::I16(a) => lucet_val { + ty: lucet_val_type::I16, + inner_val: lucet_val_inner_val { as_i64: *a as _ }, + }, + Val::I32(a) => lucet_val { + ty: lucet_val_type::I32, + inner_val: lucet_val_inner_val { as_i64: *a as _ }, + }, + Val::I64(a) => lucet_val { + ty: lucet_val_type::I64, + inner_val: lucet_val_inner_val { as_i64: *a as _ }, + }, + Val::USize(a) => lucet_val { + ty: lucet_val_type::USize, + inner_val: lucet_val_inner_val { as_u64: *a as _ }, + }, + Val::ISize(a) => lucet_val { + ty: lucet_val_type::ISize, + inner_val: lucet_val_inner_val { as_i64: *a as _ }, + }, + Val::Bool(a) => lucet_val { + ty: lucet_val_type::Bool, + inner_val: lucet_val_inner_val { as_u64: *a as _ }, + }, + Val::F32(a) => lucet_val { + ty: lucet_val_type::F32, + inner_val: lucet_val_inner_val { as_f32: *a as _ }, + }, + Val::F64(a) => lucet_val { + ty: lucet_val_type::F64, + inner_val: lucet_val_inner_val { as_f64: *a as _ }, + }, + } + } + } + + #[repr(C)] + #[derive(Clone, Copy, Debug)] + pub struct lucet_untyped_retval { + pub fp: [c_char; 16], + pub gp: [c_char; 8], + } + + #[repr(C)] + #[derive(Clone, Copy)] + pub union lucet_retval_gp { + pub as_untyped: [c_char; 8], + pub as_c_ptr: *mut c_void, + pub as_u64: u64, + pub as_i64: i64, + } + + impl From<UntypedRetVal> for lucet_untyped_retval { + fn from(retval: UntypedRetVal) -> lucet_untyped_retval { + let mut v = lucet_untyped_retval { + fp: [0; 16], + gp: [0; 8], + }; + unsafe { + core::arch::x86_64::_mm_storeu_ps( + v.fp.as_mut().as_mut_ptr() as *mut f32, + retval.fp(), + ); + *(v.gp.as_mut().as_mut_ptr() as *mut u64) = retval.gp(); + } + v + } + } +} diff --git a/third_party/rust/lucet-runtime-internals-wasmsbx/src/context/context_asm.S b/third_party/rust/lucet-runtime-internals-wasmsbx/src/context/context_asm.S new file mode 100644 index 0000000000..66fbfd4859 --- /dev/null +++ b/third_party/rust/lucet-runtime-internals-wasmsbx/src/context/context_asm.S @@ -0,0 +1,186 @@ +/* + The lucet_context_swap function is taken from Xudong Huang's + generator-rs project. Its MIT license is provided below. + + Copyright (c) 2017 Xudong Huang + + Permission is hereby granted, free of charge, to any + person obtaining a copy of this software and associated + documentation files (the "Software"), to deal in the + Software without restriction, including without + limitation the rights to use, copy, modify, merge, + publish, distribute, sublicense, and/or sell copies of + the Software, and to permit persons to whom the Software + is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice + shall be included in all copies or substantial portions + of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF + ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED + TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT + SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR + IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + +*/ + +.text +.globl lucet_context_bootstrap +#ifdef __ELF__ +.type lucet_context_bootstrap,@function +#else +.globl _lucet_context_bootstrap +#endif +.align 16 +lucet_context_bootstrap: +_lucet_context_bootstrap: + /* Move each of the context-saved registers into the corresponding call + * argument register. See lucet_register enum for docs */ + mov %r12, %rsi + mov %r13, %rdx + mov %r14, %rcx + mov %r15, %r8 + mov %rbx, %r9 + /* the next thing on the stack is the guest function - return to it */ + ret +#ifdef __ELF__ +.size lucet_context_bootstrap,.-lucet_context_bootstrap +#endif + +.text +.globl lucet_context_backstop +#ifdef __ELF__ +.type lucet_context_backstop,@function +#else +.globl _lucet_context_backstop +#endif +.align 16 +lucet_context_backstop: +_lucet_context_backstop: + mov -16(%rbp), %rdi /* parent context to arg 1 */ + mov -8(%rbp), %rsi /* own context to arg 2 */ + mov %rax, (8*8 + 8*16 + 8*0)(%rdi) /* store return values before swapping back -- offset is offsetof(struct lucet_context, retvals) */ + mov %rdx, (8*8 + 8*16 + 8*1)(%rdi) + movdqu %xmm0, (8*8 + 8*16 + 8*2)(%rdi) /* floating-point return value */ +#ifdef __ELF__ + jmp lucet_context_swap@PLT +#else + jmp lucet_context_swap +#endif +#ifdef __ELF__ +.size lucet_context_backstop,.-lucet_context_backstop +#endif + +.text +.globl lucet_context_swap +#ifdef __ELF__ +.type lucet_context_swap,@function +#else +.globl _lucet_context_swap +#endif +.align 16 +lucet_context_swap: +_lucet_context_swap: + // store everything in offsets from rdi (1st arg) + mov %rbx, (0*8)(%rdi) + mov %rsp, (1*8)(%rdi) + mov %rbp, (2*8)(%rdi) + mov %rdi, (3*8)(%rdi) + mov %r12, (4*8)(%rdi) + mov %r13, (5*8)(%rdi) + mov %r14, (6*8)(%rdi) + mov %r15, (7*8)(%rdi) + + movdqu %xmm0, (8*8 + 0*16)(%rdi) + movdqu %xmm1, (8*8 + 1*16)(%rdi) + movdqu %xmm2, (8*8 + 2*16)(%rdi) + movdqu %xmm3, (8*8 + 3*16)(%rdi) + movdqu %xmm4, (8*8 + 4*16)(%rdi) + movdqu %xmm5, (8*8 + 5*16)(%rdi) + movdqu %xmm6, (8*8 + 6*16)(%rdi) + movdqu %xmm7, (8*8 + 7*16)(%rdi) + + // load everything from offsets from rsi (2nd arg) + mov (0*8)(%rsi), %rbx + mov (1*8)(%rsi), %rsp + mov (2*8)(%rsi), %rbp + mov (3*8)(%rsi), %rdi + mov (4*8)(%rsi), %r12 + mov (5*8)(%rsi), %r13 + mov (6*8)(%rsi), %r14 + mov (7*8)(%rsi), %r15 + + movdqu (8*8 + 0*16)(%rsi), %xmm0 + movdqu (8*8 + 1*16)(%rsi), %xmm1 + movdqu (8*8 + 2*16)(%rsi), %xmm2 + movdqu (8*8 + 3*16)(%rsi), %xmm3 + movdqu (8*8 + 4*16)(%rsi), %xmm4 + movdqu (8*8 + 5*16)(%rsi), %xmm5 + movdqu (8*8 + 6*16)(%rsi), %xmm6 + movdqu (8*8 + 7*16)(%rsi), %xmm7 + + ret +#ifdef __ELF__ +.size lucet_context_swap,.-lucet_context_swap +#endif + +.text +.globl lucet_context_set +#ifdef __ELF__ +.type lucet_context_set,@function +#else +.globl _lucet_context_set +#endif +.align 16 +lucet_context_set: +_lucet_context_set: + // load everything from offsets from rdi (1st arg) + mov (0*8)(%rdi), %rbx + mov (1*8)(%rdi), %rsp + mov (2*8)(%rdi), %rbp + mov (4*8)(%rdi), %r12 + mov (5*8)(%rdi), %r13 + mov (6*8)(%rdi), %r14 + mov (7*8)(%rdi), %r15 + + movdqu (8*8 + 0*16)(%rdi), %xmm0 + movdqu (8*8 + 1*16)(%rdi), %xmm1 + movdqu (8*8 + 2*16)(%rdi), %xmm2 + movdqu (8*8 + 3*16)(%rdi), %xmm3 + movdqu (8*8 + 4*16)(%rdi), %xmm4 + movdqu (8*8 + 5*16)(%rdi), %xmm5 + movdqu (8*8 + 6*16)(%rdi), %xmm6 + movdqu (8*8 + 7*16)(%rdi), %xmm7 + + // load rdi from itself last + mov (3*8)(%rdi), %rdi + ret +#ifdef __ELF__ +.size lucet_context_set,.-lucet_context_set +#endif + +.text +.globl lucet_get_current_stack_pointer +#ifdef __ELF__ +.type lucet_get_current_stack_pointer,@function +#else +.globl _lucet_get_current_stack_pointer +#endif +.align 16 +lucet_get_current_stack_pointer: +_lucet_get_current_stack_pointer: + mov %rsp, %rax + ret +#ifdef __ELF__ +.size lucet_get_current_stack_pointer,.-lucet_get_current_stack_pointer +#endif + +/* Mark that we don't need executable stack. */ +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/third_party/rust/lucet-runtime-internals-wasmsbx/src/context/mod.rs b/third_party/rust/lucet-runtime-internals-wasmsbx/src/context/mod.rs new file mode 100644 index 0000000000..247b6d9e1d --- /dev/null +++ b/third_party/rust/lucet-runtime-internals-wasmsbx/src/context/mod.rs @@ -0,0 +1,645 @@ +#![allow(improper_ctypes)] + +#[cfg(test)] +mod tests; + +use crate::val::{val_to_reg, val_to_stack, RegVal, UntypedRetVal, Val}; +use failure::Fail; +use nix; +use nix::sys::signal; +use std::arch::x86_64::{__m128, _mm_setzero_ps}; +use std::mem; +use std::ptr::NonNull; +use xfailure::xbail; + +/// Callee-saved general-purpose registers in the AMD64 ABI. +/// +/// # Layout +/// +/// `repr(C)` is required to preserve the ordering of members, which are read by the assembly at +/// hard-coded offsets. +/// +/// # TODOs +/// +/// - Unlike the C code, this doesn't use the `packed` repr due to warnings in the Nomicon: +/// <https://doc.rust-lang.org/nomicon/other-reprs.html#reprpacked>. Since the members are all +/// `u64`, this should be fine? +#[repr(C)] +#[derive(Clone)] +struct GpRegs { + rbx: u64, + rsp: u64, + rbp: u64, + rdi: u64, + r12: u64, + r13: u64, + r14: u64, + r15: u64, +} + +impl GpRegs { + fn new() -> Self { + GpRegs { + rbx: 0, + rsp: 0, + rbp: 0, + rdi: 0, + r12: 0, + r13: 0, + r14: 0, + r15: 0, + } + } +} + +/// Floating-point argument registers in the AMD64 ABI. +/// +/// # Layout +/// +/// `repr(C)` is required to preserve the ordering of members, which are read by the assembly at +/// hard-coded offsets. +/// +/// # TODOs +/// +/// - Unlike the C code, this doesn't use the `packed` repr due to warnings in the Nomicon: +/// <https://doc.rust-lang.org/nomicon/other-reprs.html#reprpacked>. Since the members are all +/// `__m128`, this should be fine? +#[repr(C)] +#[derive(Clone)] +struct FpRegs { + xmm0: __m128, + xmm1: __m128, + xmm2: __m128, + xmm3: __m128, + xmm4: __m128, + xmm5: __m128, + xmm6: __m128, + xmm7: __m128, +} + +impl FpRegs { + fn new() -> Self { + let zero = unsafe { _mm_setzero_ps() }; + FpRegs { + xmm0: zero, + xmm1: zero, + xmm2: zero, + xmm3: zero, + xmm4: zero, + xmm5: zero, + xmm6: zero, + xmm7: zero, + } + } +} + +/// Everything we need to make a context switch: a signal mask, and the registers and return values +/// that are manipulated directly by assembly code. +/// +/// # Layout +/// +/// The `repr(C)` and order of fields in this struct are very important, as the assembly code reads +/// and writes hard-coded offsets from the base of the struct. Without `repr(C)`, Rust is free to +/// reorder the fields. +/// +/// Contexts are also `repr(align(64))` in order to align to cache lines and minimize contention +/// when running multiple threads. +/// +/// # Movement +/// +/// `Context` values must not be moved once they've been initialized. Contexts contain a pointer to +/// their stack, which in turn contains a pointer back to the context. If the context gets moved, +/// that pointer becomes invalid, and the behavior of returning from that context becomes undefined. +#[repr(C, align(64))] +#[derive(Clone)] +pub struct Context { + gpr: GpRegs, + fpr: FpRegs, + retvals_gp: [u64; 2], + retval_fp: __m128, + sigset: signal::SigSet, +} + +impl Context { + /// Create an all-zeroed `Context`. + pub fn new() -> Self { + Context { + gpr: GpRegs::new(), + fpr: FpRegs::new(), + retvals_gp: [0; 2], + retval_fp: unsafe { _mm_setzero_ps() }, + sigset: signal::SigSet::empty(), + } + } +} + +/// A wrapper around a `Context`, primarily meant for use in test code. +/// +/// Users of this library interact with contexts implicitly via `Instance` values, but for testing +/// the context code independently, it is helpful to use contexts directly. +/// +/// # Movement of `ContextHandle` +/// +/// `ContextHandle` keeps a pointer to a `Context` rather than keeping all of the data directly as +/// fields in order to have better control over where that data lives in memory. We always want that +/// data to be heap-allocated, and to never move once it has been initialized. The `ContextHandle`, +/// by contrast, should be treated like a normal Rust value with no such restrictions. +/// +/// Until the `Unpin` marker trait arrives in stable Rust, it is difficult to enforce this with the +/// type system alone, so we use a bit of unsafety and (hopefully) clever API design to ensure that +/// the data cannot be moved. +/// +/// We create the `Context` within a box to allocate it on the heap, then convert it into a raw +/// pointer to relinquish ownership. When accessing the internal structure via the `DerefMut` trait, +/// data must not be moved out of the `Context` with functions like `mem::replace`. +/// +/// # Layout +/// +/// Foreign code accesses the `internal` pointer in tests, so it is important that it is the first +/// member, and that the struct is `repr(C)`. +#[repr(C)] +pub struct ContextHandle { + internal: NonNull<Context>, +} + +impl Drop for ContextHandle { + fn drop(&mut self) { + unsafe { + // create a box from the pointer so that it'll get dropped + // and we won't leak `Context`s + Box::from_raw(self.internal.as_ptr()); + } + } +} + +impl std::ops::Deref for ContextHandle { + type Target = Context; + fn deref(&self) -> &Self::Target { + unsafe { self.internal.as_ref() } + } +} + +impl std::ops::DerefMut for ContextHandle { + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { self.internal.as_mut() } + } +} + +impl ContextHandle { + /// Create an all-zeroed `ContextHandle`. + pub fn new() -> Self { + let internal = NonNull::new(Box::into_raw(Box::new(Context::new()))) + .expect("Box::into_raw should never return NULL"); + ContextHandle { internal } + } + + pub fn create_and_init( + stack: &mut [u64], + parent: &mut ContextHandle, + fptr: usize, + args: &[Val], + ) -> Result<ContextHandle, Error> { + let mut child = ContextHandle::new(); + Context::init(stack, parent, &mut child, fptr, args)?; + Ok(child) + } +} + +impl Context { + /// Initialize a new child context. + /// + /// - `stack`: The stack for the child; *must be 16-byte aligned*. + /// + /// - `parent`: The context that the child will return to. Since `swap` initializes the fields + /// in its `from` argument, this will typically be an empty context from `ContextHandle::zero()` + /// that will later be passed to `swap`. + /// + /// - `child`: The context for the child. The fields of this structure will be overwritten by + /// `init`. + /// + /// - `fptr`: A pointer to the entrypoint for the child. Note that while the type signature here + /// is for a void function of no arguments (equivalent to `void (*fptr)(void)` in C), the + /// entrypoint actually can be a function of any argument or return type that corresponds to a + /// `val::Val` variant. + /// + /// - `args`: A slice of arguments for the `fptr` entrypoint. These must match the number and + /// types of `fptr`'s actual arguments exactly, otherwise swapping to this context will cause + /// undefined behavior. + /// + /// # Errors + /// + /// - `Error::UnalignedStack` if the _end_ of `stack` is not 16-byte aligned. + /// + /// # Examples + /// + /// ## C entrypoint + /// + /// This example initializes a context that will start in a C function `entrypoint` when first + /// swapped to. + /// + /// ```c + /// void entrypoint(uint64_t x, float y); + /// ``` + /// + /// ```no_run + /// # use lucet_runtime_internals::context::Context; + /// # use lucet_runtime_internals::val::Val; + /// extern "C" { fn entrypoint(x: u64, y: f32); } + /// // allocating an even number of `u64`s seems to reliably yield + /// // properly aligned stacks, but TODO do better + /// let mut stack = vec![0u64; 1024].into_boxed_slice(); + /// let mut parent = Context::new(); + /// let mut child = Context::new(); + /// let res = Context::init( + /// &mut *stack, + /// &mut parent, + /// &mut child, + /// entrypoint as usize, + /// &[Val::U64(120), Val::F32(3.14)], + /// ); + /// assert!(res.is_ok()); + /// ``` + /// + /// ## Rust entrypoint + /// + /// This example initializes a context that will start in a Rust function `entrypoint` when + /// first swapped to. Note that we mark `entrypoint` as `extern "C"` to make sure it is compiled + /// with C calling conventions. + /// + /// ```no_run + /// # use lucet_runtime_internals::context::{Context, ContextHandle}; + /// # use lucet_runtime_internals::val::Val; + /// extern "C" fn entrypoint(x: u64, y: f32) { } + /// // allocating an even number of `u64`s seems to reliably yield + /// // properly aligned stacks, but TODO do better + /// let mut stack = vec![0u64; 1024].into_boxed_slice(); + /// let mut parent = ContextHandle::new(); + /// let mut child = Context::new(); + /// let res = Context::init( + /// &mut *stack, + /// &mut parent, + /// &mut child, + /// entrypoint as usize, + /// &[Val::U64(120), Val::F32(3.14)], + /// ); + /// assert!(res.is_ok()); + /// ``` + pub fn init( + stack: &mut [u64], + parent: &mut Context, + child: &mut Context, + fptr: usize, + args: &[Val], + ) -> Result<(), Error> { + if !stack_is_aligned(stack) { + xbail!(Error::UnalignedStack); + } + + let mut gp_args_ix = 0; + let mut fp_args_ix = 0; + + let mut spilled_args = vec![]; + + for arg in args { + match val_to_reg(arg) { + RegVal::GpReg(v) => { + if gp_args_ix >= 6 { + spilled_args.push(arg); + } else { + child.bootstrap_gp_ix_arg(gp_args_ix, v); + gp_args_ix += 1; + } + } + RegVal::FpReg(v) => { + if fp_args_ix >= 8 { + spilled_args.push(arg); + } else { + child.bootstrap_fp_ix_arg(fp_args_ix, v); + fp_args_ix += 1; + } + } + } + } + + // the top of the stack; should not be used as an index, always subtracted from + let sp = stack.len(); + + let stack_start = 3 // the bootstrap ret addr, then guest func ret addr, then the backstop ret addr + + spilled_args.len() // then any args to guest func that don't fit in registers + + spilled_args.len() % 2 // padding to keep the stack 16-byte aligned when we spill an odd number of spilled arguments + + 4; // then the backstop args and terminator + + // stack-saved arguments start 3 below the top of the stack + // (TODO: a diagram would be great here) + let mut stack_args_ix = 3; + + // If there are more additional args to the guest function than available registers, they + // have to be pushed on the stack underneath the return address. + for arg in spilled_args { + let v = val_to_stack(arg); + stack[sp + stack_args_ix - stack_start] = v; + stack_args_ix += 1; + } + + // Prepare the stack for a swap context that lands in the bootstrap function swap will ret + // into the bootstrap function + stack[sp + 0 - stack_start] = lucet_context_bootstrap as u64; + + // The bootstrap function returns into the guest function, fptr + stack[sp + 1 - stack_start] = fptr as u64; + + // the guest function returns into lucet_context_backstop. + stack[sp + 2 - stack_start] = lucet_context_backstop as u64; + + // if fptr ever returns, it returns to the backstop func. backstop needs two arguments in + // its frame - first the context we are switching *out of* (which is also the one we are + // creating right now) and the ctx we switch back into. Note *parent might not be a valid + // ctx now, but it should be when this ctx is started. + stack[sp - 4] = child as *mut Context as u64; + stack[sp - 3] = parent as *mut Context as u64; + // Terminate the call chain. + stack[sp - 2] = 0; + stack[sp - 1] = 0; + + // RSP, RBP, and sigset still remain to be initialized. + // Stack pointer: this has the return address of the first function to be run on the swap. + child.gpr.rsp = &mut stack[sp - stack_start] as *mut u64 as u64; + // Frame pointer: this is only used by the backstop code. It uses it to locate the ctx and + // parent arguments set above. + child.gpr.rbp = &mut stack[sp - 2] as *mut u64 as u64; + + // Read the sigprocmask to be restored if we ever need to jump out of a signal handler. If + // this isn't possible, die. + signal::sigprocmask( + signal::SigmaskHow::SIG_SETMASK, + None, + Some(&mut child.sigset), + ) + .expect("sigprocmask could not be retrieved"); + + Ok(()) + } + + /// Save the current context, and swap to another context. + /// + /// - `from`: the current context is written here + /// - `to`: the context to read from and swap to + /// + /// The current registers, including the stack pointer, are saved to `from`. The current stack + /// pointer is then replaced by the value saved in `to.gpr.rsp`, so when `swap` returns, it will + /// return to the pointer saved in `to`'s stack. + /// + /// If `to` was freshly initialized by passing it as the child to `init`, `swap` will return to + /// the function that bootstraps arguments and then calls the entrypoint that was passed to + /// `init`. + /// + /// If `to` was previously passed as the `from` argument to another call to `swap`, the program + /// will return as if from that _first_ call to `swap`. + /// + /// # Safety + /// + /// The value in `to.gpr.rsp` must be a valid pointer into the stack that was originally passed + /// to `init` when the `to` context was initialized, or to the original stack created implicitly + /// by Rust. + /// + /// The registers saved in the `to` context must match the arguments expected by the entrypoint + /// of the function passed to `init`, or be unaltered from when they were previously written by + /// `swap`. + /// + /// If `from` is never returned to, `swap`ped to, or `set` to, resources could leak due to + /// implicit `drop`s never being called: + /// + /// ```no_run + /// # use lucet_runtime_internals::context::Context; + /// fn f(x: Box<u64>, child: &Context) { + /// let mut xs = vec![187; 410757864530]; + /// xs[0] += *x; + /// + /// // manually drop here to avoid leaks + /// drop(x); + /// drop(xs); + /// + /// let mut parent = Context::new(); + /// unsafe { Context::swap(&mut parent, child); } + /// // implicit `drop(x)` and `drop(xs)` here never get called unless we swap back + /// } + /// ``` + /// + /// # Examples + /// + /// The typical case is to initialize a new child context, and then swap to it from a zeroed + /// parent context. + /// + /// ```no_run + /// # use lucet_runtime_internals::context::Context; + /// # extern "C" fn entrypoint() {} + /// # let mut stack = vec![0u64; 1024].into_boxed_slice(); + /// let mut parent = Context::new(); + /// let mut child = Context::new(); + /// Context::init( + /// &mut stack, + /// &mut parent, + /// &mut child, + /// entrypoint as usize, + /// &[], + /// ).unwrap(); + /// + /// unsafe { Context::swap(&mut parent, &child); } + /// ``` + #[inline] + pub unsafe fn swap(from: &mut Context, to: &Context) { + lucet_context_swap(from as *mut Context, to as *const Context); + } + + /// Swap to another context without saving the current context. + /// + /// - `to`: the context to read from and swap to + /// + /// The current registers, including the stack pointer, are discarded. The current stack pointer + /// is then replaced by the value saved in `to.gpr.rsp`, so when `swap` returns, it will return + /// to the pointer saved in `to`'s stack. + /// + /// If `to` was freshly initialized by passing it as the child to `init`, `swap` will return to + /// the function that bootstraps arguments and then calls the entrypoint that was passed to + /// `init`. + /// + /// If `to` was previously passed as the `from` argument to another call to `swap`, the program + /// will return as if from the call to `swap`. + /// + /// # Safety + /// + /// ## Stack and registers + /// + /// The value in `to.gpr.rsp` must be a valid pointer into the stack that was originally passed + /// to `init` when the context was initialized, or to the original stack created implicitly by + /// Rust. + /// + /// The registers saved in `to` must match the arguments expected by the entrypoint of the + /// function passed to `init`, or be unaltered from when they were previously written by `swap`. + /// + /// ## Returning + /// + /// If `to` is a context freshly initialized by `init`, at least one of the following must be + /// true, otherwise the program will return to a context with uninitialized registers: + /// + /// - The `fptr` argument to `init` is a function that never returns + /// + /// - The `parent` argument to `init` was passed as the `from` argument to `swap` before this + /// call to `set` + /// + /// ## Resource leaks + /// + /// Since control flow will not return to the calling context, care must be taken to ensure that + /// any resources owned by the calling context are manually dropped. The implicit `drop`s + /// inserted by Rust at the end of the calling scope will not be reached: + /// + /// ```no_run + /// # use lucet_runtime_internals::context::Context; + /// fn f(x: Box<u64>, child: &Context) { + /// let mut xs = vec![187; 410757864530]; + /// xs[0] += *x; + /// + /// // manually drop here to avoid leaks + /// drop(x); + /// drop(xs); + /// + /// unsafe { Context::set(child); } + /// // implicit `drop(x)` and `drop(xs)` here never get called + /// } + /// ``` + #[inline] + pub unsafe fn set(to: &Context) -> ! { + lucet_context_set(to as *const Context); + } + + #[inline] + pub unsafe fn get_current_stack_pointer() -> u64 { + return lucet_get_current_stack_pointer(); + } + + /// Like `set`, but also manages the return from a signal handler. + /// + /// TODO: the return type of this function should really be `Result<!, nix::Error>`, but using + /// `!` as a type like that is currently experimental. + #[inline] + pub unsafe fn set_from_signal(to: &Context) -> Result<(), nix::Error> { + signal::sigprocmask(signal::SigmaskHow::SIG_SETMASK, Some(&to.sigset), None)?; + Context::set(to) + } + + /// Clear (zero) return values. + pub fn clear_retvals(&mut self) { + self.retvals_gp = [0; 2]; + let zero = unsafe { _mm_setzero_ps() }; + self.retval_fp = zero; + } + + /// Get the general-purpose return value at index `idx`. + /// + /// If this method is called before the context has returned from its original entrypoint, the + /// result will be `0`. + pub fn get_retval_gp(&self, idx: usize) -> u64 { + self.retvals_gp[idx] + } + + /// Get the floating point return value. + /// + /// If this method is called before the context has returned from its original entrypoint, the + /// result will be `0.0`. + pub fn get_retval_fp(&self) -> __m128 { + self.retval_fp + } + + /// Get the return value as an `UntypedRetVal`. + /// + /// This combines the 0th general-purpose return value, and the single floating-point return value. + pub fn get_untyped_retval(&self) -> UntypedRetVal { + let gp = self.get_retval_gp(0); + let fp = self.get_retval_fp(); + UntypedRetVal::new(gp, fp) + } + + /// Put one of the first 6 general-purpose arguments into a `Context` register. + /// + /// Although these registers are callee-saved registers rather than argument registers, they get + /// moved into argument registers by `lucet_context_bootstrap`. + /// + /// - `ix`: ABI general-purpose argument number + /// - `arg`: argument value + fn bootstrap_gp_ix_arg(&mut self, ix: usize, arg: u64) { + match ix { + // rdi lives across bootstrap + 0 => self.gpr.rdi = arg, + // bootstraps into rsi + 1 => self.gpr.r12 = arg, + // bootstraps into rdx + 2 => self.gpr.r13 = arg, + // bootstraps into rcx + 3 => self.gpr.r14 = arg, + // bootstraps into r8 + 4 => self.gpr.r15 = arg, + // bootstraps into r9 + 5 => self.gpr.rbx = arg, + _ => panic!("unexpected gp register index {}", ix), + } + } + + /// Put one of the first 8 floating-point arguments into a `Context` register. + /// + /// - `ix`: ABI floating-point argument number + /// - `arg`: argument value + fn bootstrap_fp_ix_arg(&mut self, ix: usize, arg: __m128) { + match ix { + 0 => self.fpr.xmm0 = arg, + 1 => self.fpr.xmm1 = arg, + 2 => self.fpr.xmm2 = arg, + 3 => self.fpr.xmm3 = arg, + 4 => self.fpr.xmm4 = arg, + 5 => self.fpr.xmm5 = arg, + 6 => self.fpr.xmm6 = arg, + 7 => self.fpr.xmm7 = arg, + _ => panic!("unexpected fp register index {}", ix), + } + } +} + +/// Errors that may arise when working with contexts. +#[derive(Debug, Fail)] +pub enum Error { + /// Raised when the bottom of the stack provided to `Context::init` is not 16-byte aligned + #[fail(display = "context initialized with unaligned stack")] + UnalignedStack, +} + +/// Check whether the bottom (highest address) of the stack is 16-byte aligned, as required by the +/// ABI. +fn stack_is_aligned(stack: &[u64]) -> bool { + let size = stack.len(); + let last_elt_addr = &stack[size - 1] as *const u64 as usize; + let bottom_addr = last_elt_addr + mem::size_of::<u64>(); + bottom_addr % 16 == 0 +} + +extern "C" { + /// Bootstraps arguments and calls the entrypoint via returning; implemented in assembly. + /// + /// Loads general-purpose arguments from the callee-saved registers in a `Context` to the + /// appropriate argument registers for the AMD64 ABI, and then returns to the entrypoint. + fn lucet_context_bootstrap(); + + /// Stores return values into the parent context, and then swaps to it; implemented in assembly. + /// + /// This is where the entrypoint function returns to, so that we swap back to the parent on + /// return. + fn lucet_context_backstop(); + + /// Saves the current context and performs the context switch. Implemented in assembly. + fn lucet_context_swap(from: *mut Context, to: *const Context); + + /// Performs the context switch; implemented in assembly. + /// + /// Never returns because the current context is discarded. + fn lucet_context_set(to: *const Context) -> !; + + fn lucet_get_current_stack_pointer() -> u64; +} diff --git a/third_party/rust/lucet-runtime-internals-wasmsbx/src/context/tests/c_child.c b/third_party/rust/lucet-runtime-internals-wasmsbx/src/context/tests/c_child.c new file mode 100644 index 0000000000..66cefa65eb --- /dev/null +++ b/third_party/rust/lucet-runtime-internals-wasmsbx/src/context/tests/c_child.c @@ -0,0 +1,130 @@ +#include <inttypes.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +char output_string[1024]; +void reset_output(void); +static void output(const char *fmt, ...); + +// These are pointers to ContextHandles, which in turn have pointers to a +// Context as their first field +void** parent_regs; +void** child_regs; + +void lucet_context_swap(void* from, void* to); +void lucet_context_set(void* to); + +void arg_printing_child(void *arg0, void *arg1) +{ + int arg0_val = *(int *) arg0; + int arg1_val = *(int *) arg1; + + output("hello from the child! my args were %d and %d\n", arg0_val, arg1_val); + + lucet_context_swap(*child_regs, *parent_regs); + + // Read the arguments again + arg0_val = *(int *) arg0; + arg1_val = *(int *) arg1; + + output("now they are %d and %d\n", arg0_val, arg1_val); + + lucet_context_swap(*child_regs, *parent_regs); +} + +// Use the lucet_context_set function to jump to the parent without saving +// the child +void context_set_child() +{ + output("hello from the child! setting context to parent...\n"); + lucet_context_set(*parent_regs); +} + +void returning_child() +{ + output("hello from the child! returning...\n"); +} + +void child_3_args(uint64_t arg1, uint64_t arg2, uint64_t arg3) +{ + output("the good three args boy %" PRId64 " %" PRId64 " %" PRId64 "\n", arg1, arg2, arg3); +} + +void child_4_args(uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4) +{ + output("the large four args boy %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 "\n", arg1, arg2, + arg3, arg4); +} + +void child_5_args(uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5) +{ + output("the big five args son %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 "\n", + arg1, arg2, arg3, arg4, arg5); +} + +void child_6_args(uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, + uint64_t arg6) +{ + output("6 args, hahaha long boy %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 + " %" PRId64 "\n", + arg1, arg2, arg3, arg4, arg5, arg6); +} + +void child_7_args(uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, + uint64_t arg6, uint64_t arg7) +{ + output("7 args, hahaha long boy %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 + " %" PRId64 " %" PRId64 "\n", + arg1, arg2, arg3, arg4, arg5, arg6, arg7); +} + +void child_8_args(uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, + uint64_t arg6, uint64_t arg7, uint64_t arg8) +{ + output("8 args, hahaha long boy %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 + " %" PRId64 " %" PRId64 " %" PRId64 "\n", + arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8); +} + +void child_9_args(uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, + uint64_t arg6, uint64_t arg7, uint64_t arg8, uint64_t arg9) +{ + output("9 args, hahaha long boy %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 + " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 "\n", + arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9); +} + +void child_10_args(uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, + uint64_t arg6, uint64_t arg7, uint64_t arg8, uint64_t arg9, uint64_t arg10) +{ + output("10 args, hahaha very long boy %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 + " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 "\n", + arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10); +} + +// Helpers: +static char * output_cursor; +static size_t output_cursor_len; + +void reset_output(void) +{ + memset(output_string, 0, sizeof(output_string)); + output_cursor = output_string; + output_cursor_len = sizeof(output_string); +} + +static void output(const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + int res = vsnprintf(output_cursor, output_cursor_len, fmt, args); + if (res > 0) { + output_cursor += res; + output_cursor_len -= res; + } else { + abort(); + } + va_end(args); +} diff --git a/third_party/rust/lucet-runtime-internals-wasmsbx/src/context/tests/c_child.rs b/third_party/rust/lucet-runtime-internals-wasmsbx/src/context/tests/c_child.rs new file mode 100644 index 0000000000..4bdb3bd142 --- /dev/null +++ b/third_party/rust/lucet-runtime-internals-wasmsbx/src/context/tests/c_child.rs @@ -0,0 +1,307 @@ +// The `__m128` is not defined to be FFI-compatible, so Rust doesn't +// like that we're `extern`ing the `Context`, which contains +// them. However the context is opaque on the C side, so this is okay. +#![allow(improper_ctypes)] + +//! A port of the tests from `lib/lucet-runtime-c/test/context_suite.c` + +use crate::context::{Context, ContextHandle}; +use crate::val::Val; +use lazy_static::lazy_static; +use std::ffi::CStr; +use std::os::raw::{c_char, c_int, c_void}; +use std::sync::Mutex; + +lazy_static! { + static ref TEST_GLOBAL_LOCK: Mutex<()> = Mutex::new(()); +} + +extern "C" { + static mut parent_regs: *mut ContextHandle; + static mut child_regs: *mut ContextHandle; +} + +fn new_stack() -> Box<[u64]> { + vec![0u64; 1024].into_boxed_slice() +} + +macro_rules! test_body { + ( $stack:ident, $body:block ) => { + let _lock = TEST_GLOBAL_LOCK.lock().unwrap(); + reset_output(); + + let mut $stack = new_stack(); + let parent = Box::into_raw(Box::new(ContextHandle::new())); + + unsafe { + parent_regs = parent; + } + + { + $body + } + + unsafe { + Box::from_raw(parent_regs); + Box::from_raw(child_regs); + } + }; +} + +macro_rules! init_and_swap { + ( $stack:ident, $fn:ident, [ $( $args:expr ),* ] ) => { + unsafe { + let child = Box::into_raw(Box::new(ContextHandle::create_and_init( + &mut *$stack, + parent_regs.as_mut().unwrap(), + $fn as usize, + &[$( $args ),*], + ).unwrap())); + + child_regs = child; + + Context::swap(parent_regs.as_mut().unwrap(), child_regs.as_ref().unwrap()); + } + } +} + +#[test] +fn call_child() { + test_body!(stack, { + extern "C" { + fn arg_printing_child(); + } + + let mut arg0_val: c_int = 123; + let mut arg1_val: c_int = 456; + let arg0 = Val::CPtr(&mut arg0_val as *mut c_int as *mut c_void); + let arg1 = Val::CPtr(&mut arg1_val as *mut c_int as *mut c_void); + + init_and_swap!(stack, arg_printing_child, [arg0, arg1]); + + assert_eq!( + "hello from the child! my args were 123 and 456\n", + &get_output() + ); + }); +} + +#[test] +#[allow(unused_assignments)] +fn call_child_twice() { + test_body!(stack, { + extern "C" { + fn arg_printing_child(); + } + + let mut arg0_val: c_int = 123; + let mut arg1_val: c_int = 456; + let arg0 = Val::CPtr(&mut arg0_val as *mut c_int as *mut c_void); + let arg1 = Val::CPtr(&mut arg1_val as *mut c_int as *mut c_void); + + init_and_swap!(stack, arg_printing_child, [arg0, arg1]); + + assert_eq!( + "hello from the child! my args were 123 and 456\n", + &get_output() + ); + + arg0_val = 9; + arg1_val = 10; + + unsafe { + Context::swap(parent_regs.as_mut().unwrap(), child_regs.as_ref().unwrap()); + } + + assert_eq!( + "hello from the child! my args were 123 and 456\n\ + now they are 9 and 10\n", + &get_output() + ); + }); +} + +#[test] +fn call_child_setcontext() { + test_body!(stack, { + extern "C" { + fn context_set_child(); + } + + init_and_swap!(stack, context_set_child, []); + + assert_eq!( + "hello from the child! setting context to parent...\n", + &get_output() + ); + }); +} + +#[test] +fn call_child_setcontext_twice() { + test_body!(stack, { + extern "C" { + fn context_set_child(); + } + + init_and_swap!(stack, context_set_child, []); + + assert_eq!( + "hello from the child! setting context to parent...\n", + &get_output() + ); + + init_and_swap!(stack, context_set_child, []); + + assert_eq!( + "hello from the child! setting context to parent...\n\ + hello from the child! setting context to parent...\n", + &get_output() + ); + }); +} + +#[test] +fn call_returning_child() { + test_body!(stack, { + extern "C" { + fn returning_child(); + } + + init_and_swap!(stack, returning_child, []); + + assert_eq!("hello from the child! returning...\n", &get_output()); + }); +} + +macro_rules! child_n_args { + ( $fn:ident, $prefix:expr, $( $arg:expr ),* ) => { + test_body!(stack, { + extern "C" { + fn $fn(); + } + + init_and_swap!(stack, $fn, [ $( Val::U64($arg) ),* ]); + + assert_eq!( + concat!($prefix, $( " ", $arg ),* , "\n"), + &get_output() + ); + }); + } +} + +#[test] +fn test_child_3_args() { + child_n_args!(child_3_args, "the good three args boy", 10, 11, 12); +} + +#[test] +fn test_child_4_args() { + child_n_args!(child_4_args, "the large four args boy", 20, 21, 22, 23); +} + +#[test] +fn test_child_5_args() { + child_n_args!(child_5_args, "the big five args son", 30, 31, 32, 33, 34); +} + +#[test] +fn test_child_6_args() { + child_n_args!( + child_6_args, + "6 args, hahaha long boy", + 40, + 41, + 42, + 43, + 44, + 45 + ); +} + +#[test] +fn test_child_7_args() { + child_n_args!( + child_7_args, + "7 args, hahaha long boy", + 50, + 51, + 52, + 53, + 54, + 55, + 56 + ); +} + +#[test] +fn test_child_8_args() { + child_n_args!( + child_8_args, + "8 args, hahaha long boy", + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67 + ); +} + +#[test] +fn test_child_9_args() { + child_n_args!( + child_9_args, + "9 args, hahaha long boy", + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78 + ); +} + +#[test] +fn test_child_10_args() { + child_n_args!( + child_10_args, + "10 args, hahaha very long boy", + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89 + ); +} + +fn get_output() -> String { + extern "C" { + static output_string: c_char; + } + unsafe { + CStr::from_ptr(&output_string as *const c_char) + .to_string_lossy() + .into_owned() + } +} + +fn reset_output() { + extern "C" { + fn reset_output(); + } + unsafe { + reset_output(); + } +} diff --git a/third_party/rust/lucet-runtime-internals-wasmsbx/src/context/tests/mod.rs b/third_party/rust/lucet-runtime-internals-wasmsbx/src/context/tests/mod.rs new file mode 100644 index 0000000000..14bc5312cf --- /dev/null +++ b/third_party/rust/lucet-runtime-internals-wasmsbx/src/context/tests/mod.rs @@ -0,0 +1,43 @@ +mod c_child; +mod rust_child; +use crate::context::{Context, ContextHandle, Error}; +use memoffset::offset_of; +use std::slice; + +#[test] +fn context_offsets_correct() { + assert_eq!(offset_of!(Context, gpr), 0); + assert_eq!(offset_of!(Context, fpr), 8 * 8); + assert_eq!(offset_of!(Context, retvals_gp), 8 * 8 + 8 * 16); + assert_eq!(offset_of!(Context, retval_fp), 8 * 8 + 8 * 16 + 8 * 2); +} + +#[test] +fn init_rejects_unaligned() { + extern "C" fn dummy() {} + // first we have to specially craft an unaligned slice, since + // a normal allocation of a [u64] often ends up 16-byte + // aligned + let mut len = 1024; + let mut stack = vec![0u64; len]; + let ptr = stack.as_mut_ptr(); + let skew = ptr as usize % 16; + + // we happened to be aligned already, so let's mess it up + if skew == 0 { + len -= 1; + } + + let mut stack_unaligned = unsafe { slice::from_raw_parts_mut(ptr, len) }; + + // now we have the unaligned stack, let's make sure it blows up right + let mut parent = ContextHandle::new(); + let res = + ContextHandle::create_and_init(&mut stack_unaligned, &mut parent, dummy as usize, &[]); + + if let Err(Error::UnalignedStack) = res { + assert!(true); + } else { + assert!(false, "init succeeded with unaligned stack"); + } +} diff --git a/third_party/rust/lucet-runtime-internals-wasmsbx/src/context/tests/rust_child.rs b/third_party/rust/lucet-runtime-internals-wasmsbx/src/context/tests/rust_child.rs new file mode 100644 index 0000000000..7292114b9f --- /dev/null +++ b/third_party/rust/lucet-runtime-internals-wasmsbx/src/context/tests/rust_child.rs @@ -0,0 +1,480 @@ +//! A port of the tests from `lib/lucet-runtime-c/test/context_suite.c` + +use crate::context::{Context, ContextHandle}; +use crate::val::{Val, __m128_as_f32, __m128_as_f64}; +use lazy_static::lazy_static; +use std::cell::RefCell; +use std::fmt::Write; +use std::os::raw::{c_int, c_void}; +use std::sync::Mutex; + +lazy_static! { + static ref TEST_GLOBAL_LOCK: Mutex<()> = Mutex::new(()); + static ref OUTPUT_STRING: Mutex<String> = Mutex::new(String::new()); +} + +macro_rules! assert_output_eq { + ( $s:expr ) => { + assert_eq!($s, &*OUTPUT_STRING.lock().unwrap()) + }; +} + +fn reset_output() { + *OUTPUT_STRING.lock().unwrap() = String::with_capacity(1024); +} + +static mut PARENT: Option<ContextHandle> = None; +static mut CHILD: Option<ContextHandle> = None; + +fn new_stack() -> Box<[u64]> { + vec![0u64; 1024].into_boxed_slice() +} + +macro_rules! test_body { + ( $stack:ident, $body:block ) => { + let _lock = TEST_GLOBAL_LOCK.lock().unwrap(); + reset_output(); + + let mut $stack = new_stack(); + unsafe { + PARENT = Some(ContextHandle::new()); + } + + $body + }; +} + +macro_rules! init_and_swap { + ( $stack:ident, $fn:ident, [ $( $args:expr ),* ] ) => { + unsafe { + let child = ContextHandle::create_and_init( + &mut *$stack, + PARENT.as_mut().unwrap(), + $fn as usize, + &[$( $args ),*], + ).unwrap(); + CHILD = Some(child); + + Context::swap(PARENT.as_mut().unwrap(), CHILD.as_ref().unwrap()); + } + } +} + +extern "C" fn arg_printing_child(arg0: *mut c_void, arg1: *mut c_void) { + let arg0_val = unsafe { *(arg0 as *mut c_int) }; + let arg1_val = unsafe { *(arg1 as *mut c_int) }; + + write!( + OUTPUT_STRING.lock().unwrap(), + "hello from the child! my args were {} and {}\n", + arg0_val, + arg1_val + ) + .unwrap(); + + unsafe { Context::swap(CHILD.as_mut().unwrap(), PARENT.as_ref().unwrap()) }; + + // Read the arguments again + let arg0_val = unsafe { *(arg0 as *mut c_int) }; + let arg1_val = unsafe { *(arg1 as *mut c_int) }; + + write!( + OUTPUT_STRING.lock().unwrap(), + "now they are {} and {}\n", + arg0_val, + arg1_val + ) + .unwrap(); + + unsafe { Context::swap(CHILD.as_mut().unwrap(), PARENT.as_ref().unwrap()) }; +} + +#[test] +fn call_child() { + test_body!(stack, { + let mut arg0_val: c_int = 123; + let mut arg1_val: c_int = 456; + let arg0 = Val::CPtr(&mut arg0_val as *mut c_int as *mut c_void); + let arg1 = Val::CPtr(&mut arg1_val as *mut c_int as *mut c_void); + + init_and_swap!(stack, arg_printing_child, [arg0, arg1]); + + assert_output_eq!("hello from the child! my args were 123 and 456\n"); + }); +} + +#[test] +#[allow(unused_assignments)] +fn call_child_twice() { + test_body!(stack, { + let mut arg0_val: c_int = 123; + let mut arg1_val: c_int = 456; + let arg0 = Val::CPtr(&mut arg0_val as *mut c_int as *mut c_void); + let arg1 = Val::CPtr(&mut arg1_val as *mut c_int as *mut c_void); + + init_and_swap!(stack, arg_printing_child, [arg0, arg1]); + + assert_output_eq!("hello from the child! my args were 123 and 456\n"); + + arg0_val = 9; + arg1_val = 10; + + unsafe { + Context::swap(PARENT.as_mut().unwrap(), CHILD.as_ref().unwrap()); + } + + assert_output_eq!( + "hello from the child! my args were 123 and 456\n\ + now they are 9 and 10\n" + ); + }); +} + +extern "C" fn context_set_child() { + write!( + OUTPUT_STRING.lock().unwrap(), + "hello from the child! setting context to parent...\n", + ) + .unwrap(); + unsafe { + Context::set(PARENT.as_ref().unwrap()); + } +} + +#[test] +fn call_child_setcontext() { + test_body!(stack, { + init_and_swap!(stack, context_set_child, []); + + assert_output_eq!("hello from the child! setting context to parent...\n"); + }); +} + +#[test] +fn call_child_setcontext_twice() { + test_body!(stack, { + init_and_swap!(stack, context_set_child, []); + + assert_output_eq!("hello from the child! setting context to parent...\n"); + + init_and_swap!(stack, context_set_child, []); + assert_output_eq!( + "hello from the child! setting context to parent...\n\ + hello from the child! setting context to parent...\n" + ); + }); +} + +extern "C" fn returning_child() { + write!( + OUTPUT_STRING.lock().unwrap(), + "hello from the child! returning...\n", + ) + .unwrap(); +} + +#[test] +fn call_returning_child() { + test_body!(stack, { + init_and_swap!(stack, returning_child, []); + + assert_output_eq!("hello from the child! returning...\n"); + }); +} + +#[test] +fn returning_add_u32() { + extern "C" fn add(x: u32, y: u32) -> u32 { + x + y + } + + test_body!(stack, { + init_and_swap!(stack, add, [Val::U32(100), Val::U32(20)]); + + unsafe { + if let Some(ref child) = CHILD { + assert_eq!(child.get_retval_gp(0), 120); + } else { + panic!("no child context present after returning"); + } + } + }); +} + +#[test] +fn returning_add_u64() { + extern "C" fn add(x: u64, y: u64) -> u64 { + x + y + } + + test_body!(stack, { + init_and_swap!(stack, add, [Val::U64(100), Val::U64(20)]); + + unsafe { + assert_eq!(CHILD.as_ref().unwrap().get_retval_gp(0), 120); + } + }); +} + +#[test] +fn returning_add_f32() { + extern "C" fn add(x: f32, y: f32) -> f32 { + x + y + } + + test_body!(stack, { + init_and_swap!(stack, add, [Val::F32(100.0), Val::F32(20.0)]); + + unsafe { + let reg = CHILD.as_ref().unwrap().get_retval_fp(); + assert_eq!(__m128_as_f32(reg), 120.0); + } + }); +} + +#[test] +fn returning_add_f64() { + extern "C" fn add(x: f64, y: f64) -> f64 { + x + y + } + + test_body!(stack, { + init_and_swap!(stack, add, [Val::F64(100.0), Val::F64(20.0)]); + + unsafe { + let reg = CHILD.as_ref().unwrap().get_retval_fp(); + assert_eq!(__m128_as_f64(reg), 120.0); + } + }); +} + +macro_rules! child_n_args { + ( $name: ident, $prefix:expr, { $( $arg:ident : $val:expr ),* } ) => { + #[test] + fn $name() { + extern "C" fn child_n_args_gen( $( $arg: u64 ),*) { + let mut out = OUTPUT_STRING.lock().unwrap(); + write!(out, $prefix).unwrap(); + $( + write!(out, " {}", $arg).unwrap(); + );* + write!(out, "\n").unwrap(); + } + + test_body!(stack, { + init_and_swap!(stack, child_n_args_gen, [ $( Val::U64($val) ),* ]); + + assert_output_eq!( + concat!($prefix, $( " ", $val ),* , "\n") + ); + }); + } + } +} + +child_n_args!(child_3_args, "the good three args boy", { + arg1: 10, + arg2: 11, + arg3: 12 +}); + +child_n_args!(child_4_args, "the large four args boy", { + arg1: 20, + arg2: 21, + arg3: 22, + arg4: 23 +}); + +child_n_args!(child_5_args, "the big five args son", { + arg1: 30, + arg2: 31, + arg3: 32, + arg4: 33, + arg5: 34 +}); + +child_n_args!(child_6_args, "6 args, hahaha long boy", { + arg1: 40, + arg2: 41, + arg3: 42, + arg4: 43, + arg5: 44, + arg6: 45 +}); + +child_n_args!(child_7_args, "7 args, hahaha long boy", { + arg1: 50, + arg2: 51, + arg3: 52, + arg4: 53, + arg5: 54, + arg6: 55, + arg7: 56 +}); + +child_n_args!(child_8_args, "8 args, hahaha long boy", { + arg1: 60, + arg2: 61, + arg3: 62, + arg4: 63, + arg5: 64, + arg6: 65, + arg7: 66, + arg8: 67 +}); + +child_n_args!(child_9_args, "9 args, hahaha long boy", { + arg1: 70, + arg2: 71, + arg3: 72, + arg4: 73, + arg5: 74, + arg6: 75, + arg7: 76, + arg8: 77, + arg9: 78 +}); + +child_n_args!(child_10_args, "10 args, hahaha very long boy", { + arg1: 80, + arg2: 81, + arg3: 82, + arg4: 83, + arg5: 84, + arg6: 85, + arg7: 86, + arg8: 87, + arg9: 88, + arg10: 89 +}); + +macro_rules! child_n_fp_args { + ( $name: ident, $prefix:expr, { $( $arg:ident : $val:expr ),* } ) => { + #[test] + fn $name() { + extern "C" fn child_n_fp_args_gen( $( $arg: f64 ),*) { + let mut out = OUTPUT_STRING.lock().unwrap(); + write!(out, $prefix).unwrap(); + $( + write!(out, " {:.1}", $arg).unwrap(); + );* + write!(out, "\n").unwrap(); + } + + test_body!(stack, { + init_and_swap!(stack, child_n_fp_args_gen, [ $( Val::F64($val) ),* ]); + + assert_output_eq!( + concat!($prefix, $( " ", $val ),* , "\n") + ); + }); + } + } +} + +child_n_fp_args!(child_6_fp_args, "6 args, hahaha long boy", { + arg1: 40.0, + arg2: 41.0, + arg3: 42.0, + arg4: 43.0, + arg5: 44.0, + arg6: 45.0 +}); + +child_n_fp_args!(child_7_fp_args, "7 args, hahaha long boy", { + arg1: 50.0, + arg2: 51.0, + arg3: 52.0, + arg4: 53.0, + arg5: 54.0, + arg6: 55.0, + arg7: 56.0 +}); + +child_n_fp_args!(child_8_fp_args, "8 args, hahaha long boy", { + arg1: 60.0, + arg2: 61.0, + arg3: 62.0, + arg4: 63.0, + arg5: 64.0, + arg6: 65.0, + arg7: 66.0, + arg8: 67.0 +}); + +child_n_fp_args!(child_9_fp_args, "9 args, hahaha long boy", { + arg1: 70.0, + arg2: 71.0, + arg3: 72.0, + arg4: 73.0, + arg5: 74.0, + arg6: 75.0, + arg7: 76.0, + arg8: 77.0, + arg9: 78.0 +}); + +child_n_fp_args!(child_10_fp_args, "10 args, hahaha very long boy", { + arg1: 80.0, + arg2: 81.0, + arg3: 82.0, + arg4: 83.0, + arg5: 84.0, + arg6: 85.0, + arg7: 86.0, + arg8: 87.0, + arg9: 88.0, + arg10: 89.0 +}); + +#[test] +fn guest_realloc_string() { + extern "C" fn guest_fn( + _arg1: u64, + _arg2: u64, + _arg3: u64, + _arg4: u64, + _arg5: u64, + _arg6: u64, + _arg7: u64, + ) { + let mut s = String::with_capacity(0); + s.push_str("hello, I did some allocation"); + write!(OUTPUT_STRING.lock().unwrap(), "{}", s).unwrap(); + } + + test_body!(stack, { + init_and_swap!( + stack, + guest_fn, + [ + Val::U64(1), + Val::U64(2), + Val::U64(3), + Val::U64(4), + Val::U64(5), + Val::U64(6), + Val::U64(7) + ] + ); + assert_output_eq!("hello, I did some allocation"); + }); +} + +#[test] +fn guest_access_tls() { + thread_local! { + static TLS: RefCell<u64> = RefCell::new(0); + } + extern "C" fn guest_fn() { + TLS.with(|tls| { + write!(OUTPUT_STRING.lock().unwrap(), "tls = {}", tls.borrow()).unwrap(); + }); + } + + TLS.with(|tls| *tls.borrow_mut() = 42); + test_body!(stack, { + init_and_swap!(stack, guest_fn, []); + assert_output_eq!("tls = 42"); + }); +} diff --git a/third_party/rust/lucet-runtime-internals-wasmsbx/src/embed_ctx.rs b/third_party/rust/lucet-runtime-internals-wasmsbx/src/embed_ctx.rs new file mode 100644 index 0000000000..8083905458 --- /dev/null +++ b/third_party/rust/lucet-runtime-internals-wasmsbx/src/embed_ctx.rs @@ -0,0 +1,67 @@ +use std::any::{Any, TypeId}; +use std::cell::{BorrowError, BorrowMutError, Ref, RefCell, RefMut}; +use std::collections::HashMap; + +/// A map that holds at most one value of any type. +/// +/// This is similar to the type provided by the `anymap` crate, but we can get away with simpler +/// types on the methods due to our more specialized use case. +pub struct CtxMap { + map: HashMap<TypeId, RefCell<Box<dyn Any>>>, +} + +impl CtxMap { + pub fn clear(&mut self) { + self.map.clear(); + } + + pub fn contains<T: Any>(&self) -> bool { + self.map.contains_key(&TypeId::of::<T>()) + } + + pub fn try_get<T: Any>(&self) -> Option<Result<Ref<'_, T>, BorrowError>> { + self.map.get(&TypeId::of::<T>()).map(|x| { + x.try_borrow().map(|r| { + Ref::map(r, |b| { + b.downcast_ref::<T>() + .expect("value stored with TypeId::of::<T> is always type T") + }) + }) + }) + } + + pub fn try_get_mut<T: Any>(&mut self) -> Option<Result<RefMut<'_, T>, BorrowMutError>> { + self.map.get_mut(&TypeId::of::<T>()).map(|x| { + x.try_borrow_mut().map(|r| { + RefMut::map(r, |b| { + b.downcast_mut::<T>() + .expect("value stored with TypeId::of::<T> is always type T") + }) + }) + }) + } + + pub fn insert<T: Any>(&mut self, x: T) -> Option<T> { + self.map + .insert(TypeId::of::<T>(), RefCell::new(Box::new(x) as Box<dyn Any>)) + .map(|x_prev| { + *(x_prev.into_inner()) + .downcast::<T>() + .expect("value stored with TypeId::of::<T> is always type T") + }) + } + + pub fn new() -> Self { + CtxMap { + map: HashMap::new(), + } + } + + pub fn remove<T: Any>(&mut self) -> Option<T> { + self.map.remove(&TypeId::of::<T>()).map(|x| { + *(x.into_inner()) + .downcast::<T>() + .expect("value stored with TypeId::of::<T> is always type T") + }) + } +} diff --git a/third_party/rust/lucet-runtime-internals-wasmsbx/src/error.rs b/third_party/rust/lucet-runtime-internals-wasmsbx/src/error.rs new file mode 100644 index 0000000000..9bd927a0f4 --- /dev/null +++ b/third_party/rust/lucet-runtime-internals-wasmsbx/src/error.rs @@ -0,0 +1,156 @@ +use crate::instance::{FaultDetails, TerminationDetails}; +use failure::Fail; + +/// Lucet runtime errors. +#[derive(Debug, Fail)] +pub enum Error { + #[fail(display = "Invalid argument: {}", _0)] + InvalidArgument(&'static str), + + /// A [`Region`](trait.Region.html) cannot currently accommodate additional instances. + #[fail(display = "Region capacity reached: {} instances", _0)] + RegionFull(usize), + + /// A module error occurred. + #[fail(display = "Module error: {}", _0)] + ModuleError(ModuleError), + + /// A method call or module specification would exceed an instance's + /// [`Limit`s](struct.Limits.html). + #[fail(display = "Instance limits exceeded: {}", _0)] + LimitsExceeded(String), + + /// A method call attempted to modify linear memory for an instance that + /// does not have linear memory + #[fail(display = "No linear memory available: {}", _0)] + NoLinearMemory(String), + + /// An attempt to look up a WebAssembly function by its symbol name failed. + #[fail(display = "Symbol not found: {}", _0)] + SymbolNotFound(String), + + /// An attempt to look up a WebAssembly function by its table index failed. + #[fail(display = "Function not found: (table {}, func {})", _0, _1)] + FuncNotFound(u32, u32), + + /// An instance aborted due to a runtime fault. + #[fail(display = "Runtime fault: {:?}", _0)] + RuntimeFault(FaultDetails), + + /// An instance terminated, potentially with extra information about the termination. + /// + /// This condition can arise from a hostcall explicitly calling + /// [`Vmctx::terminate()`](vmctx/struct.Vmctx.html#method.terminate), or via a custom signal handler + /// that returns [`SignalBehavior::Terminate`](enum.SignalBehavior.html#variant.Terminate). + #[fail(display = "Runtime terminated")] + RuntimeTerminated(TerminationDetails), + + /// IO errors arising during dynamic loading with [`DlModule`](struct.DlModule.html). + #[fail(display = "Dynamic loading error: {}", _0)] + DlError(#[cause] std::io::Error), + + #[fail(display = "Instance not returned")] + InstanceNotReturned, + + #[fail(display = "Instance not yielded")] + InstanceNotYielded, + + #[fail(display = "Start function yielded")] + StartYielded, + + /// A catch-all for internal errors that are likely unrecoverable by the runtime user. + /// + /// As the API matures, these will likely become rarer, replaced by new variants of this enum, + /// or by panics for truly unrecoverable situations. + #[fail(display = "Internal error: {}", _0)] + InternalError(#[cause] failure::Error), + + /// An unsupported feature was used. + #[fail(display = "Unsupported feature: {}", _0)] + Unsupported(String), +} + +impl From<failure::Error> for Error { + fn from(e: failure::Error) -> Error { + Error::InternalError(e) + } +} + +impl From<crate::context::Error> for Error { + fn from(e: crate::context::Error) -> Error { + Error::InternalError(e.into()) + } +} + +impl From<nix::Error> for Error { + fn from(e: nix::Error) -> Error { + Error::InternalError(e.into()) + } +} + +impl From<std::ffi::IntoStringError> for Error { + fn from(e: std::ffi::IntoStringError) -> Error { + Error::InternalError(e.into()) + } +} + +impl From<lucet_module::Error> for Error { + fn from(e: lucet_module::Error) -> Error { + Error::ModuleError(ModuleError::ModuleDataError(e)) + } +} + +/// Lucet module errors. +#[derive(Debug, Fail)] +pub enum ModuleError { + /// An error was found in the definition of a Lucet module. + #[fail(display = "Incorrect module definition: {}", _0)] + IncorrectModule(String), + + /// An error occurred with the module data section, likely during deserialization. + #[fail(display = "Module data error: {}", _0)] + ModuleDataError(#[cause] lucet_module::Error), +} + +#[macro_export] +macro_rules! lucet_bail { + ($e:expr) => { + return Err(lucet_format_err!($e)); + }; + ($fmt:expr, $($arg:tt)*) => { + return Err(lucet_format_err!($fmt, $($arg)*)); + }; +} + +#[macro_export(local_inner_macros)] +macro_rules! lucet_ensure { + ($cond:expr, $e:expr) => { + if !($cond) { + lucet_bail!($e); + } + }; + ($cond:expr, $fmt:expr, $($arg:tt)*) => { + if !($cond) { + lucet_bail!($fmt, $($arg)*); + } + }; +} + +#[macro_export] +macro_rules! lucet_format_err { + ($($arg:tt)*) => { $crate::error::Error::InternalError(failure::format_err!($($arg)*)) } +} + +#[macro_export] +macro_rules! lucet_incorrect_module { + ($($arg:tt)*) => { + $crate::error::Error::ModuleError( + $crate::error::ModuleError::IncorrectModule(format!($($arg)*)) + ) + } +} + +#[macro_export] +macro_rules! bail_limits_exceeded { + ($($arg:tt)*) => { return Err($crate::error::Error::LimitsExceeded(format!($($arg)*))); } +} diff --git a/third_party/rust/lucet-runtime-internals-wasmsbx/src/hostcall_macros.rs b/third_party/rust/lucet-runtime-internals-wasmsbx/src/hostcall_macros.rs new file mode 100644 index 0000000000..3c11f88074 --- /dev/null +++ b/third_party/rust/lucet-runtime-internals-wasmsbx/src/hostcall_macros.rs @@ -0,0 +1,93 @@ +/// The macro that surrounds definitions of Lucet hostcalls in Rust. +/// +/// It is important to use this macro for hostcalls, rather than exporting them directly, as it +/// installs unwind protection that prevents panics from unwinding into the guest stack. +/// +/// Since this is not yet a proc macro, the syntax is unfortunately fairly brittle. The functions it +/// encloses must be of the form: +/// +/// ```ignore +/// #[$attr1] +/// #[$attr2] +/// ... // any number of attributes are supported; in most cases you will want `#[no_mangle]` +/// pub unsafe extern "C" fn $name( // must be `pub unsafe extern "C"` +/// &mut $vmctx, +/// $arg1: $arg1_ty, +/// $arg2: $arg2_ty, +/// ... , // trailing comma must always be present +/// ) -> $ret_ty { // return type must always be present even if it is `()` +/// // body +/// } +/// ``` +#[macro_export] +macro_rules! lucet_hostcalls { + { + $( + $(#[$attr:meta])* + pub unsafe extern "C" fn $name:ident( + &mut $vmctx:ident + $(, $arg:ident : $arg_ty:ty )*, + ) -> $ret_ty:ty { + $($body:tt)* + } + )* + } => { + $( + $(#[$attr])* + pub unsafe extern "C" fn $name( + vmctx_raw: *mut $crate::vmctx::lucet_vmctx, + $( $arg: $arg_ty ),* + ) -> $ret_ty { + #[inline(always)] + unsafe fn hostcall_impl( + $vmctx: &mut $crate::vmctx::Vmctx, + $( $arg : $arg_ty ),* + ) -> $ret_ty { + $($body)* + } + let res = std::panic::catch_unwind(move || { + hostcall_impl(&mut $crate::vmctx::Vmctx::from_raw(vmctx_raw), $( $arg ),*) + }); + match res { + Ok(res) => res, + Err(e) => { + match e.downcast::<$crate::instance::TerminationDetails>() { + Ok(details) => { + let mut vmctx = $crate::vmctx::Vmctx::from_raw(vmctx_raw); + vmctx.terminate_no_unwind(*details) + }, + Err(e) => std::panic::resume_unwind(e), + } + } + } + } + )* + } +} + +/// Terminate an instance from within a hostcall, returning an optional value as an error. +/// +/// Use this instead of `panic!` when you want the instance to terminate, but not the entire host +/// program. Like `panic!`, you can pass a format string with arguments, a value that implements +/// `Any`, or nothing to return a default message. +/// +/// Upon termination, the call to `Instance::run()` will return with an +/// `Err(Error::RuntimeTerminated)` value containing the value you pass to this macro. +/// +/// This macro safely unwinds the hostcall stack out to the entrypoint of the hostcall, so any +/// resources that may have been acquired will be properly dropped. +#[macro_export] +macro_rules! lucet_hostcall_terminate { + () => { + lucet_hostcall_terminate!("lucet_hostcall_terminate") + }; + ( $payload:expr ) => { + panic!($crate::instance::TerminationDetails::provide($payload)) + }; + ( $payload:expr, ) => { + lucet_hostcall_terminate!($payload) + }; + ( $fmt:expr, $($arg:tt)+ ) => { + lucet_hostcall_terminate!(format!($fmt, $($arg),+)) + }; +} diff --git a/third_party/rust/lucet-runtime-internals-wasmsbx/src/instance.rs b/third_party/rust/lucet-runtime-internals-wasmsbx/src/instance.rs new file mode 100644 index 0000000000..a66fba2402 --- /dev/null +++ b/third_party/rust/lucet-runtime-internals-wasmsbx/src/instance.rs @@ -0,0 +1,1313 @@ +mod siginfo_ext; +pub mod signals; +pub mod state; + +pub use crate::instance::signals::{signal_handler_none, SignalBehavior, SignalHandler}; +pub use crate::instance::state::State; + +use crate::alloc::{Alloc, HOST_PAGE_SIZE_EXPECTED}; +use crate::context::Context; +use crate::embed_ctx::CtxMap; +use crate::error::Error; +use crate::module::{self, FunctionHandle, FunctionPointer, Global, GlobalValue, Module, TrapCode}; +use crate::region::RegionInternal; +use crate::val::{UntypedRetVal, Val}; +use crate::WASM_PAGE_SIZE; +use libc::{c_void, siginfo_t, uintptr_t}; +use lucet_module::InstanceRuntimeData; +use memoffset::offset_of; +use std::any::Any; +use std::cell::{BorrowError, BorrowMutError, Ref, RefCell, RefMut, UnsafeCell}; +use std::marker::PhantomData; +use std::mem; +use std::ops::{Deref, DerefMut}; +use std::ptr::{self, NonNull}; +use std::sync::Arc; + +pub const LUCET_INSTANCE_MAGIC: u64 = 746932922; + +thread_local! { + /// The host context. + /// + /// Control returns here implicitly due to the setup in `Context::init()` when guest functions + /// return normally. Control can return here explicitly from signal handlers when the guest + /// program needs to be terminated. + /// + /// This is an `UnsafeCell` due to nested borrows. The context must be borrowed mutably when + /// swapping to the guest context, which means that borrow exists for the entire time the guest + /// function runs even though the mutation to the host context is done only at the beginning of + /// the swap. Meanwhile, the signal handler can run at any point during the guest function, and + /// so it also must be able to immutably borrow the host context if it needs to swap back. The + /// runtime borrowing constraints for a `RefCell` are therefore too strict for this variable. + pub(crate) static HOST_CTX: UnsafeCell<Context> = UnsafeCell::new(Context::new()); + + /// The currently-running `Instance`, if one exists. + pub(crate) static CURRENT_INSTANCE: RefCell<Option<NonNull<Instance>>> = RefCell::new(None); +} + +/// A smart pointer to an [`Instance`](struct.Instance.html) that properly manages cleanup when dropped. +/// +/// Instances are always stored in memory backed by a `Region`; we never want to create one directly +/// with the Rust allocator. This type allows us to abide by that rule while also having an owned +/// type that cleans up the instance when we are done with it. +/// +/// Since this type implements `Deref` and `DerefMut` to `Instance`, it can usually be treated as +/// though it were a `&mut Instance`. +pub struct InstanceHandle { + inst: NonNull<Instance>, + needs_inst_drop: bool, +} + +// raw pointer lint +unsafe impl Send for InstanceHandle {} + +/// Create a new `InstanceHandle`. +/// +/// This is not meant for public consumption, but rather is used to make implementations of +/// `Region`. +/// +/// # Safety +/// +/// This function runs the guest code for the WebAssembly `start` section, and running any guest +/// code is potentially unsafe; see [`Instance::run()`](struct.Instance.html#method.run). +pub fn new_instance_handle( + instance: *mut Instance, + module: Arc<dyn Module>, + alloc: Alloc, + embed_ctx: CtxMap, +) -> Result<InstanceHandle, Error> { + let inst = NonNull::new(instance) + .ok_or(lucet_format_err!("instance pointer is null; this is a bug"))?; + + lucet_ensure!( + unsafe { inst.as_ref().magic } != LUCET_INSTANCE_MAGIC, + "created a new instance handle in memory with existing instance magic; this is a bug" + ); + + let mut handle = InstanceHandle { + inst, + needs_inst_drop: false, + }; + + let inst = Instance::new(alloc, module, embed_ctx); + + unsafe { + // this is wildly unsafe! you must be very careful to not let the drop impls run on the + // uninitialized fields; see + // <https://doc.rust-lang.org/std/mem/fn.forget.html#use-case-1> + + // write the whole struct into place over the uninitialized page + ptr::write(&mut *handle, inst); + }; + + handle.needs_inst_drop = true; + + handle.reset()?; + + Ok(handle) +} + +pub fn instance_handle_to_raw(mut inst: InstanceHandle) -> *mut Instance { + inst.needs_inst_drop = false; + inst.inst.as_ptr() +} + +pub unsafe fn instance_handle_from_raw( + ptr: *mut Instance, + needs_inst_drop: bool, +) -> InstanceHandle { + InstanceHandle { + inst: NonNull::new_unchecked(ptr), + needs_inst_drop, + } +} + +// Safety argument for these deref impls: the instance's `Alloc` field contains an `Arc` to the +// region that backs this memory, keeping the page containing the `Instance` alive as long as the +// region exists + +impl Deref for InstanceHandle { + type Target = Instance; + fn deref(&self) -> &Self::Target { + unsafe { self.inst.as_ref() } + } +} + +impl DerefMut for InstanceHandle { + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { self.inst.as_mut() } + } +} + +impl Drop for InstanceHandle { + fn drop(&mut self) { + if self.needs_inst_drop { + unsafe { + let inst = self.inst.as_mut(); + + // Grab a handle to the region to ensure it outlives `inst`. + // + // This ensures that the region won't be dropped by `inst` being + // dropped, which could result in `inst` being unmapped by the + // Region *during* drop of the Instance's fields. + let region: Arc<dyn RegionInternal> = inst.alloc().region.clone(); + + // drop the actual instance + std::ptr::drop_in_place(inst); + + // and now we can drop what may be the last Arc<Region>. If it is + // it can safely do what it needs with memory; we're not running + // destructors on it anymore. + mem::drop(region); + } + } + } +} + +/// A Lucet program, together with its dedicated memory and signal handlers. +/// +/// This is the primary interface for running programs, examining return values, and accessing the +/// WebAssembly heap. +/// +/// `Instance`s are never created by runtime users directly, but rather are acquired from +/// [`Region`](../region/trait.Region.html)s and often accessed through +/// [`InstanceHandle`](../instance/struct.InstanceHandle.html) smart pointers. This guarantees that instances +/// and their fields are never moved in memory, otherwise raw pointers in the metadata could be +/// unsafely invalidated. +/// +/// An instance occupies one 4096-byte page in memory, with a layout like: +/// ```text +/// 0xXXXXX000: +/// Instance { +/// .magic +/// .embed_ctx +/// ... etc ... +/// } +/// +/// // unused space +/// +/// InstanceInternals { +/// .globals +/// .instruction_counter +/// } // last address *inside* `InstanceInternals` is 0xXXXXXFFF +/// 0xXXXXY000: // start of next page, VMContext points here +/// Heap { +/// .. +/// } +/// ``` +/// +/// This layout allows modules to tightly couple to a handful of fields related to the instance, +/// rather than possibly requiring compiler-side changes (and recompiles) whenever `Instance` +/// changes. +/// +/// It also obligates `Instance` to be immediately followed by the heap, but otherwise leaves the +/// locations of the stack, globals, and any other data, to be implementation-defined by the +/// `Region` that actually creates `Slot`s onto which `Instance` are mapped. +/// For information about the layout of all instance-related memory, see the documentation of +/// [MmapRegion](../region/mmap/struct.MmapRegion.html). +#[repr(C)] +#[repr(align(4096))] +pub struct Instance { + /// Used to catch bugs in pointer math used to find the address of the instance + magic: u64, + + /// The embedding context is a map containing embedder-specific values that are used to + /// implement hostcalls + pub(crate) embed_ctx: CtxMap, + + /// The program (WebAssembly module) that is the entrypoint for the instance. + module: Arc<dyn Module>, + + /// The `Context` in which the guest program runs + pub(crate) ctx: Context, + + /// Instance state and error information + pub(crate) state: State, + + /// The memory allocated for this instance + alloc: Alloc, + + /// Handler run for signals that do not arise from a known WebAssembly trap, or that involve + /// memory outside of the current instance. + fatal_handler: fn(&Instance) -> !, + + /// A fatal handler set from C + c_fatal_handler: Option<unsafe extern "C" fn(*mut Instance)>, + + /// Handler run when `SIGBUS`, `SIGFPE`, `SIGILL`, or `SIGSEGV` are caught by the instance thread. + signal_handler: Box< + dyn Fn( + &Instance, + &Option<TrapCode>, + libc::c_int, + *const siginfo_t, + *const c_void, + ) -> SignalBehavior, + >, + + /// Pointer to the function used as the entrypoint (for use in backtraces) + entrypoint: Option<FunctionPointer>, + + /// The value passed back to the guest when resuming a yielded instance. + pub(crate) resumed_val: Option<Box<dyn Any + 'static>>, + + /// `_padding` must be the last member of the structure. + /// This marks where the padding starts to make the structure exactly 4096 bytes long. + /// It is also used to compute the size of the structure up to that point, i.e. without padding. + _padding: (), +} + +/// Users of `Instance` must be very careful about when instances are dropped! +/// +/// Typically you will not have to worry about this, as InstanceHandle will robustly handle +/// Instance drop semantics. If an instance is dropped, and the Region it's in has already dropped, +/// it may contain the last reference counted pointer to its Region. If so, when Instance's +/// destructor runs, Region will be dropped, and may free or otherwise invalidate the memory that +/// this Instance exists in, *while* the Instance destructor is executing. +impl Drop for Instance { + fn drop(&mut self) { + // Reset magic to indicate this instance + // is no longer valid + self.magic = 0; + } +} + +/// The result of running or resuming an [`Instance`](struct.Instance.html). +#[derive(Debug)] +pub enum RunResult { + /// An instance returned with a value. + /// + /// The actual type of the contained value depends on the return type of the guest function that + /// was called. For guest functions with no return value, it is undefined behavior to do + /// anything with this value. + Returned(UntypedRetVal), + /// An instance yielded, potentially with a value. + /// + /// This arises when a hostcall invokes one of the + /// [`Vmctx::yield_*()`](vmctx/struct.Vmctx.html#method.yield_) family of methods. Depending on which + /// variant is used, the `YieldedVal` may contain a value passed from the guest context to the + /// host. + /// + /// An instance that has yielded may only be resumed + /// ([with](struct.Instance.html#method.resume_with_val) or + /// [without](struct.Instance.html#method.resume) a value to returned to the guest), + /// [reset](struct.Instance.html#method.reset), or dropped. Attempting to run an instance from a + /// new entrypoint after it has yielded but without first resetting will result in an error. + Yielded(YieldedVal), +} + +impl RunResult { + /// Try to get a return value from a run result, returning `Error::InstanceNotReturned` if the + /// instance instead yielded. + pub fn returned(self) -> Result<UntypedRetVal, Error> { + match self { + RunResult::Returned(rv) => Ok(rv), + RunResult::Yielded(_) => Err(Error::InstanceNotReturned), + } + } + + /// Try to get a reference to a return value from a run result, returning + /// `Error::InstanceNotReturned` if the instance instead yielded. + pub fn returned_ref(&self) -> Result<&UntypedRetVal, Error> { + match self { + RunResult::Returned(rv) => Ok(rv), + RunResult::Yielded(_) => Err(Error::InstanceNotReturned), + } + } + + /// Returns `true` if the instance returned a value. + pub fn is_returned(&self) -> bool { + self.returned_ref().is_ok() + } + + /// Unwraps a run result into a return value. + /// + /// # Panics + /// + /// Panics if the instance instead yielded, with a panic message including the passed message. + pub fn expect_returned(self, msg: &str) -> UntypedRetVal { + self.returned().expect(msg) + } + + /// Unwraps a run result into a returned value. + /// + /// # Panics + /// + /// Panics if the instance instead yielded. + pub fn unwrap_returned(self) -> UntypedRetVal { + self.returned().unwrap() + } + + /// Try to get a yielded value from a run result, returning `Error::InstanceNotYielded` if the + /// instance instead returned. + pub fn yielded(self) -> Result<YieldedVal, Error> { + match self { + RunResult::Returned(_) => Err(Error::InstanceNotYielded), + RunResult::Yielded(yv) => Ok(yv), + } + } + + /// Try to get a reference to a yielded value from a run result, returning + /// `Error::InstanceNotYielded` if the instance instead returned. + pub fn yielded_ref(&self) -> Result<&YieldedVal, Error> { + match self { + RunResult::Returned(_) => Err(Error::InstanceNotYielded), + RunResult::Yielded(yv) => Ok(yv), + } + } + + /// Returns `true` if the instance yielded. + pub fn is_yielded(&self) -> bool { + self.yielded_ref().is_ok() + } + + /// Unwraps a run result into a yielded value. + /// + /// # Panics + /// + /// Panics if the instance instead returned, with a panic message including the passed message. + pub fn expect_yielded(self, msg: &str) -> YieldedVal { + self.yielded().expect(msg) + } + + /// Unwraps a run result into a yielded value. + /// + /// # Panics + /// + /// Panics if the instance instead returned. + pub fn unwrap_yielded(self) -> YieldedVal { + self.yielded().unwrap() + } +} + +/// APIs that are internal, but useful to implementors of extension modules; you probably don't want +/// this trait! +/// +/// This is a trait rather than inherent `impl`s in order to keep the `lucet-runtime` API clean and +/// safe. +pub trait InstanceInternal { + fn alloc(&self) -> &Alloc; + fn alloc_mut(&mut self) -> &mut Alloc; + fn module(&self) -> &dyn Module; + fn state(&self) -> &State; + fn valid_magic(&self) -> bool; +} + +impl InstanceInternal for Instance { + /// Get a reference to the instance's `Alloc`. + fn alloc(&self) -> &Alloc { + &self.alloc + } + + /// Get a mutable reference to the instance's `Alloc`. + fn alloc_mut(&mut self) -> &mut Alloc { + &mut self.alloc + } + + /// Get a reference to the instance's `Module`. + fn module(&self) -> &dyn Module { + self.module.deref() + } + + /// Get a reference to the instance's `State`. + fn state(&self) -> &State { + &self.state + } + + /// Check whether the instance magic is valid. + fn valid_magic(&self) -> bool { + self.magic == LUCET_INSTANCE_MAGIC + } +} + +// Public API +impl Instance { + /// Run a function with arguments in the guest context at the given entrypoint. + /// + /// ```no_run + /// # use lucet_runtime_internals::instance::InstanceHandle; + /// # let instance: InstanceHandle = unimplemented!(); + /// // regular execution yields `Ok(UntypedRetVal)` + /// let retval = instance.run("factorial", &[5u64.into()]).unwrap().unwrap_returned(); + /// assert_eq!(u64::from(retval), 120u64); + /// + /// // runtime faults yield `Err(Error)` + /// let result = instance.run("faulting_function", &[]); + /// assert!(result.is_err()); + /// ``` + /// + /// # Safety + /// + /// This is unsafe in two ways: + /// + /// - The type of the entrypoint might not be correct. It might take a different number or + /// different types of arguments than are provided to `args`. It might not even point to a + /// function! We will likely add type information to `lucetc` output so we can dynamically check + /// the type in the future. + /// + /// - The entrypoint is foreign code. While we may be convinced that WebAssembly compiled to + /// native code by `lucetc` is safe, we do not have the same guarantee for the hostcalls that a + /// guest may invoke. They might be implemented in an unsafe language, so we must treat this + /// call as unsafe, just like any other FFI call. + /// + /// For the moment, we do not mark this as `unsafe` in the Rust type system, but that may change + /// in the future. + pub fn run(&mut self, entrypoint: &str, args: &[Val]) -> Result<RunResult, Error> { + let func = self.module.get_export_func(entrypoint)?; + self.run_func(func, &args) + } + + /// Run a function with arguments in the guest context from the [WebAssembly function + /// table](https://webassembly.github.io/spec/core/syntax/modules.html#tables). + /// + /// # Safety + /// + /// The same safety caveats of [`Instance::run()`](struct.Instance.html#method.run) apply. + pub fn run_func_idx( + &mut self, + table_idx: u32, + func_idx: u32, + args: &[Val], + ) -> Result<RunResult, Error> { + let func = self.module.get_func_from_idx(table_idx, func_idx)?; + self.run_func(func, &args) + } + + /// Resume execution of an instance that has yielded without providing a value to the guest. + /// + /// This should only be used when the guest yielded with + /// [`Vmctx::yield_()`](vmctx/struct.Vmctx.html#method.yield_) or + /// [`Vmctx::yield_val()`](vmctx/struct.Vmctx.html#method.yield_val). Otherwise, this call will + /// fail with `Error::InvalidArgument`. + /// + /// # Safety + /// + /// The foreign code safety caveat of [`Instance::run()`](struct.Instance.html#method.run) + /// applies. + pub fn resume(&mut self) -> Result<RunResult, Error> { + self.resume_with_val(EmptyYieldVal) + } + + /// Resume execution of an instance that has yielded, providing a value to the guest. + /// + /// The type of the provided value must match the type expected by + /// [`Vmctx::yield_expecting_val()`](vmctx/struct.Vmctx.html#method.yield_expecting_val) or + /// [`Vmctx::yield_val_expecting_val()`](vmctx/struct.Vmctx.html#method.yield_val_expecting_val). + /// + /// The provided value will be dynamically typechecked against the type the guest expects to + /// receive, and if that check fails, this call will fail with `Error::InvalidArgument`. + /// + /// # Safety + /// + /// The foreign code safety caveat of [`Instance::run()`](struct.Instance.html#method.run) + /// applies. + pub fn resume_with_val<A: Any + 'static>(&mut self, val: A) -> Result<RunResult, Error> { + match &self.state { + State::Yielded { expecting, .. } => { + // make sure the resumed value is of the right type + if !expecting.is::<PhantomData<A>>() { + return Err(Error::InvalidArgument( + "type mismatch between yielded instance expected value and resumed value", + )); + } + } + _ => return Err(Error::InvalidArgument("can only resume a yielded instance")), + } + + self.resumed_val = Some(Box::new(val) as Box<dyn Any + 'static>); + + self.swap_and_return() + } + + /// Reset the instance's heap and global variables to their initial state. + /// + /// The WebAssembly `start` section will also be run, if one exists. + /// + /// The embedder contexts present at instance creation or added with + /// [`Instance::insert_embed_ctx()`](struct.Instance.html#method.insert_embed_ctx) are not + /// modified by this call; it is the embedder's responsibility to clear or reset their state if + /// necessary. + /// + /// # Safety + /// + /// This function runs the guest code for the WebAssembly `start` section, and running any guest + /// code is potentially unsafe; see [`Instance::run()`](struct.Instance.html#method.run). + pub fn reset(&mut self) -> Result<(), Error> { + self.alloc.reset_heap(self.module.as_ref())?; + let globals = unsafe { self.alloc.globals_mut() }; + let mod_globals = self.module.globals(); + for (i, v) in mod_globals.iter().enumerate() { + globals[i] = match v.global() { + Global::Import { .. } => { + return Err(Error::Unsupported(format!( + "global imports are unsupported; found: {:?}", + i + ))); + } + Global::Def(def) => def.init_val(), + }; + } + + self.state = State::Ready; + + self.run_start()?; + + Ok(()) + } + + /// Grow the guest memory by the given number of WebAssembly pages. + /// + /// On success, returns the number of pages that existed before the call. + pub fn grow_memory(&mut self, additional_pages: u32) -> Result<u32, Error> { + let additional_bytes = + additional_pages + .checked_mul(WASM_PAGE_SIZE) + .ok_or(lucet_format_err!( + "additional pages larger than wasm address space", + ))?; + let orig_len = self + .alloc + .expand_heap(additional_bytes, self.module.as_ref())?; + Ok(orig_len / WASM_PAGE_SIZE) + } + + /// Return the WebAssembly heap as a slice of bytes. + pub fn heap(&self) -> &[u8] { + unsafe { self.alloc.heap() } + } + + /// Return the WebAssembly heap as a mutable slice of bytes. + pub fn heap_mut(&mut self) -> &mut [u8] { + unsafe { self.alloc.heap_mut() } + } + + /// Return the WebAssembly heap as a slice of `u32`s. + pub fn heap_u32(&self) -> &[u32] { + unsafe { self.alloc.heap_u32() } + } + + /// Return the WebAssembly heap as a mutable slice of `u32`s. + pub fn heap_u32_mut(&mut self) -> &mut [u32] { + unsafe { self.alloc.heap_u32_mut() } + } + + /// Return the WebAssembly globals as a slice of `i64`s. + pub fn globals(&self) -> &[GlobalValue] { + unsafe { self.alloc.globals() } + } + + /// Return the WebAssembly globals as a mutable slice of `i64`s. + pub fn globals_mut(&mut self) -> &mut [GlobalValue] { + unsafe { self.alloc.globals_mut() } + } + + /// Check whether a given range in the host address space overlaps with the memory that backs + /// the instance heap. + pub fn check_heap<T>(&self, ptr: *const T, len: usize) -> bool { + self.alloc.mem_in_heap(ptr, len) + } + + /// Check whether a context value of a particular type exists. + pub fn contains_embed_ctx<T: Any>(&self) -> bool { + self.embed_ctx.contains::<T>() + } + + /// Get a reference to a context value of a particular type, if it exists. + pub fn get_embed_ctx<T: Any>(&self) -> Option<Result<Ref<'_, T>, BorrowError>> { + self.embed_ctx.try_get::<T>() + } + + /// Get a mutable reference to a context value of a particular type, if it exists. + pub fn get_embed_ctx_mut<T: Any>(&mut self) -> Option<Result<RefMut<'_, T>, BorrowMutError>> { + self.embed_ctx.try_get_mut::<T>() + } + + /// Insert a context value. + /// + /// If a context value of the same type already existed, it is returned. + /// + /// **Note**: this method is intended for embedder contexts that need to be added _after_ an + /// instance is created and initialized. To add a context for an instance's entire lifetime, + /// including the execution of its `start` section, see + /// [`Region::new_instance_builder()`](trait.Region.html#method.new_instance_builder). + pub fn insert_embed_ctx<T: Any>(&mut self, x: T) -> Option<T> { + self.embed_ctx.insert(x) + } + + /// Remove a context value of a particular type, returning it if it exists. + pub fn remove_embed_ctx<T: Any>(&mut self) -> Option<T> { + self.embed_ctx.remove::<T>() + } + + /// Set the handler run when `SIGBUS`, `SIGFPE`, `SIGILL`, or `SIGSEGV` are caught by the + /// instance thread. + /// + /// In most cases, these signals are unrecoverable for the instance that raised them, but do not + /// affect the rest of the process. + /// + /// The default signal handler returns + /// [`SignalBehavior::Default`](enum.SignalBehavior.html#variant.Default), which yields a + /// runtime fault error. + /// + /// The signal handler must be + /// [signal-safe](http://man7.org/linux/man-pages/man7/signal-safety.7.html). + pub fn set_signal_handler<H>(&mut self, handler: H) + where + H: 'static + + Fn( + &Instance, + &Option<TrapCode>, + libc::c_int, + *const siginfo_t, + *const c_void, + ) -> SignalBehavior, + { + self.signal_handler = Box::new(handler) as Box<SignalHandler>; + } + + /// Set the handler run for signals that do not arise from a known WebAssembly trap, or that + /// involve memory outside of the current instance. + /// + /// Fatal signals are not only unrecoverable for the instance that raised them, but may + /// compromise the correctness of the rest of the process if unhandled. + /// + /// The default fatal handler calls `panic!()`. + pub fn set_fatal_handler(&mut self, handler: fn(&Instance) -> !) { + self.fatal_handler = handler; + } + + /// Set the fatal handler to a C-compatible function. + /// + /// This is a separate interface, because C functions can't return the `!` type. Like the + /// regular `fatal_handler`, it is not expected to return, but we cannot enforce that through + /// types. + /// + /// When a fatal error occurs, this handler is run first, and then the regular `fatal_handler` + /// runs in case it returns. + pub fn set_c_fatal_handler(&mut self, handler: unsafe extern "C" fn(*mut Instance)) { + self.c_fatal_handler = Some(handler); + } + + #[inline] + pub fn get_instruction_count(&self) -> u64 { + self.get_instance_implicits().instruction_count + } + + #[inline] + pub fn set_instruction_count(&mut self, instruction_count: u64) { + self.get_instance_implicits_mut().instruction_count = instruction_count; + } +} + +// Private API +impl Instance { + fn new(alloc: Alloc, module: Arc<dyn Module>, embed_ctx: CtxMap) -> Self { + let globals_ptr = alloc.slot().globals as *mut i64; + let mut inst = Instance { + magic: LUCET_INSTANCE_MAGIC, + embed_ctx: embed_ctx, + module, + ctx: Context::new(), + state: State::Ready, + alloc, + fatal_handler: default_fatal_handler, + c_fatal_handler: None, + signal_handler: Box::new(signal_handler_none) as Box<SignalHandler>, + entrypoint: None, + resumed_val: None, + _padding: (), + }; + inst.set_globals_ptr(globals_ptr); + inst.set_instruction_count(0); + + assert_eq!(mem::size_of::<Instance>(), HOST_PAGE_SIZE_EXPECTED); + let unpadded_size = offset_of!(Instance, _padding); + assert!(unpadded_size <= HOST_PAGE_SIZE_EXPECTED - mem::size_of::<*mut i64>()); + inst + } + + // The globals pointer must be stored right before the end of the structure, padded to the page size, + // so that it is 8 bytes before the heap. + // For this reason, the alignment of the structure is set to 4096, and we define accessors that + // read/write the globals pointer as bytes [4096-8..4096] of that structure represented as raw bytes. + // InstanceRuntimeData is placed such that it ends at the end of the page this `Instance` starts + // on. So we can access it by *self + PAGE_SIZE - size_of::<InstanceRuntimeData> + #[inline] + fn get_instance_implicits(&self) -> &InstanceRuntimeData { + unsafe { + let implicits_ptr = (self as *const _ as *const u8) + .offset((HOST_PAGE_SIZE_EXPECTED - mem::size_of::<InstanceRuntimeData>()) as isize) + as *const InstanceRuntimeData; + mem::transmute::<*const InstanceRuntimeData, &InstanceRuntimeData>(implicits_ptr) + } + } + + #[inline] + fn get_instance_implicits_mut(&mut self) -> &mut InstanceRuntimeData { + unsafe { + let implicits_ptr = (self as *mut _ as *mut u8) + .offset((HOST_PAGE_SIZE_EXPECTED - mem::size_of::<InstanceRuntimeData>()) as isize) + as *mut InstanceRuntimeData; + mem::transmute::<*mut InstanceRuntimeData, &mut InstanceRuntimeData>(implicits_ptr) + } + } + + #[allow(dead_code)] + #[inline] + fn get_globals_ptr(&self) -> *mut i64 { + self.get_instance_implicits().globals_ptr + } + + #[inline] + fn set_globals_ptr(&mut self, globals_ptr: *mut i64) { + self.get_instance_implicits_mut().globals_ptr = globals_ptr + } + + /// Run a function in guest context at the given entrypoint. + fn run_func(&mut self, func: FunctionHandle, args: &[Val]) -> Result<RunResult, Error> { + if !(self.state.is_ready() || (self.state.is_fault() && !self.state.is_fatal())) { + return Err(Error::InvalidArgument( + "instance must be ready or non-fatally faulted", + )); + } + if func.ptr.as_usize() == 0 { + return Err(Error::InvalidArgument( + "entrypoint function cannot be null; this is probably a malformed module", + )); + } + + let sig = self.module.get_signature(func.id); + + // in typechecking these values, we can only really check that arguments are correct. + // in the future we might want to make return value use more type safe as well. + + if sig.params.len() != args.len() { + return Err(Error::InvalidArgument( + "entrypoint function signature mismatch (number of arguments is incorrect)", + )); + } + + for (param_ty, arg) in sig.params.iter().zip(args.iter()) { + if param_ty != &arg.value_type() { + return Err(Error::InvalidArgument( + "entrypoint function signature mismatch", + )); + } + } + + self.entrypoint = Some(func.ptr); + + let mut args_with_vmctx = vec![Val::from(self.alloc.slot().heap)]; + args_with_vmctx.extend_from_slice(args); + + HOST_CTX.with(|host_ctx| { + Context::init( + unsafe { self.alloc.stack_u64_mut() }, + unsafe { &mut *host_ctx.get() }, + &mut self.ctx, + func.ptr.as_usize(), + &args_with_vmctx, + ) + })?; + + self.swap_and_return() + } + + /// The core routine for context switching into a guest, and extracting a result. + /// + /// This must only be called for an instance in a ready, non-fatally faulted, or yielded + /// state. The public wrappers around this function should make sure the state is appropriate. + fn swap_and_return(&mut self) -> Result<RunResult, Error> { + debug_assert!( + self.state.is_ready() + || (self.state.is_fault() && !self.state.is_fatal()) + || self.state.is_yielded() + ); + self.state = State::Running; + + // there should never be another instance running on this thread when we enter this function + CURRENT_INSTANCE.with(|current_instance| { + let mut current_instance = current_instance.borrow_mut(); + assert!( + current_instance.is_none(), + "no other instance is running on this thread" + ); + // safety: `self` is not null if we are in this function + *current_instance = Some(unsafe { NonNull::new_unchecked(self) }); + }); + + self.with_signals_on(|i| { + HOST_CTX.with(|host_ctx| { + // Save the current context into `host_ctx`, and jump to the guest context. The + // lucet context is linked to host_ctx, so it will return here after it finishes, + // successfully or otherwise. + unsafe { Context::swap(&mut *host_ctx.get(), &mut i.ctx) }; + Ok(()) + }) + })?; + + CURRENT_INSTANCE.with(|current_instance| { + *current_instance.borrow_mut() = None; + }); + + // Sandbox has jumped back to the host process, indicating it has either: + // + // * returned: state should be `Running`; transition to `Ready` and return a RunResult + // * yielded: state should be `Yielding`; transition to `Yielded` and return a RunResult + // * trapped: state should be `Faulted`; populate details and return an error or call a handler as appropriate + // * terminated: state should be `Terminating`; transition to `Terminated` and return the termination details as an Err + // + // The state should never be `Ready`, `Terminated`, `Yielded`, or `Transitioning` at this point + + // Set transitioning state temporarily so that we can move values out of the current state + let st = mem::replace(&mut self.state, State::Transitioning); + + match st { + State::Running => { + let retval = self.ctx.get_untyped_retval(); + self.state = State::Ready; + Ok(RunResult::Returned(retval)) + } + State::Terminating { details, .. } => { + self.state = State::Terminated; + Err(Error::RuntimeTerminated(details)) + } + State::Yielding { val, expecting } => { + self.state = State::Yielded { expecting }; + Ok(RunResult::Yielded(val)) + } + State::Faulted { + mut details, + siginfo, + context, + } => { + // Sandbox is no longer runnable. It's unsafe to determine all error details in the signal + // handler, so we fill in extra details here. + // + // FIXME after lucet-module is complete it should be possible to fill this in without + // consulting the process symbol table + details.rip_addr_details = self + .module + .addr_details(details.rip_addr as *const c_void)?; + + // fill the state back in with the updated details in case fatal handlers need it + self.state = State::Faulted { + details: details.clone(), + siginfo, + context, + }; + + if details.fatal { + // Some errors indicate that the guest is not functioning correctly or that + // the loaded code violated some assumption, so bail out via the fatal + // handler. + + // Run the C-style fatal handler, if it exists. + self.c_fatal_handler + .map(|h| unsafe { h(self as *mut Instance) }); + + // If there is no C-style fatal handler, or if it (erroneously) returns, + // call the Rust handler that we know will not return + (self.fatal_handler)(self) + } else { + // leave the full fault details in the instance state, and return the + // higher-level info to the user + Err(Error::RuntimeFault(details)) + } + } + State::Ready | State::Terminated | State::Yielded { .. } | State::Transitioning => Err( + lucet_format_err!("\"impossible\" state found in `swap_and_return()`: {}", st), + ), + } + } + + pub fn set_current_instance(&mut self) + { + // there should never be another instance running on this thread when we enter this function + CURRENT_INSTANCE.with(|current_instance| { + let mut current_instance = current_instance.borrow_mut(); + // safety: `self` is not null if we are in this function + *current_instance = Some(unsafe { NonNull::new_unchecked(self) }); + }); + } + + pub fn clear_current_instance(&mut self) + { + CURRENT_INSTANCE.with(|current_instance| { + *current_instance.borrow_mut() = None; + }); + } + + /// Run a function in guest context at the given entrypoint. + pub fn unsafe_run_func_fast( + &mut self, + func_ptr: FunctionPointer, + args: &[Val], + ) -> Result<RunResult, Error> { + let prev_entrypoint = self.entrypoint; + self.entrypoint = Some(func_ptr); + + let mut args_with_vmctx = vec![Val::from(self.alloc.slot().heap)]; + args_with_vmctx.extend_from_slice(args); + + let re_entrant = match &self.state { + State::Running => true, + _ => false, + }; + + let saved_host_ctx = HOST_CTX.with(|host_ctx| { + let chosen_stack_loc; + + if re_entrant { + let curr_stack_ptr = unsafe { Context::get_current_stack_pointer() }; + // Add some padding as we have to account for the stack used by the rest of the current function body + let padded_curr_stack_ptr = curr_stack_ptr - 2048; + + let stack_slice = unsafe { self.alloc.stack_mut() }; + let stack_ptr = stack_slice.as_ptr() as u64; + + assert!(padded_curr_stack_ptr >= stack_ptr); + + let mut rem_stack_len = ((padded_curr_stack_ptr - stack_ptr) / 8) as usize; + // align to 8 + if rem_stack_len % 8 != 0 { + rem_stack_len += 8 - rem_stack_len % 8; + } + + let computed_stack_loc = unsafe { + std::slice::from_raw_parts_mut(stack_slice.as_ptr() as *mut u64, rem_stack_len) + }; + + chosen_stack_loc = computed_stack_loc; + } else { + chosen_stack_loc = unsafe { self.alloc.stack_u64_mut() }; + } + + let host_ctx_copy = unsafe { (*host_ctx.get()).clone() }; + + Context::init( + chosen_stack_loc, + unsafe { &mut *host_ctx.get() }, + &mut self.ctx, + func_ptr.as_usize(), + &args_with_vmctx, + ) + .map(|_| host_ctx_copy) + })?; + + debug_assert!( + self.state.is_ready() + || self.state.is_running() + || (self.state.is_fault() && !self.state.is_fatal()) + || self.state.is_yielded() + ); + + if !re_entrant { + self.state = State::Running; + } + + // there should never be another instance running on this thread when we enter this function + let mut prev_instance = None; + CURRENT_INSTANCE.with(|current_instance| { + let mut current_instance = current_instance.borrow_mut(); + prev_instance = *current_instance; + // safety: `self` is not null if we are in this function + *current_instance = Some(unsafe { NonNull::new_unchecked(self) }); + }); + + // self.with_signals_on(|i| { + HOST_CTX.with(|host_ctx| { + // Save the current context into `host_ctx`, and jump to the guest context. The + // lucet context is linked to host_ctx, so it will return here after it finishes, + // successfully or otherwise. + unsafe { Context::swap(&mut *host_ctx.get(), &mut self.ctx) }; + // Ok(()) + }); + // })?; + + if re_entrant { + HOST_CTX.with(|host_ctx| { + let host_ctx_ref = unsafe { &mut *host_ctx.get() }; + *host_ctx_ref = saved_host_ctx; + }); + } + + CURRENT_INSTANCE.with(|current_instance| { + *current_instance.borrow_mut() = prev_instance; + }); + + self.entrypoint = prev_entrypoint; + + // Sandbox has jumped back to the host process, indicating it has either: + // + // * returned: state should be `Running`; transition to `Ready` and return a RunResult + // * yielded: state should be `Yielding`; transition to `Yielded` and return a RunResult + // * trapped: state should be `Faulted`; populate details and return an error or call a handler as appropriate + // * terminated: state should be `Terminating`; transition to `Terminated` and return the termination details as an Err + // + // The state should never be `Ready`, `Terminated`, `Yielded`, or `Transitioning` at this point + + // Set transitioning state temporarily so that we can move values out of the current state + let st = mem::replace(&mut self.state, State::Transitioning); + + match st { + State::Running => { + let retval = self.ctx.get_untyped_retval(); + if !re_entrant { + self.state = State::Ready; + } else { + self.state = State::Running; + } + Ok(RunResult::Returned(retval)) + } + State::Terminating { details, .. } => { + self.state = State::Terminated; + Err(Error::RuntimeTerminated(details)) + } + State::Yielding { val, expecting } => { + self.state = State::Yielded { expecting }; + Ok(RunResult::Yielded(val)) + } + State::Faulted { + mut details, + siginfo, + context, + } => { + // Sandbox is no longer runnable. It's unsafe to determine all error details in the signal + // handler, so we fill in extra details here. + // + // FIXME after lucet-module is complete it should be possible to fill this in without + // consulting the process symbol table + details.rip_addr_details = self + .module + .addr_details(details.rip_addr as *const c_void)?; + + // fill the state back in with the updated details in case fatal handlers need it + self.state = State::Faulted { + details: details.clone(), + siginfo, + context, + }; + + if details.fatal { + // Some errors indicate that the guest is not functioning correctly or that + // the loaded code violated some assumption, so bail out via the fatal + // handler. + + // Run the C-style fatal handler, if it exists. + self.c_fatal_handler + .map(|h| unsafe { h(self as *mut Instance) }); + + // If there is no C-style fatal handler, or if it (erroneously) returns, + // call the Rust handler that we know will not return + (self.fatal_handler)(self) + } else { + // leave the full fault details in the instance state, and return the + // higher-level info to the user + Err(Error::RuntimeFault(details)) + } + } + State::Ready | State::Terminated | State::Yielded { .. } | State::Transitioning => Err( + lucet_format_err!("\"impossible\" state found in `swap_and_return()`: {}", st), + ), + } + } + + fn run_start(&mut self) -> Result<(), Error> { + if let Some(start) = self.module.get_start_func()? { + let res = self.run_func(start, &[])?; + if res.is_yielded() { + return Err(Error::StartYielded); + } + } + Ok(()) + } +} + +/// Information about a runtime fault. +/// +/// Runtime faults are raised implictly by signal handlers that return `SignalBehavior::Default` in +/// response to signals arising while a guest is running. +#[derive(Clone, Debug)] +pub struct FaultDetails { + /// If true, the instance's `fatal_handler` will be called. + pub fatal: bool, + /// Information about the type of fault that occurred. + pub trapcode: Option<TrapCode>, + /// The instruction pointer where the fault occurred. + pub rip_addr: uintptr_t, + /// Extra information about the instruction pointer's location, if available. + pub rip_addr_details: Option<module::AddrDetails>, +} + +impl std::fmt::Display for FaultDetails { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if self.fatal { + write!(f, "fault FATAL ")?; + } else { + write!(f, "fault ")?; + } + + if let Some(trapcode) = self.trapcode { + write!(f, "{:?} ", trapcode)?; + } else { + write!(f, "TrapCode::UNKNOWN ")?; + } + + write!(f, "code at address {:p}", self.rip_addr as *const c_void)?; + + if let Some(ref addr_details) = self.rip_addr_details { + if let Some(ref fname) = addr_details.file_name { + let sname = addr_details + .sym_name + .as_ref() + .map(String::as_str) + .unwrap_or("<unknown>"); + write!(f, " (symbol {}:{})", fname, sname)?; + } + if addr_details.in_module_code { + write!(f, " (inside module code)") + } else { + write!(f, " (not inside module code)") + } + } else { + write!(f, " (unknown whether in module)") + } + } +} + +/// Information about a terminated guest. +/// +/// Guests are terminated either explicitly by `Vmctx::terminate()`, or implicitly by signal +/// handlers that return `SignalBehavior::Terminate`. It usually indicates that an unrecoverable +/// error has occurred in a hostcall, rather than in WebAssembly code. +pub enum TerminationDetails { + /// Returned when a signal handler terminates the instance. + Signal, + /// Returned when `get_embed_ctx` or `get_embed_ctx_mut` are used with a type that is not present. + CtxNotFound, + /// Returned when the type of the value passed to `Instance::resume_with_val()` does not match + /// the type expected by `Vmctx::yield_expecting_val()` or `Vmctx::yield_val_expecting_val`, or + /// if `Instance::resume()` was called when a value was expected. + /// + /// **Note**: If you see this termination value, please report it as a Lucet bug. The types of + /// resumed values are dynamically checked by `Instance::resume()` and + /// `Instance::resume_with_val()`, so this should never arise. + YieldTypeMismatch, + /// Returned when dynamic borrowing rules of methods like `Vmctx::heap()` are violated. + BorrowError(&'static str), + /// Calls to `lucet_hostcall_terminate` provide a payload for use by the embedder. + Provided(Box<dyn Any + 'static>), +} + +impl TerminationDetails { + pub fn provide<A: Any + 'static>(details: A) -> Self { + TerminationDetails::Provided(Box::new(details)) + } + pub fn provided_details(&self) -> Option<&dyn Any> { + match self { + TerminationDetails::Provided(a) => Some(a.as_ref()), + _ => None, + } + } +} + +// Because of deref coercions, the code above was tricky to get right- +// test that a string makes it through +#[test] +fn termination_details_any_typing() { + let hello = "hello, world".to_owned(); + let details = TerminationDetails::provide(hello.clone()); + let provided = details.provided_details().expect("got Provided"); + assert_eq!( + provided.downcast_ref::<String>().expect("right type"), + &hello + ); +} + +impl PartialEq for TerminationDetails { + fn eq(&self, rhs: &TerminationDetails) -> bool { + use TerminationDetails::*; + match (self, rhs) { + (Signal, Signal) => true, + (BorrowError(msg1), BorrowError(msg2)) => msg1 == msg2, + (CtxNotFound, CtxNotFound) => true, + // can't compare `Any` + _ => false, + } + } +} + +impl std::fmt::Debug for TerminationDetails { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "TerminationDetails::")?; + match self { + TerminationDetails::Signal => write!(f, "Signal"), + TerminationDetails::BorrowError(msg) => write!(f, "BorrowError({})", msg), + TerminationDetails::CtxNotFound => write!(f, "CtxNotFound"), + TerminationDetails::YieldTypeMismatch => write!(f, "YieldTypeMismatch"), + TerminationDetails::Provided(_) => write!(f, "Provided(Any)"), + } + } +} + +unsafe impl Send for TerminationDetails {} +unsafe impl Sync for TerminationDetails {} + +/// The value yielded by an instance through a [`Vmctx`](vmctx/struct.Vmctx.html) and returned to +/// the host. +pub struct YieldedVal { + val: Box<dyn Any + 'static>, +} + +impl std::fmt::Debug for YieldedVal { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if self.is_none() { + write!(f, "YieldedVal {{ val: None }}") + } else { + write!(f, "YieldedVal {{ val: Some }}") + } + } +} + +impl YieldedVal { + pub(crate) fn new<A: Any + 'static>(val: A) -> Self { + YieldedVal { val: Box::new(val) } + } + + /// Returns `true` if the guest yielded without a value. + pub fn is_none(&self) -> bool { + self.val.is::<EmptyYieldVal>() + } + + /// Returns `true` if the guest yielded with a value. + pub fn is_some(&self) -> bool { + !self.is_none() + } + + /// Attempt to downcast the yielded value to a concrete type, returning the original + /// `YieldedVal` if unsuccessful. + pub fn downcast<A: Any + 'static + Send + Sync>(self) -> Result<Box<A>, YieldedVal> { + match self.val.downcast() { + Ok(val) => Ok(val), + Err(val) => Err(YieldedVal { val }), + } + } + + /// Returns a reference to the yielded value if it is present and of type `A`, or `None` if it + /// isn't. + pub fn downcast_ref<A: Any + 'static + Send + Sync>(&self) -> Option<&A> { + self.val.downcast_ref() + } +} + +/// A marker value to indicate a yield or resume with no value. +/// +/// This exists to unify the implementations of the various operators, and should only ever be +/// created by internal code. +#[derive(Debug)] +pub(crate) struct EmptyYieldVal; + +fn default_fatal_handler(inst: &Instance) -> ! { + panic!("> instance {:p} had fatal error: {}", inst, inst.state); +} diff --git a/third_party/rust/lucet-runtime-internals-wasmsbx/src/instance/siginfo_ext.c b/third_party/rust/lucet-runtime-internals-wasmsbx/src/instance/siginfo_ext.c new file mode 100644 index 0000000000..4dd9b58b4c --- /dev/null +++ b/third_party/rust/lucet-runtime-internals-wasmsbx/src/instance/siginfo_ext.c @@ -0,0 +1,6 @@ +#include <signal.h> + +void *siginfo_si_addr(siginfo_t *si) +{ + return si->si_addr; +} diff --git a/third_party/rust/lucet-runtime-internals-wasmsbx/src/instance/siginfo_ext.rs b/third_party/rust/lucet-runtime-internals-wasmsbx/src/instance/siginfo_ext.rs new file mode 100644 index 0000000000..863d8a8684 --- /dev/null +++ b/third_party/rust/lucet-runtime-internals-wasmsbx/src/instance/siginfo_ext.rs @@ -0,0 +1,15 @@ +use libc::{c_void, siginfo_t}; + +extern "C" { + fn siginfo_si_addr(si: *const siginfo_t) -> *const c_void; +} + +pub trait SiginfoExt { + fn si_addr_ext(&self) -> *const c_void; +} + +impl SiginfoExt for siginfo_t { + fn si_addr_ext(&self) -> *const c_void { + unsafe { siginfo_si_addr(self as *const siginfo_t) } + } +} diff --git a/third_party/rust/lucet-runtime-internals-wasmsbx/src/instance/signals.rs b/third_party/rust/lucet-runtime-internals-wasmsbx/src/instance/signals.rs new file mode 100644 index 0000000000..d52be927c7 --- /dev/null +++ b/third_party/rust/lucet-runtime-internals-wasmsbx/src/instance/signals.rs @@ -0,0 +1,447 @@ +use crate::context::Context; +use crate::error::Error; +use crate::instance::{ + siginfo_ext::SiginfoExt, FaultDetails, Instance, State, TerminationDetails, CURRENT_INSTANCE, + HOST_CTX, +}; +use crate::sysdeps::UContextPtr; +use lazy_static::lazy_static; +use libc::{c_int, c_void, siginfo_t, SIGBUS, SIGSEGV}; +use lucet_module::TrapCode; +use nix::sys::signal::{ + pthread_sigmask, raise, sigaction, SaFlags, SigAction, SigHandler, SigSet, SigmaskHow, Signal, +}; +use std::mem::MaybeUninit; +use std::panic; +use std::sync::{Arc, Mutex}; + +lazy_static! { + // TODO: work out an alternative to this that is signal-safe for `reraise_host_signal_in_handler` + static ref LUCET_SIGNAL_STATE: Mutex<Option<SignalState>> = Mutex::new(None); +} + +/// The value returned by +/// [`Instance.signal_handler`](struct.Instance.html#structfield.signal_handler) to determine the +/// outcome of a handled signal. +pub enum SignalBehavior { + /// Use default behavior, which switches back to the host with `State::Fault` populated. + Default, + /// Override default behavior and cause the instance to continue. + Continue, + /// Override default behavior and cause the instance to terminate. + Terminate, +} + +pub type SignalHandler = dyn Fn( + &Instance, + &Option<TrapCode>, + libc::c_int, + *const siginfo_t, + *const c_void, +) -> SignalBehavior; + +pub fn signal_handler_none( + _inst: &Instance, + _trapcode: &Option<TrapCode>, + _signum: libc::c_int, + _siginfo_ptr: *const siginfo_t, + _ucontext_ptr: *const c_void, +) -> SignalBehavior { + SignalBehavior::Default +} + +impl Instance { + pub(crate) fn with_signals_on<F, R>(&mut self, f: F) -> Result<R, Error> + where + F: FnOnce(&mut Instance) -> Result<R, Error>, + { + // Set up the signal stack for this thread. Note that because signal stacks are per-thread, + // rather than per-process, we do this for every run, while the signal handler is installed + // only once per process. + let guest_sigstack = SigStack::new( + self.alloc.slot().sigstack, + SigStackFlags::empty(), + self.alloc.slot().limits.signal_stack_size, + ); + let previous_sigstack = unsafe { sigaltstack(Some(guest_sigstack)) } + .expect("enabling or changing the signal stack succeeds"); + if let Some(previous_sigstack) = previous_sigstack { + assert!( + !previous_sigstack + .flags() + .contains(SigStackFlags::SS_ONSTACK), + "an instance was created with a signal stack" + ); + } + let mut ostate = LUCET_SIGNAL_STATE.lock().unwrap(); + if let Some(ref mut state) = *ostate { + state.counter += 1; + } else { + unsafe { + setup_guest_signal_state(&mut ostate); + } + } + drop(ostate); + + // run the body + let res = f(self); + + let mut ostate = LUCET_SIGNAL_STATE.lock().unwrap(); + let counter_zero = if let Some(ref mut state) = *ostate { + state.counter -= 1; + if state.counter == 0 { + unsafe { + restore_host_signal_state(state); + } + true + } else { + false + } + } else { + panic!("signal handlers weren't installed at instance exit"); + }; + if counter_zero { + *ostate = None; + } + + unsafe { + // restore the host signal stack for this thread + if !altstack_flags() + .expect("the current stack flags could be retrieved") + .contains(SigStackFlags::SS_ONSTACK) + { + sigaltstack(previous_sigstack).expect("sigaltstack restoration succeeds"); + } + } + + res + } +} + +/// Signal handler installed during instance execution. +/// +/// This function is only designed to handle signals that are the direct result of execution of a +/// hardware instruction from the faulting WASM thread. It thus safely assumes the signal is +/// directed specifically at this thread (i.e. not a different thread or the process as a whole). +extern "C" fn handle_signal(signum: c_int, siginfo_ptr: *mut siginfo_t, ucontext_ptr: *mut c_void) { + let signal = Signal::from_c_int(signum).expect("signum is a valid signal"); + if !(signal == Signal::SIGBUS + || signal == Signal::SIGSEGV + || signal == Signal::SIGILL + || signal == Signal::SIGFPE) + { + panic!("unexpected signal in guest signal handler: {:?}", signal); + } + assert!(!siginfo_ptr.is_null(), "siginfo must not be null"); + + // Safety: when using a SA_SIGINFO sigaction, the third argument can be cast to a `ucontext_t` + // pointer per the manpage + assert!(!ucontext_ptr.is_null(), "ucontext_ptr must not be null"); + let ctx = UContextPtr::new(ucontext_ptr); + let rip = ctx.get_ip(); + + let switch_to_host = CURRENT_INSTANCE.with(|current_instance| { + let mut current_instance = current_instance.borrow_mut(); + + if current_instance.is_none() { + // If there is no current instance, we've caught a signal raised by a thread that's not + // running a lucet instance. Restore the host signal handler and reraise the signal, + // then return if the host handler returns + unsafe { + reraise_host_signal_in_handler(signal, signum, siginfo_ptr, ucontext_ptr); + } + // don't try context-switching + return false; + } + + // Safety: the memory pointed to by CURRENT_INSTANCE should be a valid instance. This is not + // a trivial property, but relies on the compiler not emitting guest programs that can + // overwrite the instance. + let inst = unsafe { + current_instance + .as_mut() + .expect("current instance exists") + .as_mut() + }; + + let trapcode = inst.module.lookup_trapcode(rip); + + let behavior = (inst.signal_handler)(inst, &trapcode, signum, siginfo_ptr, ucontext_ptr); + match behavior { + SignalBehavior::Continue => { + // return to the guest context without making any modifications to the instance + false + } + SignalBehavior::Terminate => { + // set the state before jumping back to the host context + inst.state = State::Terminating { + details: TerminationDetails::Signal, + }; + true + } + SignalBehavior::Default => { + // safety: pointer is checked for null at the top of the function, and the + // manpage guarantees that a siginfo_t will be passed as the second argument + let siginfo = unsafe { *siginfo_ptr }; + let rip_addr = rip as usize; + // If the trap table lookup returned unknown, it is a fatal error + let unknown_fault = trapcode.is_none(); + + // If the trap was a segv or bus fault and the addressed memory was outside the + // guard pages, it is also a fatal error + let outside_guard = (siginfo.si_signo == SIGSEGV || siginfo.si_signo == SIGBUS) + && !inst.alloc.addr_in_guard_page(siginfo.si_addr_ext()); + + // record the fault and jump back to the host context + inst.state = State::Faulted { + details: FaultDetails { + fatal: unknown_fault || outside_guard, + trapcode: trapcode, + rip_addr, + // Details set to `None` here: have to wait until `verify_trap_safety` to + // fill in these details, because access may not be signal safe. + rip_addr_details: None, + }, + siginfo, + context: ctx.into(), + }; + true + } + } + }); + + if switch_to_host { + HOST_CTX.with(|host_ctx| unsafe { + Context::set_from_signal(&*host_ctx.get()) + .expect("can successfully switch back to the host context"); + }); + unreachable!() + } +} + +struct SignalState { + counter: usize, + saved_sigbus: SigAction, + saved_sigfpe: SigAction, + saved_sigill: SigAction, + saved_sigsegv: SigAction, + saved_panic_hook: Option<Arc<Box<dyn Fn(&panic::PanicInfo<'_>) + Sync + Send + 'static>>>, +} + +// raw pointers in the saved types +unsafe impl Send for SignalState {} + +unsafe fn setup_guest_signal_state(ostate: &mut Option<SignalState>) { + let mut masked_signals = SigSet::empty(); + masked_signals.add(Signal::SIGBUS); + masked_signals.add(Signal::SIGFPE); + masked_signals.add(Signal::SIGILL); + masked_signals.add(Signal::SIGSEGV); + + // setup signal handlers + let sa = SigAction::new( + SigHandler::SigAction(handle_signal), + SaFlags::SA_RESTART | SaFlags::SA_SIGINFO | SaFlags::SA_ONSTACK, + masked_signals, + ); + let saved_sigbus = sigaction(Signal::SIGBUS, &sa).expect("sigaction succeeds"); + let saved_sigfpe = sigaction(Signal::SIGFPE, &sa).expect("sigaction succeeds"); + let saved_sigill = sigaction(Signal::SIGILL, &sa).expect("sigaction succeeds"); + let saved_sigsegv = sigaction(Signal::SIGSEGV, &sa).expect("sigaction succeeds"); + + let saved_panic_hook = Some(setup_guest_panic_hook()); + + *ostate = Some(SignalState { + counter: 1, + saved_sigbus, + saved_sigfpe, + saved_sigill, + saved_sigsegv, + saved_panic_hook, + }); +} + +fn setup_guest_panic_hook() -> Arc<Box<dyn Fn(&panic::PanicInfo<'_>) + Sync + Send + 'static>> { + let saved_panic_hook = Arc::new(panic::take_hook()); + let closure_saved_panic_hook = saved_panic_hook.clone(); + std::panic::set_hook(Box::new(move |panic_info| { + if panic_info + .payload() + .downcast_ref::<TerminationDetails>() + .is_none() + { + closure_saved_panic_hook(panic_info); + } else { + // this is a panic used to implement instance termination (such as + // `lucet_hostcall_terminate!`), so we don't want to print a backtrace; instead, we do + // nothing + } + })); + saved_panic_hook +} + +unsafe fn restore_host_signal_state(state: &mut SignalState) { + // restore signal handlers + sigaction(Signal::SIGBUS, &state.saved_sigbus).expect("sigaction succeeds"); + sigaction(Signal::SIGFPE, &state.saved_sigfpe).expect("sigaction succeeds"); + sigaction(Signal::SIGILL, &state.saved_sigill).expect("sigaction succeeds"); + sigaction(Signal::SIGSEGV, &state.saved_sigsegv).expect("sigaction succeeds"); + + // restore panic hook + drop(panic::take_hook()); + state + .saved_panic_hook + .take() + .map(|hook| Arc::try_unwrap(hook).map(|hook| panic::set_hook(hook))); +} + +unsafe fn reraise_host_signal_in_handler( + sig: Signal, + signum: libc::c_int, + siginfo_ptr: *mut libc::siginfo_t, + ucontext_ptr: *mut c_void, +) { + let saved_handler = { + // TODO: avoid taking a mutex here, probably by having some static muts just for this + // function + if let Some(ref state) = *LUCET_SIGNAL_STATE.lock().unwrap() { + match sig { + Signal::SIGBUS => state.saved_sigbus.clone(), + Signal::SIGFPE => state.saved_sigfpe.clone(), + Signal::SIGILL => state.saved_sigill.clone(), + Signal::SIGSEGV => state.saved_sigsegv.clone(), + sig => panic!( + "unexpected signal in reraise_host_signal_in_handler: {:?}", + sig + ), + } + } else { + // this case is very fishy; it can arise when the last lucet instance spins down and + // uninstalls the lucet handlers while a signal handler is running on this thread, but + // before taking the mutex above. The theory is that if this has happened, the host + // handler has been reinstalled, so we shouldn't end up back here if we reraise + + // unmask the signal to reraise; we don't have to restore it because the handler will return + // after this. If it signals again between here and now, that's a double fault and the + // process is going to die anyway + let mut unmask = SigSet::empty(); + unmask.add(sig); + pthread_sigmask(SigmaskHow::SIG_UNBLOCK, Some(&unmask), None) + .expect("pthread_sigmask succeeds"); + // if there's no current signal state, just re-raise and hope for the best + raise(sig).expect("raise succeeds"); + return; + } + }; + + match saved_handler.handler() { + SigHandler::SigDfl => { + // reinstall default signal handler and reraise the signal; this should terminate the + // program + sigaction(sig, &saved_handler).expect("sigaction succeeds"); + let mut unmask = SigSet::empty(); + unmask.add(sig); + pthread_sigmask(SigmaskHow::SIG_UNBLOCK, Some(&unmask), None) + .expect("pthread_sigmask succeeds"); + raise(sig).expect("raise succeeds"); + } + SigHandler::SigIgn => { + // don't do anything; if we hit this case, whatever program is hosting us is almost + // certainly doing something wrong, because our set of signals requires intervention to + // proceed + return; + } + SigHandler::Handler(f) => { + // call the saved handler directly so there is no altstack confusion + f(signum) + } + SigHandler::SigAction(f) => { + // call the saved handler directly so there is no altstack confusion + f(signum, siginfo_ptr, ucontext_ptr) + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// A collection of wrappers that will be upstreamed to the `nix` crate eventually. +//////////////////////////////////////////////////////////////////////////////////////////////////// + +use bitflags::bitflags; + +#[derive(Copy, Clone)] +pub struct SigStack { + stack: libc::stack_t, +} + +impl SigStack { + pub fn new(sp: *mut libc::c_void, flags: SigStackFlags, size: libc::size_t) -> SigStack { + let stack = libc::stack_t { + ss_sp: sp, + ss_flags: flags.bits(), + ss_size: size, + }; + SigStack { stack } + } + + pub fn disabled() -> SigStack { + let stack = libc::stack_t { + ss_sp: std::ptr::null_mut(), + ss_flags: SigStackFlags::SS_DISABLE.bits(), + ss_size: libc::SIGSTKSZ, + }; + SigStack { stack } + } + + pub fn flags(&self) -> SigStackFlags { + SigStackFlags::from_bits_truncate(self.stack.ss_flags) + } +} + +impl AsRef<libc::stack_t> for SigStack { + fn as_ref(&self) -> &libc::stack_t { + &self.stack + } +} + +impl AsMut<libc::stack_t> for SigStack { + fn as_mut(&mut self) -> &mut libc::stack_t { + &mut self.stack + } +} + +bitflags! { + pub struct SigStackFlags: libc::c_int { + const SS_ONSTACK = libc::SS_ONSTACK; + const SS_DISABLE = libc::SS_DISABLE; + } +} + +pub unsafe fn sigaltstack(new_sigstack: Option<SigStack>) -> nix::Result<Option<SigStack>> { + let mut previous_stack = MaybeUninit::<libc::stack_t>::uninit(); + let disabled_sigstack = SigStack::disabled(); + let new_stack = match new_sigstack { + None => &disabled_sigstack.stack, + Some(ref new_stack) => &new_stack.stack, + }; + let res = libc::sigaltstack( + new_stack as *const libc::stack_t, + previous_stack.as_mut_ptr(), + ); + nix::errno::Errno::result(res).map(|_| { + let sigstack = SigStack { + stack: previous_stack.assume_init(), + }; + if sigstack.flags().contains(SigStackFlags::SS_DISABLE) { + None + } else { + Some(sigstack) + } + }) +} + +pub unsafe fn altstack_flags() -> nix::Result<SigStackFlags> { + let mut current_stack = MaybeUninit::<libc::stack_t>::uninit(); + let res = libc::sigaltstack(std::ptr::null_mut(), current_stack.as_mut_ptr()); + nix::errno::Errno::result(res) + .map(|_| SigStackFlags::from_bits_truncate(current_stack.assume_init().ss_flags)) +} diff --git a/third_party/rust/lucet-runtime-internals-wasmsbx/src/instance/state.rs b/third_party/rust/lucet-runtime-internals-wasmsbx/src/instance/state.rs new file mode 100644 index 0000000000..1be0346675 --- /dev/null +++ b/third_party/rust/lucet-runtime-internals-wasmsbx/src/instance/state.rs @@ -0,0 +1,178 @@ +use crate::instance::siginfo_ext::SiginfoExt; +use crate::instance::{FaultDetails, TerminationDetails, YieldedVal}; +use crate::sysdeps::UContext; +use libc::{SIGBUS, SIGSEGV}; +use std::any::Any; +use std::ffi::{CStr, CString}; + +/// The representation of a Lucet instance's state machine. +pub enum State { + /// The instance is ready to run. + /// + /// Transitions to `Running` when the instance is run, or to `Ready` when it's reset. + Ready, + + /// The instance is running. + /// + /// Transitions to `Ready` when the guest function returns normally, or to `Faulted`, + /// `Terminating`, or `Yielding` if the instance faults, terminates, or yields. + Running, + + /// The instance has faulted, potentially fatally. + /// + /// Transitions to `Faulted` when filling in additional fault details, to `Running` if + /// re-running a non-fatally faulted instance, or to `Ready` when the instance is reset. + Faulted { + details: FaultDetails, + siginfo: libc::siginfo_t, + context: UContext, + }, + + /// The instance is in the process of terminating. + /// + /// Transitions only to `Terminated`; the `TerminationDetails` are always extracted into a + /// `RunResult` before anything else happens to the instance. + Terminating { details: TerminationDetails }, + + /// The instance has terminated, and must be reset before running again. + /// + /// Transitions to `Ready` if the instance is reset. + Terminated, + + /// The instance is in the process of yielding. + /// + /// Transitions only to `Yielded`; the `YieldedVal` is always extracted into a + /// `RunResult` before anything else happens to the instance. + Yielding { + val: YieldedVal, + /// A phantom value carrying the type of the expected resumption value. + /// + /// Concretely, this should only ever be `Box<PhantomData<R>>` where `R` is the type + /// the guest expects upon resumption. + expecting: Box<dyn Any>, + }, + + /// The instance has yielded. + /// + /// Transitions to `Running` if the instance is resumed, or to `Ready` if the instance is reset. + Yielded { + /// A phantom value carrying the type of the expected resumption value. + /// + /// Concretely, this should only ever be `Box<PhantomData<R>>` where `R` is the type + /// the guest expects upon resumption. + expecting: Box<dyn Any>, + }, + + /// A placeholder state used with `std::mem::replace()` when a new state must be constructed by + /// moving values out of an old state. + /// + /// This is used so that we do not need a `Clone` impl for this type, which would add + /// unnecessary constraints to the types of values instances could yield or terminate with. + /// + /// It is an error for this state to appear outside of a transition between other states. + Transitioning, +} + +impl std::fmt::Display for State { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + State::Ready => write!(f, "ready"), + State::Running => write!(f, "running"), + State::Faulted { + details, siginfo, .. + } => { + write!(f, "{}", details)?; + write!( + f, + " triggered by {}: ", + strsignal_wrapper(siginfo.si_signo) + .into_string() + .expect("strsignal returns valid UTF-8") + )?; + + if siginfo.si_signo == SIGSEGV || siginfo.si_signo == SIGBUS { + // We know this is inside the heap guard, because by the time we get here, + // `lucet_error_verify_trap_safety` will have run and validated it. + write!( + f, + " accessed memory at {:p} (inside heap guard)", + siginfo.si_addr_ext() + )?; + } + Ok(()) + } + State::Terminated { .. } => write!(f, "terminated"), + State::Terminating { .. } => write!(f, "terminating"), + State::Yielding { .. } => write!(f, "yielding"), + State::Yielded { .. } => write!(f, "yielded"), + State::Transitioning { .. } => { + write!(f, "transitioning (IF YOU SEE THIS, THERE'S PROBABLY A BUG)") + } + } + } +} + +impl State { + pub fn is_ready(&self) -> bool { + if let State::Ready { .. } = self { + true + } else { + false + } + } + + pub fn is_running(&self) -> bool { + if let State::Running = self { + true + } else { + false + } + } + + pub fn is_fault(&self) -> bool { + if let State::Faulted { .. } = self { + true + } else { + false + } + } + + pub fn is_fatal(&self) -> bool { + if let State::Faulted { + details: FaultDetails { fatal, .. }, + .. + } = self + { + *fatal + } else { + false + } + } + + pub fn is_terminated(&self) -> bool { + if let State::Terminated { .. } = self { + true + } else { + false + } + } + + pub fn is_yielded(&self) -> bool { + if let State::Yielded { .. } = self { + true + } else { + false + } + } +} + +// TODO: PR into `libc` +extern "C" { + #[no_mangle] + fn strsignal(sig: libc::c_int) -> *mut libc::c_char; +} + +// TODO: PR into `nix` +fn strsignal_wrapper(sig: libc::c_int) -> CString { + unsafe { CStr::from_ptr(strsignal(sig)).to_owned() } +} diff --git a/third_party/rust/lucet-runtime-internals-wasmsbx/src/lib.rs b/third_party/rust/lucet-runtime-internals-wasmsbx/src/lib.rs new file mode 100644 index 0000000000..fb7103d4eb --- /dev/null +++ b/third_party/rust/lucet-runtime-internals-wasmsbx/src/lib.rs @@ -0,0 +1,28 @@ +//! `lucet-runtime` is a library for loading, running, and monitoring ahead-of-time compiled +//! WebAssembly modules in lightweight sandboxes. It is intended to work with modules compiled by +//! [`lucetc`](https://github.com/fastly/lucet/tree/master/lucetc). + +#![deny(bare_trait_objects)] + +#[macro_use] +pub mod error; +#[macro_use] +pub mod hostcall_macros; + +#[macro_use] +#[cfg(test)] +pub mod test_helpers; + +pub mod alloc; +pub mod c_api; +pub mod context; +pub mod embed_ctx; +pub mod instance; +pub mod module; +pub mod region; +pub mod sysdeps; +pub mod val; +pub mod vmctx; + +/// The size of a page in WebAssembly heaps. +pub const WASM_PAGE_SIZE: u32 = 64 * 1024; diff --git a/third_party/rust/lucet-runtime-internals-wasmsbx/src/module.rs b/third_party/rust/lucet-runtime-internals-wasmsbx/src/module.rs new file mode 100644 index 0000000000..4af275ab25 --- /dev/null +++ b/third_party/rust/lucet-runtime-internals-wasmsbx/src/module.rs @@ -0,0 +1,142 @@ +mod dl; +mod mock; +mod sparse_page_data; + +pub use crate::module::dl::DlModule; +pub use crate::module::mock::{MockExportBuilder, MockModuleBuilder}; +pub use lucet_module::{ + FunctionHandle, FunctionIndex, FunctionPointer, FunctionSpec, Global, GlobalSpec, GlobalValue, + HeapSpec, Signature, TableElement, TrapCode, TrapManifest, ValueType, +}; + +use crate::alloc::Limits; +use crate::error::Error; +use libc::c_void; + +/// Details about a program address. +/// +/// It is possible to determine whether an address lies within the module code if the module is +/// loaded from a shared object. Statically linked modules are not resolvable. Best effort is made +/// to resolve the symbol the address is found inside, and the file that symbol is found in. See +/// `dladdr(3)` for more details. +#[derive(Clone, Debug)] +pub struct AddrDetails { + pub in_module_code: bool, + pub file_name: Option<String>, + pub sym_name: Option<String>, +} + +/// The read-only parts of a Lucet program, including its code and initial heap configuration. +/// +/// Types that implement this trait are suitable for use with +/// [`Region::new_instance()`](trait.Region.html#method.new_instance). +pub trait Module: ModuleInternal { + /// Calculate the initial size in bytes of the module's Wasm globals. + fn initial_globals_size(&self) -> usize { + self.globals().len() * std::mem::size_of::<u64>() + } +} + +pub trait ModuleInternal: Send + Sync { + fn heap_spec(&self) -> Option<&HeapSpec>; + + /// Get the WebAssembly globals of the module. + /// + /// The indices into the returned slice correspond to the WebAssembly indices of the globals + /// (<https://webassembly.github.io/spec/core/syntax/modules.html#syntax-globalidx>) + fn globals(&self) -> &[GlobalSpec<'_>]; + + fn get_sparse_page_data(&self, page: usize) -> Option<&[u8]>; + + /// Get the number of pages in the sparse page data. + fn sparse_page_data_len(&self) -> usize; + + /// Get the table elements from the module. + fn table_elements(&self) -> Result<&[TableElement], Error>; + + fn get_export_func(&self, sym: &str) -> Result<FunctionHandle, Error>; + + fn get_func_from_idx(&self, table_id: u32, func_id: u32) -> Result<FunctionHandle, Error>; + + fn get_start_func(&self) -> Result<Option<FunctionHandle>, Error>; + + fn function_manifest(&self) -> &[FunctionSpec]; + + fn addr_details(&self, addr: *const c_void) -> Result<Option<AddrDetails>, Error>; + + fn get_signature(&self, fn_id: FunctionIndex) -> &Signature; + + fn get_signatures(&self) -> &[Signature]; + + fn function_handle_from_ptr(&self, ptr: FunctionPointer) -> FunctionHandle { + let id = self + .function_manifest() + .iter() + .enumerate() + .find(|(_, fn_spec)| fn_spec.ptr() == ptr) + .map(|(fn_id, _)| FunctionIndex::from_u32(fn_id as u32)) + .expect("valid function pointer"); + + FunctionHandle { ptr, id } + } + + /// Look up an instruction pointer in the trap manifest. + /// + /// This function must be signal-safe. + fn lookup_trapcode(&self, rip: *const c_void) -> Option<TrapCode> { + for fn_spec in self.function_manifest() { + if let Some(offset) = fn_spec.relative_addr(rip as u64) { + // the address falls in this trap manifest's function. + // `rip` can only lie in one function, so either + // there's a trap site in this manifest, and that's + // the one we want, or there's none + return fn_spec.traps().and_then(|traps| traps.lookup_addr(offset)); + } + } + None + } + + /// Check that the specifications of the WebAssembly module are valid given certain `Limit`s. + /// + /// Returns a `Result<(), Error>` rather than a boolean in order to provide a richer accounting + /// of what may be invalid. + fn validate_runtime_spec(&self, limits: &Limits) -> Result<(), Error> { + // Modules without heap specs will not access the heap + if let Some(heap) = self.heap_spec() { + // Assure that the total reserved + guard regions fit in the address space. + // First check makes sure they fit our 32-bit model, and ensures the second + // check doesn't overflow. + if heap.reserved_size > std::u32::MAX as u64 + 1 + || heap.guard_size > std::u32::MAX as u64 + 1 + { + return Err(lucet_incorrect_module!( + "heap spec sizes would overflow: {:?}", + heap + )); + } + + if heap.reserved_size as usize + heap.guard_size as usize + > limits.heap_address_space_size + { + bail_limits_exceeded!("heap spec reserved and guard size: {:?}", heap); + } + + if heap.initial_size as usize > limits.heap_memory_size { + bail_limits_exceeded!("heap spec initial size: {:?}", heap); + } + + if heap.initial_size > heap.reserved_size { + return Err(lucet_incorrect_module!( + "initial heap size greater than reserved size: {:?}", + heap + )); + } + } + + if self.globals().len() * std::mem::size_of::<u64>() > limits.globals_size { + bail_limits_exceeded!("globals exceed limits"); + } + + Ok(()) + } +} diff --git a/third_party/rust/lucet-runtime-internals-wasmsbx/src/module/dl.rs b/third_party/rust/lucet-runtime-internals-wasmsbx/src/module/dl.rs new file mode 100644 index 0000000000..0cc4f3495b --- /dev/null +++ b/third_party/rust/lucet-runtime-internals-wasmsbx/src/module/dl.rs @@ -0,0 +1,314 @@ +use crate::error::Error; +use crate::module::{AddrDetails, GlobalSpec, HeapSpec, Module, ModuleInternal, TableElement}; +use libc::c_void; +use libloading::Library; +use lucet_module::{ + FunctionHandle, FunctionIndex, FunctionPointer, FunctionSpec, ModuleData, ModuleFeatures, + SerializedModule, Signature, LUCET_MODULE_SYM, +}; +#[cfg(feature = "signature_checking")] +use lucet_module::{ModuleSignature, PublicKey}; +use std::ffi::CStr; +use std::mem::MaybeUninit; +use std::path::Path; +use std::slice; +use std::slice::from_raw_parts; +use std::sync::Arc; + +use raw_cpuid::CpuId; + +fn check_feature_support(module_features: &ModuleFeatures) -> Result<(), Error> { + let cpuid = CpuId::new(); + + fn missing_feature(feature: &str) -> Error { + Error::Unsupported(format!( + "Module requires feature host does not support: {}", + feature + )) + } + + let info = cpuid + .get_feature_info() + .ok_or_else(|| Error::Unsupported("Unable to obtain host CPU feature info!".to_string()))?; + + if module_features.sse3 && !info.has_sse3() { + return Err(missing_feature("SSE3")); + } + if module_features.ssse3 && !info.has_ssse3() { + return Err(missing_feature("SSS3")); + } + if module_features.sse41 && !info.has_sse41() { + return Err(missing_feature("SSE4.1")); + } + if module_features.sse42 && !info.has_sse42() { + return Err(missing_feature("SSE4.2")); + } + if module_features.avx && !info.has_avx() { + return Err(missing_feature("AVX")); + } + if module_features.popcnt && !info.has_popcnt() { + return Err(missing_feature("POPCNT")); + } + + if module_features.bmi1 || module_features.bmi2 { + let info = cpuid.get_extended_feature_info().ok_or_else(|| { + Error::Unsupported("Unable to obtain host CPU extended feature info!".to_string()) + })?; + + if module_features.bmi1 && !info.has_bmi1() { + return Err(missing_feature("BMI1")); + } + + if module_features.bmi2 && !info.has_bmi2() { + return Err(missing_feature("BMI2")); + } + } + + if module_features.lzcnt { + let info = cpuid.get_extended_function_info().ok_or_else(|| { + Error::Unsupported("Unable to obtain host CPU extended function info!".to_string()) + })?; + + if module_features.lzcnt && !info.has_lzcnt() { + return Err(missing_feature("LZCNT")); + } + } + + // Features are fine, we're compatible! + Ok(()) +} + +/// A Lucet module backed by a dynamically-loaded shared object. +pub struct DlModule { + lib: Library, + + /// Base address of the dynamically-loaded module + fbase: *const c_void, + + /// Metadata decoded from inside the module + module: lucet_module::Module<'static>, +} + +// for the one raw pointer only +unsafe impl Send for DlModule {} +unsafe impl Sync for DlModule {} + +impl DlModule { + /// Create a module, loading code from a shared object on the filesystem. + pub fn load<P: AsRef<Path>>(so_path: P) -> Result<Arc<Self>, Error> { + Self::load_and_maybe_verify(so_path, |_module_data| Ok(())) + } + + /// Create a module, loading code from a shared object on the filesystem + /// and verifying it using a public key if one has been supplied. + #[cfg(feature = "signature_checking")] + pub fn load_and_verify<P: AsRef<Path>>(so_path: P, pk: PublicKey) -> Result<Arc<Self>, Error> { + Self::load_and_maybe_verify(so_path, |module_data| { + // Public key has been provided, verify the module signature + // The TOCTOU issue is unavoidable without reimplenting `dlopen(3)` + ModuleSignature::verify(so_path, &pk, &module_data) + }) + } + + fn load_and_maybe_verify<P: AsRef<Path>>( + so_path: P, + verifier: fn(&ModuleData) -> Result<(), Error>, + // pk: Option<PublicKey>, + ) -> Result<Arc<Self>, Error> { + // Load the dynamic library. The undefined symbols corresponding to the lucet_syscall_ + // functions will be provided by the current executable. We trust our wasm->dylib compiler + // to make sure these function calls are the way the dylib can touch memory outside of its + // stack and heap. + // let abs_so_path = so_path.as_ref().canonicalize().map_err(Error::DlError)?; + let lib = Library::new(so_path.as_ref().as_os_str()).map_err(Error::DlError)?; + + let serialized_module_ptr = unsafe { + lib.get::<*const SerializedModule>(LUCET_MODULE_SYM.as_bytes()) + .map_err(|e| { + lucet_incorrect_module!("error loading required symbol `lucet_module`: {}", e) + })? + }; + + let serialized_module: &SerializedModule = + unsafe { serialized_module_ptr.as_ref().unwrap() }; + + // Deserialize the slice into ModuleData, which will hold refs into the loaded + // shared object file in `module_data_slice`. Both of these get a 'static lifetime because + // Rust doesn't have a safe way to describe that their lifetime matches the containing + // struct (and the dll). + // + // The exposed lifetime of ModuleData will be the same as the lifetime of the + // dynamically loaded library. This makes the interface safe. + let module_data_slice: &'static [u8] = unsafe { + slice::from_raw_parts( + serialized_module.module_data_ptr as *const u8, + serialized_module.module_data_len as usize, + ) + }; + let module_data = ModuleData::deserialize(module_data_slice)?; + + check_feature_support(module_data.features())?; + + verifier(&module_data)?; + + let fbase = if let Some(dli) = + dladdr(serialized_module as *const SerializedModule as *const c_void) + { + dli.dli_fbase + } else { + std::ptr::null() + }; + + if serialized_module.tables_len > std::u32::MAX as u64 { + lucet_incorrect_module!("table segment too long: {}", serialized_module.tables_len); + } + let tables: &'static [&'static [TableElement]] = unsafe { + from_raw_parts( + serialized_module.tables_ptr as *const &[TableElement], + serialized_module.tables_len as usize, + ) + }; + + let function_manifest = if serialized_module.function_manifest_ptr != 0 { + unsafe { + from_raw_parts( + serialized_module.function_manifest_ptr as *const FunctionSpec, + serialized_module.function_manifest_len as usize, + ) + } + } else { + &[] + }; + + Ok(Arc::new(DlModule { + lib, + fbase, + module: lucet_module::Module { + module_data, + tables, + function_manifest, + }, + })) + } +} + +impl Module for DlModule {} + +impl ModuleInternal for DlModule { + fn heap_spec(&self) -> Option<&HeapSpec> { + self.module.module_data.heap_spec() + } + + fn globals(&self) -> &[GlobalSpec<'_>] { + self.module.module_data.globals_spec() + } + + fn get_sparse_page_data(&self, page: usize) -> Option<&[u8]> { + if let Some(ref sparse_data) = self.module.module_data.sparse_data() { + *sparse_data.get_page(page) + } else { + None + } + } + + fn sparse_page_data_len(&self) -> usize { + self.module + .module_data + .sparse_data() + .map(|d| d.len()) + .unwrap_or(0) + } + + fn table_elements(&self) -> Result<&[TableElement], Error> { + match self.module.tables.get(0) { + Some(table) => Ok(table), + None => Err(lucet_incorrect_module!("table 0 is not present")), + } + } + + fn get_export_func(&self, sym: &str) -> Result<FunctionHandle, Error> { + self.module + .module_data + .get_export_func_id(sym) + .ok_or_else(|| Error::SymbolNotFound(sym.to_string())) + .map(|id| { + let ptr = self.function_manifest()[id.as_u32() as usize].ptr(); + FunctionHandle { ptr, id } + }) + } + + fn get_func_from_idx(&self, table_id: u32, func_id: u32) -> Result<FunctionHandle, Error> { + if table_id != 0 { + return Err(Error::FuncNotFound(table_id, func_id)); + } + let table = self.table_elements()?; + let func = table + .get(func_id as usize) + .map(|element| element.function_pointer()) + .ok_or(Error::FuncNotFound(table_id, func_id))?; + + Ok(self.function_handle_from_ptr(func)) + } + + fn get_start_func(&self) -> Result<Option<FunctionHandle>, Error> { + // `guest_start` is a pointer to the function the module designates as the start function, + // since we can't have multiple symbols pointing to the same function and guest code might + // call it in the normal course of execution + if let Ok(start_func) = unsafe { self.lib.get::<*const extern "C" fn()>(b"guest_start") } { + if start_func.is_null() { + lucet_incorrect_module!("`guest_start` is defined but null"); + } + Ok(Some(self.function_handle_from_ptr( + FunctionPointer::from_usize(unsafe { **start_func } as usize), + ))) + } else { + Ok(None) + } + } + + fn function_manifest(&self) -> &[FunctionSpec] { + self.module.function_manifest + } + + fn addr_details(&self, addr: *const c_void) -> Result<Option<AddrDetails>, Error> { + if let Some(dli) = dladdr(addr) { + let file_name = if dli.dli_fname.is_null() { + None + } else { + Some(unsafe { CStr::from_ptr(dli.dli_fname).to_owned().into_string()? }) + }; + let sym_name = if dli.dli_sname.is_null() { + None + } else { + Some(unsafe { CStr::from_ptr(dli.dli_sname).to_owned().into_string()? }) + }; + Ok(Some(AddrDetails { + in_module_code: dli.dli_fbase as *const c_void == self.fbase, + file_name, + sym_name, + })) + } else { + Ok(None) + } + } + + fn get_signature(&self, fn_id: FunctionIndex) -> &Signature { + self.module.module_data.get_signature(fn_id) + } + + fn get_signatures(&self) -> &[Signature] { + self.module.module_data.signatures() + } +} + +// TODO: PR to nix or libloading? +// TODO: possibly not safe to use without grabbing the mutex within libloading::Library? +fn dladdr(addr: *const c_void) -> Option<libc::Dl_info> { + let mut info = MaybeUninit::<libc::Dl_info>::uninit(); + let res = unsafe { libc::dladdr(addr, info.as_mut_ptr()) }; + if res != 0 { + Some(unsafe { info.assume_init() }) + } else { + None + } +} diff --git a/third_party/rust/lucet-runtime-internals-wasmsbx/src/module/mock.rs b/third_party/rust/lucet-runtime-internals-wasmsbx/src/module/mock.rs new file mode 100644 index 0000000000..edca4d83a2 --- /dev/null +++ b/third_party/rust/lucet-runtime-internals-wasmsbx/src/module/mock.rs @@ -0,0 +1,383 @@ +use crate::error::Error; +use crate::module::{AddrDetails, GlobalSpec, HeapSpec, Module, ModuleInternal, TableElement}; +use libc::c_void; +use lucet_module::owned::{ + OwnedExportFunction, OwnedFunctionMetadata, OwnedGlobalSpec, OwnedImportFunction, + OwnedLinearMemorySpec, OwnedModuleData, OwnedSparseData, +}; +use lucet_module::{ + FunctionHandle, FunctionIndex, FunctionPointer, FunctionSpec, ModuleData, ModuleFeatures, + Signature, TrapSite, UniqueSignatureIndex, +}; +use std::collections::{BTreeMap, HashMap}; +use std::sync::Arc; + +#[derive(Default)] +pub struct MockModuleBuilder { + heap_spec: HeapSpec, + sparse_page_data: Vec<Option<Vec<u8>>>, + globals: BTreeMap<usize, OwnedGlobalSpec>, + table_elements: BTreeMap<usize, TableElement>, + export_funcs: HashMap<&'static str, FunctionPointer>, + func_table: HashMap<(u32, u32), FunctionPointer>, + start_func: Option<FunctionPointer>, + function_manifest: Vec<FunctionSpec>, + function_info: Vec<OwnedFunctionMetadata>, + imports: Vec<OwnedImportFunction>, + exports: Vec<OwnedExportFunction>, + signatures: Vec<Signature>, +} + +impl MockModuleBuilder { + pub fn new() -> Self { + const DEFAULT_HEAP_SPEC: HeapSpec = HeapSpec { + reserved_size: 4 * 1024 * 1024, + guard_size: 4 * 1024 * 1024, + initial_size: 64 * 1024, + max_size: Some(64 * 1024), + }; + MockModuleBuilder::default().with_heap_spec(DEFAULT_HEAP_SPEC) + } + + pub fn with_heap_spec(mut self, heap_spec: HeapSpec) -> Self { + self.heap_spec = heap_spec; + self + } + + pub fn with_initial_heap(mut self, heap: &[u8]) -> Self { + self.sparse_page_data = heap + .chunks(4096) + .map(|page| { + if page.iter().all(|b| *b == 0) { + None + } else { + let mut page = page.to_vec(); + if page.len() < 4096 { + page.resize(4096, 0); + } + Some(page) + } + }) + .collect(); + self + } + + pub fn with_global(mut self, idx: u32, init_val: i64) -> Self { + self.globals + .insert(idx as usize, OwnedGlobalSpec::new_def(init_val, vec![])); + self + } + + pub fn with_exported_global(mut self, idx: u32, init_val: i64, export_name: &str) -> Self { + self.globals.insert( + idx as usize, + OwnedGlobalSpec::new_def(init_val, vec![export_name.to_string()]), + ); + self + } + + pub fn with_import(mut self, idx: u32, import_module: &str, import_field: &str) -> Self { + self.globals.insert( + idx as usize, + OwnedGlobalSpec::new_import( + import_module.to_string(), + import_field.to_string(), + vec![], + ), + ); + self + } + + pub fn with_exported_import( + mut self, + idx: u32, + import_module: &str, + import_field: &str, + export_name: &str, + ) -> Self { + self.globals.insert( + idx as usize, + OwnedGlobalSpec::new_import( + import_module.to_string(), + import_field.to_string(), + vec![export_name.to_string()], + ), + ); + self + } + + pub fn with_table_element(mut self, idx: u32, element: &TableElement) -> Self { + self.table_elements.insert(idx as usize, element.clone()); + self + } + + fn record_sig(&mut self, sig: Signature) -> UniqueSignatureIndex { + let idx = self + .signatures + .iter() + .enumerate() + .find(|(_, v)| *v == &sig) + .map(|(key, _)| key) + .unwrap_or_else(|| { + self.signatures.push(sig); + self.signatures.len() - 1 + }); + UniqueSignatureIndex::from_u32(idx as u32) + } + + pub fn with_export_func(mut self, export: MockExportBuilder) -> Self { + self.export_funcs.insert(export.sym(), export.func()); + let sig_idx = self.record_sig(export.sig()); + self.function_info.push(OwnedFunctionMetadata { + signature: sig_idx, + name: Some(export.sym().to_string()), + }); + self.exports.push(OwnedExportFunction { + fn_idx: FunctionIndex::from_u32(self.function_manifest.len() as u32), + names: vec![export.sym().to_string()], + }); + self.function_manifest.push(FunctionSpec::new( + export.func().as_usize() as u64, + export.func_len() as u32, + export.traps().as_ptr() as u64, + export.traps().len() as u64, + )); + self + } + + pub fn with_exported_import_func( + mut self, + export_name: &'static str, + import_fn_ptr: FunctionPointer, + sig: Signature, + ) -> Self { + self.export_funcs.insert(export_name, import_fn_ptr); + let sig_idx = self.record_sig(sig); + self.function_info.push(OwnedFunctionMetadata { + signature: sig_idx, + name: Some(export_name.to_string()), + }); + self.exports.push(OwnedExportFunction { + fn_idx: FunctionIndex::from_u32(self.function_manifest.len() as u32), + names: vec![export_name.to_string()], + }); + self.function_manifest.push(FunctionSpec::new( + import_fn_ptr.as_usize() as u64, + 0u32, + 0u64, + 0u64, + )); + self + } + + pub fn with_table_func(mut self, table_idx: u32, func_idx: u32, func: FunctionPointer) -> Self { + self.func_table.insert((table_idx, func_idx), func); + self + } + + pub fn with_start_func(mut self, func: FunctionPointer) -> Self { + self.start_func = Some(func); + self + } + + pub fn build(self) -> Arc<dyn Module> { + assert!( + self.sparse_page_data.len() * 4096 <= self.heap_spec.initial_size as usize, + "heap must fit in heap spec initial size" + ); + + let table_elements = self + .table_elements + .into_iter() + .enumerate() + .map(|(expected_idx, (idx, te))| { + assert_eq!( + idx, expected_idx, + "table element indices must be contiguous starting from 0" + ); + te + }) + .collect(); + let globals_spec = self + .globals + .into_iter() + .enumerate() + .map(|(expected_idx, (idx, gs))| { + assert_eq!( + idx, expected_idx, + "global indices must be contiguous starting from 0" + ); + gs + }) + .collect(); + let owned_module_data = OwnedModuleData::new( + Some(OwnedLinearMemorySpec { + heap: self.heap_spec, + initializer: OwnedSparseData::new(self.sparse_page_data) + .expect("sparse data pages are valid"), + }), + globals_spec, + self.function_info.clone(), + self.imports, + self.exports, + self.signatures, + ModuleFeatures::none(), + ); + let serialized_module_data = owned_module_data + .to_ref() + .serialize() + .expect("serialization of module_data succeeds"); + let module_data = ModuleData::deserialize(&serialized_module_data) + .map(|md| unsafe { std::mem::transmute(md) }) + .expect("module data can be deserialized"); + let mock = MockModule { + serialized_module_data, + module_data, + table_elements, + export_funcs: self.export_funcs, + func_table: self.func_table, + start_func: self.start_func, + function_manifest: self.function_manifest, + }; + Arc::new(mock) + } +} + +pub struct MockModule { + #[allow(dead_code)] + serialized_module_data: Vec<u8>, + module_data: ModuleData<'static>, + pub table_elements: Vec<TableElement>, + pub export_funcs: HashMap<&'static str, FunctionPointer>, + pub func_table: HashMap<(u32, u32), FunctionPointer>, + pub start_func: Option<FunctionPointer>, + pub function_manifest: Vec<FunctionSpec>, +} + +unsafe impl Send for MockModule {} +unsafe impl Sync for MockModule {} + +impl Module for MockModule {} + +impl ModuleInternal for MockModule { + fn heap_spec(&self) -> Option<&HeapSpec> { + self.module_data.heap_spec() + } + + fn globals(&self) -> &[GlobalSpec<'_>] { + self.module_data.globals_spec() + } + + fn get_sparse_page_data(&self, page: usize) -> Option<&[u8]> { + if let Some(ref sparse_data) = self.module_data.sparse_data() { + *sparse_data.get_page(page) + } else { + None + } + } + + fn sparse_page_data_len(&self) -> usize { + self.module_data.sparse_data().map(|d| d.len()).unwrap_or(0) + } + + fn table_elements(&self) -> Result<&[TableElement], Error> { + Ok(&self.table_elements) + } + + fn get_export_func(&self, sym: &str) -> Result<FunctionHandle, Error> { + let ptr = *self + .export_funcs + .get(sym) + .ok_or(Error::SymbolNotFound(sym.to_string()))?; + + Ok(self.function_handle_from_ptr(ptr)) + } + + fn get_func_from_idx(&self, table_id: u32, func_id: u32) -> Result<FunctionHandle, Error> { + let ptr = self + .func_table + .get(&(table_id, func_id)) + .cloned() + .ok_or(Error::FuncNotFound(table_id, func_id))?; + + Ok(self.function_handle_from_ptr(ptr)) + } + + fn get_start_func(&self) -> Result<Option<FunctionHandle>, Error> { + Ok(self + .start_func + .map(|start| self.function_handle_from_ptr(start))) + } + + fn function_manifest(&self) -> &[FunctionSpec] { + &self.function_manifest + } + + fn addr_details(&self, _addr: *const c_void) -> Result<Option<AddrDetails>, Error> { + // we can call `dladdr` on Rust code, but unless we inspect the stack I don't think there's + // a way to determine whether or not we're in "module" code; punt for now + Ok(None) + } + + fn get_signature(&self, fn_id: FunctionIndex) -> &Signature { + self.module_data.get_signature(fn_id) + } + + fn get_signatures(&self) -> &[Signature] { + self.module_data.signatures() + } +} + +pub struct MockExportBuilder { + sym: &'static str, + func: FunctionPointer, + func_len: Option<usize>, + traps: Option<&'static [TrapSite]>, + sig: Signature, +} + +impl MockExportBuilder { + pub fn new(name: &'static str, func: FunctionPointer) -> MockExportBuilder { + MockExportBuilder { + sym: name, + func: func, + func_len: None, + traps: None, + sig: Signature { + params: vec![], + ret_ty: None, + }, + } + } + + pub fn with_func_len(mut self, len: usize) -> MockExportBuilder { + self.func_len = Some(len); + self + } + + pub fn with_traps(mut self, traps: &'static [TrapSite]) -> MockExportBuilder { + self.traps = Some(traps); + self + } + + pub fn with_sig(mut self, sig: Signature) -> MockExportBuilder { + self.sig = sig; + self + } + + pub fn sym(&self) -> &'static str { + self.sym + } + pub fn func(&self) -> FunctionPointer { + self.func + } + pub fn func_len(&self) -> usize { + self.func_len.unwrap_or(1) + } + pub fn traps(&self) -> &'static [TrapSite] { + self.traps.unwrap_or(&[]) + } + pub fn sig(&self) -> Signature { + self.sig.clone() + } +} diff --git a/third_party/rust/lucet-runtime-internals-wasmsbx/src/module/sparse_page_data.rs b/third_party/rust/lucet-runtime-internals-wasmsbx/src/module/sparse_page_data.rs new file mode 100644 index 0000000000..7b7ce7710b --- /dev/null +++ b/third_party/rust/lucet-runtime-internals-wasmsbx/src/module/sparse_page_data.rs @@ -0,0 +1,70 @@ +#[macro_export] +macro_rules! sparse_page_data_tests { + ( $TestRegion:path ) => { + use std::sync::Arc; + use $TestRegion as TestRegion; + use $crate::alloc::{host_page_size, Limits}; + use $crate::instance::InstanceInternal; + use $crate::module::{MockModuleBuilder, Module}; + use $crate::region::Region; + + const FIRST_MESSAGE: &'static [u8] = b"hello from mock_sparse_module!"; + const SECOND_MESSAGE: &'static [u8] = b"hello again from mock_sparse_module!"; + + fn mock_sparse_module() -> Arc<dyn Module> { + let mut initial_heap = FIRST_MESSAGE.to_vec(); + // zero remainder of the first page, and the whole second page + initial_heap.resize(4096 * 2, 0); + let mut third_page = SECOND_MESSAGE.to_vec(); + third_page.resize(4096, 0); + initial_heap.append(&mut third_page); + MockModuleBuilder::new() + .with_initial_heap(initial_heap.as_slice()) + .build() + } + + #[test] + fn valid_sparse_page_data() { + let module = mock_sparse_module(); + + assert_eq!(module.sparse_page_data_len(), 3); + + let mut first_page_expected: Vec<u8> = FIRST_MESSAGE.to_vec(); + first_page_expected.resize(host_page_size(), 0); + let mut third_page_expected: Vec<u8> = SECOND_MESSAGE.to_vec(); + third_page_expected.resize(host_page_size(), 0); + + let first_page: &[u8] = module.get_sparse_page_data(0).unwrap(); + assert_eq!(first_page, first_page_expected.as_slice()); + + assert!(module.get_sparse_page_data(1).is_none()); + + let third_page: &[u8] = module.get_sparse_page_data(2).unwrap(); + assert_eq!(third_page, third_page_expected.as_slice()); + } + + #[test] + fn instantiate_valid_sparse_data() { + let module = mock_sparse_module(); + let region = TestRegion::create(1, &Limits::default()).expect("region can be created"); + let inst = region + .new_instance(module) + .expect("instance can be created"); + + // The test data initializers result in two strings getting copied into linear memory; see + // `lucet-runtime-c/test/data_segment/valid_data_seg.c` for details + let heap = unsafe { inst.alloc().heap() }; + assert_eq!(&heap[0..FIRST_MESSAGE.len()], FIRST_MESSAGE.as_ref()); + let second_message_start = 2 * host_page_size(); + assert_eq!( + &heap[second_message_start..second_message_start + SECOND_MESSAGE.len()], + SECOND_MESSAGE.as_ref() + ); + } + }; +} + +#[cfg(test)] +mod tests { + sparse_page_data_tests!(crate::region::mmap::MmapRegion); +} diff --git a/third_party/rust/lucet-runtime-internals-wasmsbx/src/region/mmap.rs b/third_party/rust/lucet-runtime-internals-wasmsbx/src/region/mmap.rs new file mode 100644 index 0000000000..c3da81353b --- /dev/null +++ b/third_party/rust/lucet-runtime-internals-wasmsbx/src/region/mmap.rs @@ -0,0 +1,457 @@ +use crate::alloc::{host_page_size, instance_heap_offset, Alloc, Limits, Slot}; +use crate::embed_ctx::CtxMap; +use crate::error::Error; +use crate::instance::{new_instance_handle, Instance, InstanceHandle}; +use crate::module::Module; +use crate::region::{Region, RegionCreate, RegionInternal}; +use libc::c_void; +#[cfg(not(target_os = "linux"))] +use libc::memset; +use nix::sys::mman::{madvise, mmap, munmap, MapFlags, MmapAdvise, ProtFlags}; +use std::ptr; +use std::sync::{Arc, Mutex, Weak}; + +/// A [`Region`](../trait.Region.html) backed by `mmap`. +/// +/// `MmapRegion` lays out memory for instances in a contiguous block, +/// with an Instance's space reserved, followed by heap, stack, globals, and sigstack. +/// +/// This results in an actual layout of an instance on an `MmapRegion`-produced `Slot` being: +/// ```text +/// 0x0000: +-----------------------+ <-- Instance +/// 0x0000: | .magic | +/// 0x0008: | ... | +/// 0x000X: | ... | +/// 0x0XXX: | .alloc -> Alloc { | +/// 0x0XXX: | .start = 0x0000 | +/// 0x0XXX: | .heap = 0x1000 | +/// 0x0XXX: | .stack = 0xN000 | +/// 0x0XXX: | .globals = 0xM000 | +/// 0x0XXX: | .sigstack = 0xS000 | +/// 0x0XXX: | } | +/// 0x0XXX: | ... | +/// 0x0XXX: ~ ~padding~ ~ +/// 0x0XXX: | ... | +/// 0x0XXX: | .globals = 0xM000 | <-- InstanceRuntimeData +/// 0x0XXX: | .inst_count = 0x0000 | +/// 0x1000: +-----------------------+ <-- Heap, and `lucet_vmctx`. One page into the allocation. +/// 0x1XXX: | | +/// 0xXXXX: ~ .......heap....... ~ // heap size is governed by limits.heap_address_space_size +/// 0xXXXX: | | +/// 0xN000: +-----------------------| <-- Stack (at heap_start + limits.heap_address_space_size) +/// 0xNXXX: --- stack guard page ---- +/// 0xNXXX: | | +/// 0xXXXX: ~ .......stack...... ~ // stack size is governed by limits.stack_size +/// 0xXXXX: | | +/// 0xM000: +-----------------------| <-- Globals (at stack_start + limits.stack_size + PAGE_SIZE) +/// 0xMXXX: | | +/// 0xXXXX: ~ ......globals..... ~ +/// 0xXXXX: | | +/// 0xXXXX --- global guard page --- +/// 0xS000: +-----------------------| <-- Sigstack (at globals_start + globals_size + PAGE_SIZE) +/// 0xSXXX: | ......sigstack.... | // sigstack is governed by limits.signal_stack_size +/// 0xSXXX: +-----------------------| +/// ``` +pub struct MmapRegion { + capacity: usize, + freelist: Mutex<Vec<Slot>>, + limits: Limits, + min_heap_alignment: usize, +} + +impl Region for MmapRegion {} + +impl RegionInternal for MmapRegion { + fn new_instance_with( + &self, + module: Arc<dyn Module>, + embed_ctx: CtxMap, + ) -> Result<InstanceHandle, Error> { + let slot = self + .freelist + .lock() + .unwrap() + .pop() + .ok_or(Error::RegionFull(self.capacity))?; + + if slot.heap as usize % host_page_size() != 0 { + lucet_bail!("heap is not page-aligned; this is a bug"); + } + + let limits = &slot.limits; + module.validate_runtime_spec(limits)?; + + for (ptr, len) in [ + // make the stack read/writable + (slot.stack, limits.stack_size), + // make the globals read/writable + (slot.globals, limits.globals_size), + // make the sigstack read/writable + (slot.sigstack, limits.signal_stack_size), + ] + .iter() + { + // eprintln!("setting r/w {:p}[{:x}]", *ptr, len); + unsafe { mprotect(*ptr, *len, ProtFlags::PROT_READ | ProtFlags::PROT_WRITE)? }; + } + + // note: the initial heap will be made read/writable when `new_instance_handle` calls `reset` + + let inst_ptr = slot.start as *mut Instance; + + // upgrade the slot's weak region pointer so the region can't get dropped while the instance + // exists + let region = slot + .region + .upgrade() + // if this precondition isn't met, something is deeply wrong as some other region's slot + // ended up in our freelist + .expect("backing region of slot (`self`) exists"); + + let alloc = Alloc { + heap_accessible_size: 0, // the `reset` call in `new_instance_handle` will set this + heap_inaccessible_size: slot.limits.heap_address_space_size, + slot: Some(slot), + region, + }; + + let inst = new_instance_handle(inst_ptr, module, alloc, embed_ctx)?; + + Ok(inst) + } + + fn drop_alloc(&self, alloc: &mut Alloc) { + let slot = alloc + .slot + .take() + .expect("alloc didn't have a slot during drop; dropped twice?"); + + if slot.heap as usize % host_page_size() != 0 { + panic!("heap is not page-aligned"); + } + + // clear and disable access to the heap, stack, globals, and sigstack + for (ptr, len) in [ + // We don't ever shrink the heap, so we only need to zero up until the accessible size + (slot.heap, alloc.heap_accessible_size), + (slot.stack, slot.limits.stack_size), + (slot.globals, slot.limits.globals_size), + (slot.sigstack, slot.limits.signal_stack_size), + ] + .iter() + { + // eprintln!("setting none {:p}[{:x}]", *ptr, len); + unsafe { + // MADV_DONTNEED is not guaranteed to clear pages on non-Linux systems + #[cfg(not(target_os = "linux"))] + { + mprotect(*ptr, *len, ProtFlags::PROT_READ | ProtFlags::PROT_WRITE) + .expect("mprotect succeeds during drop"); + memset(*ptr, 0, *len); + } + mprotect(*ptr, *len, ProtFlags::PROT_NONE).expect("mprotect succeeds during drop"); + madvise(*ptr, *len, MmapAdvise::MADV_DONTNEED) + .expect("madvise succeeds during drop"); + } + } + + self.freelist.lock().unwrap().push(slot); + } + + fn expand_heap(&self, slot: &Slot, start: u32, len: u32) -> Result<(), Error> { + unsafe { + mprotect( + (slot.heap as usize + start as usize) as *mut c_void, + len as usize, + ProtFlags::PROT_READ | ProtFlags::PROT_WRITE, + )?; + } + Ok(()) + } + + fn reset_heap(&self, alloc: &mut Alloc, module: &dyn Module) -> Result<(), Error> { + let heap = alloc.slot().heap; + + if alloc.heap_accessible_size > 0 { + // zero the whole heap, if any of it is currently accessible + let heap_size = alloc.slot().limits.heap_address_space_size; + + unsafe { + // `mprotect()` and `madvise()` are sufficient to zero a page on Linux, + // but not necessarily on all POSIX operating systems, and on macOS in particular. + #[cfg(not(target_os = "linux"))] + { + mprotect( + heap, + alloc.heap_accessible_size, + ProtFlags::PROT_READ | ProtFlags::PROT_WRITE, + )?; + memset(heap, 0, alloc.heap_accessible_size); + } + mprotect(heap, heap_size, ProtFlags::PROT_NONE)?; + madvise(heap, heap_size, MmapAdvise::MADV_DONTNEED)?; + } + } + + let initial_size = module + .heap_spec() + .map(|h| h.initial_size as usize) + .unwrap_or(0); + + // reset the heap to the initial size, and mprotect those pages appropriately + if initial_size > 0 { + unsafe { + mprotect( + heap, + initial_size, + ProtFlags::PROT_READ | ProtFlags::PROT_WRITE, + )? + }; + } + alloc.heap_accessible_size = initial_size; + alloc.heap_inaccessible_size = alloc.slot().limits.heap_address_space_size - initial_size; + + // Initialize the heap using the module sparse page data. There cannot be more pages in the + // sparse page data than will fit in the initial heap size. + // + // Pages with a corresponding Some entry in the sparse page data are initialized with + // the contents of that data. + // + // Any pages which don't have an entry in the sparse page data, either because their entry + // is None, or because the sparse data has fewer pages than the initial heap, are zeroed. + let heap = unsafe { alloc.heap_mut() }; + let initial_pages = + initial_size + .checked_div(host_page_size()) + .ok_or(lucet_incorrect_module!( + "initial heap size {} is not divisible by host page size ({})", + initial_size, + host_page_size() + ))?; + for page_num in 0..initial_pages { + let page_base = page_num * host_page_size(); + if heap.len() < page_base { + return Err(lucet_incorrect_module!( + "sparse page data length exceeded initial heap size" + )); + } + if let Some(contents) = module.get_sparse_page_data(page_num) { + // otherwise copy in the page data + heap[page_base..page_base + host_page_size()].copy_from_slice(contents); + } + } + + Ok(()) + } + + fn as_dyn_internal(&self) -> &dyn RegionInternal { + self + } +} + +impl Drop for MmapRegion { + fn drop(&mut self) { + for slot in self.freelist.get_mut().unwrap().drain(0..) { + Self::free_slot(slot); + } + } +} + +impl RegionCreate for MmapRegion { + const TYPE_NAME: &'static str = "MmapRegion"; + + fn create(instance_capacity: usize, limits: &Limits) -> Result<Arc<Self>, Error> { + MmapRegion::create(instance_capacity, limits) + } +} + +impl MmapRegion { + /// Create a new `MmapRegion` that can support a given number instances, each subject to the + /// same runtime limits. + /// + /// The region is returned in an `Arc`, because any instances created from it carry a reference + /// back to the region. + pub fn create(instance_capacity: usize, limits: &Limits) -> Result<Arc<Self>, Error> { + limits.validate()?; + + let region = Arc::new(MmapRegion { + capacity: instance_capacity, + freelist: Mutex::new(Vec::with_capacity(instance_capacity)), + limits: limits.clone(), + min_heap_alignment: 0, // No constaints on heap alignment by default + }); + { + let mut freelist = region.freelist.lock().unwrap(); + for _ in 0..instance_capacity { + freelist.push(MmapRegion::create_slot(®ion)?); + } + } + + Ok(region) + } + + /// Create a new `MmapRegion` that can support a given number instances, each subject to the + /// same runtime limits. Additionally, ensure that the heap is aligned at least to the + /// specified amount. heap_alignment must be a power of 2. + /// + /// The region is returned in an `Arc`, because any instances created from it carry a reference + /// back to the region. + pub fn create_aligned( + instance_capacity: usize, + limits: &Limits, + heap_alignment: usize, + ) -> Result<Arc<Self>, Error> { + limits.validate()?; + + let is_power_of_2 = (heap_alignment & (heap_alignment - 1)) == 0; + + if !is_power_of_2 { + return Err(Error::InvalidArgument( + "heap_alignment must be a power of 2", + )); + } + + let region = Arc::new(MmapRegion { + capacity: instance_capacity, + freelist: Mutex::new(Vec::with_capacity(instance_capacity)), + limits: limits.clone(), + min_heap_alignment: heap_alignment, + }); + { + let mut freelist = region.freelist.lock().unwrap(); + for _ in 0..instance_capacity { + freelist.push(MmapRegion::create_slot(®ion)?); + } + } + + Ok(region) + } + + fn create_slot(region: &Arc<MmapRegion>) -> Result<Slot, Error> { + // get the chunk of virtual memory that the `Slot` will manage + let mem = if region.min_heap_alignment == 0 { + unsafe { + mmap( + ptr::null_mut(), + region.limits.total_memory_size(), + ProtFlags::PROT_NONE, + MapFlags::MAP_ANON | MapFlags::MAP_PRIVATE, + 0, + 0, + )? + } + } else { + unsafe { + mmap_aligned( + region.limits.total_memory_size(), + ProtFlags::PROT_NONE, + MapFlags::MAP_ANON | MapFlags::MAP_PRIVATE, + region.min_heap_alignment, // requested alignment + instance_heap_offset(), // offset that must be aligned + )? + } + }; + + // set the first part of the memory to read/write so that the `Instance` can be stored there + // TODO: post slot refactor, is this necessary/desirable? + unsafe { + mprotect( + mem, + instance_heap_offset(), + ProtFlags::PROT_READ | ProtFlags::PROT_WRITE, + )? + }; + + // lay out the other sections in memory + let heap = mem as usize + instance_heap_offset(); + let stack = heap + region.limits.heap_address_space_size + host_page_size(); + let globals = stack + region.limits.stack_size; + let sigstack = globals + region.limits.globals_size + host_page_size(); + + Ok(Slot { + start: mem, + heap: heap as *mut c_void, + stack: stack as *mut c_void, + globals: globals as *mut c_void, + sigstack: sigstack as *mut c_void, + limits: region.limits.clone(), + region: Arc::downgrade(region) as Weak<dyn RegionInternal>, + }) + } + + fn free_slot(slot: Slot) { + // eprintln!( + // "unmapping {:p}[{:x}]", + // slot.start, + // slot.limits.total_memory_size() + // ); + let res = unsafe { munmap(slot.start, slot.limits.total_memory_size()) }; + res.expect("munmap succeeded"); + } +} + +// Note alignment must be a power of 2 +unsafe fn mmap_aligned( + requested_length: usize, + prot: ProtFlags, + flags: MapFlags, + alignment: usize, + alignment_offset: usize, +) -> Result<*mut c_void, Error> { + let addr = ptr::null_mut(); + let fd = 0; + let offset = 0; + + let padded_length = requested_length + alignment + alignment_offset; + let unaligned = mmap(addr, padded_length, prot, flags, fd, offset)? as usize; + + // Round up the next address that has addr % alignment = 0 + let aligned_nonoffset = (unaligned + (alignment - 1)) & !(alignment - 1); + + // Currently offset 0 is aligned according to alignment + // Alignment needs to be enforced at the given offset + let aligned = if aligned_nonoffset - alignment_offset >= unaligned { + aligned_nonoffset - alignment_offset + } else { + aligned_nonoffset - alignment_offset + alignment + }; + + //Sanity check + if aligned < unaligned + || (aligned + (requested_length - 1)) > (unaligned + (padded_length - 1)) + || (aligned + alignment_offset) % alignment != 0 + { + // explicitly ignore failures now, as this is just a best-effort clean up after the last fail + let _ = munmap(unaligned as *mut c_void, padded_length); + return Err(Error::Unsupported("Could not align memory".to_string())); + } + + { + let unused_front = aligned - unaligned; + if unused_front != 0 { + if munmap(unaligned as *mut c_void, unused_front).is_err() { + // explicitly ignore failures now, as this is just a best-effort clean up after the last fail + let _ = munmap(unaligned as *mut c_void, padded_length); + return Err(Error::Unsupported("Could not align memory".to_string())); + } + } + } + + { + let unused_back = (unaligned + (padded_length - 1)) - (aligned + (requested_length - 1)); + if unused_back != 0 { + if munmap((aligned + requested_length) as *mut c_void, unused_back).is_err() { + // explicitly ignore failures now, as this is just a best-effort clean up after the last fail + let _ = munmap(unaligned as *mut c_void, padded_length); + return Err(Error::Unsupported("Could not align memory".to_string())); + } + } + } + + return Ok(aligned as *mut c_void); +} + +// TODO: remove this once `nix` PR https://github.com/nix-rust/nix/pull/991 is merged +unsafe fn mprotect(addr: *mut c_void, length: libc::size_t, prot: ProtFlags) -> nix::Result<()> { + nix::errno::Errno::result(libc::mprotect(addr, length, prot.bits())).map(drop) +} diff --git a/third_party/rust/lucet-runtime-internals-wasmsbx/src/region/mod.rs b/third_party/rust/lucet-runtime-internals-wasmsbx/src/region/mod.rs new file mode 100644 index 0000000000..7099ed639b --- /dev/null +++ b/third_party/rust/lucet-runtime-internals-wasmsbx/src/region/mod.rs @@ -0,0 +1,105 @@ +pub mod mmap; + +use crate::alloc::{Alloc, Limits, Slot}; +use crate::embed_ctx::CtxMap; +use crate::error::Error; +use crate::instance::InstanceHandle; +use crate::module::Module; +use std::any::Any; +use std::sync::Arc; + +/// A memory region in which Lucet instances are created and run. +/// +/// These methods return an [`InstanceHandle`](struct.InstanceHandle.html) smart pointer rather than +/// the `Instance` itself. This allows the region implementation complete control of where the +/// instance metadata is stored. +pub trait Region: RegionInternal { + /// Create a new instance within the region. + /// + /// Calling `region.new_instance(module)` is shorthand for + /// `region.new_instance_builder(module).build()` for use when further customization is + /// unnecessary. + /// + /// # Safety + /// + /// This function runs the guest code for the WebAssembly `start` section, and running any guest + /// code is potentially unsafe; see [`Instance::run()`](struct.Instance.html#method.run). + fn new_instance(&self, module: Arc<dyn Module>) -> Result<InstanceHandle, Error> { + self.new_instance_builder(module).build() + } + + /// Return an [`InstanceBuilder`](struct.InstanceBuilder.html) for the given module. + fn new_instance_builder<'a>(&'a self, module: Arc<dyn Module>) -> InstanceBuilder<'a> { + InstanceBuilder::new(self.as_dyn_internal(), module) + } +} + +/// A `RegionInternal` is a collection of `Slot`s which are managed as a whole. +pub trait RegionInternal: Send + Sync { + fn new_instance_with( + &self, + module: Arc<dyn Module>, + embed_ctx: CtxMap, + ) -> Result<InstanceHandle, Error>; + + /// Unmaps the heap, stack, and globals of an `Alloc`, while retaining the virtual address + /// ranges in its `Slot`. + fn drop_alloc(&self, alloc: &mut Alloc); + + /// Expand the heap for the given slot to include the given range. + fn expand_heap(&self, slot: &Slot, start: u32, len: u32) -> Result<(), Error>; + + fn reset_heap(&self, alloc: &mut Alloc, module: &dyn Module) -> Result<(), Error>; + + fn as_dyn_internal(&self) -> &dyn RegionInternal; +} + +/// A trait for regions that are created with a fixed capacity and limits. +/// +/// This is not part of [`Region`](trait.Region.html) so that `Region` types can be made into trait +/// objects. +pub trait RegionCreate: Region { + /// The type name of the region; useful for testing. + const TYPE_NAME: &'static str; + + /// Create a new `Region` that can support a given number instances, each subject to the same + /// runtime limits. + fn create(instance_capacity: usize, limits: &Limits) -> Result<Arc<Self>, Error>; +} + +/// A builder for instances; created by +/// [`Region::new_instance_builder()`](trait.Region.html#method.new_instance_builder). +pub struct InstanceBuilder<'a> { + region: &'a dyn RegionInternal, + module: Arc<dyn Module>, + embed_ctx: CtxMap, +} + +impl<'a> InstanceBuilder<'a> { + fn new(region: &'a dyn RegionInternal, module: Arc<dyn Module>) -> Self { + InstanceBuilder { + region, + module, + embed_ctx: CtxMap::new(), + } + } + + /// Add an embedder context to the built instance. + /// + /// Up to one context value of any particular type may exist in the instance. If a context value + /// of the same type already exists, it is replaced by the new value. + pub fn with_embed_ctx<T: Any>(mut self, ctx: T) -> Self { + self.embed_ctx.insert(ctx); + self + } + + /// Build the instance. + /// + /// # Safety + /// + /// This function runs the guest code for the WebAssembly `start` section, and running any guest + /// code is potentially unsafe; see [`Instance::run()`](struct.Instance.html#method.run). + pub fn build(self) -> Result<InstanceHandle, Error> { + self.region.new_instance_with(self.module, self.embed_ctx) + } +} diff --git a/third_party/rust/lucet-runtime-internals-wasmsbx/src/sysdeps/linux.rs b/third_party/rust/lucet-runtime-internals-wasmsbx/src/sysdeps/linux.rs new file mode 100644 index 0000000000..fb576d8a19 --- /dev/null +++ b/third_party/rust/lucet-runtime-internals-wasmsbx/src/sysdeps/linux.rs @@ -0,0 +1,50 @@ +use libc::{c_void, ucontext_t, REG_RIP}; + +#[derive(Clone, Copy, Debug)] +pub struct UContextPtr(*const ucontext_t); + +impl UContextPtr { + #[inline] + pub fn new(ptr: *const c_void) -> Self { + assert!(!ptr.is_null(), "non-null context"); + UContextPtr(ptr as *const ucontext_t) + } + + #[inline] + pub fn get_ip(self) -> *const c_void { + let mcontext = &unsafe { *(self.0) }.uc_mcontext; + mcontext.gregs[REG_RIP as usize] as *const _ + } +} + +#[repr(C)] +#[derive(Clone, Copy)] +pub struct UContext { + context: ucontext_t, +} + +impl UContext { + #[inline] + pub fn new(ptr: *const c_void) -> Self { + UContext { + context: *unsafe { + (ptr as *const ucontext_t) + .as_ref() + .expect("non-null context") + }, + } + } + + pub fn as_ptr(&mut self) -> UContextPtr { + UContextPtr::new(&self.context as *const _ as *const _) + } +} + +impl Into<UContext> for UContextPtr { + #[inline] + fn into(self) -> UContext { + UContext { + context: unsafe { *(self.0) }, + } + } +} diff --git a/third_party/rust/lucet-runtime-internals-wasmsbx/src/sysdeps/macos.rs b/third_party/rust/lucet-runtime-internals-wasmsbx/src/sysdeps/macos.rs new file mode 100644 index 0000000000..3ea2e1c861 --- /dev/null +++ b/third_party/rust/lucet-runtime-internals-wasmsbx/src/sysdeps/macos.rs @@ -0,0 +1,171 @@ +use libc::{c_int, c_short, c_void, sigset_t, size_t}; +#[derive(Clone, Copy, Debug)] +#[repr(C)] +pub struct sigaltstack { + pub ss_sp: *const c_void, + pub ss_size: size_t, + pub ss_flags: c_int, +} + +#[derive(Clone, Copy, Debug)] +#[repr(C)] +pub struct x86_exception_state64 { + pub trapno: u16, + pub cpu: u16, + pub err: u32, + pub faultvaddr: u64, +} + +#[derive(Clone, Copy, Debug)] +#[repr(C)] +pub struct x86_thread_state64 { + pub rax: u64, + pub rbx: u64, + pub rcx: u64, + pub rdx: u64, + pub rdi: u64, + pub rsi: u64, + pub rbp: u64, + pub rsp: u64, + pub r8: u64, + pub r9: u64, + pub r10: u64, + pub r11: u64, + pub r12: u64, + pub r13: u64, + pub r14: u64, + pub r15: u64, + pub rip: u64, + pub rflags: u64, + pub cs: u64, + pub fs: u64, + pub gs: u64, +} + +#[derive(Clone, Copy, Debug)] +#[repr(C)] +pub struct mmst_reg { + pub mmst_reg: [u8; 10], + pub rsrv: [u8; 6], +} + +#[derive(Clone, Copy, Debug)] +#[repr(C)] +pub struct xmm_reg([u8; 16]); + +#[derive(Clone, Copy, Debug)] +#[repr(C)] +pub struct x86_float_state64 { + pub fpu_reserved: [c_int; 2], + pub fpu_fcw: c_short, + pub fpu_fsw: c_short, + pub fpu_ftw: u8, + pub fpu_rsrv1: u8, + pub fpu_fop: u16, + pub fpu_ip: u32, + pub fpu_cs: u16, + pub fpu_rsrv2: u16, + pub fpu_dp: u32, + pub fpu_ds: u16, + pub fpu_rsrv3: u16, + pub fpu_mxcsr: u32, + pub fpu_mxcsrmask: u32, + pub fpu_stmm0: mmst_reg, + pub fpu_stmm1: mmst_reg, + pub fpu_stmm2: mmst_reg, + pub fpu_stmm3: mmst_reg, + pub fpu_stmm4: mmst_reg, + pub fpu_stmm5: mmst_reg, + pub fpu_stmm6: mmst_reg, + pub fpu_stmm7: mmst_reg, + pub fpu_xmm0: xmm_reg, + pub fpu_xmm1: xmm_reg, + pub fpu_xmm2: xmm_reg, + pub fpu_xmm3: xmm_reg, + pub fpu_xmm4: xmm_reg, + pub fpu_xmm5: xmm_reg, + pub fpu_xmm6: xmm_reg, + pub fpu_xmm7: xmm_reg, + pub fpu_xmm8: xmm_reg, + pub fpu_xmm9: xmm_reg, + pub fpu_xmm10: xmm_reg, + pub fpu_xmm11: xmm_reg, + pub fpu_xmm12: xmm_reg, + pub fpu_xmm13: xmm_reg, + pub fpu_xmm14: xmm_reg, + pub fpu_xmm15: xmm_reg, + pub fpu_rsrv4_0: [u8; 16], + pub fpu_rsrv4_1: [u8; 16], + pub fpu_rsrv4_2: [u8; 16], + pub fpu_rsrv4_3: [u8; 16], + pub fpu_rsrv4_4: [u8; 16], + pub fpu_rsrv4_5: [u8; 16], + pub fpu_reserved1: c_int, +} + +#[derive(Clone, Copy, Debug)] +#[repr(C)] +pub struct mcontext64 { + pub es: x86_exception_state64, + pub ss: x86_thread_state64, + pub fs: x86_float_state64, +} +#[derive(Clone, Copy, Debug)] +#[repr(C)] +pub struct ucontext_t { + pub uc_onstack: c_int, + pub uc_sigmask: sigset_t, + pub uc_stack: sigaltstack, + pub uc_link: *const ucontext_t, + pub uc_mcsize: size_t, + pub uc_mcontext: *const mcontext64, +} + +#[derive(Clone, Copy, Debug)] +pub struct UContextPtr(*const ucontext_t); + +impl UContextPtr { + #[inline] + pub fn new(ptr: *const c_void) -> Self { + assert!(!ptr.is_null(), "non-null context"); + UContextPtr(ptr as *const ucontext_t) + } + + #[inline] + pub fn get_ip(self) -> *const c_void { + let mcontext = &unsafe { *(*self.0).uc_mcontext }; + mcontext.ss.rip as *const _ + } +} + +#[derive(Clone, Copy)] +#[repr(C)] +pub struct UContext { + context: ucontext_t, + mcontext: mcontext64, +} + +impl UContext { + #[inline] + pub fn new(ptr: *const c_void) -> Self { + let context = *unsafe { + (ptr as *const ucontext_t) + .as_ref() + .expect("non-null context") + }; + let mcontext = unsafe { *context.uc_mcontext }; + UContext { context, mcontext } + } + + pub fn as_ptr(&mut self) -> UContextPtr { + self.context.uc_mcontext = &self.mcontext; + UContextPtr::new(&self.context as *const _ as *const _) + } +} + +impl Into<UContext> for UContextPtr { + #[inline] + fn into(self) -> UContext { + UContext::new(self.0 as *const _) + } +} diff --git a/third_party/rust/lucet-runtime-internals-wasmsbx/src/sysdeps/mod.rs b/third_party/rust/lucet-runtime-internals-wasmsbx/src/sysdeps/mod.rs new file mode 100644 index 0000000000..8f9247c4f1 --- /dev/null +++ b/third_party/rust/lucet-runtime-internals-wasmsbx/src/sysdeps/mod.rs @@ -0,0 +1,11 @@ +#[cfg(target_os = "macos")] +mod macos; + +#[cfg(target_os = "linux")] +mod linux; + +#[cfg(target_os = "macos")] +pub use macos::*; + +#[cfg(target_os = "linux")] +pub use linux::*; diff --git a/third_party/rust/lucet-runtime-internals-wasmsbx/src/test_helpers.rs b/third_party/rust/lucet-runtime-internals-wasmsbx/src/test_helpers.rs new file mode 100644 index 0000000000..94c24d09fb --- /dev/null +++ b/third_party/rust/lucet-runtime-internals-wasmsbx/src/test_helpers.rs @@ -0,0 +1,20 @@ +use crate::error::Error; +use crate::module::DlModule; +use std::env; +use std::path::{Path, PathBuf}; +use std::sync::Arc; + +fn guest_module_path<P: AsRef<Path>>(path: P) -> PathBuf { + if let Some(prefix) = env::var_os("GUEST_MODULE_PREFIX") { + Path::new(&prefix).join(path) + } else { + // default to the `devenv` path convention + Path::new("/lucet").join(path) + } +} + +impl DlModule { + pub fn load_test<P: AsRef<Path>>(so_path: P) -> Result<Arc<Self>, Error> { + DlModule::load(guest_module_path(so_path)) + } +} diff --git a/third_party/rust/lucet-runtime-internals-wasmsbx/src/val.rs b/third_party/rust/lucet-runtime-internals-wasmsbx/src/val.rs new file mode 100644 index 0000000000..c2f2fe51c9 --- /dev/null +++ b/third_party/rust/lucet-runtime-internals-wasmsbx/src/val.rs @@ -0,0 +1,318 @@ +//! Typed values for passing into and returning from sandboxed +//! programs. + +use libc::c_void; +use std::arch::x86_64::{ + __m128, _mm_castpd_ps, _mm_castps_pd, _mm_load_pd1, _mm_load_ps1, _mm_setzero_ps, + _mm_storeu_pd, _mm_storeu_ps, +}; + +use lucet_module::ValueType; + +impl Val { + pub fn value_type(&self) -> ValueType { + match self { + // USize, ISize, and CPtr are all as fits for definitions on the target architecture + // (wasm) which is all 32-bit. + Val::USize(_) | Val::ISize(_) | Val::CPtr(_) => ValueType::I32, + Val::GuestPtr(_) => ValueType::I32, + Val::I8(_) | Val::U8(_) | Val::I16(_) | Val::U16(_) | Val::I32(_) | Val::U32(_) => { + ValueType::I32 + } + Val::I64(_) | Val::U64(_) => ValueType::I64, + Val::Bool(_) => ValueType::I32, + Val::F32(_) => ValueType::F32, + Val::F64(_) => ValueType::F64, + } + } +} + +/// Typed values used for passing arguments into guest functions. +#[derive(Clone, Copy, Debug)] +pub enum Val { + CPtr(*const c_void), + /// A WebAssembly linear memory address + GuestPtr(u32), + U8(u8), + U16(u16), + U32(u32), + U64(u64), + I8(i8), + I16(i16), + I32(i32), + I64(i64), + USize(usize), + ISize(isize), + Bool(bool), + F32(f32), + F64(f64), +} + +// the pointer variant is just a wrapper; the caller will know they're still responsible for their +// safety +unsafe impl Send for Val {} +unsafe impl Sync for Val {} + +impl<T> From<*const T> for Val { + fn from(x: *const T) -> Val { + Val::CPtr(x as *const c_void) + } +} + +impl<T> From<*mut T> for Val { + fn from(x: *mut T) -> Val { + Val::CPtr(x as *mut c_void) + } +} + +macro_rules! impl_from_scalars { + ( { $( $ctor:ident : $ty:ty ),* } ) => { + $( + impl From<$ty> for Val { + fn from(x: $ty) -> Val { + Val::$ctor(x) + } + } + )* + }; +} + +// Since there is overlap in these enum variants, we can't have instances for all of them, such as +// GuestPtr +impl_from_scalars!({ + U8: u8, + U16: u16, + U32: u32, + U64: u64, + I8: i8, + I16: i16, + I32: i32, + I64: i64, + USize: usize, + ISize: isize, + Bool: bool, + F32: f32, + F64: f64 +}); + +/// Register representation of `Val`. +/// +/// When mapping `Val`s to x86_64 registers, we map floating point +/// values into the SSE registers _xmmN_, and all other values into +/// general-purpose (integer) registers. +pub enum RegVal { + GpReg(u64), + FpReg(__m128), +} + +/// Convert a `Val` to its representation when stored in an +/// argument register. +pub fn val_to_reg(val: &Val) -> RegVal { + use self::RegVal::*; + use self::Val::*; + match *val { + CPtr(v) => GpReg(v as u64), + GuestPtr(v) => GpReg(v as u64), + U8(v) => GpReg(v as u64), + U16(v) => GpReg(v as u64), + U32(v) => GpReg(v as u64), + U64(v) => GpReg(v as u64), + I8(v) => GpReg(v as u64), + I16(v) => GpReg(v as u64), + I32(v) => GpReg(v as u64), + I64(v) => GpReg(v as u64), + USize(v) => GpReg(v as u64), + ISize(v) => GpReg(v as u64), + Bool(false) => GpReg(0u64), + Bool(true) => GpReg(1u64), + Val::F32(v) => FpReg(unsafe { _mm_load_ps1(&v as *const f32) }), + Val::F64(v) => FpReg(unsafe { _mm_castpd_ps(_mm_load_pd1(&v as *const f64)) }), + } +} + +/// Convert a `Val` to its representation when spilled onto the +/// stack. +pub fn val_to_stack(val: &Val) -> u64 { + use self::Val::*; + match *val { + CPtr(v) => v as u64, + GuestPtr(v) => v as u64, + U8(v) => v as u64, + U16(v) => v as u64, + U32(v) => v as u64, + U64(v) => v as u64, + I8(v) => v as u64, + I16(v) => v as u64, + I32(v) => v as u64, + I64(v) => v as u64, + USize(v) => v as u64, + ISize(v) => v as u64, + Bool(false) => 0u64, + Bool(true) => 1u64, + F32(v) => v.to_bits() as u64, + F64(v) => v.to_bits(), + } +} + +/// A value returned by a guest function. +/// +/// Since the Rust type system cannot know the type of the returned value, the user must use the +/// appropriate `From` implementation or `as_T` method. +#[derive(Clone, Copy, Debug)] +pub struct UntypedRetVal { + fp: __m128, + gp: u64, +} + +impl std::fmt::Display for UntypedRetVal { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "<untyped return value>") + } +} + +impl UntypedRetVal { + pub(crate) fn new(gp: u64, fp: __m128) -> UntypedRetVal { + UntypedRetVal { gp, fp } + } +} + +impl From<RegVal> for UntypedRetVal { + fn from(reg: RegVal) -> UntypedRetVal { + match reg { + RegVal::GpReg(r) => UntypedRetVal::new(r, unsafe { _mm_setzero_ps() }), + RegVal::FpReg(r) => UntypedRetVal::new(0, r), + } + } +} + +impl<T: Into<Val>> From<T> for UntypedRetVal { + fn from(v: T) -> UntypedRetVal { + val_to_reg(&v.into()).into() + } +} + +macro_rules! impl_from_fp { + ( $ty:ty, $f:ident, $as:ident ) => { + impl From<UntypedRetVal> for $ty { + fn from(retval: UntypedRetVal) -> $ty { + $f(retval.fp) + } + } + + impl From<&UntypedRetVal> for $ty { + fn from(retval: &UntypedRetVal) -> $ty { + $f(retval.fp) + } + } + + impl UntypedRetVal { + pub fn $as(&self) -> $ty { + $f(self.fp) + } + } + }; +} + +impl_from_fp!(f32, __m128_as_f32, as_f32); +impl_from_fp!(f64, __m128_as_f64, as_f64); + +macro_rules! impl_from_gp { + ( $ty:ty, $as:ident ) => { + impl From<UntypedRetVal> for $ty { + fn from(retval: UntypedRetVal) -> $ty { + retval.gp as $ty + } + } + + impl From<&UntypedRetVal> for $ty { + fn from(retval: &UntypedRetVal) -> $ty { + retval.gp as $ty + } + } + + impl UntypedRetVal { + pub fn $as(&self) -> $ty { + self.gp as $ty + } + } + }; +} + +impl_from_gp!(u8, as_u8); +impl_from_gp!(u16, as_u16); +impl_from_gp!(u32, as_u32); +impl_from_gp!(u64, as_u64); + +impl_from_gp!(i8, as_i8); +impl_from_gp!(i16, as_i16); +impl_from_gp!(i32, as_i32); +impl_from_gp!(i64, as_i64); + +impl From<UntypedRetVal> for bool { + fn from(retval: UntypedRetVal) -> bool { + retval.gp != 0 + } +} + +impl From<&UntypedRetVal> for bool { + fn from(retval: &UntypedRetVal) -> bool { + retval.gp != 0 + } +} + +impl UntypedRetVal { + pub fn as_bool(&self) -> bool { + self.gp != 0 + } + + pub fn as_ptr<T>(&self) -> *const T { + self.gp as *const T + } + + pub fn as_mut<T>(&self) -> *mut T { + self.gp as *mut T + } +} + +impl Default for UntypedRetVal { + fn default() -> UntypedRetVal { + let fp = unsafe { _mm_setzero_ps() }; + UntypedRetVal { fp, gp: 0 } + } +} + +pub trait UntypedRetValInternal { + fn fp(&self) -> __m128; + fn gp(&self) -> u64; +} + +impl UntypedRetValInternal for UntypedRetVal { + fn fp(&self) -> __m128 { + self.fp + } + + fn gp(&self) -> u64 { + self.gp + } +} + +// Helpers that we might want to put in a utils module someday + +/// Interpret the contents of a `__m128` register as an `f32`. +pub fn __m128_as_f32(v: __m128) -> f32 { + let mut out: [f32; 4] = [0.0; 4]; + unsafe { + _mm_storeu_ps(&mut out[0] as *mut f32, v); + } + out[0] +} + +/// Interpret the contents of a `__m128` register as an `f64`. +pub fn __m128_as_f64(v: __m128) -> f64 { + let mut out: [f64; 2] = [0.0; 2]; + unsafe { + let vd = _mm_castps_pd(v); + _mm_storeu_pd(&mut out[0] as *mut f64, vd); + } + out[0] +} diff --git a/third_party/rust/lucet-runtime-internals-wasmsbx/src/vmctx.rs b/third_party/rust/lucet-runtime-internals-wasmsbx/src/vmctx.rs new file mode 100644 index 0000000000..61897a38e8 --- /dev/null +++ b/third_party/rust/lucet-runtime-internals-wasmsbx/src/vmctx.rs @@ -0,0 +1,419 @@ +//! Interfaces for accessing instance data from hostcalls. +//! +//! This module contains both a Rust-friendly API ([`Vmctx`](struct.Vmctx.html)) as well as C-style +//! exports for compatibility with hostcalls written against `lucet-runtime-c`. + +pub use crate::c_api::lucet_vmctx; + +use crate::alloc::instance_heap_offset; +use crate::context::Context; +use crate::error::Error; +use crate::instance::{ + EmptyYieldVal, Instance, InstanceInternal, State, TerminationDetails, YieldedVal, + CURRENT_INSTANCE, HOST_CTX, +}; +use lucet_module::{FunctionHandle, GlobalValue}; +use std::any::Any; +use std::borrow::{Borrow, BorrowMut}; +use std::cell::{Ref, RefCell, RefMut}; +use std::marker::PhantomData; + +/// An opaque handle to a running instance's context. +#[derive(Debug)] +pub struct Vmctx { + vmctx: *mut lucet_vmctx, + /// A view of the underlying instance's heap. + /// + /// This must never be dropped automatically, as the view does not own the heap. Rather, this is + /// a value used to implement dynamic borrowing of the heap contents that are owned and managed + /// by the instance and its `Alloc`. + heap_view: RefCell<Box<[u8]>>, + /// A view of the underlying instance's globals. + /// + /// This must never be dropped automatically, as the view does not own the globals. Rather, this + /// is a value used to implement dynamic borrowing of the globals that are owned and managed by + /// the instance and its `Alloc`. + globals_view: RefCell<Box<[GlobalValue]>>, +} + +impl Drop for Vmctx { + fn drop(&mut self) { + let heap_view = self.heap_view.replace(Box::new([])); + let globals_view = self.globals_view.replace(Box::new([])); + // as described in the definition of `Vmctx`, we cannot allow the boxed views of the heap + // and globals to be dropped + Box::leak(heap_view); + Box::leak(globals_view); + } +} + +pub trait VmctxInternal { + /// Get a reference to the `Instance` for this guest. + fn instance(&self) -> &Instance; + + /// Get a mutable reference to the `Instance` for this guest. + /// + /// ### Safety + /// + /// Using this method, you could hold on to multiple mutable references to the same + /// `Instance`. Only use one at a time! This method does not take `&mut self` because otherwise + /// you could not use orthogonal `&mut` refs that come from `Vmctx`, like the heap or + /// terminating the instance. + unsafe fn instance_mut(&self) -> &mut Instance; + + /// Try to take and return the value passed to `Instance::resume_with_val()`. + /// + /// If there is no resumed value, or if the dynamic type check of the value fails, this returns + /// `None`. + fn try_take_resumed_val<R: Any + 'static>(&self) -> Option<R>; + + /// Suspend the instance, returning a value in + /// [`RunResult::Yielded`](../enum.RunResult.html#variant.Yielded) to where the instance was run + /// or resumed. + /// + /// After suspending, the instance may be resumed by calling + /// [`Instance::resume_with_val()`](../struct.Instance.html#method.resume_with_val) from the + /// host with a value of type `R`. If resumed with a value of some other type, this returns + /// `None`. + /// + /// The dynamic type checks used by the other yield methods should make this explicit option + /// type redundant, however this interface is used to avoid exposing a panic to the C API. + fn yield_val_try_val<A: Any + 'static, R: Any + 'static>(&self, val: A) -> Option<R>; +} + +impl VmctxInternal for Vmctx { + fn instance(&self) -> &Instance { + unsafe { instance_from_vmctx(self.vmctx) } + } + + unsafe fn instance_mut(&self) -> &mut Instance { + instance_from_vmctx(self.vmctx) + } + + fn try_take_resumed_val<R: Any + 'static>(&self) -> Option<R> { + let inst = unsafe { self.instance_mut() }; + if let Some(val) = inst.resumed_val.take() { + match val.downcast() { + Ok(val) => Some(*val), + Err(val) => { + inst.resumed_val = Some(val); + None + } + } + } else { + None + } + } + + fn yield_val_try_val<A: Any + 'static, R: Any + 'static>(&self, val: A) -> Option<R> { + self.yield_impl::<A, R>(val); + self.try_take_resumed_val() + } +} + +impl Vmctx { + /// Create a `Vmctx` from the compiler-inserted `vmctx` argument in a guest function. + /// + /// This is almost certainly not what you want to use to get a `Vmctx`; instead use the `&mut + /// Vmctx` argument to a `lucet_hostcalls!`-wrapped function. + pub unsafe fn from_raw(vmctx: *mut lucet_vmctx) -> Vmctx { + let inst = instance_from_vmctx(vmctx); + assert!(inst.valid_magic()); + + let res = Vmctx { + vmctx, + heap_view: RefCell::new(Box::<[u8]>::from_raw(inst.heap_mut())), + globals_view: RefCell::new(Box::<[GlobalValue]>::from_raw(inst.globals_mut())), + }; + res + } + + /// Return the underlying `vmctx` pointer. + pub fn as_raw(&self) -> *mut lucet_vmctx { + self.vmctx + } + + /// Return the WebAssembly heap as a slice of bytes. + /// + /// If the heap is already mutably borrowed by `heap_mut()`, the instance will + /// terminate with `TerminationDetails::BorrowError`. + pub fn heap(&self) -> Ref<'_, [u8]> { + unsafe { + self.reconstitute_heap_view_if_needed(); + } + let r = self + .heap_view + .try_borrow() + .unwrap_or_else(|_| panic!(TerminationDetails::BorrowError("heap"))); + Ref::map(r, |b| b.borrow()) + } + + /// Return the WebAssembly heap as a mutable slice of bytes. + /// + /// If the heap is already borrowed by `heap()` or `heap_mut()`, the instance will terminate + /// with `TerminationDetails::BorrowError`. + pub fn heap_mut(&self) -> RefMut<'_, [u8]> { + unsafe { + self.reconstitute_heap_view_if_needed(); + } + let r = self + .heap_view + .try_borrow_mut() + .unwrap_or_else(|_| panic!(TerminationDetails::BorrowError("heap_mut"))); + RefMut::map(r, |b| b.borrow_mut()) + } + + /// Check whether the heap has grown, and replace the heap view if it has. + /// + /// This handles the case where `Vmctx::grow_memory()` and `Vmctx::heap()` are called in + /// sequence. Since `Vmctx::grow_memory()` takes `&mut self`, heap references cannot live across + /// it. + /// + /// TODO: There is still an unsound case, though, when a heap reference is held across a call + /// back into the guest via `Vmctx::get_func_from_idx()`. That guest code may grow the heap as + /// well, causing any outstanding heap references to become invalid. We will address this when + /// we rework the interface for calling back into the guest. + unsafe fn reconstitute_heap_view_if_needed(&self) { + let inst = self.instance_mut(); + if inst.heap_mut().len() != self.heap_view.borrow().len() { + let old_heap_view = self + .heap_view + .replace(Box::<[u8]>::from_raw(inst.heap_mut())); + // as described in the definition of `Vmctx`, we cannot allow the boxed view of the heap + // to be dropped + Box::leak(old_heap_view); + } + } + + /// Check whether a given range in the host address space overlaps with the memory that backs + /// the instance heap. + pub fn check_heap<T>(&self, ptr: *const T, len: usize) -> bool { + self.instance().check_heap(ptr, len) + } + + /// Check whether a context value of a particular type exists. + pub fn contains_embed_ctx<T: Any>(&self) -> bool { + self.instance().contains_embed_ctx::<T>() + } + + /// Get a reference to a context value of a particular type. + /// + /// If a context of that type does not exist, the instance will terminate with + /// `TerminationDetails::CtxNotFound`. + /// + /// If the context is already mutably borrowed by `get_embed_ctx_mut`, the instance will + /// terminate with `TerminationDetails::BorrowError`. + pub fn get_embed_ctx<T: Any>(&self) -> Ref<'_, T> { + match self.instance().embed_ctx.try_get::<T>() { + Some(Ok(t)) => t, + Some(Err(_)) => panic!(TerminationDetails::BorrowError("get_embed_ctx")), + None => panic!(TerminationDetails::CtxNotFound), + } + } + + /// Get a mutable reference to a context value of a particular type. + /// + /// If a context of that type does not exist, the instance will terminate with + /// `TerminationDetails::CtxNotFound`. + /// + /// If the context is already borrowed by some other use of `get_embed_ctx` or + /// `get_embed_ctx_mut`, the instance will terminate with `TerminationDetails::BorrowError`. + pub fn get_embed_ctx_mut<T: Any>(&self) -> RefMut<'_, T> { + match unsafe { self.instance_mut().embed_ctx.try_get_mut::<T>() } { + Some(Ok(t)) => t, + Some(Err(_)) => panic!(TerminationDetails::BorrowError("get_embed_ctx_mut")), + None => panic!(TerminationDetails::CtxNotFound), + } + } + + /// Terminate this guest and return to the host context without unwinding. + /// + /// This is almost certainly not what you want to use to terminate an instance from a hostcall, + /// as any resources currently in scope will not be dropped. Instead, use + /// `lucet_hostcall_terminate!` which unwinds to the enclosing hostcall body. + pub unsafe fn terminate_no_unwind(&mut self, details: TerminationDetails) -> ! { + self.instance_mut().terminate(details) + } + + /// Grow the guest memory by the given number of WebAssembly pages. + /// + /// On success, returns the number of pages that existed before the call. + pub fn grow_memory(&mut self, additional_pages: u32) -> Result<u32, Error> { + unsafe { self.instance_mut().grow_memory(additional_pages) } + } + + /// Return the WebAssembly globals as a slice of `i64`s. + /// + /// If the globals are already mutably borrowed by `globals_mut()`, the instance will terminate + /// with `TerminationDetails::BorrowError`. + pub fn globals(&self) -> Ref<'_, [GlobalValue]> { + let r = self + .globals_view + .try_borrow() + .unwrap_or_else(|_| panic!(TerminationDetails::BorrowError("globals"))); + Ref::map(r, |b| b.borrow()) + } + + /// Return the WebAssembly globals as a mutable slice of `i64`s. + /// + /// If the globals are already borrowed by `globals()` or `globals_mut()`, the instance will + /// terminate with `TerminationDetails::BorrowError`. + pub fn globals_mut(&self) -> RefMut<'_, [GlobalValue]> { + let r = self + .globals_view + .try_borrow_mut() + .unwrap_or_else(|_| panic!(TerminationDetails::BorrowError("globals_mut"))); + RefMut::map(r, |b| b.borrow_mut()) + } + + /// Get a function pointer by WebAssembly table and function index. + /// + /// This is useful when a hostcall takes a function pointer as its argument, as WebAssembly uses + /// table indices as its runtime representation of function pointers. + /// + /// We do not currently reflect function type information into the Rust type system, so callers + /// of the returned function must take care to cast it to the correct type before calling. The + /// correct type will include the `vmctx` argument, which the caller is responsible for passing + /// from its own context. + /// + /// ```no_run + /// use lucet_runtime_internals::{lucet_hostcalls, lucet_hostcall_terminate}; + /// use lucet_runtime_internals::vmctx::{lucet_vmctx, Vmctx}; + /// + /// lucet_hostcalls! { + /// #[no_mangle] + /// pub unsafe extern "C" fn hostcall_call_binop( + /// &mut vmctx, + /// binop_table_idx: u32, + /// binop_func_idx: u32, + /// operand1: u32, + /// operand2: u32, + /// ) -> u32 { + /// if let Ok(binop) = vmctx.get_func_from_idx(binop_table_idx, binop_func_idx) { + /// let typed_binop = std::mem::transmute::< + /// usize, + /// extern "C" fn(*mut lucet_vmctx, u32, u32) -> u32 + /// >(binop.ptr.as_usize()); + /// unsafe { (typed_binop)(vmctx.as_raw(), operand1, operand2) } + /// } else { + /// lucet_hostcall_terminate!("invalid function index") + /// } + /// } + /// } + pub fn get_func_from_idx( + &self, + table_idx: u32, + func_idx: u32, + ) -> Result<FunctionHandle, Error> { + self.instance() + .module() + .get_func_from_idx(table_idx, func_idx) + } + + /// Suspend the instance, returning an empty + /// [`RunResult::Yielded`](../enum.RunResult.html#variant.Yielded) to where the instance was run + /// or resumed. + /// + /// After suspending, the instance may be resumed by the host using + /// [`Instance::resume()`](../struct.Instance.html#method.resume). + /// + /// (The reason for the trailing underscore in the name is that Rust reserves `yield` as a + /// keyword for future use.) + pub fn yield_(&self) { + self.yield_val_expecting_val::<EmptyYieldVal, EmptyYieldVal>(EmptyYieldVal); + } + + /// Suspend the instance, returning an empty + /// [`RunResult::Yielded`](../enum.RunResult.html#variant.Yielded) to where the instance was run + /// or resumed. + /// + /// After suspending, the instance may be resumed by calling + /// [`Instance::resume_with_val()`](../struct.Instance.html#method.resume_with_val) from the + /// host with a value of type `R`. + pub fn yield_expecting_val<R: Any + 'static>(&self) -> R { + self.yield_val_expecting_val::<EmptyYieldVal, R>(EmptyYieldVal) + } + + /// Suspend the instance, returning a value in + /// [`RunResult::Yielded`](../enum.RunResult.html#variant.Yielded) to where the instance was run + /// or resumed. + /// + /// After suspending, the instance may be resumed by the host using + /// [`Instance::resume()`](../struct.Instance.html#method.resume). + pub fn yield_val<A: Any + 'static>(&self, val: A) { + self.yield_val_expecting_val::<A, EmptyYieldVal>(val); + } + + /// Suspend the instance, returning a value in + /// [`RunResult::Yielded`](../enum.RunResult.html#variant.Yielded) to where the instance was run + /// or resumed. + /// + /// After suspending, the instance may be resumed by calling + /// [`Instance::resume_with_val()`](../struct.Instance.html#method.resume_with_val) from the + /// host with a value of type `R`. + pub fn yield_val_expecting_val<A: Any + 'static, R: Any + 'static>(&self, val: A) -> R { + self.yield_impl::<A, R>(val); + self.take_resumed_val() + } + + fn yield_impl<A: Any + 'static, R: Any + 'static>(&self, val: A) { + let inst = unsafe { self.instance_mut() }; + let expecting: Box<PhantomData<R>> = Box::new(PhantomData); + inst.state = State::Yielding { + val: YieldedVal::new(val), + expecting: expecting as Box<dyn Any>, + }; + HOST_CTX.with(|host_ctx| unsafe { Context::swap(&mut inst.ctx, &mut *host_ctx.get()) }); + } + + /// Take and return the value passed to + /// [`Instance::resume_with_val()`](../struct.Instance.html#method.resume_with_val), terminating + /// the instance if there is no value present, or the dynamic type check of the value fails. + fn take_resumed_val<R: Any + 'static>(&self) -> R { + self.try_take_resumed_val() + .unwrap_or_else(|| panic!(TerminationDetails::YieldTypeMismatch)) + } +} + +/// Get an `Instance` from the `vmctx` pointer. +/// +/// Only safe to call from within the guest context. +pub unsafe fn instance_from_vmctx<'a>(vmctx: *mut lucet_vmctx) -> &'a mut Instance { + assert!(!vmctx.is_null(), "vmctx is not null"); + + let inst_ptr = (vmctx as usize - instance_heap_offset()) as *mut Instance; + + // We shouldn't actually need to access the thread local, only the exception handler should + // need to. But, as long as the thread local exists, we should make sure that the guest + // hasn't pulled any shenanigans and passed a bad vmctx. (Codegen should ensure the guest + // cant pull any shenanigans but there have been bugs before.) + CURRENT_INSTANCE.with(|current_instance| { + if let Some(current_inst_ptr) = current_instance.borrow().map(|nn| nn.as_ptr()) { + assert_eq!( + inst_ptr, current_inst_ptr, + "vmctx corresponds to current instance" + ); + } else { + panic!( + "current instance is not set; thread local storage failure can indicate \ + dynamic linking issues" + ); + } + }); + + let inst = inst_ptr.as_mut().unwrap(); + assert!(inst.valid_magic()); + inst +} + +impl Instance { + /// Terminate the guest and swap back to the host context without unwinding. + /// + /// This is almost certainly not what you want to use to terminate from a hostcall; use panics + /// with `TerminationDetails` instead. + unsafe fn terminate(&mut self, details: TerminationDetails) -> ! { + self.state = State::Terminating { details }; + #[allow(unused_unsafe)] // The following unsafe will be incorrectly warned as unused + HOST_CTX.with(|host_ctx| unsafe { Context::set(&*host_ctx.get()) }) + } +} |