//! WebAssembly module and function translation state. //! //! The `ModuleTranslationState` struct defined in this module is used to keep track of data about //! the whole WebAssembly module, such as the decoded type signatures. //! //! The `FuncTranslationState` struct defined in this module is used to keep track of the WebAssembly //! value and control stacks during the translation of a single function. use crate::environ::{FuncEnvironment, GlobalVariable, WasmResult}; use crate::translation_utils::{FuncIndex, GlobalIndex, MemoryIndex, TableIndex, TypeIndex}; use crate::{HashMap, Occupied, Vacant}; use cranelift_codegen::ir::{self, Block, Inst, Value}; use std::vec::Vec; /// Information about the presence of an associated `else` for an `if`, or the /// lack thereof. #[derive(Debug)] pub enum ElseData { /// The `if` does not already have an `else` block. /// /// This doesn't mean that it will never have an `else`, just that we /// haven't seen it yet. NoElse { /// If we discover that we need an `else` block, this is the jump /// instruction that needs to be fixed up to point to the new `else` /// block rather than the destination block after the `if...end`. branch_inst: Inst, }, /// We have already allocated an `else` block. /// /// Usually we don't know whether we will hit an `if .. end` or an `if /// .. else .. end`, but sometimes we can tell based on the block's type /// signature that the signature is not valid if there isn't an `else`. In /// these cases, we pre-allocate the `else` block. WithElse { /// This is the `else` block. else_block: Block, }, } /// A control stack frame can be an `if`, a `block` or a `loop`, each one having the following /// fields: /// /// - `destination`: reference to the `Block` that will hold the code after the control block; /// - `num_return_values`: number of values returned by the control block; /// - `original_stack_size`: size of the value stack at the beginning of the control block. /// /// Moreover, the `if` frame has the `branch_inst` field that points to the `brz` instruction /// separating the `true` and `false` branch. The `loop` frame has a `header` field that references /// the `Block` that contains the beginning of the body of the loop. #[derive(Debug)] pub enum ControlStackFrame { If { destination: Block, else_data: ElseData, num_param_values: usize, num_return_values: usize, original_stack_size: usize, exit_is_branched_to: bool, blocktype: wasmparser::TypeOrFuncType, /// Was the head of the `if` reachable? head_is_reachable: bool, /// What was the reachability at the end of the consequent? /// /// This is `None` until we're finished translating the consequent, and /// is set to `Some` either by hitting an `else` when we will begin /// translating the alternative, or by hitting an `end` in which case /// there is no alternative. consequent_ends_reachable: Option, // Note: no need for `alternative_ends_reachable` because that is just // `state.reachable` when we hit the `end` in the `if .. else .. end`. }, Block { destination: Block, num_param_values: usize, num_return_values: usize, original_stack_size: usize, exit_is_branched_to: bool, }, Loop { destination: Block, header: Block, num_param_values: usize, num_return_values: usize, original_stack_size: usize, }, } /// Helper methods for the control stack objects. impl ControlStackFrame { pub fn num_return_values(&self) -> usize { match *self { Self::If { num_return_values, .. } | Self::Block { num_return_values, .. } | Self::Loop { num_return_values, .. } => num_return_values, } } pub fn num_param_values(&self) -> usize { match *self { Self::If { num_param_values, .. } | Self::Block { num_param_values, .. } | Self::Loop { num_param_values, .. } => num_param_values, } } pub fn following_code(&self) -> Block { match *self { Self::If { destination, .. } | Self::Block { destination, .. } | Self::Loop { destination, .. } => destination, } } pub fn br_destination(&self) -> Block { match *self { Self::If { destination, .. } | Self::Block { destination, .. } => destination, Self::Loop { header, .. } => header, } } /// Private helper. Use `truncate_value_stack_to_else_params()` or /// `truncate_value_stack_to_original_size()` to restore value-stack state. fn original_stack_size(&self) -> usize { match *self { Self::If { original_stack_size, .. } | Self::Block { original_stack_size, .. } | Self::Loop { original_stack_size, .. } => original_stack_size, } } pub fn is_loop(&self) -> bool { match *self { Self::If { .. } | Self::Block { .. } => false, Self::Loop { .. } => true, } } pub fn exit_is_branched_to(&self) -> bool { match *self { Self::If { exit_is_branched_to, .. } | Self::Block { exit_is_branched_to, .. } => exit_is_branched_to, Self::Loop { .. } => false, } } pub fn set_branched_to_exit(&mut self) { match *self { Self::If { ref mut exit_is_branched_to, .. } | Self::Block { ref mut exit_is_branched_to, .. } => *exit_is_branched_to = true, Self::Loop { .. } => {} } } /// Pop values from the value stack so that it is left at the /// input-parameters to an else-block. pub fn truncate_value_stack_to_else_params(&self, stack: &mut Vec) { debug_assert!(matches!(self, &ControlStackFrame::If { .. })); stack.truncate(self.original_stack_size()); } /// Pop values from the value stack so that it is left at the state it was /// before this control-flow frame. pub fn truncate_value_stack_to_original_size(&self, stack: &mut Vec) { // The "If" frame pushes its parameters twice, so they're available to the else block // (see also `FuncTranslationState::push_if`). // Yet, the original_stack_size member accounts for them only once, so that the else // block can see the same number of parameters as the consequent block. As a matter of // fact, we need to substract an extra number of parameter values for if blocks. let num_duplicated_params = match self { &ControlStackFrame::If { num_param_values, .. } => { debug_assert!(num_param_values <= self.original_stack_size()); num_param_values } _ => 0, }; stack.truncate(self.original_stack_size() - num_duplicated_params); } } /// Contains information passed along during a function's translation and that records: /// /// - The current value and control stacks. /// - The depth of the two unreachable control blocks stacks, that are manipulated when translating /// unreachable code; pub struct FuncTranslationState { /// A stack of values corresponding to the active values in the input wasm function at this /// point. pub(crate) stack: Vec, /// A stack of active control flow operations at this point in the input wasm function. pub(crate) control_stack: Vec, /// Is the current translation state still reachable? This is false when translating operators /// like End, Return, or Unreachable. pub(crate) reachable: bool, // Map of global variables that have already been created by `FuncEnvironment::make_global`. globals: HashMap, // Map of heaps that have been created by `FuncEnvironment::make_heap`. heaps: HashMap, // Map of tables that have been created by `FuncEnvironment::make_table`. pub(crate) tables: HashMap, // Map of indirect call signatures that have been created by // `FuncEnvironment::make_indirect_sig()`. // Stores both the signature reference and the number of WebAssembly arguments signatures: HashMap, // Imported and local functions that have been created by // `FuncEnvironment::make_direct_func()`. // Stores both the function reference and the number of WebAssembly arguments functions: HashMap, } // Public methods that are exposed to non-`cranelift_wasm` API consumers. impl FuncTranslationState { /// True if the current translation state expresses reachable code, false if it is unreachable. #[inline] pub fn reachable(&self) -> bool { self.reachable } } impl FuncTranslationState { /// Construct a new, empty, `FuncTranslationState` pub(crate) fn new() -> Self { Self { stack: Vec::new(), control_stack: Vec::new(), reachable: true, globals: HashMap::new(), heaps: HashMap::new(), tables: HashMap::new(), signatures: HashMap::new(), functions: HashMap::new(), } } fn clear(&mut self) { debug_assert!(self.stack.is_empty()); debug_assert!(self.control_stack.is_empty()); self.reachable = true; self.globals.clear(); self.heaps.clear(); self.tables.clear(); self.signatures.clear(); self.functions.clear(); } /// Initialize the state for compiling a function with the given signature. /// /// This resets the state to containing only a single block representing the whole function. /// The exit block is the last block in the function which will contain the return instruction. pub(crate) fn initialize(&mut self, sig: &ir::Signature, exit_block: Block) { self.clear(); self.push_block( exit_block, 0, sig.returns .iter() .filter(|arg| arg.purpose == ir::ArgumentPurpose::Normal) .count(), ); } /// Push a value. pub(crate) fn push1(&mut self, val: Value) { self.stack.push(val); } /// Push multiple values. pub(crate) fn pushn(&mut self, vals: &[Value]) { self.stack.extend_from_slice(vals); } /// Pop one value. pub(crate) fn pop1(&mut self) -> Value { self.stack .pop() .expect("attempted to pop a value from an empty stack") } /// Peek at the top of the stack without popping it. pub(crate) fn peek1(&self) -> Value { *self .stack .last() .expect("attempted to peek at a value on an empty stack") } /// Pop two values. Return them in the order they were pushed. pub(crate) fn pop2(&mut self) -> (Value, Value) { let v2 = self.stack.pop().unwrap(); let v1 = self.stack.pop().unwrap(); (v1, v2) } /// Pop three values. Return them in the order they were pushed. pub(crate) fn pop3(&mut self) -> (Value, Value, Value) { let v3 = self.stack.pop().unwrap(); let v2 = self.stack.pop().unwrap(); let v1 = self.stack.pop().unwrap(); (v1, v2, v3) } /// Helper to ensure the the stack size is at least as big as `n`; note that due to /// `debug_assert` this will not execute in non-optimized builds. #[inline] fn ensure_length_is_at_least(&self, n: usize) { debug_assert!( n <= self.stack.len(), "attempted to access {} values but stack only has {} values", n, self.stack.len() ) } /// Pop the top `n` values on the stack. /// /// The popped values are not returned. Use `peekn` to look at them before popping. pub(crate) fn popn(&mut self, n: usize) { self.ensure_length_is_at_least(n); let new_len = self.stack.len() - n; self.stack.truncate(new_len); } /// Peek at the top `n` values on the stack in the order they were pushed. pub(crate) fn peekn(&self, n: usize) -> &[Value] { self.ensure_length_is_at_least(n); &self.stack[self.stack.len() - n..] } /// Peek at the top `n` values on the stack in the order they were pushed. pub(crate) fn peekn_mut(&mut self, n: usize) -> &mut [Value] { self.ensure_length_is_at_least(n); let len = self.stack.len(); &mut self.stack[len - n..] } /// Push a block on the control stack. pub(crate) fn push_block( &mut self, following_code: Block, num_param_types: usize, num_result_types: usize, ) { debug_assert!(num_param_types <= self.stack.len()); self.control_stack.push(ControlStackFrame::Block { destination: following_code, original_stack_size: self.stack.len() - num_param_types, num_param_values: num_param_types, num_return_values: num_result_types, exit_is_branched_to: false, }); } /// Push a loop on the control stack. pub(crate) fn push_loop( &mut self, header: Block, following_code: Block, num_param_types: usize, num_result_types: usize, ) { debug_assert!(num_param_types <= self.stack.len()); self.control_stack.push(ControlStackFrame::Loop { header, destination: following_code, original_stack_size: self.stack.len() - num_param_types, num_param_values: num_param_types, num_return_values: num_result_types, }); } /// Push an if on the control stack. pub(crate) fn push_if( &mut self, destination: Block, else_data: ElseData, num_param_types: usize, num_result_types: usize, blocktype: wasmparser::TypeOrFuncType, ) { debug_assert!(num_param_types <= self.stack.len()); // Push a second copy of our `if`'s parameters on the stack. This lets // us avoid saving them on the side in the `ControlStackFrame` for our // `else` block (if it exists), which would require a second heap // allocation. See also the comment in `translate_operator` for // `Operator::Else`. self.stack.reserve(num_param_types); for i in (self.stack.len() - num_param_types)..self.stack.len() { let val = self.stack[i]; self.stack.push(val); } self.control_stack.push(ControlStackFrame::If { destination, else_data, original_stack_size: self.stack.len() - num_param_types, num_param_values: num_param_types, num_return_values: num_result_types, exit_is_branched_to: false, head_is_reachable: self.reachable, consequent_ends_reachable: None, blocktype, }); } } /// Methods for handling entity references. impl FuncTranslationState { /// Get the `GlobalVariable` reference that should be used to access the global variable /// `index`. Create the reference if necessary. /// Also return the WebAssembly type of the global. pub(crate) fn get_global( &mut self, func: &mut ir::Function, index: u32, environ: &mut FE, ) -> WasmResult { let index = GlobalIndex::from_u32(index); match self.globals.entry(index) { Occupied(entry) => Ok(*entry.get()), Vacant(entry) => Ok(*entry.insert(environ.make_global(func, index)?)), } } /// Get the `Heap` reference that should be used to access linear memory `index`. /// Create the reference if necessary. pub(crate) fn get_heap( &mut self, func: &mut ir::Function, index: u32, environ: &mut FE, ) -> WasmResult { let index = MemoryIndex::from_u32(index); match self.heaps.entry(index) { Occupied(entry) => Ok(*entry.get()), Vacant(entry) => Ok(*entry.insert(environ.make_heap(func, index)?)), } } /// Get the `Table` reference that should be used to access table `index`. /// Create the reference if necessary. pub(crate) fn get_or_create_table( &mut self, func: &mut ir::Function, index: u32, environ: &mut FE, ) -> WasmResult { let index = TableIndex::from_u32(index); match self.tables.entry(index) { Occupied(entry) => Ok(*entry.get()), Vacant(entry) => Ok(*entry.insert(environ.make_table(func, index)?)), } } /// Get the `SigRef` reference that should be used to make an indirect call with signature /// `index`. Also return the number of WebAssembly arguments in the signature. /// /// Create the signature if necessary. pub(crate) fn get_indirect_sig( &mut self, func: &mut ir::Function, index: u32, environ: &mut FE, ) -> WasmResult<(ir::SigRef, usize)> { let index = TypeIndex::from_u32(index); match self.signatures.entry(index) { Occupied(entry) => Ok(*entry.get()), Vacant(entry) => { let sig = environ.make_indirect_sig(func, index)?; Ok(*entry.insert((sig, num_wasm_parameters(environ, &func.dfg.signatures[sig])))) } } } /// Get the `FuncRef` reference that should be used to make a direct call to function /// `index`. Also return the number of WebAssembly arguments in the signature. /// /// Create the function reference if necessary. pub(crate) fn get_direct_func( &mut self, func: &mut ir::Function, index: u32, environ: &mut FE, ) -> WasmResult<(ir::FuncRef, usize)> { let index = FuncIndex::from_u32(index); match self.functions.entry(index) { Occupied(entry) => Ok(*entry.get()), Vacant(entry) => { let fref = environ.make_direct_func(func, index)?; let sig = func.dfg.ext_funcs[fref].signature; Ok(*entry.insert(( fref, num_wasm_parameters(environ, &func.dfg.signatures[sig]), ))) } } } } fn num_wasm_parameters( environ: &FE, signature: &ir::Signature, ) -> usize { (0..signature.params.len()) .filter(|index| environ.is_wasm_parameter(signature, *index)) .count() }