1 files changed, 2754 insertions, 0 deletions
diff --git a/third_party/rust/cranelift-wasm/src/code_translator.rs b/third_party/rust/cranelift-wasm/src/code_translator.rs
new file mode 100644
index 0000000000..4a8beaec3b
--- /dev/null
+++ b/third_party/rust/cranelift-wasm/src/code_translator.rs
@@ -0,0 +1,2754 @@
+//! This module contains the bulk of the interesting code performing the translation between
+//! WebAssembly and Cranelift IR.
+//!
+//! The translation is done in one pass, opcode by opcode. Two main data structures are used during
+//! code translations: the value stack and the control stack. The value stack mimics the execution
+//! of the WebAssembly stack machine: each instruction result is pushed onto the stack and
+//! instruction arguments are popped off the stack. Similarly, when encountering a control flow
+//! block, it is pushed onto the control stack and popped off when encountering the corresponding
+//! `End`.
+//!
+//! Another data structure, the translation state, records information concerning unreachable code
+//! status and about if inserting a return at the end of the function is necessary.
+//!
+//! Some of the WebAssembly instructions need information about the environment for which they
+//! are being translated:
+//!
+//! - the loads and stores need the memory base address;
+//! - the `get_global` and `set_global` instructions depend on how the globals are implemented;
+//! - `memory.size` and `memory.grow` are runtime functions;
+//! - `call_indirect` has to translate the function index into the address of where this
+//!    is;
+//!
+//! That is why `translate_function_body` takes an object having the `WasmRuntime` trait as
+//! argument.
+//!
+//! There is extra complexity associated with translation of 128-bit SIMD instructions.
+//! Wasm only considers there to be a single 128-bit vector type.  But CLIF's type system
+//! distinguishes different lane configurations, so considers 8X16, 16X8, 32X4 and 64X2 to be
+//! different types.  The result is that, in wasm, it's perfectly OK to take the output of (eg)
+//! an `add.16x8` and use that as an operand of a `sub.32x4`, without using any cast.  But when
+//! translated into CLIF, that will cause a verifier error due to the apparent type mismatch.
+//!
+//! This file works around that problem by liberally inserting `bitcast` instructions in many
+//! places -- mostly, before the use of vector values, either as arguments to CLIF instructions
+//! or as block actual parameters.  These are no-op casts which nevertheless have different
+//! input and output types, and are used (mostly) to "convert" 16X8, 32X4 and 64X2-typed vectors
+//! to the "canonical" type, 8X16.  Hence the functions `optionally_bitcast_vector`,
+//! `bitcast_arguments`, `pop*_with_bitcast`, `canonicalise_then_jump`,
+//! `canonicalise_then_br{z,nz}`, `is_non_canonical_v128` and `canonicalise_v128_values`.
+//! Note that the `bitcast*` functions are occasionally used to convert to some type other than
+//! 8X16, but the `canonicalise*` functions always convert to type 8X16.
+//!
+//! Be careful when adding support for new vector instructions.  And when adding new jumps, even
+//! if they are apparently don't have any connection to vectors.  Never generate any kind of
+//! (inter-block) jump directly.  Instead use `canonicalise_then_jump` and
+//! `canonicalise_then_br{z,nz}`.
+//!
+//! The use of bitcasts is ugly and inefficient, but currently unavoidable:
+//!
+//! * they make the logic in this file fragile: miss out a bitcast for any reason, and there is
+//!   the risk of the system failing in the verifier.  At least for debug builds.
+//!
+//! * in the new backends, they potentially interfere with pattern matching on CLIF -- the
+//!   patterns need to take into account the presence of bitcast nodes.
+//!
+//! * in the new backends, they get translated into machine-level vector-register-copy
+//!   instructions, none of which are actually necessary.  We then depend on the register
+//!   allocator to coalesce them all out.
+//!
+//! * they increase the total number of CLIF nodes that have to be processed, hence slowing down
+//!   the compilation pipeline.  Also, the extra coalescing work generates a slowdown.
+//!
+//! A better solution which would avoid all four problems would be to remove the 8X16, 16X8,
+//! 32X4 and 64X2 types from CLIF and instead have a single V128 type.
+//!
+//! For further background see also:
+//!   https://github.com/bytecodealliance/wasmtime/issues/1147
+//!     ("Too many raw_bitcasts in SIMD code")
+//!   https://github.com/bytecodealliance/cranelift/pull/1251
+//!     ("Add X128 type to represent WebAssembly's V128 type")
+//!   https://github.com/bytecodealliance/cranelift/pull/1236
+//!     ("Relax verification to allow I8X16 to act as a default vector type")
+
+use super::{hash_map, HashMap};
+use crate::environ::{FuncEnvironment, GlobalVariable, ReturnMode, WasmResult};
+use crate::state::{ControlStackFrame, ElseData, FuncTranslationState};
+use crate::translation_utils::{
+    block_with_params, blocktype_params_results, f32_translation, f64_translation,
+};
+use crate::translation_utils::{FuncIndex, GlobalIndex, MemoryIndex, TableIndex, TypeIndex};
+use crate::wasm_unsupported;
+use core::convert::TryInto;
+use core::{i32, u32};
+use cranelift_codegen::ir::condcodes::{FloatCC, IntCC};
+use cranelift_codegen::ir::immediates::Offset32;
+use cranelift_codegen::ir::types::*;
+use cranelift_codegen::ir::{
+    self, AtomicRmwOp, ConstantData, InstBuilder, JumpTableData, MemFlags, Value, ValueLabel,
+};
+use cranelift_codegen::packed_option::ReservedValue;
+use cranelift_frontend::{FunctionBuilder, Variable};
+use smallvec::SmallVec;
+use std::cmp;
+use std::convert::TryFrom;
+use std::vec::Vec;
+use wasmparser::{FuncValidator, MemoryImmediate, Operator, WasmModuleResources};
+
+// Clippy warns about "align: _" but its important to document that the flags field is ignored
+#[cfg_attr(
+    feature = "cargo-clippy",
+    allow(clippy::unneeded_field_pattern, clippy::cognitive_complexity)
+)]
+/// Translates wasm operators into Cranelift IR instructions. Returns `true` if it inserted
+/// a return.
+pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
+    validator: &mut FuncValidator<impl WasmModuleResources>,
+    op: &Operator,
+    builder: &mut FunctionBuilder,
+    state: &mut FuncTranslationState,
+    environ: &mut FE,
+) -> WasmResult<()> {
+    if !state.reachable {
+        translate_unreachable_operator(validator, &op, builder, state, environ)?;
+        return Ok(());
+    }
+
+    // This big match treats all Wasm code operators.
+    match op {
+        /********************************** Locals ****************************************
+         *  `get_local` and `set_local` are treated as non-SSA variables and will completely
+         *  disappear in the Cranelift Code
+         ***********************************************************************************/
+        Operator::LocalGet { local_index } => {
+            let val = builder.use_var(Variable::with_u32(*local_index));
+            state.push1(val);
+            let label = ValueLabel::from_u32(*local_index);
+            builder.set_val_label(val, label);
+        }
+        Operator::LocalSet { local_index } => {
+            let mut val = state.pop1();
+
+            // Ensure SIMD values are cast to their default Cranelift type, I8x16.
+            let ty = builder.func.dfg.value_type(val);
+            if ty.is_vector() {
+                val = optionally_bitcast_vector(val, I8X16, builder);
+            }
+
+            builder.def_var(Variable::with_u32(*local_index), val);
+            let label = ValueLabel::from_u32(*local_index);
+            builder.set_val_label(val, label);
+        }
+        Operator::LocalTee { local_index } => {
+            let mut val = state.peek1();
+
+            // Ensure SIMD values are cast to their default Cranelift type, I8x16.
+            let ty = builder.func.dfg.value_type(val);
+            if ty.is_vector() {
+                val = optionally_bitcast_vector(val, I8X16, builder);
+            }
+
+            builder.def_var(Variable::with_u32(*local_index), val);
+            let label = ValueLabel::from_u32(*local_index);
+            builder.set_val_label(val, label);
+        }
+        /********************************** Globals ****************************************
+         *  `get_global` and `set_global` are handled by the environment.
+         ***********************************************************************************/
+        Operator::GlobalGet { global_index } => {
+            let val = match state.get_global(builder.func, *global_index, environ)? {
+                GlobalVariable::Const(val) => val,
+                GlobalVariable::Memory { gv, offset, ty } => {
+                    let addr = builder.ins().global_value(environ.pointer_type(), gv);
+                    let flags = ir::MemFlags::trusted();
+                    builder.ins().load(ty, flags, addr, offset)
+                }
+                GlobalVariable::Custom => environ.translate_custom_global_get(
+                    builder.cursor(),
+                    GlobalIndex::from_u32(*global_index),
+                )?,
+            };
+            state.push1(val);
+        }
+        Operator::GlobalSet { global_index } => {
+            match state.get_global(builder.func, *global_index, environ)? {
+                GlobalVariable::Const(_) => panic!("global #{} is a constant", *global_index),
+                GlobalVariable::Memory { gv, offset, ty } => {
+                    let addr = builder.ins().global_value(environ.pointer_type(), gv);
+                    let flags = ir::MemFlags::trusted();
+                    let mut val = state.pop1();
+                    // Ensure SIMD values are cast to their default Cranelift type, I8x16.
+                    if ty.is_vector() {
+                        val = optionally_bitcast_vector(val, I8X16, builder);
+                    }
+                    debug_assert_eq!(ty, builder.func.dfg.value_type(val));
+                    builder.ins().store(flags, val, addr, offset);
+                }
+                GlobalVariable::Custom => {
+                    let val = state.pop1();
+                    environ.translate_custom_global_set(
+                        builder.cursor(),
+                        GlobalIndex::from_u32(*global_index),
+                        val,
+                    )?;
+                }
+            }
+        }
+        /********************************* Stack misc ***************************************
+         *  `drop`, `nop`, `unreachable` and `select`.
+         ***********************************************************************************/
+        Operator::Drop => {
+            state.pop1();
+        }
+        Operator::Select => {
+            let (arg1, arg2, cond) = state.pop3();
+            state.push1(builder.ins().select(cond, arg1, arg2));
+        }
+        Operator::TypedSelect { ty: _ } => {
+            // We ignore the explicit type parameter as it is only needed for
+            // validation, which we require to have been performed before
+            // translation.
+            let (arg1, arg2, cond) = state.pop3();
+            state.push1(builder.ins().select(cond, arg1, arg2));
+        }
+        Operator::Nop => {
+            // We do nothing
+        }
+        Operator::Unreachable => {
+            builder.ins().trap(ir::TrapCode::UnreachableCodeReached);
+            state.reachable = false;
+        }
+        /***************************** Control flow blocks **********************************
+         *  When starting a control flow block, we create a new `Block` that will hold the code
+         *  after the block, and we push a frame on the control stack. Depending on the type
+         *  of block, we create a new `Block` for the body of the block with an associated
+         *  jump instruction.
+         *
+         *  The `End` instruction pops the last control frame from the control stack, seals
+         *  the destination block (since `br` instructions targeting it only appear inside the
+         *  block and have already been translated) and modify the value stack to use the
+         *  possible `Block`'s arguments values.
+         ***********************************************************************************/
+        Operator::Block { ty } => {
+            let (params, results) = blocktype_params_results(validator, *ty)?;
+            let next = block_with_params(builder, results.clone(), environ)?;
+            state.push_block(next, params.len(), results.len());
+        }
+        Operator::Loop { ty } => {
+            let (params, results) = blocktype_params_results(validator, *ty)?;
+            let loop_body = block_with_params(builder, params.clone(), environ)?;
+            let next = block_with_params(builder, results.clone(), environ)?;
+            canonicalise_then_jump(builder, loop_body, state.peekn(params.len()));
+            state.push_loop(loop_body, next, params.len(), results.len());
+
+            // Pop the initial `Block` actuals and replace them with the `Block`'s
+            // params since control flow joins at the top of the loop.
+            state.popn(params.len());
+            state
+                .stack
+                .extend_from_slice(builder.block_params(loop_body));
+
+            builder.switch_to_block(loop_body);
+            environ.translate_loop_header(builder.cursor())?;
+        }
+        Operator::If { ty } => {
+            let val = state.pop1();
+
+            let (params, results) = blocktype_params_results(validator, *ty)?;
+            let (destination, else_data) = if params.clone().eq(results.clone()) {
+                // It is possible there is no `else` block, so we will only
+                // allocate a block for it if/when we find the `else`. For now,
+                // we if the condition isn't true, then we jump directly to the
+                // destination block following the whole `if...end`. If we do end
+                // up discovering an `else`, then we will allocate a block for it
+                // and go back and patch the jump.
+                let destination = block_with_params(builder, results.clone(), environ)?;
+                let branch_inst =
+                    canonicalise_then_brz(builder, val, destination, state.peekn(params.len()));
+                (destination, ElseData::NoElse { branch_inst })
+            } else {
+                // The `if` type signature is not valid without an `else` block,
+                // so we eagerly allocate the `else` block here.
+                let destination = block_with_params(builder, results.clone(), environ)?;
+                let else_block = block_with_params(builder, params.clone(), environ)?;
+                canonicalise_then_brz(builder, val, else_block, state.peekn(params.len()));
+                builder.seal_block(else_block);
+                (destination, ElseData::WithElse { else_block })
+            };
+
+            let next_block = builder.create_block();
+            canonicalise_then_jump(builder, next_block, &[]);
+            builder.seal_block(next_block); // Only predecessor is the current block.
+            builder.switch_to_block(next_block);
+
+            // Here we append an argument to a Block targeted by an argumentless jump instruction
+            // But in fact there are two cases:
+            // - either the If does not have a Else clause, in that case ty = EmptyBlock
+            //   and we add nothing;
+            // - either the If have an Else clause, in that case the destination of this jump
+            //   instruction will be changed later when we translate the Else operator.
+            state.push_if(destination, else_data, params.len(), results.len(), *ty);
+        }
+        Operator::Else => {
+            let i = state.control_stack.len() - 1;
+            match state.control_stack[i] {
+                ControlStackFrame::If {
+                    ref else_data,
+                    head_is_reachable,
+                    ref mut consequent_ends_reachable,
+                    num_return_values,
+                    blocktype,
+                    destination,
+                    ..
+                } => {
+                    // We finished the consequent, so record its final
+                    // reachability state.
+                    debug_assert!(consequent_ends_reachable.is_none());
+                    *consequent_ends_reachable = Some(state.reachable);
+
+                    if head_is_reachable {
+                        // We have a branch from the head of the `if` to the `else`.
+                        state.reachable = true;
+
+                        // Ensure we have a block for the `else` block (it may have
+                        // already been pre-allocated, see `ElseData` for details).
+                        let else_block = match *else_data {
+                            ElseData::NoElse { branch_inst } => {
+                                let (params, _results) =
+                                    blocktype_params_results(validator, blocktype)?;
+                                debug_assert_eq!(params.len(), num_return_values);
+                                let else_block =
+                                    block_with_params(builder, params.clone(), environ)?;
+                                canonicalise_then_jump(
+                                    builder,
+                                    destination,
+                                    state.peekn(params.len()),
+                                );
+                                state.popn(params.len());
+
+                                builder.change_jump_destination(branch_inst, else_block);
+                                builder.seal_block(else_block);
+                                else_block
+                            }
+                            ElseData::WithElse { else_block } => {
+                                canonicalise_then_jump(
+                                    builder,
+                                    destination,
+                                    state.peekn(num_return_values),
+                                );
+                                state.popn(num_return_values);
+                                else_block
+                            }
+                        };
+
+                        // You might be expecting that we push the parameters for this
+                        // `else` block here, something like this:
+                        //
+                        //     state.pushn(&control_stack_frame.params);
+                        //
+                        // We don't do that because they are already on the top of the stack
+                        // for us: we pushed the parameters twice when we saw the initial
+                        // `if` so that we wouldn't have to save the parameters in the
+                        // `ControlStackFrame` as another `Vec` allocation.
+
+                        builder.switch_to_block(else_block);
+
+                        // We don't bother updating the control frame's `ElseData`
+                        // to `WithElse` because nothing else will read it.
+                    }
+                }
+                _ => unreachable!(),
+            }
+        }
+        Operator::End => {
+            let frame = state.control_stack.pop().unwrap();
+            let next_block = frame.following_code();
+
+            if !builder.is_unreachable() || !builder.is_pristine() {
+                let return_count = frame.num_return_values();
+                let return_args = state.peekn_mut(return_count);
+                canonicalise_then_jump(builder, frame.following_code(), return_args);
+                // You might expect that if we just finished an `if` block that
+                // didn't have a corresponding `else` block, then we would clean
+                // up our duplicate set of parameters that we pushed earlier
+                // right here. However, we don't have to explicitly do that,
+                // since we truncate the stack back to the original height
+                // below.
+            }
+
+            builder.switch_to_block(next_block);
+            builder.seal_block(next_block);
+
+            // If it is a loop we also have to seal the body loop block
+            if let ControlStackFrame::Loop { header, .. } = frame {
+                builder.seal_block(header)
+            }
+
+            frame.truncate_value_stack_to_original_size(&mut state.stack);
+            state
+                .stack
+                .extend_from_slice(builder.block_params(next_block));
+        }
+        /**************************** Branch instructions *********************************
+         * The branch instructions all have as arguments a target nesting level, which
+         * corresponds to how many control stack frames do we have to pop to get the
+         * destination `Block`.
+         *
+         * Once the destination `Block` is found, we sometimes have to declare a certain depth
+         * of the stack unreachable, because some branch instructions are terminator.
+         *
+         * The `br_table` case is much more complicated because Cranelift's `br_table` instruction
+         * does not support jump arguments like all the other branch instructions. That is why, in
+         * the case where we would use jump arguments for every other branch instruction, we
+         * need to split the critical edges leaving the `br_tables` by creating one `Block` per
+         * table destination; the `br_table` will point to these newly created `Blocks` and these
+         * `Block`s contain only a jump instruction pointing to the final destination, this time with
+         * jump arguments.
+         *
+         * This system is also implemented in Cranelift's SSA construction algorithm, because
+         * `use_var` located in a destination `Block` of a `br_table` might trigger the addition
+         * of jump arguments in each predecessor branch instruction, one of which might be a
+         * `br_table`.
+         ***********************************************************************************/
+        Operator::Br { relative_depth } => {
+            let i = state.control_stack.len() - 1 - (*relative_depth as usize);
+            let (return_count, br_destination) = {
+                let frame = &mut state.control_stack[i];
+                // We signal that all the code that follows until the next End is unreachable
+                frame.set_branched_to_exit();
+                let return_count = if frame.is_loop() {
+                    frame.num_param_values()
+                } else {
+                    frame.num_return_values()
+                };
+                (return_count, frame.br_destination())
+            };
+            let destination_args = state.peekn_mut(return_count);
+            canonicalise_then_jump(builder, br_destination, destination_args);
+            state.popn(return_count);
+            state.reachable = false;
+        }
+        Operator::BrIf { relative_depth } => translate_br_if(*relative_depth, builder, state),
+        Operator::BrTable { table } => {
+            let mut depths = table.targets().collect::<Result<Vec<_>, _>>()?;
+            let default = depths.pop().unwrap().0;
+            let mut min_depth = default;
+            for (depth, _) in depths.iter() {
+                if *depth < min_depth {
+                    min_depth = *depth;
+                }
+            }
+            let jump_args_count = {
+                let i = state.control_stack.len() - 1 - (min_depth as usize);
+                let min_depth_frame = &state.control_stack[i];
+                if min_depth_frame.is_loop() {
+                    min_depth_frame.num_param_values()
+                } else {
+                    min_depth_frame.num_return_values()
+                }
+            };
+            let val = state.pop1();
+            let mut data = JumpTableData::with_capacity(depths.len());
+            if jump_args_count == 0 {
+                // No jump arguments
+                for (depth, _) in depths.iter() {
+                    let block = {
+                        let i = state.control_stack.len() - 1 - (*depth as usize);
+                        let frame = &mut state.control_stack[i];
+                        frame.set_branched_to_exit();
+                        frame.br_destination()
+                    };
+                    data.push_entry(block);
+                }
+                let jt = builder.create_jump_table(data);
+                let block = {
+                    let i = state.control_stack.len() - 1 - (default as usize);
+                    let frame = &mut state.control_stack[i];
+                    frame.set_branched_to_exit();
+                    frame.br_destination()
+                };
+                builder.ins().br_table(val, block, jt);
+            } else {
+                // Here we have jump arguments, but Cranelift's br_table doesn't support them
+                // We then proceed to split the edges going out of the br_table
+                let return_count = jump_args_count;
+                let mut dest_block_sequence = vec![];
+                let mut dest_block_map = HashMap::new();
+                for (depth, _) in depths.iter() {
+                    let branch_block = match dest_block_map.entry(*depth as usize) {
+                        hash_map::Entry::Occupied(entry) => *entry.get(),
+                        hash_map::Entry::Vacant(entry) => {
+                            let block = builder.create_block();
+                            dest_block_sequence.push((*depth as usize, block));
+                            *entry.insert(block)
+                        }
+                    };
+                    data.push_entry(branch_block);
+                }
+                let default_branch_block = match dest_block_map.entry(default as usize) {
+                    hash_map::Entry::Occupied(entry) => *entry.get(),
+                    hash_map::Entry::Vacant(entry) => {
+                        let block = builder.create_block();
+                        dest_block_sequence.push((default as usize, block));
+                        *entry.insert(block)
+                    }
+                };
+                let jt = builder.create_jump_table(data);
+                builder.ins().br_table(val, default_branch_block, jt);
+                for (depth, dest_block) in dest_block_sequence {
+                    builder.switch_to_block(dest_block);
+                    builder.seal_block(dest_block);
+                    let real_dest_block = {
+                        let i = state.control_stack.len() - 1 - depth;
+                        let frame = &mut state.control_stack[i];
+                        frame.set_branched_to_exit();
+                        frame.br_destination()
+                    };
+                    let destination_args = state.peekn_mut(return_count);
+                    canonicalise_then_jump(builder, real_dest_block, destination_args);
+                }
+                state.popn(return_count);
+            }
+            state.reachable = false;
+        }
+        Operator::Return => {
+            let (return_count, br_destination) = {
+                let frame = &mut state.control_stack[0];
+                if environ.return_mode() == ReturnMode::FallthroughReturn {
+                    frame.set_branched_to_exit();
+                }
+                let return_count = frame.num_return_values();
+                (return_count, frame.br_destination())
+            };
+            {
+                let return_args = state.peekn_mut(return_count);
+                let return_types = wasm_param_types(&builder.func.signature.returns, |i| {
+                    environ.is_wasm_return(&builder.func.signature, i)
+                });
+                bitcast_arguments(return_args, &return_types, builder);
+                match environ.return_mode() {
+                    ReturnMode::NormalReturns => builder.ins().return_(return_args),
+                    ReturnMode::FallthroughReturn => {
+                        canonicalise_then_jump(builder, br_destination, return_args)
+                    }
+                };
+            }
+            state.popn(return_count);
+            state.reachable = false;
+        }
+        /************************************ Calls ****************************************
+         * The call instructions pop off their arguments from the stack and append their
+         * return values to it. `call_indirect` needs environment support because there is an
+         * argument referring to an index in the external functions table of the module.
+         ************************************************************************************/
+        Operator::Call { function_index } => {
+            let (fref, num_args) = state.get_direct_func(builder.func, *function_index, environ)?;
+
+            // Bitcast any vector arguments to their default type, I8X16, before calling.
+            let callee_signature =
+                &builder.func.dfg.signatures[builder.func.dfg.ext_funcs[fref].signature];
+            let args = state.peekn_mut(num_args);
+            let types = wasm_param_types(&callee_signature.params, |i| {
+                environ.is_wasm_parameter(&callee_signature, i)
+            });
+            bitcast_arguments(args, &types, builder);
+
+            let call = environ.translate_call(
+                builder.cursor(),
+                FuncIndex::from_u32(*function_index),
+                fref,
+                args,
+            )?;
+            let inst_results = builder.inst_results(call);
+            debug_assert_eq!(
+                inst_results.len(),
+                builder.func.dfg.signatures[builder.func.dfg.ext_funcs[fref].signature]
+                    .returns
+                    .len(),
+                "translate_call results should match the call signature"
+            );
+            state.popn(num_args);
+            state.pushn(inst_results);
+        }
+        Operator::CallIndirect { index, table_index } => {
+            // `index` is the index of the function's signature and `table_index` is the index of
+            // the table to search the function in.
+            let (sigref, num_args) = state.get_indirect_sig(builder.func, *index, environ)?;
+            let table = state.get_or_create_table(builder.func, *table_index, environ)?;
+            let callee = state.pop1();
+
+            // Bitcast any vector arguments to their default type, I8X16, before calling.
+            let callee_signature = &builder.func.dfg.signatures[sigref];
+            let args = state.peekn_mut(num_args);
+            let types = wasm_param_types(&callee_signature.params, |i| {
+                environ.is_wasm_parameter(&callee_signature, i)
+            });
+            bitcast_arguments(args, &types, builder);
+
+            let call = environ.translate_call_indirect(
+                builder.cursor(),
+                TableIndex::from_u32(*table_index),
+                table,
+                TypeIndex::from_u32(*index),
+                sigref,
+                callee,
+                state.peekn(num_args),
+            )?;
+            let inst_results = builder.inst_results(call);
+            debug_assert_eq!(
+                inst_results.len(),
+                builder.func.dfg.signatures[sigref].returns.len(),
+                "translate_call_indirect results should match the call signature"
+            );
+            state.popn(num_args);
+            state.pushn(inst_results);
+        }
+        /******************************* Memory management ***********************************
+         * Memory management is handled by environment. It is usually translated into calls to
+         * special functions.
+         ************************************************************************************/
+        Operator::MemoryGrow { mem, mem_byte: _ } => {
+            // The WebAssembly MVP only supports one linear memory, but we expect the reserved
+            // argument to be a memory index.
+            let heap_index = MemoryIndex::from_u32(*mem);
+            let heap = state.get_heap(builder.func, *mem, environ)?;
+            let val = state.pop1();
+            state.push1(environ.translate_memory_grow(builder.cursor(), heap_index, heap, val)?)
+        }
+        Operator::MemorySize { mem, mem_byte: _ } => {
+            let heap_index = MemoryIndex::from_u32(*mem);
+            let heap = state.get_heap(builder.func, *mem, environ)?;
+            state.push1(environ.translate_memory_size(builder.cursor(), heap_index, heap)?);
+        }
+        /******************************* Load instructions ***********************************
+         * Wasm specifies an integer alignment flag but we drop it in Cranelift.
+         * The memory base address is provided by the environment.
+         ************************************************************************************/
+        Operator::I32Load8U { memarg } => {
+            translate_load(memarg, ir::Opcode::Uload8, I32, builder, state, environ)?;
+        }
+        Operator::I32Load16U { memarg } => {
+            translate_load(memarg, ir::Opcode::Uload16, I32, builder, state, environ)?;
+        }
+        Operator::I32Load8S { memarg } => {
+            translate_load(memarg, ir::Opcode::Sload8, I32, builder, state, environ)?;
+        }
+        Operator::I32Load16S { memarg } => {
+            translate_load(memarg, ir::Opcode::Sload16, I32, builder, state, environ)?;
+        }
+        Operator::I64Load8U { memarg } => {
+            translate_load(memarg, ir::Opcode::Uload8, I64, builder, state, environ)?;
+        }
+        Operator::I64Load16U { memarg } => {
+            translate_load(memarg, ir::Opcode::Uload16, I64, builder, state, environ)?;
+        }
+        Operator::I64Load8S { memarg } => {
+            translate_load(memarg, ir::Opcode::Sload8, I64, builder, state, environ)?;
+        }
+        Operator::I64Load16S { memarg } => {
+            translate_load(memarg, ir::Opcode::Sload16, I64, builder, state, environ)?;
+        }
+        Operator::I64Load32S { memarg } => {
+            translate_load(memarg, ir::Opcode::Sload32, I64, builder, state, environ)?;
+        }
+        Operator::I64Load32U { memarg } => {
+            translate_load(memarg, ir::Opcode::Uload32, I64, builder, state, environ)?;
+        }
+        Operator::I32Load { memarg } => {
+            translate_load(memarg, ir::Opcode::Load, I32, builder, state, environ)?;
+        }
+        Operator::F32Load { memarg } => {
+            translate_load(memarg, ir::Opcode::Load, F32, builder, state, environ)?;
+        }
+        Operator::I64Load { memarg } => {
+            translate_load(memarg, ir::Opcode::Load, I64, builder, state, environ)?;
+        }
+        Operator::F64Load { memarg } => {
+            translate_load(memarg, ir::Opcode::Load, F64, builder, state, environ)?;
+        }
+        Operator::V128Load { memarg } => {
+            translate_load(memarg, ir::Opcode::Load, I8X16, builder, state, environ)?;
+        }
+        Operator::V128Load8x8S { memarg } => {
+            let (flags, base, offset) = prepare_load(memarg, 8, builder, state, environ)?;
+            let loaded = builder.ins().sload8x8(flags, base, offset);
+            state.push1(loaded);
+        }
+        Operator::V128Load8x8U { memarg } => {
+            let (flags, base, offset) = prepare_load(memarg, 8, builder, state, environ)?;
+            let loaded = builder.ins().uload8x8(flags, base, offset);
+            state.push1(loaded);
+        }
+        Operator::V128Load16x4S { memarg } => {
+            let (flags, base, offset) = prepare_load(memarg, 8, builder, state, environ)?;
+            let loaded = builder.ins().sload16x4(flags, base, offset);
+            state.push1(loaded);
+        }
+        Operator::V128Load16x4U { memarg } => {
+            let (flags, base, offset) = prepare_load(memarg, 8, builder, state, environ)?;
+            let loaded = builder.ins().uload16x4(flags, base, offset);
+            state.push1(loaded);
+        }
+        Operator::V128Load32x2S { memarg } => {
+            let (flags, base, offset) = prepare_load(memarg, 8, builder, state, environ)?;
+            let loaded = builder.ins().sload32x2(flags, base, offset);
+            state.push1(loaded);
+        }
+        Operator::V128Load32x2U { memarg } => {
+            let (flags, base, offset) = prepare_load(memarg, 8, builder, state, environ)?;
+            let loaded = builder.ins().uload32x2(flags, base, offset);
+            state.push1(loaded);
+        }
+        /****************************** Store instructions ***********************************
+         * Wasm specifies an integer alignment flag but we drop it in Cranelift.
+         * The memory base address is provided by the environment.
+         ************************************************************************************/
+        Operator::I32Store { memarg }
+        | Operator::I64Store { memarg }
+        | Operator::F32Store { memarg }
+        | Operator::F64Store { memarg } => {
+            translate_store(memarg, ir::Opcode::Store, builder, state, environ)?;
+        }
+        Operator::I32Store8 { memarg } | Operator::I64Store8 { memarg } => {
+            translate_store(memarg, ir::Opcode::Istore8, builder, state, environ)?;
+        }
+        Operator::I32Store16 { memarg } | Operator::I64Store16 { memarg } => {
+            translate_store(memarg, ir::Opcode::Istore16, builder, state, environ)?;
+        }
+        Operator::I64Store32 { memarg } => {
+            translate_store(memarg, ir::Opcode::Istore32, builder, state, environ)?;
+        }
+        Operator::V128Store { memarg } => {
+            translate_store(memarg, ir::Opcode::Store, builder, state, environ)?;
+        }
+        /****************************** Nullary Operators ************************************/
+        Operator::I32Const { value } => state.push1(builder.ins().iconst(I32, i64::from(*value))),
+        Operator::I64Const { value } => state.push1(builder.ins().iconst(I64, *value)),
+        Operator::F32Const { value } => {
+            state.push1(builder.ins().f32const(f32_translation(*value)));
+        }
+        Operator::F64Const { value } => {
+            state.push1(builder.ins().f64const(f64_translation(*value)));
+        }
+        /******************************* Unary Operators *************************************/
+        Operator::I32Clz | Operator::I64Clz => {
+            let arg = state.pop1();
+            state.push1(builder.ins().clz(arg));
+        }
+        Operator::I32Ctz | Operator::I64Ctz => {
+            let arg = state.pop1();
+            state.push1(builder.ins().ctz(arg));
+        }
+        Operator::I32Popcnt | Operator::I64Popcnt => {
+            let arg = state.pop1();
+            state.push1(builder.ins().popcnt(arg));
+        }
+        Operator::I64ExtendI32S => {
+            let val = state.pop1();
+            state.push1(builder.ins().sextend(I64, val));
+        }
+        Operator::I64ExtendI32U => {
+            let val = state.pop1();
+            state.push1(builder.ins().uextend(I64, val));
+        }
+        Operator::I32WrapI64 => {
+            let val = state.pop1();
+            state.push1(builder.ins().ireduce(I32, val));
+        }
+        Operator::F32Sqrt | Operator::F64Sqrt => {
+            let arg = state.pop1();
+            state.push1(builder.ins().sqrt(arg));
+        }
+        Operator::F32Ceil | Operator::F64Ceil => {
+            let arg = state.pop1();
+            state.push1(builder.ins().ceil(arg));
+        }
+        Operator::F32Floor | Operator::F64Floor => {
+            let arg = state.pop1();
+            state.push1(builder.ins().floor(arg));
+        }
+        Operator::F32Trunc | Operator::F64Trunc => {
+            let arg = state.pop1();
+            state.push1(builder.ins().trunc(arg));
+        }
+        Operator::F32Nearest | Operator::F64Nearest => {
+            let arg = state.pop1();
+            state.push1(builder.ins().nearest(arg));
+        }
+        Operator::F32Abs | Operator::F64Abs => {
+            let val = state.pop1();
+            state.push1(builder.ins().fabs(val));
+        }
+        Operator::F32Neg | Operator::F64Neg => {
+            let arg = state.pop1();
+            state.push1(builder.ins().fneg(arg));
+        }
+        Operator::F64ConvertI64U | Operator::F64ConvertI32U => {
+            let val = state.pop1();
+            state.push1(builder.ins().fcvt_from_uint(F64, val));
+        }
+        Operator::F64ConvertI64S | Operator::F64ConvertI32S => {
+            let val = state.pop1();
+            state.push1(builder.ins().fcvt_from_sint(F64, val));
+        }
+        Operator::F32ConvertI64S | Operator::F32ConvertI32S => {
+            let val = state.pop1();
+            state.push1(builder.ins().fcvt_from_sint(F32, val));
+        }
+        Operator::F32ConvertI64U | Operator::F32ConvertI32U => {
+            let val = state.pop1();
+            state.push1(builder.ins().fcvt_from_uint(F32, val));
+        }
+        Operator::F64PromoteF32 => {
+            let val = state.pop1();
+            state.push1(builder.ins().fpromote(F64, val));
+        }
+        Operator::F32DemoteF64 => {
+            let val = state.pop1();
+            state.push1(builder.ins().fdemote(F32, val));
+        }
+        Operator::I64TruncF64S | Operator::I64TruncF32S => {
+            let val = state.pop1();
+            state.push1(builder.ins().fcvt_to_sint(I64, val));
+        }
+        Operator::I32TruncF64S | Operator::I32TruncF32S => {
+            let val = state.pop1();
+            state.push1(builder.ins().fcvt_to_sint(I32, val));
+        }
+        Operator::I64TruncF64U | Operator::I64TruncF32U => {
+            let val = state.pop1();
+            state.push1(builder.ins().fcvt_to_uint(I64, val));
+        }
+        Operator::I32TruncF64U | Operator::I32TruncF32U => {
+            let val = state.pop1();
+            state.push1(builder.ins().fcvt_to_uint(I32, val));
+        }
+        Operator::I64TruncSatF64S | Operator::I64TruncSatF32S => {
+            let val = state.pop1();
+            state.push1(builder.ins().fcvt_to_sint_sat(I64, val));
+        }
+        Operator::I32TruncSatF64S | Operator::I32TruncSatF32S => {
+            let val = state.pop1();
+            state.push1(builder.ins().fcvt_to_sint_sat(I32, val));
+        }
+        Operator::I64TruncSatF64U | Operator::I64TruncSatF32U => {
+            let val = state.pop1();
+            state.push1(builder.ins().fcvt_to_uint_sat(I64, val));
+        }
+        Operator::I32TruncSatF64U | Operator::I32TruncSatF32U => {
+            let val = state.pop1();
+            state.push1(builder.ins().fcvt_to_uint_sat(I32, val));
+        }
+        Operator::F32ReinterpretI32 => {
+            let val = state.pop1();
+            state.push1(builder.ins().bitcast(F32, val));
+        }
+        Operator::F64ReinterpretI64 => {
+            let val = state.pop1();
+            state.push1(builder.ins().bitcast(F64, val));
+        }
+        Operator::I32ReinterpretF32 => {
+            let val = state.pop1();
+            state.push1(builder.ins().bitcast(I32, val));
+        }
+        Operator::I64ReinterpretF64 => {
+            let val = state.pop1();
+            state.push1(builder.ins().bitcast(I64, val));
+        }
+        Operator::I32Extend8S => {
+            let val = state.pop1();
+            state.push1(builder.ins().ireduce(I8, val));
+            let val = state.pop1();
+            state.push1(builder.ins().sextend(I32, val));
+        }
+        Operator::I32Extend16S => {
+            let val = state.pop1();
+            state.push1(builder.ins().ireduce(I16, val));
+            let val = state.pop1();
+            state.push1(builder.ins().sextend(I32, val));
+        }
+        Operator::I64Extend8S => {
+            let val = state.pop1();
+            state.push1(builder.ins().ireduce(I8, val));
+            let val = state.pop1();
+            state.push1(builder.ins().sextend(I64, val));
+        }
+        Operator::I64Extend16S => {
+            let val = state.pop1();
+            state.push1(builder.ins().ireduce(I16, val));
+            let val = state.pop1();
+            state.push1(builder.ins().sextend(I64, val));
+        }
+        Operator::I64Extend32S => {
+            let val = state.pop1();
+            state.push1(builder.ins().ireduce(I32, val));
+            let val = state.pop1();
+            state.push1(builder.ins().sextend(I64, val));
+        }
+        /****************************** Binary Operators ************************************/
+        Operator::I32Add | Operator::I64Add => {
+            let (arg1, arg2) = state.pop2();
+            state.push1(builder.ins().iadd(arg1, arg2));
+        }
+        Operator::I32And | Operator::I64And => {
+            let (arg1, arg2) = state.pop2();
+            state.push1(builder.ins().band(arg1, arg2));
+        }
+        Operator::I32Or | Operator::I64Or => {
+            let (arg1, arg2) = state.pop2();
+            state.push1(builder.ins().bor(arg1, arg2));
+        }
+        Operator::I32Xor | Operator::I64Xor => {
+            let (arg1, arg2) = state.pop2();
+            state.push1(builder.ins().bxor(arg1, arg2));
+        }
+        Operator::I32Shl | Operator::I64Shl => {
+            let (arg1, arg2) = state.pop2();
+            state.push1(builder.ins().ishl(arg1, arg2));
+        }
+        Operator::I32ShrS | Operator::I64ShrS => {
+            let (arg1, arg2) = state.pop2();
+            state.push1(builder.ins().sshr(arg1, arg2));
+        }
+        Operator::I32ShrU | Operator::I64ShrU => {
+            let (arg1, arg2) = state.pop2();
+            state.push1(builder.ins().ushr(arg1, arg2));
+        }
+        Operator::I32Rotl | Operator::I64Rotl => {
+            let (arg1, arg2) = state.pop2();
+            state.push1(builder.ins().rotl(arg1, arg2));
+        }
+        Operator::I32Rotr | Operator::I64Rotr => {
+            let (arg1, arg2) = state.pop2();
+            state.push1(builder.ins().rotr(arg1, arg2));
+        }
+        Operator::F32Add | Operator::F64Add => {
+            let (arg1, arg2) = state.pop2();
+            state.push1(builder.ins().fadd(arg1, arg2));
+        }
+        Operator::I32Sub | Operator::I64Sub => {
+            let (arg1, arg2) = state.pop2();
+            state.push1(builder.ins().isub(arg1, arg2));
+        }
+        Operator::F32Sub | Operator::F64Sub => {
+            let (arg1, arg2) = state.pop2();
+            state.push1(builder.ins().fsub(arg1, arg2));
+        }
+        Operator::I32Mul | Operator::I64Mul => {
+            let (arg1, arg2) = state.pop2();
+            state.push1(builder.ins().imul(arg1, arg2));
+        }
+        Operator::F32Mul | Operator::F64Mul => {
+            let (arg1, arg2) = state.pop2();
+            state.push1(builder.ins().fmul(arg1, arg2));
+        }
+        Operator::F32Div | Operator::F64Div => {
+            let (arg1, arg2) = state.pop2();
+            state.push1(builder.ins().fdiv(arg1, arg2));
+        }
+        Operator::I32DivS | Operator::I64DivS => {
+            let (arg1, arg2) = state.pop2();
+            state.push1(builder.ins().sdiv(arg1, arg2));
+        }
+        Operator::I32DivU | Operator::I64DivU => {
+            let (arg1, arg2) = state.pop2();
+            state.push1(builder.ins().udiv(arg1, arg2));
+        }
+        Operator::I32RemS | Operator::I64RemS => {
+            let (arg1, arg2) = state.pop2();
+            state.push1(builder.ins().srem(arg1, arg2));
+        }
+        Operator::I32RemU | Operator::I64RemU => {
+            let (arg1, arg2) = state.pop2();
+            state.push1(builder.ins().urem(arg1, arg2));
+        }
+        Operator::F32Min | Operator::F64Min => {
+            let (arg1, arg2) = state.pop2();
+            state.push1(builder.ins().fmin(arg1, arg2));
+        }
+        Operator::F32Max | Operator::F64Max => {
+            let (arg1, arg2) = state.pop2();
+            state.push1(builder.ins().fmax(arg1, arg2));
+        }
+        Operator::F32Copysign | Operator::F64Copysign => {
+            let (arg1, arg2) = state.pop2();
+            state.push1(builder.ins().fcopysign(arg1, arg2));
+        }
+        /**************************** Comparison Operators **********************************/
+        Operator::I32LtS | Operator::I64LtS => {
+            translate_icmp(IntCC::SignedLessThan, builder, state)
+        }
+        Operator::I32LtU | Operator::I64LtU => {
+            translate_icmp(IntCC::UnsignedLessThan, builder, state)
+        }
+        Operator::I32LeS | Operator::I64LeS => {
+            translate_icmp(IntCC::SignedLessThanOrEqual, builder, state)
+        }
+        Operator::I32LeU | Operator::I64LeU => {
+            translate_icmp(IntCC::UnsignedLessThanOrEqual, builder, state)
+        }
+        Operator::I32GtS | Operator::I64GtS => {
+            translate_icmp(IntCC::SignedGreaterThan, builder, state)
+        }
+        Operator::I32GtU | Operator::I64GtU => {
+            translate_icmp(IntCC::UnsignedGreaterThan, builder, state)
+        }
+        Operator::I32GeS | Operator::I64GeS => {
+            translate_icmp(IntCC::SignedGreaterThanOrEqual, builder, state)
+        }
+        Operator::I32GeU | Operator::I64GeU => {
+            translate_icmp(IntCC::UnsignedGreaterThanOrEqual, builder, state)
+        }
+        Operator::I32Eqz | Operator::I64Eqz => {
+            let arg = state.pop1();
+            let val = builder.ins().icmp_imm(IntCC::Equal, arg, 0);
+            state.push1(builder.ins().bint(I32, val));
+        }
+        Operator::I32Eq | Operator::I64Eq => translate_icmp(IntCC::Equal, builder, state),
+        Operator::F32Eq | Operator::F64Eq => translate_fcmp(FloatCC::Equal, builder, state),
+        Operator::I32Ne | Operator::I64Ne => translate_icmp(IntCC::NotEqual, builder, state),
+        Operator::F32Ne | Operator::F64Ne => translate_fcmp(FloatCC::NotEqual, builder, state),
+        Operator::F32Gt | Operator::F64Gt => translate_fcmp(FloatCC::GreaterThan, builder, state),
+        Operator::F32Ge | Operator::F64Ge => {
+            translate_fcmp(FloatCC::GreaterThanOrEqual, builder, state)
+        }
+        Operator::F32Lt | Operator::F64Lt => translate_fcmp(FloatCC::LessThan, builder, state),
+        Operator::F32Le | Operator::F64Le => {
+            translate_fcmp(FloatCC::LessThanOrEqual, builder, state)
+        }
+        Operator::RefNull { ty } => {
+            state.push1(environ.translate_ref_null(builder.cursor(), (*ty).try_into()?)?)
+        }
+        Operator::RefIsNull => {
+            let value = state.pop1();
+            state.push1(environ.translate_ref_is_null(builder.cursor(), value)?);
+        }
+        Operator::RefFunc { function_index } => {
+            let index = FuncIndex::from_u32(*function_index);
+            state.push1(environ.translate_ref_func(builder.cursor(), index)?);
+        }
+        Operator::MemoryAtomicWait32 { memarg } | Operator::MemoryAtomicWait64 { memarg } => {
+            // The WebAssembly MVP only supports one linear memory and
+            // wasmparser will ensure that the memory indices specified are
+            // zero.
+            let implied_ty = match op {
+                Operator::MemoryAtomicWait64 { .. } => I64,
+                Operator::MemoryAtomicWait32 { .. } => I32,
+                _ => unreachable!(),
+            };
+            let heap_index = MemoryIndex::from_u32(memarg.memory);
+            let heap = state.get_heap(builder.func, memarg.memory, environ)?;
+            let timeout = state.pop1(); // 64 (fixed)
+            let expected = state.pop1(); // 32 or 64 (per the `Ixx` in `IxxAtomicWait`)
+            let addr = state.pop1(); // 32 (fixed)
+            assert!(builder.func.dfg.value_type(expected) == implied_ty);
+            // `fn translate_atomic_wait` can inspect the type of `expected` to figure out what
+            // code it needs to generate, if it wants.
+            let res = environ.translate_atomic_wait(
+                builder.cursor(),
+                heap_index,
+                heap,
+                addr,
+                expected,
+                timeout,
+            )?;
+            state.push1(res);
+        }
+        Operator::MemoryAtomicNotify { memarg } => {
+            let heap_index = MemoryIndex::from_u32(memarg.memory);
+            let heap = state.get_heap(builder.func, memarg.memory, environ)?;
+            let count = state.pop1(); // 32 (fixed)
+            let addr = state.pop1(); // 32 (fixed)
+            let res =
+                environ.translate_atomic_notify(builder.cursor(), heap_index, heap, addr, count)?;
+            state.push1(res);
+        }
+        Operator::I32AtomicLoad { memarg } => {
+            translate_atomic_load(I32, I32, memarg, builder, state, environ)?
+        }
+        Operator::I64AtomicLoad { memarg } => {
+            translate_atomic_load(I64, I64, memarg, builder, state, environ)?
+        }
+        Operator::I32AtomicLoad8U { memarg } => {
+            translate_atomic_load(I32, I8, memarg, builder, state, environ)?
+        }
+        Operator::I32AtomicLoad16U { memarg } => {
+            translate_atomic_load(I32, I16, memarg, builder, state, environ)?
+        }
+        Operator::I64AtomicLoad8U { memarg } => {
+            translate_atomic_load(I64, I8, memarg, builder, state, environ)?
+        }
+        Operator::I64AtomicLoad16U { memarg } => {
+            translate_atomic_load(I64, I16, memarg, builder, state, environ)?
+        }
+        Operator::I64AtomicLoad32U { memarg } => {
+            translate_atomic_load(I64, I32, memarg, builder, state, environ)?
+        }
+
+        Operator::I32AtomicStore { memarg } => {
+            translate_atomic_store(I32, memarg, builder, state, environ)?
+        }
+        Operator::I64AtomicStore { memarg } => {
+            translate_atomic_store(I64, memarg, builder, state, environ)?
+        }
+        Operator::I32AtomicStore8 { memarg } => {
+            translate_atomic_store(I8, memarg, builder, state, environ)?
+        }
+        Operator::I32AtomicStore16 { memarg } => {
+            translate_atomic_store(I16, memarg, builder, state, environ)?
+        }
+        Operator::I64AtomicStore8 { memarg } => {
+            translate_atomic_store(I8, memarg, builder, state, environ)?
+        }
+        Operator::I64AtomicStore16 { memarg } => {
+            translate_atomic_store(I16, memarg, builder, state, environ)?
+        }
+        Operator::I64AtomicStore32 { memarg } => {
+            translate_atomic_store(I32, memarg, builder, state, environ)?
+        }
+
+        Operator::I32AtomicRmwAdd { memarg } => {
+            translate_atomic_rmw(I32, I32, AtomicRmwOp::Add, memarg, builder, state, environ)?
+        }
+        Operator::I64AtomicRmwAdd { memarg } => {
+            translate_atomic_rmw(I64, I64, AtomicRmwOp::Add, memarg, builder, state, environ)?
+        }
+        Operator::I32AtomicRmw8AddU { memarg } => {
+            translate_atomic_rmw(I32, I8, AtomicRmwOp::Add, memarg, builder, state, environ)?
+        }
+        Operator::I32AtomicRmw16AddU { memarg } => {
+            translate_atomic_rmw(I32, I16, AtomicRmwOp::Add, memarg, builder, state, environ)?
+        }
+        Operator::I64AtomicRmw8AddU { memarg } => {
+            translate_atomic_rmw(I64, I8, AtomicRmwOp::Add, memarg, builder, state, environ)?
+        }
+        Operator::I64AtomicRmw16AddU { memarg } => {
+            translate_atomic_rmw(I64, I16, AtomicRmwOp::Add, memarg, builder, state, environ)?
+        }
+        Operator::I64AtomicRmw32AddU { memarg } => {
+            translate_atomic_rmw(I64, I32, AtomicRmwOp::Add, memarg, builder, state, environ)?
+        }
+
+        Operator::I32AtomicRmwSub { memarg } => {
+            translate_atomic_rmw(I32, I32, AtomicRmwOp::Sub, memarg, builder, state, environ)?
+        }
+        Operator::I64AtomicRmwSub { memarg } => {
+            translate_atomic_rmw(I64, I64, AtomicRmwOp::Sub, memarg, builder, state, environ)?
+        }
+        Operator::I32AtomicRmw8SubU { memarg } => {
+            translate_atomic_rmw(I32, I8, AtomicRmwOp::Sub, memarg, builder, state, environ)?
+        }
+        Operator::I32AtomicRmw16SubU { memarg } => {
+            translate_atomic_rmw(I32, I16, AtomicRmwOp::Sub, memarg, builder, state, environ)?
+        }
+        Operator::I64AtomicRmw8SubU { memarg } => {
+            translate_atomic_rmw(I64, I8, AtomicRmwOp::Sub, memarg, builder, state, environ)?
+        }
+        Operator::I64AtomicRmw16SubU { memarg } => {
+            translate_atomic_rmw(I64, I16, AtomicRmwOp::Sub, memarg, builder, state, environ)?
+        }
+        Operator::I64AtomicRmw32SubU { memarg } => {
+            translate_atomic_rmw(I64, I32, AtomicRmwOp::Sub, memarg, builder, state, environ)?
+        }
+
+        Operator::I32AtomicRmwAnd { memarg } => {
+            translate_atomic_rmw(I32, I32, AtomicRmwOp::And, memarg, builder, state, environ)?
+        }
+        Operator::I64AtomicRmwAnd { memarg } => {
+            translate_atomic_rmw(I64, I64, AtomicRmwOp::And, memarg, builder, state, environ)?
+        }
+        Operator::I32AtomicRmw8AndU { memarg } => {
+            translate_atomic_rmw(I32, I8, AtomicRmwOp::And, memarg, builder, state, environ)?
+        }
+        Operator::I32AtomicRmw16AndU { memarg } => {
+            translate_atomic_rmw(I32, I16, AtomicRmwOp::And, memarg, builder, state, environ)?
+        }
+        Operator::I64AtomicRmw8AndU { memarg } => {
+            translate_atomic_rmw(I64, I8, AtomicRmwOp::And, memarg, builder, state, environ)?
+        }
+        Operator::I64AtomicRmw16AndU { memarg } => {
+            translate_atomic_rmw(I64, I16, AtomicRmwOp::And, memarg, builder, state, environ)?
+        }
+        Operator::I64AtomicRmw32AndU { memarg } => {
+            translate_atomic_rmw(I64, I32, AtomicRmwOp::And, memarg, builder, state, environ)?
+        }
+
+        Operator::I32AtomicRmwOr { memarg } => {
+            translate_atomic_rmw(I32, I32, AtomicRmwOp::Or, memarg, builder, state, environ)?
+        }
+        Operator::I64AtomicRmwOr { memarg } => {
+            translate_atomic_rmw(I64, I64, AtomicRmwOp::Or, memarg, builder, state, environ)?
+        }
+        Operator::I32AtomicRmw8OrU { memarg } => {
+            translate_atomic_rmw(I32, I8, AtomicRmwOp::Or, memarg, builder, state, environ)?
+        }
+        Operator::I32AtomicRmw16OrU { memarg } => {
+            translate_atomic_rmw(I32, I16, AtomicRmwOp::Or, memarg, builder, state, environ)?
+        }
+        Operator::I64AtomicRmw8OrU { memarg } => {
+            translate_atomic_rmw(I64, I8, AtomicRmwOp::Or, memarg, builder, state, environ)?
+        }
+        Operator::I64AtomicRmw16OrU { memarg } => {
+            translate_atomic_rmw(I64, I16, AtomicRmwOp::Or, memarg, builder, state, environ)?
+        }
+        Operator::I64AtomicRmw32OrU { memarg } => {
+            translate_atomic_rmw(I64, I32, AtomicRmwOp::Or, memarg, builder, state, environ)?
+        }
+
+        Operator::I32AtomicRmwXor { memarg } => {
+            translate_atomic_rmw(I32, I32, AtomicRmwOp::Xor, memarg, builder, state, environ)?
+        }
+        Operator::I64AtomicRmwXor { memarg } => {
+            translate_atomic_rmw(I64, I64, AtomicRmwOp::Xor, memarg, builder, state, environ)?
+        }
+        Operator::I32AtomicRmw8XorU { memarg } => {
+            translate_atomic_rmw(I32, I8, AtomicRmwOp::Xor, memarg, builder, state, environ)?
+        }
+        Operator::I32AtomicRmw16XorU { memarg } => {
+            translate_atomic_rmw(I32, I16, AtomicRmwOp::Xor, memarg, builder, state, environ)?
+        }
+        Operator::I64AtomicRmw8XorU { memarg } => {
+            translate_atomic_rmw(I64, I8, AtomicRmwOp::Xor, memarg, builder, state, environ)?
+        }
+        Operator::I64AtomicRmw16XorU { memarg } => {
+            translate_atomic_rmw(I64, I16, AtomicRmwOp::Xor, memarg, builder, state, environ)?
+        }
+        Operator::I64AtomicRmw32XorU { memarg } => {
+            translate_atomic_rmw(I64, I32, AtomicRmwOp::Xor, memarg, builder, state, environ)?
+        }
+
+        Operator::I32AtomicRmwXchg { memarg } => {
+            translate_atomic_rmw(I32, I32, AtomicRmwOp::Xchg, memarg, builder, state, environ)?
+        }
+        Operator::I64AtomicRmwXchg { memarg } => {
+            translate_atomic_rmw(I64, I64, AtomicRmwOp::Xchg, memarg, builder, state, environ)?
+        }
+        Operator::I32AtomicRmw8XchgU { memarg } => {
+            translate_atomic_rmw(I32, I8, AtomicRmwOp::Xchg, memarg, builder, state, environ)?
+        }
+        Operator::I32AtomicRmw16XchgU { memarg } => {
+            translate_atomic_rmw(I32, I16, AtomicRmwOp::Xchg, memarg, builder, state, environ)?
+        }
+        Operator::I64AtomicRmw8XchgU { memarg } => {
+            translate_atomic_rmw(I64, I8, AtomicRmwOp::Xchg, memarg, builder, state, environ)?
+        }
+        Operator::I64AtomicRmw16XchgU { memarg } => {
+            translate_atomic_rmw(I64, I16, AtomicRmwOp::Xchg, memarg, builder, state, environ)?
+        }
+        Operator::I64AtomicRmw32XchgU { memarg } => {
+            translate_atomic_rmw(I64, I32, AtomicRmwOp::Xchg, memarg, builder, state, environ)?
+        }
+
+        Operator::I32AtomicRmwCmpxchg { memarg } => {
+            translate_atomic_cas(I32, I32, memarg, builder, state, environ)?
+        }
+        Operator::I64AtomicRmwCmpxchg { memarg } => {
+            translate_atomic_cas(I64, I64, memarg, builder, state, environ)?
+        }
+        Operator::I32AtomicRmw8CmpxchgU { memarg } => {
+            translate_atomic_cas(I32, I8, memarg, builder, state, environ)?
+        }
+        Operator::I32AtomicRmw16CmpxchgU { memarg } => {
+            translate_atomic_cas(I32, I16, memarg, builder, state, environ)?
+        }
+        Operator::I64AtomicRmw8CmpxchgU { memarg } => {
+            translate_atomic_cas(I64, I8, memarg, builder, state, environ)?
+        }
+        Operator::I64AtomicRmw16CmpxchgU { memarg } => {
+            translate_atomic_cas(I64, I16, memarg, builder, state, environ)?
+        }
+        Operator::I64AtomicRmw32CmpxchgU { memarg } => {
+            translate_atomic_cas(I64, I32, memarg, builder, state, environ)?
+        }
+
+        Operator::AtomicFence { .. } => {
+            builder.ins().fence();
+        }
+        Operator::MemoryCopy { src, dst } => {
+            let src_index = MemoryIndex::from_u32(*src);
+            let dst_index = MemoryIndex::from_u32(*dst);
+            let src_heap = state.get_heap(builder.func, *src, environ)?;
+            let dst_heap = state.get_heap(builder.func, *dst, environ)?;
+            let len = state.pop1();
+            let src_pos = state.pop1();
+            let dst_pos = state.pop1();
+            environ.translate_memory_copy(
+                builder.cursor(),
+                src_index,
+                src_heap,
+                dst_index,
+                dst_heap,
+                dst_pos,
+                src_pos,
+                len,
+            )?;
+        }
+        Operator::MemoryFill { mem } => {
+            let heap_index = MemoryIndex::from_u32(*mem);
+            let heap = state.get_heap(builder.func, *mem, environ)?;
+            let len = state.pop1();
+            let val = state.pop1();
+            let dest = state.pop1();
+            environ.translate_memory_fill(builder.cursor(), heap_index, heap, dest, val, len)?;
+        }
+        Operator::MemoryInit { segment, mem } => {
+            let heap_index = MemoryIndex::from_u32(*mem);
+            let heap = state.get_heap(builder.func, *mem, environ)?;
+            let len = state.pop1();
+            let src = state.pop1();
+            let dest = state.pop1();
+            environ.translate_memory_init(
+                builder.cursor(),
+                heap_index,
+                heap,
+                *segment,
+                dest,
+                src,
+                len,
+            )?;
+        }
+        Operator::DataDrop { segment } => {
+            environ.translate_data_drop(builder.cursor(), *segment)?;
+        }
+        Operator::TableSize { table: index } => {
+            let table = state.get_or_create_table(builder.func, *index, environ)?;
+            state.push1(environ.translate_table_size(
+                builder.cursor(),
+                TableIndex::from_u32(*index),
+                table,
+            )?);
+        }
+        Operator::TableGrow { table: index } => {
+            let table_index = TableIndex::from_u32(*index);
+            let table = state.get_or_create_table(builder.func, *index, environ)?;
+            let delta = state.pop1();
+            let init_value = state.pop1();
+            state.push1(environ.translate_table_grow(
+                builder.cursor(),
+                table_index,
+                table,
+                delta,
+                init_value,
+            )?);
+        }
+        Operator::TableGet { table: index } => {
+            let table_index = TableIndex::from_u32(*index);
+            let table = state.get_or_create_table(builder.func, *index, environ)?;
+            let index = state.pop1();
+            state.push1(environ.translate_table_get(builder, table_index, table, index)?);
+        }
+        Operator::TableSet { table: index } => {
+            let table_index = TableIndex::from_u32(*index);
+            let table = state.get_or_create_table(builder.func, *index, environ)?;
+            let value = state.pop1();
+            let index = state.pop1();
+            environ.translate_table_set(builder, table_index, table, value, index)?;
+        }
+        Operator::TableCopy {
+            dst_table: dst_table_index,
+            src_table: src_table_index,
+        } => {
+            let dst_table = state.get_or_create_table(builder.func, *dst_table_index, environ)?;
+            let src_table = state.get_or_create_table(builder.func, *src_table_index, environ)?;
+            let len = state.pop1();
+            let src = state.pop1();
+            let dest = state.pop1();
+            environ.translate_table_copy(
+                builder.cursor(),
+                TableIndex::from_u32(*dst_table_index),
+                dst_table,
+                TableIndex::from_u32(*src_table_index),
+                src_table,
+                dest,
+                src,
+                len,
+            )?;
+        }
+        Operator::TableFill { table } => {
+            let table_index = TableIndex::from_u32(*table);
+            let len = state.pop1();
+            let val = state.pop1();
+            let dest = state.pop1();
+            environ.translate_table_fill(builder.cursor(), table_index, dest, val, len)?;
+        }
+        Operator::TableInit {
+            segment,
+            table: table_index,
+        } => {
+            let table = state.get_or_create_table(builder.func, *table_index, environ)?;
+            let len = state.pop1();
+            let src = state.pop1();
+            let dest = state.pop1();
+            environ.translate_table_init(
+                builder.cursor(),
+                *segment,
+                TableIndex::from_u32(*table_index),
+                table,
+                dest,
+                src,
+                len,
+            )?;
+        }
+        Operator::ElemDrop { segment } => {
+            environ.translate_elem_drop(builder.cursor(), *segment)?;
+        }
+        Operator::V128Const { value } => {
+            let data = value.bytes().to_vec().into();
+            let handle = builder.func.dfg.constants.insert(data);
+            let value = builder.ins().vconst(I8X16, handle);
+            // the v128.const is typed in CLIF as a I8x16 but raw_bitcast to a different type
+            // before use
+            state.push1(value)
+        }
+        Operator::I8x16Splat | Operator::I16x8Splat => {
+            let reduced = builder.ins().ireduce(type_of(op).lane_type(), state.pop1());
+            let splatted = builder.ins().splat(type_of(op), reduced);
+            state.push1(splatted)
+        }
+        Operator::I32x4Splat
+        | Operator::I64x2Splat
+        | Operator::F32x4Splat
+        | Operator::F64x2Splat => {
+            let splatted = builder.ins().splat(type_of(op), state.pop1());
+            state.push1(splatted)
+        }
+        Operator::V128Load8Splat { memarg }
+        | Operator::V128Load16Splat { memarg }
+        | Operator::V128Load32Splat { memarg }
+        | Operator::V128Load64Splat { memarg } => {
+            let opcode = ir::Opcode::LoadSplat;
+            let result_ty = type_of(op);
+            let (flags, base, offset) = prepare_load(
+                memarg,
+                mem_op_size(opcode, result_ty.lane_type()),
+                builder,
+                state,
+                environ,
+            )?;
+            let (load, dfg) = builder.ins().Load(opcode, result_ty, flags, offset, base);
+            state.push1(dfg.first_result(load))
+        }
+        Operator::V128Load32Zero { memarg } | Operator::V128Load64Zero { memarg } => {
+            translate_load(
+                memarg,
+                ir::Opcode::Load,
+                type_of(op).lane_type(),
+                builder,
+                state,
+                environ,
+            )?;
+            let as_vector = builder.ins().scalar_to_vector(type_of(op), state.pop1());
+            state.push1(as_vector)
+        }
+        Operator::I8x16ExtractLaneS { lane } | Operator::I16x8ExtractLaneS { lane } => {
+            let vector = pop1_with_bitcast(state, type_of(op), builder);
+            let extracted = builder.ins().extractlane(vector, lane.clone());
+            state.push1(builder.ins().sextend(I32, extracted))
+        }
+        Operator::I8x16ExtractLaneU { lane } | Operator::I16x8ExtractLaneU { lane } => {
+            let vector = pop1_with_bitcast(state, type_of(op), builder);
+            let extracted = builder.ins().extractlane(vector, lane.clone());
+            state.push1(builder.ins().uextend(I32, extracted));
+            // On x86, PEXTRB zeroes the upper bits of the destination register of extractlane so
+            // uextend could be elided; for now, uextend is needed for Cranelift's type checks to
+            // work.
+        }
+        Operator::I32x4ExtractLane { lane }
+        | Operator::I64x2ExtractLane { lane }
+        | Operator::F32x4ExtractLane { lane }
+        | Operator::F64x2ExtractLane { lane } => {
+            let vector = pop1_with_bitcast(state, type_of(op), builder);
+            state.push1(builder.ins().extractlane(vector, lane.clone()))
+        }
+        Operator::I8x16ReplaceLane { lane } | Operator::I16x8ReplaceLane { lane } => {
+            let (vector, replacement) = state.pop2();
+            let ty = type_of(op);
+            let reduced = builder.ins().ireduce(ty.lane_type(), replacement);
+            let vector = optionally_bitcast_vector(vector, ty, builder);
+            state.push1(builder.ins().insertlane(vector, reduced, *lane))
+        }
+        Operator::I32x4ReplaceLane { lane }
+        | Operator::I64x2ReplaceLane { lane }
+        | Operator::F32x4ReplaceLane { lane }
+        | Operator::F64x2ReplaceLane { lane } => {
+            let (vector, replacement) = state.pop2();
+            let vector = optionally_bitcast_vector(vector, type_of(op), builder);
+            state.push1(builder.ins().insertlane(vector, replacement, *lane))
+        }
+        Operator::I8x16Shuffle { lanes, .. } => {
+            let (a, b) = pop2_with_bitcast(state, I8X16, builder);
+            let lanes = ConstantData::from(lanes.as_ref());
+            let mask = builder.func.dfg.immediates.push(lanes);
+            let shuffled = builder.ins().shuffle(a, b, mask);
+            state.push1(shuffled)
+            // At this point the original types of a and b are lost; users of this value (i.e. this
+            // WASM-to-CLIF translator) may need to raw_bitcast for type-correctness. This is due
+            // to WASM using the less specific v128 type for certain operations and more specific
+            // types (e.g. i8x16) for others.
+        }
+        Operator::I8x16Swizzle => {
+            let (a, b) = pop2_with_bitcast(state, I8X16, builder);
+            state.push1(builder.ins().swizzle(I8X16, a, b))
+        }
+        Operator::I8x16Add | Operator::I16x8Add | Operator::I32x4Add | Operator::I64x2Add => {
+            let (a, b) = pop2_with_bitcast(state, type_of(op), builder);
+            state.push1(builder.ins().iadd(a, b))
+        }
+        Operator::I8x16AddSatS | Operator::I16x8AddSatS => {
+            let (a, b) = pop2_with_bitcast(state, type_of(op), builder);
+            state.push1(builder.ins().sadd_sat(a, b))
+        }
+        Operator::I8x16AddSatU | Operator::I16x8AddSatU => {
+            let (a, b) = pop2_with_bitcast(state, type_of(op), builder);
+            state.push1(builder.ins().uadd_sat(a, b))
+        }
+        Operator::I8x16Sub | Operator::I16x8Sub | Operator::I32x4Sub | Operator::I64x2Sub => {
+            let (a, b) = pop2_with_bitcast(state, type_of(op), builder);
+            state.push1(builder.ins().isub(a, b))
+        }
+        Operator::I8x16SubSatS | Operator::I16x8SubSatS => {
+            let (a, b) = pop2_with_bitcast(state, type_of(op), builder);
+            state.push1(builder.ins().ssub_sat(a, b))
+        }
+        Operator::I8x16SubSatU | Operator::I16x8SubSatU => {
+            let (a, b) = pop2_with_bitcast(state, type_of(op), builder);
+            state.push1(builder.ins().usub_sat(a, b))
+        }
+        Operator::I8x16MinS | Operator::I16x8MinS | Operator::I32x4MinS => {
+            let (a, b) = pop2_with_bitcast(state, type_of(op), builder);
+            state.push1(builder.ins().imin(a, b))
+        }
+        Operator::I8x16MinU | Operator::I16x8MinU | Operator::I32x4MinU => {
+            let (a, b) = pop2_with_bitcast(state, type_of(op), builder);
+            state.push1(builder.ins().umin(a, b))
+        }
+        Operator::I8x16MaxS | Operator::I16x8MaxS | Operator::I32x4MaxS => {
+            let (a, b) = pop2_with_bitcast(state, type_of(op), builder);
+            state.push1(builder.ins().imax(a, b))
+        }
+        Operator::I8x16MaxU | Operator::I16x8MaxU | Operator::I32x4MaxU => {
+            let (a, b) = pop2_with_bitcast(state, type_of(op), builder);
+            state.push1(builder.ins().umax(a, b))
+        }
+        Operator::I8x16RoundingAverageU | Operator::I16x8RoundingAverageU => {
+            let (a, b) = pop2_with_bitcast(state, type_of(op), builder);
+            state.push1(builder.ins().avg_round(a, b))
+        }
+        Operator::I8x16Neg | Operator::I16x8Neg | Operator::I32x4Neg | Operator::I64x2Neg => {
+            let a = pop1_with_bitcast(state, type_of(op), builder);
+            state.push1(builder.ins().ineg(a))
+        }
+        Operator::I8x16Abs | Operator::I16x8Abs | Operator::I32x4Abs => {
+            let a = pop1_with_bitcast(state, type_of(op), builder);
+            state.push1(builder.ins().iabs(a))
+        }
+        Operator::I16x8Mul | Operator::I32x4Mul | Operator::I64x2Mul => {
+            let (a, b) = pop2_with_bitcast(state, type_of(op), builder);
+            state.push1(builder.ins().imul(a, b))
+        }
+        Operator::V128Or => {
+            let (a, b) = pop2_with_bitcast(state, type_of(op), builder);
+            state.push1(builder.ins().bor(a, b))
+        }
+        Operator::V128Xor => {
+            let (a, b) = pop2_with_bitcast(state, type_of(op), builder);
+            state.push1(builder.ins().bxor(a, b))
+        }
+        Operator::V128And => {
+            let (a, b) = pop2_with_bitcast(state, type_of(op), builder);
+            state.push1(builder.ins().band(a, b))
+        }
+        Operator::V128AndNot => {
+            let (a, b) = pop2_with_bitcast(state, type_of(op), builder);
+            state.push1(builder.ins().band_not(a, b))
+        }
+        Operator::V128Not => {
+            let a = state.pop1();
+            state.push1(builder.ins().bnot(a));
+        }
+        Operator::I8x16Shl | Operator::I16x8Shl | Operator::I32x4Shl | Operator::I64x2Shl => {
+            let (a, b) = state.pop2();
+            let bitcast_a = optionally_bitcast_vector(a, type_of(op), builder);
+            let bitwidth = i64::from(type_of(op).lane_bits());
+            // The spec expects to shift with `b mod lanewidth`; so, e.g., for 16 bit lane-width
+            // we do `b AND 15`; this means fewer instructions than `iconst + urem`.
+            let b_mod_bitwidth = builder.ins().band_imm(b, bitwidth - 1);
+            state.push1(builder.ins().ishl(bitcast_a, b_mod_bitwidth))
+        }
+        Operator::I8x16ShrU | Operator::I16x8ShrU | Operator::I32x4ShrU | Operator::I64x2ShrU => {
+            let (a, b) = state.pop2();
+            let bitcast_a = optionally_bitcast_vector(a, type_of(op), builder);
+            let bitwidth = i64::from(type_of(op).lane_bits());
+            // The spec expects to shift with `b mod lanewidth`; so, e.g., for 16 bit lane-width
+            // we do `b AND 15`; this means fewer instructions than `iconst + urem`.
+            let b_mod_bitwidth = builder.ins().band_imm(b, bitwidth - 1);
+            state.push1(builder.ins().ushr(bitcast_a, b_mod_bitwidth))
+        }
+        Operator::I8x16ShrS | Operator::I16x8ShrS | Operator::I32x4ShrS | Operator::I64x2ShrS => {
+            let (a, b) = state.pop2();
+            let bitcast_a = optionally_bitcast_vector(a, type_of(op), builder);
+            let bitwidth = i64::from(type_of(op).lane_bits());
+            // The spec expects to shift with `b mod lanewidth`; so, e.g., for 16 bit lane-width
+            // we do `b AND 15`; this means fewer instructions than `iconst + urem`.
+            let b_mod_bitwidth = builder.ins().band_imm(b, bitwidth - 1);
+            state.push1(builder.ins().sshr(bitcast_a, b_mod_bitwidth))
+        }
+        Operator::V128Bitselect => {
+            let (a, b, c) = state.pop3();
+            let bitcast_a = optionally_bitcast_vector(a, I8X16, builder);
+            let bitcast_b = optionally_bitcast_vector(b, I8X16, builder);
+            let bitcast_c = optionally_bitcast_vector(c, I8X16, builder);
+            // The CLIF operand ordering is slightly different and the types of all three
+            // operands must match (hence the bitcast).
+            state.push1(builder.ins().bitselect(bitcast_c, bitcast_a, bitcast_b))
+        }
+        Operator::I8x16AnyTrue | Operator::I16x8AnyTrue | Operator::I32x4AnyTrue => {
+            let a = pop1_with_bitcast(state, type_of(op), builder);
+            let bool_result = builder.ins().vany_true(a);
+            state.push1(builder.ins().bint(I32, bool_result))
+        }
+        Operator::I8x16AllTrue | Operator::I16x8AllTrue | Operator::I32x4AllTrue => {
+            let a = pop1_with_bitcast(state, type_of(op), builder);
+            let bool_result = builder.ins().vall_true(a);
+            state.push1(builder.ins().bint(I32, bool_result))
+        }
+        Operator::I8x16Bitmask | Operator::I16x8Bitmask | Operator::I32x4Bitmask => {
+            let a = pop1_with_bitcast(state, type_of(op), builder);
+            state.push1(builder.ins().vhigh_bits(I32, a));
+        }
+        Operator::I8x16Eq | Operator::I16x8Eq | Operator::I32x4Eq => {
+            translate_vector_icmp(IntCC::Equal, type_of(op), builder, state)
+        }
+        Operator::I8x16Ne | Operator::I16x8Ne | Operator::I32x4Ne => {
+            translate_vector_icmp(IntCC::NotEqual, type_of(op), builder, state)
+        }
+        Operator::I8x16GtS | Operator::I16x8GtS | Operator::I32x4GtS => {
+            translate_vector_icmp(IntCC::SignedGreaterThan, type_of(op), builder, state)
+        }
+        Operator::I8x16LtS | Operator::I16x8LtS | Operator::I32x4LtS => {
+            translate_vector_icmp(IntCC::SignedLessThan, type_of(op), builder, state)
+        }
+        Operator::I8x16GtU | Operator::I16x8GtU | Operator::I32x4GtU => {
+            translate_vector_icmp(IntCC::UnsignedGreaterThan, type_of(op), builder, state)
+        }
+        Operator::I8x16LtU | Operator::I16x8LtU | Operator::I32x4LtU => {
+            translate_vector_icmp(IntCC::UnsignedLessThan, type_of(op), builder, state)
+        }
+        Operator::I8x16GeS | Operator::I16x8GeS | Operator::I32x4GeS => {
+            translate_vector_icmp(IntCC::SignedGreaterThanOrEqual, type_of(op), builder, state)
+        }
+        Operator::I8x16LeS | Operator::I16x8LeS | Operator::I32x4LeS => {
+            translate_vector_icmp(IntCC::SignedLessThanOrEqual, type_of(op), builder, state)
+        }
+        Operator::I8x16GeU | Operator::I16x8GeU | Operator::I32x4GeU => translate_vector_icmp(
+            IntCC::UnsignedGreaterThanOrEqual,
+            type_of(op),
+            builder,
+            state,
+        ),
+        Operator::I8x16LeU | Operator::I16x8LeU | Operator::I32x4LeU => {
+            translate_vector_icmp(IntCC::UnsignedLessThanOrEqual, type_of(op), builder, state)
+        }
+        Operator::F32x4Eq | Operator::F64x2Eq => {
+            translate_vector_fcmp(FloatCC::Equal, type_of(op), builder, state)
+        }
+        Operator::F32x4Ne | Operator::F64x2Ne => {
+            translate_vector_fcmp(FloatCC::NotEqual, type_of(op), builder, state)
+        }
+        Operator::F32x4Lt | Operator::F64x2Lt => {
+            translate_vector_fcmp(FloatCC::LessThan, type_of(op), builder, state)
+        }
+        Operator::F32x4Gt | Operator::F64x2Gt => {
+            translate_vector_fcmp(FloatCC::GreaterThan, type_of(op), builder, state)
+        }
+        Operator::F32x4Le | Operator::F64x2Le => {
+            translate_vector_fcmp(FloatCC::LessThanOrEqual, type_of(op), builder, state)
+        }
+        Operator::F32x4Ge | Operator::F64x2Ge => {
+            translate_vector_fcmp(FloatCC::GreaterThanOrEqual, type_of(op), builder, state)
+        }
+        Operator::F32x4Add | Operator::F64x2Add => {
+            let (a, b) = pop2_with_bitcast(state, type_of(op), builder);
+            state.push1(builder.ins().fadd(a, b))
+        }
+        Operator::F32x4Sub | Operator::F64x2Sub => {
+            let (a, b) = pop2_with_bitcast(state, type_of(op), builder);
+            state.push1(builder.ins().fsub(a, b))
+        }
+        Operator::F32x4Mul | Operator::F64x2Mul => {
+            let (a, b) = pop2_with_bitcast(state, type_of(op), builder);
+            state.push1(builder.ins().fmul(a, b))
+        }
+        Operator::F32x4Div | Operator::F64x2Div => {
+            let (a, b) = pop2_with_bitcast(state, type_of(op), builder);
+            state.push1(builder.ins().fdiv(a, b))
+        }
+        Operator::F32x4Max | Operator::F64x2Max => {
+            let (a, b) = pop2_with_bitcast(state, type_of(op), builder);
+            state.push1(builder.ins().fmax(a, b))
+        }
+        Operator::F32x4Min | Operator::F64x2Min => {
+            let (a, b) = pop2_with_bitcast(state, type_of(op), builder);
+            state.push1(builder.ins().fmin(a, b))
+        }
+        Operator::F32x4PMax | Operator::F64x2PMax => {
+            let (a, b) = pop2_with_bitcast(state, type_of(op), builder);
+            state.push1(builder.ins().fmax_pseudo(a, b))
+        }
+        Operator::F32x4PMin | Operator::F64x2PMin => {
+            let (a, b) = pop2_with_bitcast(state, type_of(op), builder);
+            state.push1(builder.ins().fmin_pseudo(a, b))
+        }
+        Operator::F32x4Sqrt | Operator::F64x2Sqrt => {
+            let a = pop1_with_bitcast(state, type_of(op), builder);
+            state.push1(builder.ins().sqrt(a))
+        }
+        Operator::F32x4Neg | Operator::F64x2Neg => {
+            let a = pop1_with_bitcast(state, type_of(op), builder);
+            state.push1(builder.ins().fneg(a))
+        }
+        Operator::F32x4Abs | Operator::F64x2Abs => {
+            let a = pop1_with_bitcast(state, type_of(op), builder);
+            state.push1(builder.ins().fabs(a))
+        }
+        Operator::F32x4ConvertI32x4S => {
+            let a = pop1_with_bitcast(state, I32X4, builder);
+            state.push1(builder.ins().fcvt_from_sint(F32X4, a))
+        }
+        Operator::F32x4ConvertI32x4U => {
+            let a = pop1_with_bitcast(state, I32X4, builder);
+            state.push1(builder.ins().fcvt_from_uint(F32X4, a))
+        }
+        Operator::I32x4TruncSatF32x4S => {
+            let a = pop1_with_bitcast(state, F32X4, builder);
+            state.push1(builder.ins().fcvt_to_sint_sat(I32X4, a))
+        }
+        Operator::I32x4TruncSatF32x4U => {
+            let a = pop1_with_bitcast(state, F32X4, builder);
+            state.push1(builder.ins().fcvt_to_uint_sat(I32X4, a))
+        }
+        Operator::I8x16NarrowI16x8S => {
+            let (a, b) = pop2_with_bitcast(state, I16X8, builder);
+            state.push1(builder.ins().snarrow(a, b))
+        }
+        Operator::I16x8NarrowI32x4S => {
+            let (a, b) = pop2_with_bitcast(state, I32X4, builder);
+            state.push1(builder.ins().snarrow(a, b))
+        }
+        Operator::I8x16NarrowI16x8U => {
+            let (a, b) = pop2_with_bitcast(state, I16X8, builder);
+            state.push1(builder.ins().unarrow(a, b))
+        }
+        Operator::I16x8NarrowI32x4U => {
+            let (a, b) = pop2_with_bitcast(state, I32X4, builder);
+            state.push1(builder.ins().unarrow(a, b))
+        }
+        Operator::I16x8WidenLowI8x16S => {
+            let a = pop1_with_bitcast(state, I8X16, builder);
+            state.push1(builder.ins().swiden_low(a))
+        }
+        Operator::I16x8WidenHighI8x16S => {
+            let a = pop1_with_bitcast(state, I8X16, builder);
+            state.push1(builder.ins().swiden_high(a))
+        }
+        Operator::I16x8WidenLowI8x16U => {
+            let a = pop1_with_bitcast(state, I8X16, builder);
+            state.push1(builder.ins().uwiden_low(a))
+        }
+        Operator::I16x8WidenHighI8x16U => {
+            let a = pop1_with_bitcast(state, I8X16, builder);
+            state.push1(builder.ins().uwiden_high(a))
+        }
+        Operator::I32x4WidenLowI16x8S => {
+            let a = pop1_with_bitcast(state, I16X8, builder);
+            state.push1(builder.ins().swiden_low(a))
+        }
+        Operator::I32x4WidenHighI16x8S => {
+            let a = pop1_with_bitcast(state, I16X8, builder);
+            state.push1(builder.ins().swiden_high(a))
+        }
+        Operator::I32x4WidenLowI16x8U => {
+            let a = pop1_with_bitcast(state, I16X8, builder);
+            state.push1(builder.ins().uwiden_low(a))
+        }
+        Operator::I32x4WidenHighI16x8U => {
+            let a = pop1_with_bitcast(state, I16X8, builder);
+            state.push1(builder.ins().uwiden_high(a))
+        }
+
+        Operator::F32x4Ceil | Operator::F64x2Ceil => {
+            // This is something of a misuse of `type_of`, because that produces the return type
+            // of `op`.  In this case we want the arg type, but we know it's the same as the
+            // return type.  Same for the 3 cases below.
+            let arg = pop1_with_bitcast(state, type_of(op), builder);
+            state.push1(builder.ins().ceil(arg));
+        }
+        Operator::F32x4Floor | Operator::F64x2Floor => {
+            let arg = pop1_with_bitcast(state, type_of(op), builder);
+            state.push1(builder.ins().floor(arg));
+        }
+        Operator::F32x4Trunc | Operator::F64x2Trunc => {
+            let arg = pop1_with_bitcast(state, type_of(op), builder);
+            state.push1(builder.ins().trunc(arg));
+        }
+        Operator::F32x4Nearest | Operator::F64x2Nearest => {
+            let arg = pop1_with_bitcast(state, type_of(op), builder);
+            state.push1(builder.ins().nearest(arg));
+        }
+
+        Operator::I32x4DotI16x8S => {
+            let (a, b) = pop2_with_bitcast(state, I16X8, builder);
+            state.push1(builder.ins().widening_pairwise_dot_product_s(a, b));
+        }
+
+        Operator::ReturnCall { .. } | Operator::ReturnCallIndirect { .. } => {
+            return Err(wasm_unsupported!("proposed tail-call operator {:?}", op));
+        }
+    };
+    Ok(())
+}
+
+// Clippy warns us of some fields we are deliberately ignoring
+#[cfg_attr(feature = "cargo-clippy", allow(clippy::unneeded_field_pattern))]
+/// Deals with a Wasm instruction located in an unreachable portion of the code. Most of them
+/// are dropped but special ones like `End` or `Else` signal the potential end of the unreachable
+/// portion so the translation state must be updated accordingly.
+fn translate_unreachable_operator<FE: FuncEnvironment + ?Sized>(
+    validator: &FuncValidator<impl WasmModuleResources>,
+    op: &Operator,
+    builder: &mut FunctionBuilder,
+    state: &mut FuncTranslationState,
+    environ: &mut FE,
+) -> WasmResult<()> {
+    debug_assert!(!state.reachable);
+    match *op {
+        Operator::If { ty } => {
+            // Push a placeholder control stack entry. The if isn't reachable,
+            // so we don't have any branches anywhere.
+            state.push_if(
+                ir::Block::reserved_value(),
+                ElseData::NoElse {
+                    branch_inst: ir::Inst::reserved_value(),
+                },
+                0,
+                0,
+                ty,
+            );
+        }
+        Operator::Loop { ty: _ } | Operator::Block { ty: _ } => {
+            state.push_block(ir::Block::reserved_value(), 0, 0);
+        }
+        Operator::Else => {
+            let i = state.control_stack.len() - 1;
+            match state.control_stack[i] {
+                ControlStackFrame::If {
+                    ref else_data,
+                    head_is_reachable,
+                    ref mut consequent_ends_reachable,
+                    blocktype,
+                    ..
+                } => {
+                    debug_assert!(consequent_ends_reachable.is_none());
+                    *consequent_ends_reachable = Some(state.reachable);
+
+                    if head_is_reachable {
+                        // We have a branch from the head of the `if` to the `else`.
+                        state.reachable = true;
+
+                        let else_block = match *else_data {
+                            ElseData::NoElse { branch_inst } => {
+                                let (params, _results) =
+                                    blocktype_params_results(validator, blocktype)?;
+                                let else_block = block_with_params(builder, params, environ)?;
+                                let frame = state.control_stack.last().unwrap();
+                                frame.truncate_value_stack_to_else_params(&mut state.stack);
+
+                                // We change the target of the branch instruction.
+                                builder.change_jump_destination(branch_inst, else_block);
+                                builder.seal_block(else_block);
+                                else_block
+                            }
+                            ElseData::WithElse { else_block } => {
+                                let frame = state.control_stack.last().unwrap();
+                                frame.truncate_value_stack_to_else_params(&mut state.stack);
+                                else_block
+                            }
+                        };
+
+                        builder.switch_to_block(else_block);
+
+                        // Again, no need to push the parameters for the `else`,
+                        // since we already did when we saw the original `if`. See
+                        // the comment for translating `Operator::Else` in
+                        // `translate_operator` for details.
+                    }
+                }
+                _ => unreachable!(),
+            }
+        }
+        Operator::End => {
+            let stack = &mut state.stack;
+            let control_stack = &mut state.control_stack;
+            let frame = control_stack.pop().unwrap();
+
+            // Pop unused parameters from stack.
+            frame.truncate_value_stack_to_original_size(stack);
+
+            let reachable_anyway = match frame {
+                // If it is a loop we also have to seal the body loop block
+                ControlStackFrame::Loop { header, .. } => {
+                    builder.seal_block(header);
+                    // And loops can't have branches to the end.
+                    false
+                }
+                // If we never set `consequent_ends_reachable` then that means
+                // we are finishing the consequent now, and there was no
+                // `else`. Whether the following block is reachable depends only
+                // on if the head was reachable.
+                ControlStackFrame::If {
+                    head_is_reachable,
+                    consequent_ends_reachable: None,
+                    ..
+                } => head_is_reachable,
+                // Since we are only in this function when in unreachable code,
+                // we know that the alternative just ended unreachable. Whether
+                // the following block is reachable depends on if the consequent
+                // ended reachable or not.
+                ControlStackFrame::If {
+                    head_is_reachable,
+                    consequent_ends_reachable: Some(consequent_ends_reachable),
+                    ..
+                } => head_is_reachable && consequent_ends_reachable,
+                // All other control constructs are already handled.
+                _ => false,
+            };
+
+            if frame.exit_is_branched_to() || reachable_anyway {
+                builder.switch_to_block(frame.following_code());
+                builder.seal_block(frame.following_code());
+
+                // And add the return values of the block but only if the next block is reachable
+                // (which corresponds to testing if the stack depth is 1)
+                stack.extend_from_slice(builder.block_params(frame.following_code()));
+                state.reachable = true;
+            }
+        }
+        _ => {
+            // We don't translate because this is unreachable code
+        }
+    }
+
+    Ok(())
+}
+
+/// Get the address+offset to use for a heap access.
+fn get_heap_addr(
+    heap: ir::Heap,
+    addr32: ir::Value,
+    offset: u32,
+    width: u32,
+    addr_ty: Type,
+    builder: &mut FunctionBuilder,
+) -> (ir::Value, i32) {
+    let offset_guard_size: u64 = builder.func.heaps[heap].offset_guard_size.into();
+
+    // How exactly the bounds check is performed here and what it's performed
+    // on is a bit tricky. Generally we want to rely on access violations (e.g.
+    // segfaults) to generate traps since that means we don't have to bounds
+    // check anything explicitly.
+    //
+    // If we don't have a guard page of unmapped memory, though, then we can't
+    // rely on this trapping behavior through segfaults. Instead we need to
+    // bounds-check the entire memory access here which is everything from
+    // `addr32 + offset` to `addr32 + offset + width` (not inclusive). In this
+    // scenario our adjusted offset that we're checking is `offset + width`.
+    //
+    // If we have a guard page, however, then we can perform a further
+    // optimization of the generated code by only checking multiples of the
+    // offset-guard size to be more CSE-friendly. Knowing that we have at least
+    // 1 page of a guard page we're then able to disregard the `width` since we
+    // know it's always less than one page. Our bounds check will be for the
+    // first byte which will either succeed and be guaranteed to fault if it's
+    // actually out of bounds, or the bounds check itself will fail. In any case
+    // we assert that the width is reasonably small for now so this assumption
+    // can be adjusted in the future if we get larger widths.
+    //
+    // Put another way we can say, where `y < offset_guard_size`:
+    //
+    //      n * offset_guard_size + y = offset
+    //
+    // We'll then pass `n * offset_guard_size` as the bounds check value. If
+    // this traps then our `offset` would have trapped anyway. If this check
+    // passes we know
+    //
+    //      addr32 + n * offset_guard_size < bound
+    //
+    // which means
+    //
+    //      addr32 + n * offset_guard_size + y < bound + offset_guard_size
+    //
+    // because `y < offset_guard_size`, which then means:
+    //
+    //      addr32 + offset < bound + offset_guard_size
+    //
+    // Since we know that that guard size bytes are all unmapped we're
+    // guaranteed that `offset` and the `width` bytes after it are either
+    // in-bounds or will hit the guard page, meaning we'll get the desired
+    // semantics we want.
+    //
+    // As one final comment on the bits with the guard size here, another goal
+    // of this is to hit an optimization in `heap_addr` where if the heap size
+    // minus the offset is >= 4GB then bounds checks are 100% eliminated. This
+    // means that with huge guard regions (e.g. our 2GB default) most adjusted
+    // offsets we're checking here are zero. This means that we'll hit the fast
+    // path and emit zero conditional traps for bounds checks
+    let adjusted_offset = if offset_guard_size == 0 {
+        u64::from(offset) + u64::from(width)
+    } else {
+        assert!(width < 1024);
+        cmp::max(u64::from(offset) / offset_guard_size * offset_guard_size, 1)
+    };
+    debug_assert!(adjusted_offset > 0); // want to bounds check at least 1 byte
+    let check_size = u32::try_from(adjusted_offset).unwrap_or(u32::MAX);
+    let base = builder.ins().heap_addr(addr_ty, heap, addr32, check_size);
+
+    // Native load/store instructions take a signed `Offset32` immediate, so adjust the base
+    // pointer if necessary.
+    if offset > i32::MAX as u32 {
+        // Offset doesn't fit in the load/store instruction.
+        let adj = builder.ins().iadd_imm(base, i64::from(i32::MAX) + 1);
+        (adj, (offset - (i32::MAX as u32 + 1)) as i32)
+    } else {
+        (base, offset as i32)
+    }
+}
+
+/// Prepare for a load; factors out common functionality between load and load_extend operations.
+fn prepare_load<FE: FuncEnvironment + ?Sized>(
+    memarg: &MemoryImmediate,
+    loaded_bytes: u32,
+    builder: &mut FunctionBuilder,
+    state: &mut FuncTranslationState,
+    environ: &mut FE,
+) -> WasmResult<(MemFlags, Value, Offset32)> {
+    let addr32 = state.pop1();
+
+    let heap = state.get_heap(builder.func, memarg.memory, environ)?;
+    let (base, offset) = get_heap_addr(
+        heap,
+        addr32,
+        memarg.offset,
+        loaded_bytes,
+        environ.pointer_type(),
+        builder,
+    );
+
+    // Note that we don't set `is_aligned` here, even if the load instruction's
+    // alignment immediate says it's aligned, because WebAssembly's immediate
+    // field is just a hint, while Cranelift's aligned flag needs a guarantee.
+    let flags = MemFlags::new();
+
+    Ok((flags, base, offset.into()))
+}
+
+/// Translate a load instruction.
+fn translate_load<FE: FuncEnvironment + ?Sized>(
+    memarg: &MemoryImmediate,
+    opcode: ir::Opcode,
+    result_ty: Type,
+    builder: &mut FunctionBuilder,
+    state: &mut FuncTranslationState,
+    environ: &mut FE,
+) -> WasmResult<()> {
+    let (flags, base, offset) = prepare_load(
+        memarg,
+        mem_op_size(opcode, result_ty),
+        builder,
+        state,
+        environ,
+    )?;
+    let (load, dfg) = builder.ins().Load(opcode, result_ty, flags, offset, base);
+    state.push1(dfg.first_result(load));
+    Ok(())
+}
+
+/// Translate a store instruction.
+fn translate_store<FE: FuncEnvironment + ?Sized>(
+    memarg: &MemoryImmediate,
+    opcode: ir::Opcode,
+    builder: &mut FunctionBuilder,
+    state: &mut FuncTranslationState,
+    environ: &mut FE,
+) -> WasmResult<()> {
+    let (addr32, val) = state.pop2();
+    let val_ty = builder.func.dfg.value_type(val);
+
+    let heap = state.get_heap(builder.func, memarg.memory, environ)?;
+    let (base, offset) = get_heap_addr(
+        heap,
+        addr32,
+        memarg.offset,
+        mem_op_size(opcode, val_ty),
+        environ.pointer_type(),
+        builder,
+    );
+    // See the comments in `prepare_load` about the flags.
+    let flags = MemFlags::new();
+    builder
+        .ins()
+        .Store(opcode, val_ty, flags, offset.into(), val, base);
+    Ok(())
+}
+
+fn mem_op_size(opcode: ir::Opcode, ty: Type) -> u32 {
+    match opcode {
+        ir::Opcode::Istore8 | ir::Opcode::Sload8 | ir::Opcode::Uload8 => 1,
+        ir::Opcode::Istore16 | ir::Opcode::Sload16 | ir::Opcode::Uload16 => 2,
+        ir::Opcode::Istore32 | ir::Opcode::Sload32 | ir::Opcode::Uload32 => 4,
+        ir::Opcode::Store | ir::Opcode::Load | ir::Opcode::LoadSplat => ty.bytes(),
+        _ => panic!("unknown size of mem op for {:?}", opcode),
+    }
+}
+
+fn translate_icmp(cc: IntCC, builder: &mut FunctionBuilder, state: &mut FuncTranslationState) {
+    let (arg0, arg1) = state.pop2();
+    let val = builder.ins().icmp(cc, arg0, arg1);
+    state.push1(builder.ins().bint(I32, val));
+}
+
+// For an atomic memory operation, emit an alignment check for the linear memory address,
+// and then compute the final effective address.
+fn finalise_atomic_mem_addr<FE: FuncEnvironment + ?Sized>(
+    linear_mem_addr: Value,
+    memarg: &MemoryImmediate,
+    access_ty: Type,
+    builder: &mut FunctionBuilder,
+    state: &mut FuncTranslationState,
+    environ: &mut FE,
+) -> WasmResult<Value> {
+    // Check the alignment of `linear_mem_addr`.
+    let access_ty_bytes = access_ty.bytes();
+    let final_lma = builder
+        .ins()
+        .iadd_imm(linear_mem_addr, i64::from(memarg.offset));
+    if access_ty_bytes != 1 {
+        assert!(access_ty_bytes == 2 || access_ty_bytes == 4 || access_ty_bytes == 8);
+        let final_lma_misalignment = builder
+            .ins()
+            .band_imm(final_lma, i64::from(access_ty_bytes - 1));
+        let f = builder
+            .ins()
+            .ifcmp_imm(final_lma_misalignment, i64::from(0));
+        builder
+            .ins()
+            .trapif(IntCC::NotEqual, f, ir::TrapCode::HeapMisaligned);
+    }
+
+    // Compute the final effective address.
+    let heap = state.get_heap(builder.func, memarg.memory, environ)?;
+    let (base, offset) = get_heap_addr(
+        heap,
+        final_lma,
+        /*offset=*/ 0,
+        access_ty.bytes(),
+        environ.pointer_type(),
+        builder,
+    );
+
+    let final_effective_address = builder.ins().iadd_imm(base, i64::from(offset));
+    Ok(final_effective_address)
+}
+
+fn translate_atomic_rmw<FE: FuncEnvironment + ?Sized>(
+    widened_ty: Type,
+    access_ty: Type,
+    op: AtomicRmwOp,
+    memarg: &MemoryImmediate,
+    builder: &mut FunctionBuilder,
+    state: &mut FuncTranslationState,
+    environ: &mut FE,
+) -> WasmResult<()> {
+    let (linear_mem_addr, mut arg2) = state.pop2();
+    let arg2_ty = builder.func.dfg.value_type(arg2);
+
+    // The operation is performed at type `access_ty`, and the old value is zero-extended
+    // to type `widened_ty`.
+    match access_ty {
+        I8 | I16 | I32 | I64 => {}
+        _ => {
+            return Err(wasm_unsupported!(
+                "atomic_rmw: unsupported access type {:?}",
+                access_ty
+            ))
+        }
+    };
+    let w_ty_ok = match widened_ty {
+        I32 | I64 => true,
+        _ => false,
+    };
+    assert!(w_ty_ok && widened_ty.bytes() >= access_ty.bytes());
+
+    assert!(arg2_ty.bytes() >= access_ty.bytes());
+    if arg2_ty.bytes() > access_ty.bytes() {
+        arg2 = builder.ins().ireduce(access_ty, arg2);
+    }
+
+    let final_effective_address =
+        finalise_atomic_mem_addr(linear_mem_addr, memarg, access_ty, builder, state, environ)?;
+
+    // See the comments in `prepare_load` about the flags.
+    let flags = MemFlags::new();
+    let mut res = builder
+        .ins()
+        .atomic_rmw(access_ty, flags, op, final_effective_address, arg2);
+    if access_ty != widened_ty {
+        res = builder.ins().uextend(widened_ty, res);
+    }
+    state.push1(res);
+    Ok(())
+}
+
+fn translate_atomic_cas<FE: FuncEnvironment + ?Sized>(
+    widened_ty: Type,
+    access_ty: Type,
+    memarg: &MemoryImmediate,
+    builder: &mut FunctionBuilder,
+    state: &mut FuncTranslationState,
+    environ: &mut FE,
+) -> WasmResult<()> {
+    let (linear_mem_addr, mut expected, mut replacement) = state.pop3();
+    let expected_ty = builder.func.dfg.value_type(expected);
+    let replacement_ty = builder.func.dfg.value_type(replacement);
+
+    // The compare-and-swap is performed at type `access_ty`, and the old value is zero-extended
+    // to type `widened_ty`.
+    match access_ty {
+        I8 | I16 | I32 | I64 => {}
+        _ => {
+            return Err(wasm_unsupported!(
+                "atomic_cas: unsupported access type {:?}",
+                access_ty
+            ))
+        }
+    };
+    let w_ty_ok = match widened_ty {
+        I32 | I64 => true,
+        _ => false,
+    };
+    assert!(w_ty_ok && widened_ty.bytes() >= access_ty.bytes());
+
+    assert!(expected_ty.bytes() >= access_ty.bytes());
+    if expected_ty.bytes() > access_ty.bytes() {
+        expected = builder.ins().ireduce(access_ty, expected);
+    }
+    assert!(replacement_ty.bytes() >= access_ty.bytes());
+    if replacement_ty.bytes() > access_ty.bytes() {
+        replacement = builder.ins().ireduce(access_ty, replacement);
+    }
+
+    let final_effective_address =
+        finalise_atomic_mem_addr(linear_mem_addr, memarg, access_ty, builder, state, environ)?;
+
+    // See the comments in `prepare_load` about the flags.
+    let flags = MemFlags::new();
+    let mut res = builder
+        .ins()
+        .atomic_cas(flags, final_effective_address, expected, replacement);
+    if access_ty != widened_ty {
+        res = builder.ins().uextend(widened_ty, res);
+    }
+    state.push1(res);
+    Ok(())
+}
+
+fn translate_atomic_load<FE: FuncEnvironment + ?Sized>(
+    widened_ty: Type,
+    access_ty: Type,
+    memarg: &MemoryImmediate,
+    builder: &mut FunctionBuilder,
+    state: &mut FuncTranslationState,
+    environ: &mut FE,
+) -> WasmResult<()> {
+    let linear_mem_addr = state.pop1();
+
+    // The load is performed at type `access_ty`, and the loaded value is zero extended
+    // to `widened_ty`.
+    match access_ty {
+        I8 | I16 | I32 | I64 => {}
+        _ => {
+            return Err(wasm_unsupported!(
+                "atomic_load: unsupported access type {:?}",
+                access_ty
+            ))
+        }
+    };
+    let w_ty_ok = match widened_ty {
+        I32 | I64 => true,
+        _ => false,
+    };
+    assert!(w_ty_ok && widened_ty.bytes() >= access_ty.bytes());
+
+    let final_effective_address =
+        finalise_atomic_mem_addr(linear_mem_addr, memarg, access_ty, builder, state, environ)?;
+
+    // See the comments in `prepare_load` about the flags.
+    let flags = MemFlags::new();
+    let mut res = builder
+        .ins()
+        .atomic_load(access_ty, flags, final_effective_address);
+    if access_ty != widened_ty {
+        res = builder.ins().uextend(widened_ty, res);
+    }
+    state.push1(res);
+    Ok(())
+}
+
+fn translate_atomic_store<FE: FuncEnvironment + ?Sized>(
+    access_ty: Type,
+    memarg: &MemoryImmediate,
+    builder: &mut FunctionBuilder,
+    state: &mut FuncTranslationState,
+    environ: &mut FE,
+) -> WasmResult<()> {
+    let (linear_mem_addr, mut data) = state.pop2();
+    let data_ty = builder.func.dfg.value_type(data);
+
+    // The operation is performed at type `access_ty`, and the data to be stored may first
+    // need to be narrowed accordingly.
+    match access_ty {
+        I8 | I16 | I32 | I64 => {}
+        _ => {
+            return Err(wasm_unsupported!(
+                "atomic_store: unsupported access type {:?}",
+                access_ty
+            ))
+        }
+    };
+    let d_ty_ok = match data_ty {
+        I32 | I64 => true,
+        _ => false,
+    };
+    assert!(d_ty_ok && data_ty.bytes() >= access_ty.bytes());
+
+    if data_ty.bytes() > access_ty.bytes() {
+        data = builder.ins().ireduce(access_ty, data);
+    }
+
+    let final_effective_address =
+        finalise_atomic_mem_addr(linear_mem_addr, memarg, access_ty, builder, state, environ)?;
+
+    // See the comments in `prepare_load` about the flags.
+    let flags = MemFlags::new();
+    builder
+        .ins()
+        .atomic_store(flags, data, final_effective_address);
+    Ok(())
+}
+
+fn translate_vector_icmp(
+    cc: IntCC,
+    needed_type: Type,
+    builder: &mut FunctionBuilder,
+    state: &mut FuncTranslationState,
+) {
+    let (a, b) = state.pop2();
+    let bitcast_a = optionally_bitcast_vector(a, needed_type, builder);
+    let bitcast_b = optionally_bitcast_vector(b, needed_type, builder);
+    state.push1(builder.ins().icmp(cc, bitcast_a, bitcast_b))
+}
+
+fn translate_fcmp(cc: FloatCC, builder: &mut FunctionBuilder, state: &mut FuncTranslationState) {
+    let (arg0, arg1) = state.pop2();
+    let val = builder.ins().fcmp(cc, arg0, arg1);
+    state.push1(builder.ins().bint(I32, val));
+}
+
+fn translate_vector_fcmp(
+    cc: FloatCC,
+    needed_type: Type,
+    builder: &mut FunctionBuilder,
+    state: &mut FuncTranslationState,
+) {
+    let (a, b) = state.pop2();
+    let bitcast_a = optionally_bitcast_vector(a, needed_type, builder);
+    let bitcast_b = optionally_bitcast_vector(b, needed_type, builder);
+    state.push1(builder.ins().fcmp(cc, bitcast_a, bitcast_b))
+}
+
+fn translate_br_if(
+    relative_depth: u32,
+    builder: &mut FunctionBuilder,
+    state: &mut FuncTranslationState,
+) {
+    let val = state.pop1();
+    let (br_destination, inputs) = translate_br_if_args(relative_depth, state);
+    canonicalise_then_brnz(builder, val, br_destination, inputs);
+
+    let next_block = builder.create_block();
+    canonicalise_then_jump(builder, next_block, &[]);
+    builder.seal_block(next_block); // The only predecessor is the current block.
+    builder.switch_to_block(next_block);
+}
+
+fn translate_br_if_args(
+    relative_depth: u32,
+    state: &mut FuncTranslationState,
+) -> (ir::Block, &mut [ir::Value]) {
+    let i = state.control_stack.len() - 1 - (relative_depth as usize);
+    let (return_count, br_destination) = {
+        let frame = &mut state.control_stack[i];
+        // The values returned by the branch are still available for the reachable
+        // code that comes after it
+        frame.set_branched_to_exit();
+        let return_count = if frame.is_loop() {
+            frame.num_param_values()
+        } else {
+            frame.num_return_values()
+        };
+        (return_count, frame.br_destination())
+    };
+    let inputs = state.peekn_mut(return_count);
+    (br_destination, inputs)
+}
+
+/// Determine the returned value type of a WebAssembly operator
+fn type_of(operator: &Operator) -> Type {
+    match operator {
+        Operator::V128Load { .. }
+        | Operator::V128Store { .. }
+        | Operator::V128Const { .. }
+        | Operator::V128Not
+        | Operator::V128And
+        | Operator::V128AndNot
+        | Operator::V128Or
+        | Operator::V128Xor
+        | Operator::V128Bitselect => I8X16, // default type representing V128
+
+        Operator::I8x16Shuffle { .. }
+        | Operator::I8x16Splat
+        | Operator::V128Load8Splat { .. }
+        | Operator::I8x16ExtractLaneS { .. }
+        | Operator::I8x16ExtractLaneU { .. }
+        | Operator::I8x16ReplaceLane { .. }
+        | Operator::I8x16Eq
+        | Operator::I8x16Ne
+        | Operator::I8x16LtS
+        | Operator::I8x16LtU
+        | Operator::I8x16GtS
+        | Operator::I8x16GtU
+        | Operator::I8x16LeS
+        | Operator::I8x16LeU
+        | Operator::I8x16GeS
+        | Operator::I8x16GeU
+        | Operator::I8x16Neg
+        | Operator::I8x16Abs
+        | Operator::I8x16AnyTrue
+        | Operator::I8x16AllTrue
+        | Operator::I8x16Shl
+        | Operator::I8x16ShrS
+        | Operator::I8x16ShrU
+        | Operator::I8x16Add
+        | Operator::I8x16AddSatS
+        | Operator::I8x16AddSatU
+        | Operator::I8x16Sub
+        | Operator::I8x16SubSatS
+        | Operator::I8x16SubSatU
+        | Operator::I8x16MinS
+        | Operator::I8x16MinU
+        | Operator::I8x16MaxS
+        | Operator::I8x16MaxU
+        | Operator::I8x16RoundingAverageU
+        | Operator::I8x16Bitmask => I8X16,
+
+        Operator::I16x8Splat
+        | Operator::V128Load16Splat { .. }
+        | Operator::I16x8ExtractLaneS { .. }
+        | Operator::I16x8ExtractLaneU { .. }
+        | Operator::I16x8ReplaceLane { .. }
+        | Operator::I16x8Eq
+        | Operator::I16x8Ne
+        | Operator::I16x8LtS
+        | Operator::I16x8LtU
+        | Operator::I16x8GtS
+        | Operator::I16x8GtU
+        | Operator::I16x8LeS
+        | Operator::I16x8LeU
+        | Operator::I16x8GeS
+        | Operator::I16x8GeU
+        | Operator::I16x8Neg
+        | Operator::I16x8Abs
+        | Operator::I16x8AnyTrue
+        | Operator::I16x8AllTrue
+        | Operator::I16x8Shl
+        | Operator::I16x8ShrS
+        | Operator::I16x8ShrU
+        | Operator::I16x8Add
+        | Operator::I16x8AddSatS
+        | Operator::I16x8AddSatU
+        | Operator::I16x8Sub
+        | Operator::I16x8SubSatS
+        | Operator::I16x8SubSatU
+        | Operator::I16x8MinS
+        | Operator::I16x8MinU
+        | Operator::I16x8MaxS
+        | Operator::I16x8MaxU
+        | Operator::I16x8RoundingAverageU
+        | Operator::I16x8Mul
+        | Operator::I16x8Bitmask => I16X8,
+
+        Operator::I32x4Splat
+        | Operator::V128Load32Splat { .. }
+        | Operator::I32x4ExtractLane { .. }
+        | Operator::I32x4ReplaceLane { .. }
+        | Operator::I32x4Eq
+        | Operator::I32x4Ne
+        | Operator::I32x4LtS
+        | Operator::I32x4LtU
+        | Operator::I32x4GtS
+        | Operator::I32x4GtU
+        | Operator::I32x4LeS
+        | Operator::I32x4LeU
+        | Operator::I32x4GeS
+        | Operator::I32x4GeU
+        | Operator::I32x4Neg
+        | Operator::I32x4Abs
+        | Operator::I32x4AnyTrue
+        | Operator::I32x4AllTrue
+        | Operator::I32x4Shl
+        | Operator::I32x4ShrS
+        | Operator::I32x4ShrU
+        | Operator::I32x4Add
+        | Operator::I32x4Sub
+        | Operator::I32x4Mul
+        | Operator::I32x4MinS
+        | Operator::I32x4MinU
+        | Operator::I32x4MaxS
+        | Operator::I32x4MaxU
+        | Operator::F32x4ConvertI32x4S
+        | Operator::F32x4ConvertI32x4U
+        | Operator::I32x4Bitmask
+        | Operator::V128Load32Zero { .. } => I32X4,
+
+        Operator::I64x2Splat
+        | Operator::V128Load64Splat { .. }
+        | Operator::I64x2ExtractLane { .. }
+        | Operator::I64x2ReplaceLane { .. }
+        | Operator::I64x2Neg
+        | Operator::I64x2Shl
+        | Operator::I64x2ShrS
+        | Operator::I64x2ShrU
+        | Operator::I64x2Add
+        | Operator::I64x2Sub
+        | Operator::I64x2Mul
+        | Operator::V128Load64Zero { .. } => I64X2,
+
+        Operator::F32x4Splat
+        | Operator::F32x4ExtractLane { .. }
+        | Operator::F32x4ReplaceLane { .. }
+        | Operator::F32x4Eq
+        | Operator::F32x4Ne
+        | Operator::F32x4Lt
+        | Operator::F32x4Gt
+        | Operator::F32x4Le
+        | Operator::F32x4Ge
+        | Operator::F32x4Abs
+        | Operator::F32x4Neg
+        | Operator::F32x4Sqrt
+        | Operator::F32x4Add
+        | Operator::F32x4Sub
+        | Operator::F32x4Mul
+        | Operator::F32x4Div
+        | Operator::F32x4Min
+        | Operator::F32x4Max
+        | Operator::F32x4PMin
+        | Operator::F32x4PMax
+        | Operator::I32x4TruncSatF32x4S
+        | Operator::I32x4TruncSatF32x4U
+        | Operator::F32x4Ceil
+        | Operator::F32x4Floor
+        | Operator::F32x4Trunc
+        | Operator::F32x4Nearest => F32X4,
+
+        Operator::F64x2Splat
+        | Operator::F64x2ExtractLane { .. }
+        | Operator::F64x2ReplaceLane { .. }
+        | Operator::F64x2Eq
+        | Operator::F64x2Ne
+        | Operator::F64x2Lt
+        | Operator::F64x2Gt
+        | Operator::F64x2Le
+        | Operator::F64x2Ge
+        | Operator::F64x2Abs
+        | Operator::F64x2Neg
+        | Operator::F64x2Sqrt
+        | Operator::F64x2Add
+        | Operator::F64x2Sub
+        | Operator::F64x2Mul
+        | Operator::F64x2Div
+        | Operator::F64x2Min
+        | Operator::F64x2Max
+        | Operator::F64x2PMin
+        | Operator::F64x2PMax
+        | Operator::F64x2Ceil
+        | Operator::F64x2Floor
+        | Operator::F64x2Trunc
+        | Operator::F64x2Nearest => F64X2,
+
+        _ => unimplemented!(
+            "Currently only SIMD instructions are mapped to their return type; the \
+             following instruction is not mapped: {:?}",
+            operator
+        ),
+    }
+}
+
+/// Some SIMD operations only operate on I8X16 in CLIF; this will convert them to that type by
+/// adding a raw_bitcast if necessary.
+fn optionally_bitcast_vector(
+    value: Value,
+    needed_type: Type,
+    builder: &mut FunctionBuilder,
+) -> Value {
+    if builder.func.dfg.value_type(value) != needed_type {
+        builder.ins().raw_bitcast(needed_type, value)
+    } else {
+        value
+    }
+}
+
+#[inline(always)]
+fn is_non_canonical_v128(ty: ir::Type) -> bool {
+    match ty {
+        B8X16 | B16X8 | B32X4 | B64X2 | I64X2 | I32X4 | I16X8 | F32X4 | F64X2 => true,
+        _ => false,
+    }
+}
+
+/// Cast to I8X16, any vector values in `values` that are of "non-canonical" type (meaning, not
+/// I8X16), and return them in a slice.  A pre-scan is made to determine whether any casts are
+/// actually necessary, and if not, the original slice is returned.  Otherwise the cast values
+/// are returned in a slice that belongs to the caller-supplied `SmallVec`.
+fn canonicalise_v128_values<'a>(
+    tmp_canonicalised: &'a mut SmallVec<[ir::Value; 16]>,
+    builder: &mut FunctionBuilder,
+    values: &'a [ir::Value],
+) -> &'a [ir::Value] {
+    debug_assert!(tmp_canonicalised.is_empty());
+    // First figure out if any of the parameters need to be cast.  Mostly they don't need to be.
+    let any_non_canonical = values
+        .iter()
+        .any(|v| is_non_canonical_v128(builder.func.dfg.value_type(*v)));
+    // Hopefully we take this exit most of the time, hence doing no heap allocation.
+    if !any_non_canonical {
+        return values;
+    }
+    // Otherwise we'll have to cast, and push the resulting `Value`s into `canonicalised`.
+    for v in values {
+        tmp_canonicalised.push(if is_non_canonical_v128(builder.func.dfg.value_type(*v)) {
+            builder.ins().raw_bitcast(I8X16, *v)
+        } else {
+            *v
+        });
+    }
+    tmp_canonicalised.as_slice()
+}
+
+/// Generate a `jump` instruction, but first cast all 128-bit vector values to I8X16 if they
+/// don't have that type.  This is done in somewhat roundabout way so as to ensure that we
+/// almost never have to do any heap allocation.
+fn canonicalise_then_jump(
+    builder: &mut FunctionBuilder,
+    destination: ir::Block,
+    params: &[ir::Value],
+) -> ir::Inst {
+    let mut tmp_canonicalised = SmallVec::<[ir::Value; 16]>::new();
+    let canonicalised = canonicalise_v128_values(&mut tmp_canonicalised, builder, params);
+    builder.ins().jump(destination, canonicalised)
+}
+
+/// The same but for a `brz` instruction.
+fn canonicalise_then_brz(
+    builder: &mut FunctionBuilder,
+    cond: ir::Value,
+    destination: ir::Block,
+    params: &[Value],
+) -> ir::Inst {
+    let mut tmp_canonicalised = SmallVec::<[ir::Value; 16]>::new();
+    let canonicalised = canonicalise_v128_values(&mut tmp_canonicalised, builder, params);
+    builder.ins().brz(cond, destination, canonicalised)
+}
+
+/// The same but for a `brnz` instruction.
+fn canonicalise_then_brnz(
+    builder: &mut FunctionBuilder,
+    cond: ir::Value,
+    destination: ir::Block,
+    params: &[Value],
+) -> ir::Inst {
+    let mut tmp_canonicalised = SmallVec::<[ir::Value; 16]>::new();
+    let canonicalised = canonicalise_v128_values(&mut tmp_canonicalised, builder, params);
+    builder.ins().brnz(cond, destination, canonicalised)
+}
+
+/// A helper for popping and bitcasting a single value; since SIMD values can lose their type by
+/// using v128 (i.e. CLIF's I8x16) we must re-type the values using a bitcast to avoid CLIF
+/// typing issues.
+fn pop1_with_bitcast(
+    state: &mut FuncTranslationState,
+    needed_type: Type,
+    builder: &mut FunctionBuilder,
+) -> Value {
+    optionally_bitcast_vector(state.pop1(), needed_type, builder)
+}
+
+/// A helper for popping and bitcasting two values; since SIMD values can lose their type by
+/// using v128 (i.e. CLIF's I8x16) we must re-type the values using a bitcast to avoid CLIF
+/// typing issues.
+fn pop2_with_bitcast(
+    state: &mut FuncTranslationState,
+    needed_type: Type,
+    builder: &mut FunctionBuilder,
+) -> (Value, Value) {
+    let (a, b) = state.pop2();
+    let bitcast_a = optionally_bitcast_vector(a, needed_type, builder);
+    let bitcast_b = optionally_bitcast_vector(b, needed_type, builder);
+    (bitcast_a, bitcast_b)
+}
+
+/// A helper for bitcasting a sequence of values (e.g. function arguments). If a value is a
+/// vector type that does not match its expected type, this will modify the value in place to point
+/// to the result of a `raw_bitcast`. This conversion is necessary to translate Wasm code that
+/// uses `V128` as function parameters (or implicitly in block parameters) and still use specific
+/// CLIF types (e.g. `I32X4`) in the function body.
+pub fn bitcast_arguments(
+    arguments: &mut [Value],
+    expected_types: &[Type],
+    builder: &mut FunctionBuilder,
+) {
+    assert_eq!(arguments.len(), expected_types.len());
+    for (i, t) in expected_types.iter().enumerate() {
+        if t.is_vector() {
+            assert!(
+                builder.func.dfg.value_type(arguments[i]).is_vector(),
+                "unexpected type mismatch: expected {}, argument {} was actually of type {}",
+                t,
+                arguments[i],
+                builder.func.dfg.value_type(arguments[i])
+            );
+            arguments[i] = optionally_bitcast_vector(arguments[i], *t, builder)
+        }
+    }
+}
+
+/// A helper to extract all the `Type` listings of each variable in `params`
+/// for only parameters the return true for `is_wasm`, typically paired with
+/// `is_wasm_return` or `is_wasm_parameter`.
+pub fn wasm_param_types(params: &[ir::AbiParam], is_wasm: impl Fn(usize) -> bool) -> Vec<Type> {
+    let mut ret = Vec::with_capacity(params.len());
+    for (i, param) in params.iter().enumerate() {
+        if is_wasm(i) {
+            ret.push(param.value_type);
+        }
+    }
+    ret
+}